rpms/libtextcat/devel libtextcat-2.2-OOo.patch, NONE, 1.1 libtextcat.spec, 1.2, 1.3

Caolan McNamara (caolanm) fedora-extras-commits at redhat.com
Wed Jul 25 10:00:27 UTC 2007


Author: caolanm

Update of /cvs/pkgs/rpms/libtextcat/devel
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv25854

Modified Files:
	libtextcat.spec 
Added Files:
	libtextcat-2.2-OOo.patch 
Log Message:
tweak api

libtextcat-2.2-OOo.patch:

--- NEW FILE libtextcat-2.2-OOo.patch ---
diff -ruN libtextcat-2.2.part1/src/constants.h libtextcat-2.2/src/constants.h
--- libtextcat-2.2.part1/src/constants.h	2007-07-25 10:46:49.000000000 +0100
+++ libtextcat-2.2/src/constants.h	2007-07-25 10:47:25.000000000 +0100
@@ -39,6 +39,8 @@
  */
 #include <limits.h>
 
+#define _UTF8_
+
 #define DESCRIPTION "out of place"
 
 /* Reported matches are those fingerprints with a score less than best
@@ -59,14 +61,21 @@
 /* Maximum number of n-grams in a fingerprint */
 #define MAXNGRAMS  400
 
-/* Maximum size of an n-gram? */
-#define MAXNGRAMSIZE 5
+/* Maximum number of character of an n-gram? */
+#define MAXNGRAMSYMBOL 5
+
+/* Maximum size of the string representing an n-gram (must be greater than number of symbol) */
+#ifdef _UTF8_
+#define MAXNGRAMSIZE 20
+#else
+#define MAXNGRAMSIZE MAXNGRAMSYMBOL
+#endif
 
 /* Which characters are not acceptable in n-grams? */
 #define INVALID(c) (isspace((int)c) || isdigit((int)c)) 
 
 /* Minimum size (in characters) for accepting a document */
-#define MINDOCSIZE  25
+#define MINDOCSIZE  6
 
 /* Maximum penalty for missing an n-gram in fingerprint */
 #define MAXOUTOFPLACE 400
@@ -76,4 +85,7 @@
 
 #define MAXSCORE  INT_MAX
 
+/* where the fingerprints files are stored */
+#define DEFAULT_FINGERPRINTS_PATH ""
+
 #endif
diff -ruN libtextcat-2.2.part1/src/fingerprint.c libtextcat-2.2/src/fingerprint.c
--- libtextcat-2.2.part1/src/fingerprint.c	2007-07-25 10:46:49.000000000 +0100
+++ libtextcat-2.2/src/fingerprint.c	2007-07-25 10:47:25.000000000 +0100
@@ -63,6 +63,10 @@
  * - put table/heap datastructure in a separate file.
  */
 
+#ifndef _UTF8_
+#define _UTF8_
+#endif
+
 #include "config.h"
 #include <stdio.h>
 #ifdef HAVE_STDLIB_H
@@ -80,10 +84,12 @@
 #include "wg_mempool.h"
 #include "constants.h"
 
+#include "utf8misc.h"
 
 #define TABLESIZE  (1<<TABLEPOW)
 #define TABLEMASK  ((TABLESIZE)-1)
 
+
 typedef struct {
 
 	sint2 rank;
@@ -134,29 +140,14 @@
 }
 
 
-/* checks if n-gram lex is a prefix of key and of length len */
-inline int issame( char *lex, char *key, int len )
-{
-	int i;
-	for (i=0; i<len; i++) {
-		if ( key[i] != lex[i] ) {
-			return 0;
-		}
-	}
-	if ( lex[i] != 0 ) {
-		return 0;
-	}
-	return 1;
-}
-
 
 /* increases frequency of ngram(p,len) */
-static inline int increasefreq( table_t *t, char *p, int len ) 
-{	
-	uint4 hash = simplehash( p, len ) & TABLEMASK;				
+static int increasefreq( table_t *t, char *p, int len )
+{
+	uint4 hash = simplehash( p, len ) & TABLEMASK;
 	entry_t *entry = t->table[ hash ];
-	
-	while ( entry ) {				
+
+	while ( entry ) {
 		if ( issame( entry->str, p, len ) ) {
 			/*** Found it! ***/
 			entry->cnt++;
@@ -168,7 +159,7 @@
 	}
 
 	/*** Not found, so create ***/
-	entry = wgmempool_alloc( t->pool, sizeof(entry_t) );
+        entry = (entry_t*)(wgmempool_alloc( t->pool, sizeof(entry_t) ));
 	strcpy( entry->str, p );
 	entry->cnt = 1;
 
@@ -181,12 +172,12 @@
 #if 0
 
 /* looks up ngram(p,len) */
-static entry_t *findfreq( table_t *t, char *p, int len ) 
-{	
-	uint4 hash = simplehash( p, len ) & TABLEMASK;				
+static entry_t *findfreq( table_t *t, char *p, int len )
+{
+	uint4 hash = simplehash( p, len ) & TABLEMASK;
 	entry_t *entry = t->table[ hash ];
-	
-	while ( entry ) {				
+
+	while ( entry ) {
 		if ( issame( entry->str, p, len ) ) {
 			return entry;
 		}
@@ -219,7 +210,7 @@
 #define GREATER(x,y) ((x).cnt > (y).cnt)
 #define LESS(x,y)    ((x).cnt < (y).cnt)
 
-inline static void siftup( table_t *t, unsigned int child )
+static void siftup( table_t *t, unsigned int child )
 {
 	entry_t *heap = t->heap;
 	unsigned int parent = (child-1) >> 1;
@@ -241,7 +232,7 @@
 }
 
 
-inline static void siftdown( table_t *t, unsigned int heapsize, uint4 parent )
+static void siftdown( table_t *t, unsigned int heapsize, uint4 parent )
 {
 	entry_t *heap = t->heap;
 	unsigned int child = parent*2 + 1;
@@ -458,21 +449,27 @@
 	return dest;
 }
 
-
+/**
+* this function extract all n-gram from past buffer and put them into the table "t"
+* [modified] by Jocelyn Merand to accept utf-8 multi-character symbols to be used in OpenOffice
+*/
 static void createngramtable( table_t *t, const char *buf )
 {
 	char n[MAXNGRAMSIZE+1];
 	const char *p = buf;
 	int i;
+        int pointer = 0;
 
 	/*** Get all n-grams where 1<=n<=MAXNGRAMSIZE. Allow underscores only at borders. ***/
-	for (;;p++) {
+	while(1) {
 
-		const char *q = p;
+     const char *q = &p[pointer];   /*[modified] previously p++ above (for(;;p++)) now, it's pointer wich is increased so we have to get the new pointer on the buffer*/
 		char *m = n;
 
 		/*** First char may be an underscore ***/
-		*m++ = *q++;
+                int decay = charcopy(q, m); /*[modified] previously *q++ = *m++*/
+                q = &(p[pointer+decay]);    /*[modified] the old copying method do not manage multi-character symbols*/
+                m += decay; /*[modified]*/
 		*m = '\0';
 
 		increasefreq( t, n, 1 );
@@ -482,19 +479,22 @@
 		}
 
 		/*** Let the compiler unroll this ***/
-		for ( i=2; i<=MAXNGRAMSIZE; i++) {
+		for ( i=2; i<=MAXNGRAMSYMBOL; i++) {
 
-			*m++ = *q;
+                        decay = charcopy(q, m); /*[modified] like above*/
+                        m += decay;
 			*m = '\0';
 
 			increasefreq( t, n, i );
 
 			if ( *q == '_' ) break;
-			q++;
+                        q += decay;
 			if ( *q == '\0' ) {
 				return;
 			}
 		}
+
+  pointer = nextcharstart(p,pointer);   /*[modified] p[pointer] must point on the next start of symbol, but whith utf next start is not surely next char*/
 	}
 	return;
 }
diff -ruN libtextcat-2.2.part1/src/fingerprint.h.orig libtextcat-2.2/src/fingerprint.h.orig
--- libtextcat-2.2.part1/src/fingerprint.h.orig	1970-01-01 01:00:00.000000000 +0100
+++ libtextcat-2.2/src/fingerprint.h.orig	2007-07-25 10:47:22.000000000 +0100
@@ -0,0 +1,55 @@
+#ifndef _FINGERPRINT_H_
+#define _FINGERPRINT_H_
+/*
+ * Copyright (C) 2003 WiseGuys Internet B.V.
+ *
+ * THE BSD LICENSE
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ * 
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void *fp_Init(const char *name);
+extern void fp_Done( void *handle );
+extern int fp_Create( void *handle, const char *buffer, uint4 bufsize, uint4 maxngrams );
+extern int fp_Read( void *handle, const char *fname, int maxngrams );
+extern sint4 fp_Compare( void *cat, void *unknown, int cutoff );
+extern void fp_Show( void *handle );
+extern const char *fp_Name( void *handle );
+extern void fp_Print( void *handle, FILE *fp );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff -ruN libtextcat-2.2.part1/src/textcat.c libtextcat-2.2/src/textcat.c
--- libtextcat-2.2.part1/src/textcat.c	2007-07-25 10:46:49.000000000 +0100
+++ libtextcat-2.2/src/textcat.c	2007-07-25 10:47:25.000000000 +0100
@@ -74,6 +74,7 @@
 typedef struct {
 
 	void **fprint;
+        char *fprint_disable;
 	uint4 size;
 	uint4 maxsize;
 
@@ -112,11 +113,21 @@
 		fp_Done( h->fprint[i] );
 	}
 	wg_free( h->fprint );
+        wg_free( h->fprint_disable );
 	wg_free( h );
 
 }
 
-extern void *textcat_Init( const char *conffile )
+/** Replaces older function */
+extern void *textcat_Init( const char *conffile ){
+    return special_textcat_Init( conffile, DEFAULT_FINGERPRINTS_PATH );
+}
+
+/**
+ * Originaly this function had only one parameter (conffile) it has been modified since OOo use
+ * Basicaly prefix is the directory path where fingerprints are stored
+ */
+extern void *special_textcat_Init( const char *conffile, const char *prefix )
 {
 	textcat_t *h;
 	char line[1024];
@@ -134,11 +145,13 @@
 	h->size = 0;
 	h->maxsize = 16;
 	h->fprint = (void **)wg_malloc( sizeof(void*) * h->maxsize );
+ h->fprint_disable = (char *)wg_malloc( sizeof(char*) * h->maxsize );   /*added to store the state of languages*/
 
 	while ( wg_getline( line, 1024, fp ) ) {
 		char *p;
 		char *segment[4];
-		int res;
+                char finger_print_file_name[512];
+                int res;
 
 		/*** Skip comments ***/
 #ifdef HAVE_STRCHR
@@ -156,17 +169,23 @@
 		/*** Ensure enough space ***/
 		if ( h->size == h->maxsize ) {
 			h->maxsize *= 2;
-			h->fprint = (void *)wg_realloc( h->fprint, sizeof(void*) * h->maxsize );
+			h->fprint = (void **)wg_realloc( h->fprint, sizeof(void*) * h->maxsize );
+                        h->fprint_disable = (char *)wg_realloc( h->fprint_disable, sizeof(char*) * h->maxsize );
 		}
 
 		/*** Load data ***/
 		if ((h->fprint[ h->size ] = fp_Init( segment[1] ))==NULL) {
 			goto ERROR;
 		}
-		if ( fp_Read( h->fprint[h->size], segment[0], 400 ) == 0 ) {
+                finger_print_file_name[0] = '\0';
+                strcat(finger_print_file_name, prefix);
+                strcat(finger_print_file_name, segment[0]);
+
+                if ( fp_Read( h->fprint[h->size], finger_print_file_name, 400 ) == 0 ) {
 			textcat_Done(h);
 			goto ERROR;
-		}		
+		}
+                h->fprint_disable[h->size] = 0xF0;  /*0xF0 is the code for enabled languages, 0x0F is for disabled*/
 		h->size++;
 	}
 
@@ -203,11 +222,18 @@
 		result = _TEXTCAT_RESULT_SHORT;
 		goto READY;
 	}
-	
+
 	/*** Calculate the score for each category. ***/
 	for (i=0; i<h->size; i++) {
-		int score = fp_Compare( h->fprint[i], unknown, threshold );
-		candidates[i].score = score;
+                int score;
+                if(h->fprint_disable[i] & 0x0F){    /*if this language is disabled*/
+                    score = MAXSCORE;
+                }
+                else{
+                    score = fp_Compare( h->fprint[i], unknown, threshold );
+                    /*printf("Score for %s : %i\n", fp_Name(h->fprint[i]), score);*/
+                }
+                candidates[i].score = score;
 		candidates[i].name = fp_Name( h->fprint[i] );
 		if ( score < minscore ) {
 			minscore = score;
diff -ruN libtextcat-2.2.part1/src/textcat.h libtextcat-2.2/src/textcat.h
--- libtextcat-2.2.part1/src/textcat.h	2007-07-25 10:46:49.000000000 +0100
+++ libtextcat-2.2/src/textcat.h	2007-07-25 10:48:18.000000000 +0100
@@ -55,10 +54,19 @@
  * Returns: handle on success, NULL on error. (At the moment, the
  * only way errors can occur, is when the library cannot read the
  * conffile, or one of the fingerprint files listed in it.)
+ *
+ * Replace older function (and has exacly the same behaviour)
+ * see below
  */
 extern void *textcat_Init( const char *conffile );
 
 /**
+ * Originaly this function had only one parameter (conffile) it has been modified since OOo must be able to load alternativ DB
+ * Basicaly prefix is the directory path where fingerprints are stored
+ */
+extern void *special_textcat_Init( const char *conffile, const char *prefix );
+
+/**
  * textcat_Done() - Free up resources for handle
  */
 extern void textcat_Done( void *handle );
diff -ruN libtextcat-2.2.part1/src/utf8misc.c libtextcat-2.2/src/utf8misc.c
--- libtextcat-2.2.part1/src/utf8misc.c	1970-01-01 01:00:00.000000000 +0100
+++ libtextcat-2.2/src/utf8misc.c	2007-07-25 10:48:57.000000000 +0100
@@ -0,0 +1,132 @@
+/***************************************************************************
+ *   Copyright (C) 2006 by Jocelyn Merand                                  *
+ *   joc.mer at gmail.com                                                     *
+ *                                                                         *
+ * THE BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+#ifndef _UTF8_MISC_H_
+#include "utf8misc.h"
+#endif
+
+
+int nextcharstart(const char *str, int position){
+    int pointer = position;
+
+    if(str[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
+
+        /*then str[pointer] is an escape character*/
+
+    char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); /*and we use it to count (by bit translation) following characters (only the weightest part)*/
+
+    while(escape_char & ESCAPE_MASK && str[pointer]){/*every step, we move the byte of 1 bit left, when first bit is 0, it's finished*/
+        escape_char = escape_char <<1;
+        ++pointer;
+    }
+    }
+    if(str[pointer]){   /*finaly, if we are not on the \0 character, we jump to the next character*/
+        ++pointer;
+    }
+    return pointer;
+}
+
+
+int charcopy(const char *str, char *dest){
+
+    int pointer = 0;
+    if(str[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
+
+        /*then str[pointer] is an escape character*/
+
+        char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); /*and we use it to count following characters (only the weightest part)*/
+
+        while(escape_char & ESCAPE_MASK && str[pointer]){   /*every step, we move the byte of 1 bit left, when first bit is 0, it's finished*/
+            dest[pointer] = str[pointer];
+            escape_char = escape_char <<1;
+            ++pointer;
+        }
+    }
+    if(str[pointer]){
+        dest[pointer] = str[pointer];
+        ++pointer;
+    }
+
+    return pointer;
+}
+
+
+int issame( char *lex, char *key, int len )
+{
+    /*printf("[%s] prefix of [%s] with length %i", lex, key, len);*/
+    int char_counter = 0;
+    int pointer = 0;
+    while(char_counter < len) {
+
+        if(key[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
+
+            /*then key[pointer] is an escap character*/
+
+            char escape_char = ((key[pointer] & WEIGHT_MASK) << 1);     /*and we use it to count (only the weightest part)*/
+
+            while(escape_char & ESCAPE_MASK && key[pointer] == lex[pointer] ){
+                escape_char = escape_char <<1;
+                ++pointer;
+            }
+        }
+        ++char_counter; /*and we are on a new utf8 character*/
+        if ( key[pointer] != lex[pointer] ) {
+            return 0;
+            /*printf(" NO\n", lex, key, len);*/
+        }
+        ++pointer;
+    }
+    if ( lex[pointer] != '\0' ) {
+        return 0;
+        /*printf(" NO\n");*/
+    }
+
+    /*printf(" YES\n");*/
+
+    return 1;
+}
+
+
+extern int utfstrlen(const char* str){
+    int char_counter = 0;
+    int pointer = 0;
+    while(str[pointer]) {
+        pointer = nextcharstart(str, pointer);
+
+        ++char_counter; /*and we are on a new utf8 character*/
+    }
+    return char_counter;
+}
+
diff -ruN libtextcat-2.2.part1/src/utf8misc.h libtextcat-2.2/src/utf8misc.h
--- libtextcat-2.2.part1/src/utf8misc.h	1970-01-01 01:00:00.000000000 +0100
+++ libtextcat-2.2/src/utf8misc.h	2007-07-25 10:48:57.000000000 +0100
@@ -0,0 +1,88 @@
+/***************************************************************************
+ *   Copyright (C) 2006 by Jocelyn Merand                                  *
+ *   joc.mer at gmail.com                                                     *
+ *                                                                         *
+ * THE BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ***************************************************************************/
+
+#ifndef _UTF8_MISC_H_
+#define _UTF8_MISC_H_
+
+/**
+ * These variables are used in character processing functions
+ * These have been added to manage utf-8 symbols, particularly escape chars
+ */
+#ifdef _UTF8_
+#define ESCAPE_MASK 0x80
+#define WEIGHT_MASK 0xF0
+#else
+#define ESCAPE_MASK 0xFF
+#define WEIGHT_MASK 0x00
+#endif
+
+
+/*
+ * Is used to jump to the next start of char
+ * of course it's only usefull when encoding is utf-8
+ * This function have been added by Jocelyn Merand to use libtextcat in OOo
+ */
+int nextcharstart(const char *str, int position);
+
+
+/*Copy the char in str to dest
+ * of course it's only usefull when encoding is utf8 and the symbol is encoded with more than 1 char
+ * return the number of char jumped
+ * This function have been added by Jocelyn Merand to use libtextcat in OOo
+ */
+int charcopy(const char *str, char *dest);
+
+
+/* checks if n-gram lex is a prefix of key and of length len
+* if _UTF8_ is defined, it uses escap characters and len is not realy the length of lex
+* in this case, len is the number of utf-8 char strlen("€") == 3 but len == 1
+*/
+int issame( char *lex, char *key, int len );
+
+
+/* Counts the number of characters
+* if _UTF8_ is defined, it uses escap characters and the result is not realy the length of str
+* in this case, the result is the number of utf-8 char strlen("€") == 3 but utfstrlen("€") == 1
+*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern int utfstrlen(const char* str);
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
--- libtextcat-2.2.part2/src/Makefile.am	2007-07-25 10:55:02.000000000 +0100
+++ libtextcat-2.2/src/Makefile.am	2007-07-25 10:55:52.000000000 +0100
@@ -12,11 +12,11 @@
 
 libtextcat_includedir = $(includedir)/libtextcat
 libtextcat_include_HEADERS = \
-	common.h constants.h fingerprint.h textcat.h
+	common.h constants.h fingerprint.h textcat.h utf8misc.h
 
 lib_LTLIBRARIES =	libtextcat.la
 libtextcat_la_SOURCES = \
-	common.c fingerprint.c textcat.c wg_mempool.c
+	common.c fingerprint.c textcat.c wg_mempool.c utf8misc.c
 
 bin_PROGRAMS =		createfp
 createfp_SOURCES =	createfp.c


Index: libtextcat.spec
===================================================================
RCS file: /cvs/pkgs/rpms/libtextcat/devel/libtextcat.spec,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- libtextcat.spec	6 Jul 2007 18:41:02 -0000	1.2
+++ libtextcat.spec	25 Jul 2007 09:59:55 -0000	1.3
@@ -1,7 +1,7 @@
 Summary: Text Categorization Library
 Name: libtextcat
 Version: 2.2
-Release: 1%{?dist}
+Release: 2%{?dist}
 Group: System Environment/Libraries
 License: BSD
 Source: http://software.wise-guys.nl/download/%{name}-%{version}.tar.gz
@@ -9,6 +9,7 @@
 Buildroot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
 BuildRequires: libtool
 Patch0: libtextcat-2.2-exportapi.patch
+Patch1: libtextcat-2.2-OOo.patch
 
 %description
 Libtextcat is a library with functions that implement the classification
@@ -28,6 +29,7 @@
 %prep
 %setup -q
 %patch0 -p1 -b .exportapi.patch
+%patch1 -p1 -b .ooo.patch
 
 %build
 autoreconf -f -i
@@ -58,5 +60,8 @@
 rm -r $RPM_BUILD_ROOT
 
 %changelog
+* Wed Jul 25 2007 Caolan McNamara <caolanm at redhat.com> 2.2-2
+- fiddle with api for OOo 2.3
+
 * Wed Jun 27 2007 Caolan McNamara <caolanm at redhat.com> 2.2-1
 - Initial version




More information about the fedora-extras-commits mailing list