Date: prev next · Thread: first prev next last
2011 Archives by date, by thread · List index


2011/1/31 Michael Meeks <michael.meeks@novell.com>


On Sun, 2011-01-30 at 14:35 +0100, Kenneth Venken wrote:
So, should i make the changes to the code or have you already done
them?

        Judging by the git log (they're not pushed), I'd say that Norbert
wanted you to re-submit a patch with his suggestions :-)

I think i've implemented all the suggestions. There's one hard coded limit
(i < 0x1FFF) i don't get. Why isn't it 0x2000 since existMark has a size of
0x10000. I changed it to i < 0x2000. This will not have an influence on
xdictionary since the 0x1FFF limit is hardcoded there too. I'll look into
xdictionary next to see if i can fix the fixme in gendict. Maybe then this
will make sence to me too.

I've also added support to print to standard output, but i don't know how to
fix the build so it will pipe the output to the file.


       Looks like some nice cleanups though,

       Thanks !

               Michael.

--
 michael.meeks@novell.com  <><, Pseudo Engineer, itinerant idiot



From 758f67f16c673dddede6f734a81c3fecccbf2657 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Thu, 27 Jan 2011 22:27:24 +0100
Subject: [PATCH 01/18] added some documentation to gendict

---
 i18npool/source/breakiterator/gendict.cxx |   17 ++++++++++++++++-
 1 files changed, 16 insertions(+), 1 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index 9f49f67..8a6354b 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -41,7 +41,22 @@ using std::vector;
 
 using namespace ::rtl;
 
-/* Main Procedure */
+/* Utility gendict:
+
+   "BreakIterator_CJK provides input string caching and dictionary searching for
+   longest matching. You can provide a sorted dictionary (the encoding must be
+   UTF-8) by creating the following file:
+            i18npool/source/breakiterator/data/<language>.dict.
+
+   The utility gendict will convert the file to C code, which will be compiled
+   into a shared library for dynamic loading.
+
+   All dictionary searching and loading is performed in the xdictionary class.
+   The only thing you need to do is to derive your class from BreakIterator_CJK
+   and create an instance of the xdictionary with the language name and
+   pass it to the parent class." (from http://wiki.services.openoffice.org/wiki/
+   /Documentation/DevGuide/OfficeDev/Implementing_a_New_Locale - 27/01/2011)
+*/
 
 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
 {
-- 
1.7.1

From acc59ce73d0d44898d9402edc18c1111323e3cfc Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Thu, 27 Jan 2011 22:43:49 +0100
Subject: [PATCH 02/18] refactored out some simple print functions

---
 i18npool/source/breakiterator/gendict.cxx |   35 +++++++++++++++++++----------
 1 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index 8a6354b..df7e144 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -58,6 +58,9 @@ using namespace ::rtl;
    /Documentation/DevGuide/OfficeDev/Implementing_a_New_Locale - 27/01/2011)
 */
 
+void printIncludes(FILE *source_fp);
+void printFunctions(FILE *source_fp);
+
 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
 {
     FILE *sfp, *cfp;
@@ -79,12 +82,7 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
         return -1;
     }
 
-    fprintf(cfp, "/*\n");
-    fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
-    fprintf(cfp, " * All Rights Reserved.\n");
-    fprintf(cfp, " */\n\n");
-    fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! 
*/\n\n");
-    fprintf(cfp, "#include <sal/types.h>\n\n");
+    printIncludes(cfp);
     fprintf(cfp, "extern \"C\" {\n");
 
     sal_Int32 count, i, j;
@@ -209,12 +207,6 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
     }
     fprintf (cfp, "\n};\n");
 
-    // create function to return arrays
-    fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
-    fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
-    fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n");
-    fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n");
-    fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
     fprintf (cfp, "}\n");
 
     fclose(sfp);
@@ -223,4 +215,23 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
     return 0;
 }      // End of main
 
+void printIncludes(FILE* source_fp)
+{
+    fprintf(source_fp, "/*\n");
+    fprintf(source_fp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
+    fprintf(source_fp, " * All Rights Reserved.\n");
+    fprintf(source_fp, " */\n\n");
+    fprintf(source_fp, "/* !!!The file is generated automatically. DO NOT edit the file 
manually!!! */\n\n");
+    fprintf(source_fp, "#include <sal/types.h>\n\n");
+}
+
+void printFunctions(FILE* source_fp)
+{
+    fprintf (source_fp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
+    fprintf (source_fp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
+    fprintf (source_fp, "\tconst sal_Int32* getIndex2() { return index2; }\n");
+    fprintf (source_fp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n");
+    fprintf (source_fp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
+}
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
-- 
1.7.1

From 80624b1927566c23d484db81a2b51cb9294acfd7 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Thu, 27 Jan 2011 23:02:11 +0100
Subject: [PATCH 03/18] refactored out dataArea

---
 i18npool/source/breakiterator/gendict.cxx |  104 +++++++++++++++++------------
 1 files changed, 60 insertions(+), 44 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index df7e144..eb654f5 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -59,6 +59,10 @@ using namespace ::rtl;
 */
 
 void printIncludes(FILE *source_fp);
+void initArrays(sal_Bool *exists, sal_Int32 *charArray);
+void printDataArea(FILE *sfp, FILE *cfp, sal_Int32 count, sal_Int32 i,
+                   sal_Int32 lenArrayCurr, sal_Int32 *charArray,
+                   vector<sal_Int32>& lenArray, sal_Bool *exists);
 void printFunctions(FILE *source_fp);
 
 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
@@ -89,51 +93,9 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
     sal_Int32 lenArrayCurr = 0, charArray[0x10000];
     vector<sal_Int32> lenArray;
     sal_Bool exist[0x10000];
-    for (i = 0; i < 0x10000; i++) {
-        exist[i] = sal_False;
-        charArray[i] = 0;
-    }
-
-    // generate main dict. data array
-    fprintf(cfp, "static const sal_Unicode dataArea[] = {");
-    sal_Char str[1024];
-    sal_Unicode current = 0;
-    count = 0;
-    while (fgets(str, 1024, sfp)) {
-        // input file is in UTF-8 encoding
-        // don't convert last new line character to Ostr.
-        OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
-        const sal_Unicode *u = Ostr.getStr();
-
-        sal_Int32 len = Ostr.getLength();
-
-        i=0;
-        Ostr.iterateCodePoints(&i, 1);
-        if (len == i) continue;        // skip one character word
-
-        if (*u != current) {
-        if (*u < current)
-        printf("u %x, current %x, count %d, lenArray.size() %d\n", *u, current,
-                    sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArray.size()));
-        current = *u;
-        charArray[current] = lenArray.size();
-        }
+    initArrays( exist, charArray );
 
-        lenArray.push_back(lenArrayCurr);
-
-        exist[u[0]] = sal_True;
-        for (i = 1; i < len; i++) {            // start from second character, 
-        exist[u[i]] = sal_True;        // since the first character is captured in charArray.
-        lenArrayCurr++;
-        if ((count++) % 0x10 == 0)
-            fprintf(cfp, "\n\t");
-        fprintf(cfp, "0x%04x, ", u[i]);
-        }
-    }
-    lenArray.push_back( lenArrayCurr ); // store last ending pointer
-
-    charArray[current+1] = lenArray.size();
-    fprintf(cfp, "\n};\n");
+    printDataArea(sfp, cfp, count, i, lenArrayCurr, charArray, lenArray, exist);
 
     // generate lenArray 
     fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t");
@@ -215,6 +177,14 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
     return 0;
 }      // End of main
 
+void initArrays(sal_Bool* exists, sal_Int32* charArray)
+{
+    for (sal_Int32 i = 0; i < 0x10000; i++) {
+        exists[i] = sal_False;
+        charArray[i] = 0;
+    }
+}
+
 void printIncludes(FILE* source_fp)
 {
     fprintf(source_fp, "/*\n");
@@ -234,4 +204,50 @@ void printFunctions(FILE* source_fp)
     fprintf (source_fp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
 }
 
+void printDataArea(FILE *sfp, FILE *cfp, sal_Int32 count, sal_Int32 i,
+                   sal_Int32 lenArrayCurr, sal_Int32 *charArray,
+                   vector<sal_Int32>& lenArray, sal_Bool *exists)
+{
+    // generate main dict. data array
+    fprintf(cfp, "static const sal_Unicode dataArea[] = {");
+    sal_Char str[1024];
+    sal_Unicode current = 0;
+    count = 0;
+    while (fgets(str, 1024, sfp)) {
+        // input file is in UTF-8 encoding
+        // don't convert last new line character to Ostr.
+        OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
+        const sal_Unicode *u = Ostr.getStr();
+
+        sal_Int32 len = Ostr.getLength();
+
+        i=0;
+        Ostr.iterateCodePoints(&i, 1);
+        if (len == i) continue;        // skip one character word
+
+        if (*u != current) {
+        if (*u < current)
+        printf("u %x, current %x, count %d, lenArray.size() %d\n", *u, current,
+                    sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArray.size()));
+        current = *u;
+        charArray[current] = lenArray.size();
+        }
+
+        lenArray.push_back(lenArrayCurr);
+
+        exists[u[0]] = sal_True;
+        for (i = 1; i < len; i++) {            // start from second character,
+        exists[u[i]] = sal_True;       // since the first character is captured in charArray.
+        lenArrayCurr++;
+        if ((count++) % 0x10 == 0)
+            fprintf(cfp, "\n\t");
+        fprintf(cfp, "0x%04x, ", u[i]);
+        }
+    }
+    lenArray.push_back( lenArrayCurr ); // store last ending pointer
+
+    charArray[current+1] = lenArray.size();
+    fprintf(cfp, "\n};\n");
+}
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
-- 
1.7.1

From eaaad5d2e6997f07eb0010192bff96774838af79 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Thu, 27 Jan 2011 23:43:38 +0100
Subject: [PATCH 04/18] refactored out all array functions

---
 i18npool/source/breakiterator/gendict.cxx |  188 ++++++++++++++++-------------
 1 files changed, 104 insertions(+), 84 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index eb654f5..315acfa 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -60,9 +60,15 @@ using namespace ::rtl;
 
 void printIncludes(FILE *source_fp);
 void initArrays(sal_Bool *exists, sal_Int32 *charArray);
-void printDataArea(FILE *sfp, FILE *cfp, sal_Int32 count, sal_Int32 i,
+void printDataArea(FILE *sfp, FILE *source_fp, sal_Int32 count, sal_Int32 i,
                    sal_Int32 lenArrayCurr, sal_Int32 *charArray,
                    vector<sal_Int32>& lenArray, sal_Bool *exists);
+void printLenArray(FILE *source_fp, const vector<sal_Int32>& lenArray,
+                   sal_Int32 count);
+void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int32 count,
+                 sal_Int16 *set);
+void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set);
+void printExistMark(FILE *source_fp, sal_Bool *exists, sal_Int32 count);
 void printFunctions(FILE *source_fp);
 
 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
@@ -86,89 +92,21 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
         return -1;
     }
 
-    printIncludes(cfp);
-    fprintf(cfp, "extern \"C\" {\n");
-
-    sal_Int32 count, i, j;
+    sal_Int32 count, i;
     sal_Int32 lenArrayCurr = 0, charArray[0x10000];
     vector<sal_Int32> lenArray;
     sal_Bool exist[0x10000];
-    initArrays( exist, charArray );
-
-    printDataArea(sfp, cfp, count, i, lenArrayCurr, charArray, lenArray, exist);
-
-    // generate lenArray 
-    fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t");
-    count = 1;
-    fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
-    for (size_t k = 0; k < lenArray.size(); k++)
-    {
-        fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k]));
-        if (count == 0xf)
-        {
-            count = 0;
-            fprintf(cfp, "\n\t");
-        }
-            else count++;
-    }
-    fprintf(cfp, "\n};\n");
-
-    // generate index1 array
-    fprintf (cfp, "static const sal_Int16 index1[] = {\n\t");
     sal_Int16 set[0x100];
-    count = 0;
-    for (i = 0; i < 0x100; i++) {
-        for (j = 0; j < 0x100; j++)
-        if (charArray[(i*0x100) + j] != 0)
-            break;
-
-        fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 
0xff));
-        if ((i+1) % 0x10 == 0)
-        fprintf (cfp, "\n\t");
-    }
-    fprintf (cfp, "};\n");
-
-    // generate index2 array
-    fprintf (cfp, "static const sal_Int32 index2[] = {\n\t");
-    sal_Int32 prev = 0;
-    for (i = 0; i < 0x100; i++) {
-        if (set[i] != 0xff) {
-        for (j = 0; j < 0x100; j++) {
-            sal_Int32 k = (i*0x100) + j;
-            if (prev != 0 && charArray[k] == 0) {
-            for (k++; k < 0x10000; k++)
-                if (charArray[k] != 0)
-                break;
-            }
-            prev = charArray[(i*0x100) + j];
-            fprintf(
-                cfp, "0x%lx, ",
-                sal::static_int_cast< unsigned long >(
-                    k < 0x10000 ? charArray[k] + 1 : 0));
-            if ((j+1) % 0x10 == 0)
-            fprintf (cfp, "\n\t");
-        }
-        fprintf (cfp, "\n\t");
-        }
-    }
-    fprintf (cfp, "\n};\n");
-
-    // generate existMark array
-    count = 0;
-    fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t");
-    for (i = 0; i < 0x1FFF; i++) {
-        sal_uInt8 bit = 0;
-        for (j = 0; j < 8; j++)
-        if (exist[i * 8 + j])
-            bit |= 1 << j;
-        fprintf(cfp, "0x%02x, ", bit);
-        if (count == 0xf) {
-        count = 0;
-        fprintf(cfp, "\n\t");
-        } else count++;
-    }
-    fprintf (cfp, "\n};\n");
+    initArrays( exist, charArray );
 
+    printIncludes(cfp);
+    fprintf(cfp, "extern \"C\" {\n");
+        printDataArea(sfp, cfp, count, i, lenArrayCurr, charArray, lenArray, exist);
+        printLenArray(cfp, lenArray, count);
+        printIndex1(cfp, charArray, count, set);
+        printIndex2(cfp, charArray, set);
+        printExistMark(cfp, exist, count);
+        printFunctions(cfp);
     fprintf (cfp, "}\n");
 
     fclose(sfp);
@@ -204,12 +142,12 @@ void printFunctions(FILE* source_fp)
     fprintf (source_fp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
 }
 
-void printDataArea(FILE *sfp, FILE *cfp, sal_Int32 count, sal_Int32 i,
+void printDataArea(FILE *sfp, FILE *source_fp, sal_Int32 count, sal_Int32 i,
                    sal_Int32 lenArrayCurr, sal_Int32 *charArray,
                    vector<sal_Int32>& lenArray, sal_Bool *exists)
 {
     // generate main dict. data array
-    fprintf(cfp, "static const sal_Unicode dataArea[] = {");
+    fprintf(source_fp, "static const sal_Unicode dataArea[] = {");
     sal_Char str[1024];
     sal_Unicode current = 0;
     count = 0;
@@ -240,14 +178,96 @@ void printDataArea(FILE *sfp, FILE *cfp, sal_Int32 count, sal_Int32 i,
         exists[u[i]] = sal_True;       // since the first character is captured in charArray.
         lenArrayCurr++;
         if ((count++) % 0x10 == 0)
-            fprintf(cfp, "\n\t");
-        fprintf(cfp, "0x%04x, ", u[i]);
+            fprintf(source_fp, "\n\t");
+        fprintf(source_fp, "0x%04x, ", u[i]);
         }
     }
     lenArray.push_back( lenArrayCurr ); // store last ending pointer
 
     charArray[current+1] = lenArray.size();
-    fprintf(cfp, "\n};\n");
+    fprintf(source_fp, "\n};\n");
+}
+
+void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray,
+                   sal_Int32 count)
+{
+    fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t");
+    count = 1;
+    fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
+    for (size_t k = 0; k < lenArray.size(); k++)
+    {
+        fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k]));
+        if (count == 0xf)
+        {
+            count = 0;
+            fprintf(source_fp, "\n\t");
+        }
+            else count++;
+    }
+    fprintf(source_fp, "\n};\n");
+}
+
+void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int32 count,
+                 sal_Int16 *set)
+{
+    fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
+    count = 0;
+    sal_Int32 j;
+    for (sal_Int32 i = 0; i < 0x100; i++) {
+        for (j = 0; j < 0x100; j++)
+        if (charArray[(i*0x100) + j] != 0)
+            break;
+
+        fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? 
sal::static_int_cast<sal_Int16>(count++) : 0xff));
+        if ((i+1) % 0x10 == 0)
+        fprintf (source_fp, "\n\t");
+    }
+    fprintf (source_fp, "};\n");
+}
+
+void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
+{
+    fprintf (source_fp, "static const sal_Int32 index2[] = {\n\t");
+    sal_Int32 prev = 0;
+    for (sal_Int32 i = 0; i < 0x100; i++) {
+        if (set[i] != 0xff) {
+        for (sal_Int32 j = 0; j < 0x100; j++) {
+            sal_Int32 k = (i*0x100) + j;
+            if (prev != 0 && charArray[k] == 0) {
+            for (k++; k < 0x10000; k++)
+                if (charArray[k] != 0)
+                break;
+            }
+            prev = charArray[(i*0x100) + j];
+            fprintf(
+                source_fp, "0x%lx, ",
+                sal::static_int_cast< unsigned long >(
+                    k < 0x10000 ? charArray[k] + 1 : 0));
+            if ((j+1) % 0x10 == 0)
+            fprintf (source_fp, "\n\t");
+        }
+        fprintf (source_fp, "\n\t");
+        }
+    }
+    fprintf (source_fp, "\n};\n");
+}
+
+void printExistMark(FILE *source_fp, sal_Bool *exists, sal_Int32 count)
+{
+    count = 0;
+    fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t");
+    for (sal_Int32 i = 0; i < 0x1FFF; i++) {
+        sal_uInt8 bit = 0;
+        for (sal_Int32 j = 0; j < 8; j++)
+        if (exists[i * 8 + j])
+            bit |= 1 << j;
+        fprintf(source_fp, "0x%02x, ", bit);
+        if (count == 0xf) {
+        count = 0;
+        fprintf(source_fp, "\n\t");
+        } else count++;
+    }
+    fprintf (source_fp, "\n};\n");
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
-- 
1.7.1

From 0fcf8c2bbb5befb39e7892a7b6a236baced08f47 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Thu, 27 Jan 2011 23:52:52 +0100
Subject: [PATCH 05/18] reduced scope of some variables

---
 i18npool/source/breakiterator/gendict.cxx |   43 +++++++++++++----------------
 1 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index 315acfa..93a359b 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -60,15 +60,12 @@ using namespace ::rtl;
 
 void printIncludes(FILE *source_fp);
 void initArrays(sal_Bool *exists, sal_Int32 *charArray);
-void printDataArea(FILE *sfp, FILE *source_fp, sal_Int32 count, sal_Int32 i,
-                   sal_Int32 lenArrayCurr, sal_Int32 *charArray,
+void printDataArea(FILE *sfp, FILE *source_fp, sal_Int32 *charArray,
                    vector<sal_Int32>& lenArray, sal_Bool *exists);
-void printLenArray(FILE *source_fp, const vector<sal_Int32>& lenArray,
-                   sal_Int32 count);
-void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int32 count,
-                 sal_Int16 *set);
+void printLenArray(FILE *source_fp, const vector<sal_Int32>& lenArray);
+void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set);
 void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set);
-void printExistMark(FILE *source_fp, sal_Bool *exists, sal_Int32 count);
+void printExistMark(FILE *source_fp, sal_Bool *exists);
 void printFunctions(FILE *source_fp);
 
 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
@@ -92,7 +89,6 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
         return -1;
     }
 
-    sal_Int32 count, i;
     sal_Int32 lenArrayCurr = 0, charArray[0x10000];
     vector<sal_Int32> lenArray;
     sal_Bool exist[0x10000];
@@ -101,11 +97,11 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
 
     printIncludes(cfp);
     fprintf(cfp, "extern \"C\" {\n");
-        printDataArea(sfp, cfp, count, i, lenArrayCurr, charArray, lenArray, exist);
-        printLenArray(cfp, lenArray, count);
-        printIndex1(cfp, charArray, count, set);
+        printDataArea(sfp, cfp, charArray, lenArray, exist);
+        printLenArray(cfp, lenArray);
+        printIndex1(cfp, charArray, set);
         printIndex2(cfp, charArray, set);
-        printExistMark(cfp, exist, count);
+        printExistMark(cfp, exist);
         printFunctions(cfp);
     fprintf (cfp, "}\n");
 
@@ -142,15 +138,16 @@ void printFunctions(FILE* source_fp)
     fprintf (source_fp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
 }
 
-void printDataArea(FILE *sfp, FILE *source_fp, sal_Int32 count, sal_Int32 i,
-                   sal_Int32 lenArrayCurr, sal_Int32 *charArray,
+void printDataArea(FILE *sfp, FILE *source_fp,
+                    sal_Int32 *charArray,
                    vector<sal_Int32>& lenArray, sal_Bool *exists)
 {
     // generate main dict. data array
     fprintf(source_fp, "static const sal_Unicode dataArea[] = {");
     sal_Char str[1024];
+    sal_Int32 lenArrayCurr = 0;
     sal_Unicode current = 0;
-    count = 0;
+    sal_Int32 count = 0;
     while (fgets(str, 1024, sfp)) {
         // input file is in UTF-8 encoding
         // don't convert last new line character to Ostr.
@@ -159,7 +156,7 @@ void printDataArea(FILE *sfp, FILE *source_fp, sal_Int32 count, sal_Int32 i,
 
         sal_Int32 len = Ostr.getLength();
 
-        i=0;
+        sal_Int32 i=0;
         Ostr.iterateCodePoints(&i, 1);
         if (len == i) continue;        // skip one character word
 
@@ -188,11 +185,10 @@ void printDataArea(FILE *sfp, FILE *source_fp, sal_Int32 count, sal_Int32 i,
     fprintf(source_fp, "\n};\n");
 }
 
-void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray,
-                   sal_Int32 count)
+void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray)
 {
     fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t");
-    count = 1;
+    sal_Int32 count = 1;
     fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
     for (size_t k = 0; k < lenArray.size(); k++)
     {
@@ -207,11 +203,10 @@ void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray,
     fprintf(source_fp, "\n};\n");
 }
 
-void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int32 count,
-                 sal_Int16 *set)
+void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
 {
     fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
-    count = 0;
+    sal_Int32 count = 0;
     sal_Int32 j;
     for (sal_Int32 i = 0; i < 0x100; i++) {
         for (j = 0; j < 0x100; j++)
@@ -252,9 +247,9 @@ void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
     fprintf (source_fp, "\n};\n");
 }
 
-void printExistMark(FILE *source_fp, sal_Bool *exists, sal_Int32 count)
+void printExistMark(FILE *source_fp, sal_Bool *exists)
 {
-    count = 0;
+    sal_Int32 count = 0;
     fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t");
     for (sal_Int32 i = 0; i < 0x1FFF; i++) {
         sal_uInt8 bit = 0;
-- 
1.7.1

From 5cbdc1c28efe4e4d0227cf805f5a28ccf1301787 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Fri, 28 Jan 2011 00:14:53 +0100
Subject: [PATCH 06/18] readability changes

---
 i18npool/source/breakiterator/gendict.cxx |  106 +++++++++++++++--------------
 1 files changed, 55 insertions(+), 51 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index 93a359b..3d0b627 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -41,6 +41,16 @@ using std::vector;
 
 using namespace ::rtl;
 
+void printIncludes(FILE *source_fp);
+void initArrays(sal_Bool *exists, sal_Int32 *charArray);
+void printDataArea(FILE *dictionary_fp, FILE *source_fp, sal_Int32 *charArray,
+                   vector<sal_Int32>& lenArray, sal_Bool *exists);
+void printLenArray(FILE *source_fp, const vector<sal_Int32>& lenArray);
+void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set);
+void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set);
+void printExistMark(FILE *source_fp, sal_Bool *exists);
+void printFunctions(FILE *source_fp);
+
 /* Utility gendict:
 
    "BreakIterator_CJK provides input string caching and dictionary searching for
@@ -58,58 +68,52 @@ using namespace ::rtl;
    /Documentation/DevGuide/OfficeDev/Implementing_a_New_Locale - 27/01/2011)
 */
 
-void printIncludes(FILE *source_fp);
-void initArrays(sal_Bool *exists, sal_Int32 *charArray);
-void printDataArea(FILE *sfp, FILE *source_fp, sal_Int32 *charArray,
-                   vector<sal_Int32>& lenArray, sal_Bool *exists);
-void printLenArray(FILE *source_fp, const vector<sal_Int32>& lenArray);
-void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set);
-void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set);
-void printExistMark(FILE *source_fp, sal_Bool *exists);
-void printFunctions(FILE *source_fp);
-
 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
 {
-    FILE *sfp, *cfp;
+    FILE *dictionary_fp, *source_fp;
 
-    if (argc < 3) exit(-1);
+    if (argc < 3)
+    {
+        printf("2 arguments required: dictionary_file_name source_file_name");
+        exit(-1);
+    }
 
-    sfp = fopen(argv[1], "rb");        // open the source file for read;
-    if (sfp == NULL) 
+    dictionary_fp = fopen(argv[1], "rb");      // open the source file for read;
+    if (dictionary_fp == NULL)
     {
         printf("Open the dictionary source file failed.");
         return -1;
     }
 
     // create the C source file to write
-    cfp = fopen(argv[2], "wb");
-    if (cfp == NULL) {
-        fclose(sfp);
+    source_fp = fopen(argv[2], "wb");
+    if (source_fp == NULL) {
+        fclose(dictionary_fp);
         printf("Can't create the C source file.");
         return -1;
     }
 
-    sal_Int32 lenArrayCurr = 0, charArray[0x10000];
+    sal_Int32 charArray[0x10000];
     vector<sal_Int32> lenArray;
     sal_Bool exist[0x10000];
     sal_Int16 set[0x100];
     initArrays( exist, charArray );
 
-    printIncludes(cfp);
-    fprintf(cfp, "extern \"C\" {\n");
-        printDataArea(sfp, cfp, charArray, lenArray, exist);
-        printLenArray(cfp, lenArray);
-        printIndex1(cfp, charArray, set);
-        printIndex2(cfp, charArray, set);
-        printExistMark(cfp, exist);
-        printFunctions(cfp);
-    fprintf (cfp, "}\n");
+    printIncludes(source_fp);
+    fprintf(source_fp, "extern \"C\" {\n");
+        printDataArea(dictionary_fp, source_fp, charArray, lenArray, exist);
+        printLenArray(source_fp, lenArray);
+        printIndex1(source_fp, charArray, set);
+        printIndex2(source_fp, charArray, set);
+        printExistMark(source_fp, exist);
+        printFunctions(source_fp);
+    fprintf (source_fp, "}\n");
 
-    fclose(sfp);
-    fclose(cfp);
+    fclose(dictionary_fp);
+    fclose(source_fp);
 
     return 0;
-}      // End of main
+}
 
 void initArrays(sal_Bool* exists, sal_Int32* charArray)
 {
@@ -138,7 +142,7 @@ void printFunctions(FILE* source_fp)
     fprintf (source_fp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
 }
 
-void printDataArea(FILE *sfp, FILE *source_fp,
+void printDataArea(FILE *dictionary_fp, FILE *source_fp,
                     sal_Int32 *charArray,
                    vector<sal_Int32>& lenArray, sal_Bool *exists)
 {
@@ -148,7 +152,7 @@ void printDataArea(FILE *sfp, FILE *source_fp,
     sal_Int32 lenArrayCurr = 0;
     sal_Unicode current = 0;
     sal_Int32 count = 0;
-    while (fgets(str, 1024, sfp)) {
+    while (fgets(str, 1024, dictionary_fp)) {
         // input file is in UTF-8 encoding
         // don't convert last new line character to Ostr.
         OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
@@ -160,23 +164,23 @@ void printDataArea(FILE *sfp, FILE *source_fp,
         Ostr.iterateCodePoints(&i, 1);
         if (len == i) continue;        // skip one character word
 
-        if (*u != current) {
-        if (*u < current)
-        printf("u %x, current %x, count %d, lenArray.size() %d\n", *u, current,
-                    sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArray.size()));
-        current = *u;
-        charArray[current] = lenArray.size();
+        if (u[0] != current) {
+            if (u[0] < current)
+            printf("u %x, current %x, count %d, lenArray.size() %d\n", u[0], current,
+                        sal::static_int_cast<int>(count), 
sal::static_int_cast<int>(lenArray.size()));
+            current = u[0];
+            charArray[current] = lenArray.size();
         }
 
         lenArray.push_back(lenArrayCurr);
 
         exists[u[0]] = sal_True;
         for (i = 1; i < len; i++) {            // start from second character,
-        exists[u[i]] = sal_True;       // since the first character is captured in charArray.
-        lenArrayCurr++;
-        if ((count++) % 0x10 == 0)
-            fprintf(source_fp, "\n\t");
-        fprintf(source_fp, "0x%04x, ", u[i]);
+            exists[u[i]] = sal_True;   // since the first character is captured in charArray.
+            lenArrayCurr++;
+            if ((count++) % 0x10 == 0)
+                fprintf(source_fp, "\n\t");
+            fprintf(source_fp, "0x%04x, ", u[i]);
         }
     }
     lenArray.push_back( lenArrayCurr ); // store last ending pointer
@@ -210,12 +214,12 @@ void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
     sal_Int32 j;
     for (sal_Int32 i = 0; i < 0x100; i++) {
         for (j = 0; j < 0x100; j++)
-        if (charArray[(i*0x100) + j] != 0)
-            break;
+            if (charArray[(i*0x100) + j] != 0)
+                break;
 
         fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? 
sal::static_int_cast<sal_Int16>(count++) : 0xff));
         if ((i+1) % 0x10 == 0)
-        fprintf (source_fp, "\n\t");
+            fprintf (source_fp, "\n\t");
     }
     fprintf (source_fp, "};\n");
 }
@@ -231,7 +235,7 @@ void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
             if (prev != 0 && charArray[k] == 0) {
             for (k++; k < 0x10000; k++)
                 if (charArray[k] != 0)
-                break;
+                    break;
             }
             prev = charArray[(i*0x100) + j];
             fprintf(
@@ -254,12 +258,12 @@ void printExistMark(FILE *source_fp, sal_Bool *exists)
     for (sal_Int32 i = 0; i < 0x1FFF; i++) {
         sal_uInt8 bit = 0;
         for (sal_Int32 j = 0; j < 8; j++)
-        if (exists[i * 8 + j])
-            bit |= 1 << j;
+            if (exists[i * 8 + j])
+                bit |= 1 << j;
         fprintf(source_fp, "0x%02x, ", bit);
         if (count == 0xf) {
-        count = 0;
-        fprintf(source_fp, "\n\t");
+            count = 0;
+            fprintf(source_fp, "\n\t");
         } else count++;
     }
     fprintf (source_fp, "\n};\n");
-- 
1.7.1

From 99408d749fddfd40f5a438b328c3fff4f4c7ee32 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Fri, 28 Jan 2011 00:29:14 +0100
Subject: [PATCH 07/18] changed some loop constructs

---
 i18npool/source/breakiterator/gendict.cxx |   16 +++++++---------
 1 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index 3d0b627..90e6f75 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -211,11 +211,10 @@ void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
 {
     fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
     sal_Int32 count = 0;
-    sal_Int32 j;
     for (sal_Int32 i = 0; i < 0x100; i++) {
-        for (j = 0; j < 0x100; j++)
-            if (charArray[(i*0x100) + j] != 0)
-                break;
+        sal_Int32 j = 0;
+        while( j < 0x100 && charArray[(i*0x100) + j] == 0)
+            j++;
 
         fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? 
sal::static_int_cast<sal_Int16>(count++) : 0xff));
         if ((i+1) % 0x10 == 0)
@@ -232,11 +231,10 @@ void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
         if (set[i] != 0xff) {
         for (sal_Int32 j = 0; j < 0x100; j++) {
             sal_Int32 k = (i*0x100) + j;
-            if (prev != 0 && charArray[k] == 0) {
-            for (k++; k < 0x10000; k++)
-                if (charArray[k] != 0)
-                    break;
-            }
+            if (prev != 0 )
+                while( charArray[k] == 0 && k < 0x10000 )
+                    k++;
+
             prev = charArray[(i*0x100) + j];
             fprintf(
                 source_fp, "0x%lx, ",
-- 
1.7.1

From 62987c99f186fc8927bc27264fa2e4c736366ceb Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Sun, 30 Jan 2011 00:00:38 +0100
Subject: [PATCH 08/18] more comments

---
 i18npool/source/breakiterator/gendict.cxx |   53 ++++++++++++++++-------------
 1 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index 90e6f75..1b70f23 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -93,10 +93,10 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
         return -1;
     }
 
-    sal_Int32 charArray[0x10000];
-    vector<sal_Int32> lenArray;
-    sal_Bool exist[0x10000];
+    vector<sal_Int32> lenArray;   // stores the word boundaries in DataArea
     sal_Int16 set[0x100];
+    sal_Bool exist[0x10000];      // true if unicode character exists
+    sal_Int32 charArray[0x10000]; // keeps track where words beginning with a certain char are 
stored in DataArea
     initArrays( exist, charArray );
 
     printIncludes(source_fp);
@@ -142,8 +142,7 @@ void printFunctions(FILE* source_fp)
     fprintf (source_fp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
 }
 
-void printDataArea(FILE *dictionary_fp, FILE *source_fp,
-                    sal_Int32 *charArray,
+void printDataArea(FILE *dictionary_fp, FILE *source_fp, sal_Int32 *charArray,
                    vector<sal_Int32>& lenArray, sal_Bool *exists)
 {
     // generate main dict. data array
@@ -162,7 +161,8 @@ void printDataArea(FILE *dictionary_fp, FILE *source_fp,
 
         sal_Int32 i=0;
         Ostr.iterateCodePoints(&i, 1);
-        if (len == i) continue;        // skip one character word
+        if (len == i)
+            continue;  // skip one character word
 
         if (u[0] != current) {
             if (u[0] < current)
@@ -184,7 +184,6 @@ void printDataArea(FILE *dictionary_fp, FILE *source_fp,
         }
     }
     lenArray.push_back( lenArrayCurr ); // store last ending pointer
-
     charArray[current+1] = lenArray.size();
     fprintf(source_fp, "\n};\n");
 }
@@ -207,6 +206,9 @@ void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray)
     fprintf(source_fp, "\n};\n");
 }
 
+/* FIXME?: what happens if in every range i there is at least one charArray != 0
+       => this will make index1[] = {0x00, 0x01, 0x02,... 0xfe, 0xff }
+       => then in index2, the last range will be ignored incorrectly */
 void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
 {
     fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
@@ -229,26 +231,28 @@ void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
     sal_Int32 prev = 0;
     for (sal_Int32 i = 0; i < 0x100; i++) {
         if (set[i] != 0xff) {
-        for (sal_Int32 j = 0; j < 0x100; j++) {
-            sal_Int32 k = (i*0x100) + j;
-            if (prev != 0 )
-                while( charArray[k] == 0 && k < 0x10000 )
-                    k++;
-
-            prev = charArray[(i*0x100) + j];
-            fprintf(
-                source_fp, "0x%lx, ",
-                sal::static_int_cast< unsigned long >(
-                    k < 0x10000 ? charArray[k] + 1 : 0));
-            if ((j+1) % 0x10 == 0)
+            for (sal_Int32 j = 0; j < 0x100; j++) {
+                sal_Int32 k = (i*0x100) + j;
+                if (prev != 0 )
+                    while( charArray[k] == 0 && k < 0x10000 )
+                        k++;
+
+                prev = charArray[(i*0x100) + j];
+                fprintf(
+                    source_fp, "0x%lx, ",
+                    sal::static_int_cast< unsigned long >(
+                        k < 0x10000 ? charArray[k] + 1 : 0));
+                if ((j+1) % 0x10 == 0)
+                    fprintf (source_fp, "\n\t");
+            }
             fprintf (source_fp, "\n\t");
         }
-        fprintf (source_fp, "\n\t");
-        }
     }
     fprintf (source_fp, "\n};\n");
 }
 
+/* Generates a bitmask for the existance of sal_Unicode values in dictionary;
+   it packs 8 sal_Bool values in 1 sal_uInt8 */
 void printExistMark(FILE *source_fp, sal_Bool *exists)
 {
     sal_Int32 count = 0;
@@ -256,13 +260,14 @@ void printExistMark(FILE *source_fp, sal_Bool *exists)
     for (sal_Int32 i = 0; i < 0x1FFF; i++) {
         sal_uInt8 bit = 0;
         for (sal_Int32 j = 0; j < 8; j++)
-            if (exists[i * 8 + j])
-                bit |= 1 << j;
+            bit |= (exists[i * 8 + j]) << j;
+
         fprintf(source_fp, "0x%02x, ", bit);
         if (count == 0xf) {
             count = 0;
             fprintf(source_fp, "\n\t");
-        } else count++;
+        } else
+            count++;
     }
     fprintf (source_fp, "\n};\n");
 }
-- 
1.7.1

From d62869bfcb101cd8e008a90ed62214da011468dc Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Sun, 30 Jan 2011 14:28:42 +0100
Subject: [PATCH 09/18] some changes

---
 i18npool/source/breakiterator/gendict.cxx |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index 1b70f23..97ea76e 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -166,8 +166,8 @@ void printDataArea(FILE *dictionary_fp, FILE *source_fp, sal_Int32 *charArray,
 
         if (u[0] != current) {
             if (u[0] < current)
-            printf("u %x, current %x, count %d, lenArray.size() %d\n", u[0], current,
-                        sal::static_int_cast<int>(count), 
sal::static_int_cast<int>(lenArray.size()));
+                printf("u %x, current %x, count %d, lenArray.size() %d\n", u[0], current,
+                            sal::static_int_cast<int>(count), 
sal::static_int_cast<int>(lenArray.size()));
             current = u[0];
             charArray[current] = lenArray.size();
         }
-- 
1.7.1

From e36e2f844413003d40af558b954ad7cf17b96e67 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Sun, 30 Jan 2011 18:39:20 +0100
Subject: [PATCH 10/18] made private functions static, reordered main

---
 i18npool/source/breakiterator/gendict.cxx |  120 +++++++++++++----------------
 1 files changed, 55 insertions(+), 65 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index 97ea76e..3f37b92 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -41,16 +41,6 @@ using std::vector;
 
 using namespace ::rtl;
 
-void printIncludes(FILE *source_fp);
-void initArrays(sal_Bool *exists, sal_Int32 *charArray);
-void printDataArea(FILE *dictionary_fp, FILE *source_fp, sal_Int32 *charArray,
-                   vector<sal_Int32>& lenArray, sal_Bool *exists);
-void printLenArray(FILE *source_fp, const vector<sal_Int32>& lenArray);
-void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set);
-void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set);
-void printExistMark(FILE *source_fp, sal_Bool *exists);
-void printFunctions(FILE *source_fp);
-
 /* Utility gendict:
 
    "BreakIterator_CJK provides input string caching and dictionary searching for
@@ -68,54 +58,7 @@ void printFunctions(FILE *source_fp);
    /Documentation/DevGuide/OfficeDev/Implementing_a_New_Locale - 27/01/2011)
 */
 
-SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
-{
-    FILE *dictionary_fp, *source_fp;
-
-    if (argc < 3)
-    {
-        printf("2 arguments required: dictionary_file_name source_file_name");
-        exit(-1);
-    }
-
-    dictionary_fp = fopen(argv[1], "rb");      // open the source file for read;
-    if (dictionary_fp == NULL)
-    {
-        printf("Open the dictionary source file failed.");
-        return -1;
-    }
-
-    // create the C source file to write
-    source_fp = fopen(argv[2], "wb");
-    if (source_fp == NULL) {
-        fclose(dictionary_fp);
-        printf("Can't create the C source file.");
-        return -1;
-    }
-
-    vector<sal_Int32> lenArray;   // stores the word boundaries in DataArea
-    sal_Int16 set[0x100];
-    sal_Bool exist[0x10000];      // true if unicode character exists
-    sal_Int32 charArray[0x10000]; // keeps track where words beginning with a certain char are 
stored in DataArea
-    initArrays( exist, charArray );
-
-    printIncludes(source_fp);
-    fprintf(source_fp, "extern \"C\" {\n");
-        printDataArea(dictionary_fp, source_fp, charArray, lenArray, exist);
-        printLenArray(source_fp, lenArray);
-        printIndex1(source_fp, charArray, set);
-        printIndex2(source_fp, charArray, set);
-        printExistMark(source_fp, exist);
-        printFunctions(source_fp);
-    fprintf (source_fp, "}\n");
-
-    fclose(dictionary_fp);
-    fclose(source_fp);
-
-    return 0;
-}
-
-void initArrays(sal_Bool* exists, sal_Int32* charArray)
+static void initArrays(sal_Bool* exists, sal_Int32* charArray)
 {
     for (sal_Int32 i = 0; i < 0x10000; i++) {
         exists[i] = sal_False;
@@ -123,7 +66,7 @@ void initArrays(sal_Bool* exists, sal_Int32* charArray)
     }
 }
 
-void printIncludes(FILE* source_fp)
+static void printIncludes(FILE* source_fp)
 {
     fprintf(source_fp, "/*\n");
     fprintf(source_fp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
@@ -133,7 +76,7 @@ void printIncludes(FILE* source_fp)
     fprintf(source_fp, "#include <sal/types.h>\n\n");
 }
 
-void printFunctions(FILE* source_fp)
+static void printFunctions(FILE* source_fp)
 {
     fprintf (source_fp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
     fprintf (source_fp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
@@ -142,7 +85,7 @@ void printFunctions(FILE* source_fp)
     fprintf (source_fp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
 }
 
-void printDataArea(FILE *dictionary_fp, FILE *source_fp, sal_Int32 *charArray,
+static void printDataArea(FILE *dictionary_fp, FILE *source_fp, sal_Int32 *charArray,
                    vector<sal_Int32>& lenArray, sal_Bool *exists)
 {
     // generate main dict. data array
@@ -188,7 +131,7 @@ void printDataArea(FILE *dictionary_fp, FILE *source_fp, sal_Int32 *charArray,
     fprintf(source_fp, "\n};\n");
 }
 
-void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray)
+static void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray)
 {
     fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t");
     sal_Int32 count = 1;
@@ -209,7 +152,7 @@ void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray)
 /* FIXME?: what happens if in every range i there is at least one charArray != 0
        => this will make index1[] = {0x00, 0x01, 0x02,... 0xfe, 0xff }
        => then in index2, the last range will be ignored incorrectly */
-void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
+static void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
 {
     fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
     sal_Int32 count = 0;
@@ -225,7 +168,7 @@ void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
     fprintf (source_fp, "};\n");
 }
 
-void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
+static void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
 {
     fprintf (source_fp, "static const sal_Int32 index2[] = {\n\t");
     sal_Int32 prev = 0;
@@ -253,7 +196,7 @@ void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
 
 /* Generates a bitmask for the existance of sal_Unicode values in dictionary;
    it packs 8 sal_Bool values in 1 sal_uInt8 */
-void printExistMark(FILE *source_fp, sal_Bool *exists)
+static void printExistMark(FILE *source_fp, sal_Bool *exists)
 {
     sal_Int32 count = 0;
     fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t");
@@ -272,4 +215,51 @@ void printExistMark(FILE *source_fp, sal_Bool *exists)
     fprintf (source_fp, "\n};\n");
 }
 
+SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
+{
+    FILE *dictionary_fp, *source_fp;
+
+    if (argc < 3)
+    {
+        printf("2 arguments required: dictionary_file_name source_file_name");
+        exit(-1);
+    }
+
+    dictionary_fp = fopen(argv[1], "rb");      // open the source file for read;
+    if (dictionary_fp == NULL)
+    {
+        printf("Open the dictionary source file failed.");
+        return -1;
+    }
+
+    // create the C source file to write
+    source_fp = fopen(argv[2], "wb");
+    if (source_fp == NULL) {
+        fclose(dictionary_fp);
+        printf("Can't create the C source file.");
+        return -1;
+    }
+
+    vector<sal_Int32> lenArray;   // stores the word boundaries in DataArea
+    sal_Int16 set[0x100];
+    sal_Bool exist[0x10000];      // true if unicode character exists
+    sal_Int32 charArray[0x10000]; // keeps track where words beginning with a certain char are 
stored in DataArea
+    initArrays( exist, charArray );
+
+    printIncludes(source_fp);
+    fprintf(source_fp, "extern \"C\" {\n");
+        printDataArea(dictionary_fp, source_fp, charArray, lenArray, exist);
+        printLenArray(source_fp, lenArray);
+        printIndex1(source_fp, charArray, set);
+        printIndex2(source_fp, charArray, set);
+        printExistMark(source_fp, exist);
+        printFunctions(source_fp);
+    fprintf (source_fp, "}\n");
+
+    fclose(dictionary_fp);
+    fclose(source_fp);
+
+    return 0;
+}
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
-- 
1.7.1

From 4e5778068b585c4cfc86b12cec041007803e7a09 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Mon, 31 Jan 2011 01:36:14 +0100
Subject: [PATCH 11/18] exists and charArray declared static

---
 i18npool/source/breakiterator/gendict.cxx |   73 ++++++++++++-----------------
 1 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index 3f37b92..d1b8b76 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -58,15 +58,16 @@ using namespace ::rtl;
    /Documentation/DevGuide/OfficeDev/Implementing_a_New_Locale - 27/01/2011)
 */
 
-static void initArrays(sal_Bool* exists, sal_Int32* charArray)
+// C-standard garantees that static variables are automatically initialized to 0
+static sal_uInt8 exists[0x2000];
+static sal_uInt32 charArray[0x10000];
+
+static inline void set_exists(sal_uInt32 index)
 {
-    for (sal_Int32 i = 0; i < 0x10000; i++) {
-        exists[i] = sal_False;
-        charArray[i] = 0;
-    }
+   exists[index>>3] |= 1 << (index & 0x07);
 }
 
-static void printIncludes(FILE* source_fp)
+static inline void printIncludes(FILE* source_fp)
 {
     fprintf(source_fp, "/*\n");
     fprintf(source_fp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
@@ -76,7 +77,7 @@ static void printIncludes(FILE* source_fp)
     fprintf(source_fp, "#include <sal/types.h>\n\n");
 }
 
-static void printFunctions(FILE* source_fp)
+static inline void printFunctions(FILE* source_fp)
 {
     fprintf (source_fp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
     fprintf (source_fp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
@@ -85,15 +86,14 @@ static void printFunctions(FILE* source_fp)
     fprintf (source_fp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
 }
 
-static void printDataArea(FILE *dictionary_fp, FILE *source_fp, sal_Int32 *charArray,
-                   vector<sal_Int32>& lenArray, sal_Bool *exists)
+static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_Int32>& lenArray)
 {
     // generate main dict. data array
     fprintf(source_fp, "static const sal_Unicode dataArea[] = {");
     sal_Char str[1024];
-    sal_Int32 lenArrayCurr = 0;
+    sal_uInt32 lenArrayCurr = 0;
     sal_Unicode current = 0;
-    sal_Int32 count = 0;
+    unsigned int count = 0;
     while (fgets(str, 1024, dictionary_fp)) {
         // input file is in UTF-8 encoding
         // don't convert last new line character to Ostr.
@@ -117,9 +117,10 @@ static void printDataArea(FILE *dictionary_fp, FILE *source_fp, sal_Int32 
*charA
 
         lenArray.push_back(lenArrayCurr);
 
-        exists[u[0]] = sal_True;
-        for (i = 1; i < len; i++) {            // start from second character,
-            exists[u[i]] = sal_True;   // since the first character is captured in charArray.
+        set_exists(u[0]);
+        // first character is stored in charArray, so start from second
+        for (i = 1; i < len; i++) {
+            set_exists(u[i]);
             lenArrayCurr++;
             if ((count++) % 0x10 == 0)
                 fprintf(source_fp, "\n\t");
@@ -131,20 +132,16 @@ static void printDataArea(FILE *dictionary_fp, FILE *source_fp, sal_Int32 
*charA
     fprintf(source_fp, "\n};\n");
 }
 
-static void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray)
+static inline void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray)
 {
     fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t");
-    sal_Int32 count = 1;
     fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
     for (size_t k = 0; k < lenArray.size(); k++)
     {
-        fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k]));
-        if (count == 0xf)
-        {
-            count = 0;
+        if( (k & 0xf) == 0xf)
             fprintf(source_fp, "\n\t");
-        }
-            else count++;
+
+        fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k]));
     }
     fprintf(source_fp, "\n};\n");
 }
@@ -152,7 +149,7 @@ static void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray)
 /* FIXME?: what happens if in every range i there is at least one charArray != 0
        => this will make index1[] = {0x00, 0x01, 0x02,... 0xfe, 0xff }
        => then in index2, the last range will be ignored incorrectly */
-static void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
+static inline void printIndex1(FILE *source_fp, sal_Int16 *set)
 {
     fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
     sal_Int32 count = 0;
@@ -168,7 +165,7 @@ static void printIndex1(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
     fprintf (source_fp, "};\n");
 }
 
-static void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
+static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
 {
     fprintf (source_fp, "static const sal_Int32 index2[] = {\n\t");
     sal_Int32 prev = 0;
@@ -196,21 +193,14 @@ static void printIndex2(FILE *source_fp, sal_Int32 *charArray, sal_Int16 *set)
 
 /* Generates a bitmask for the existance of sal_Unicode values in dictionary;
    it packs 8 sal_Bool values in 1 sal_uInt8 */
-static void printExistMark(FILE *source_fp, sal_Bool *exists)
+static inline void printExistMark(FILE *source_fp)
 {
-    sal_Int32 count = 0;
     fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t");
-    for (sal_Int32 i = 0; i < 0x1FFF; i++) {
-        sal_uInt8 bit = 0;
-        for (sal_Int32 j = 0; j < 8; j++)
-            bit |= (exists[i * 8 + j]) << j;
-
-        fprintf(source_fp, "0x%02x, ", bit);
-        if (count == 0xf) {
-            count = 0;
+    for (unsigned int i = 0; i < 0x2000; i++)
+    {
+        fprintf(source_fp, "0x%02x, ", exists[i]);
+        if ( (i & 0xf) == 0xf )
             fprintf(source_fp, "\n\t");
-        } else
-            count++;
     }
     fprintf (source_fp, "\n};\n");
 }
@@ -242,17 +232,14 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
 
     vector<sal_Int32> lenArray;   // stores the word boundaries in DataArea
     sal_Int16 set[0x100];
-    sal_Bool exist[0x10000];      // true if unicode character exists
-    sal_Int32 charArray[0x10000]; // keeps track where words beginning with a certain char are 
stored in DataArea
-    initArrays( exist, charArray );
 
     printIncludes(source_fp);
     fprintf(source_fp, "extern \"C\" {\n");
-        printDataArea(dictionary_fp, source_fp, charArray, lenArray, exist);
+        printDataArea(dictionary_fp, source_fp, lenArray);
         printLenArray(source_fp, lenArray);
-        printIndex1(source_fp, charArray, set);
-        printIndex2(source_fp, charArray, set);
-        printExistMark(source_fp, exist);
+        printIndex1(source_fp, set);
+        printIndex2(source_fp, set);
+        printExistMark(source_fp);
         printFunctions(source_fp);
     fprintf (source_fp, "}\n");
 
-- 
1.7.1

From 474cf418f07821c9b25c38bc973a5968113cbc18 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Mon, 31 Jan 2011 01:59:01 +0100
Subject: [PATCH 12/18] removed use of count and casting

---
 i18npool/source/breakiterator/gendict.cxx |   32 +++++++++++++---------------
 1 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index d1b8b76..6881981 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -86,14 +86,14 @@ static inline void printFunctions(FILE* source_fp)
     fprintf (source_fp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
 }
 
-static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_Int32>& lenArray)
+static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_uInt32>& 
lenArray)
 {
     // generate main dict. data array
     fprintf(source_fp, "static const sal_Unicode dataArea[] = {");
     sal_Char str[1024];
     sal_uInt32 lenArrayCurr = 0;
     sal_Unicode current = 0;
-    unsigned int count = 0;
+
     while (fgets(str, 1024, dictionary_fp)) {
         // input file is in UTF-8 encoding
         // don't convert last new line character to Ostr.
@@ -109,8 +109,7 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa
 
         if (u[0] != current) {
             if (u[0] < current)
-                printf("u %x, current %x, count %d, lenArray.size() %d\n", u[0], current,
-                            sal::static_int_cast<int>(count), 
sal::static_int_cast<int>(lenArray.size()));
+                printf("u %x, current %x, count %u, lenArray.size() %lu\n", u[0], current, 
lenArrayCurr, lenArray.size());
             current = u[0];
             charArray[current] = lenArray.size();
         }
@@ -119,10 +118,9 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, 
vector<sa
 
         set_exists(u[0]);
         // first character is stored in charArray, so start from second
-        for (i = 1; i < len; i++) {
+        for (i = 1; i < len; i++, lenArrayCurr++) {
             set_exists(u[i]);
-            lenArrayCurr++;
-            if ((count++) % 0x10 == 0)
+            if (lenArrayCurr % 0x10 == 0)
                 fprintf(source_fp, "\n\t");
             fprintf(source_fp, "0x%04x, ", u[i]);
         }
@@ -132,16 +130,16 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, 
vector<sa
     fprintf(source_fp, "\n};\n");
 }
 
-static inline void printLenArray(FILE* source_fp, const vector<sal_Int32>& lenArray)
+static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenArray)
 {
     fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t");
     fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
     for (size_t k = 0; k < lenArray.size(); k++)
     {
-        if( (k & 0xf) == 0xf)
+        if( !(k & 0xf) )
             fprintf(source_fp, "\n\t");
 
-        fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k]));
+        fprintf(source_fp, "0x%x, ", lenArray[k]);
     }
     fprintf(source_fp, "\n};\n");
 }
@@ -152,13 +150,13 @@ static inline void printLenArray(FILE* source_fp, const vector<sal_Int32>& 
lenAr
 static inline void printIndex1(FILE *source_fp, sal_Int16 *set)
 {
     fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
-    sal_Int32 count = 0;
+    sal_Int16 count = 0;
     for (sal_Int32 i = 0; i < 0x100; i++) {
         sal_Int32 j = 0;
         while( j < 0x100 && charArray[(i*0x100) + j] == 0)
             j++;
 
-        fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? 
sal::static_int_cast<sal_Int16>(count++) : 0xff));
+        fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff));
         if ((i+1) % 0x10 == 0)
             fprintf (source_fp, "\n\t");
     }
@@ -172,12 +170,12 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
     for (sal_Int32 i = 0; i < 0x100; i++) {
         if (set[i] != 0xff) {
             for (sal_Int32 j = 0; j < 0x100; j++) {
-                sal_Int32 k = (i*0x100) + j;
+                sal_Int32 k = (i<<8) + j;
                 if (prev != 0 )
                     while( charArray[k] == 0 && k < 0x10000 )
                         k++;
 
-                prev = charArray[(i*0x100) + j];
+                prev = charArray[(i<<8) + j];
                 fprintf(
                     source_fp, "0x%lx, ",
                     sal::static_int_cast< unsigned long >(
@@ -193,7 +191,7 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
 
 /* Generates a bitmask for the existance of sal_Unicode values in dictionary;
    it packs 8 sal_Bool values in 1 sal_uInt8 */
-static inline void printExistMark(FILE *source_fp)
+static inline void printExistsMask(FILE *source_fp)
 {
     fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t");
     for (unsigned int i = 0; i < 0x2000; i++)
@@ -230,7 +228,7 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
         return -1;
     }
 
-    vector<sal_Int32> lenArray;   // stores the word boundaries in DataArea
+    vector<sal_uInt32> lenArray;   // stores the word boundaries in DataArea
     sal_Int16 set[0x100];
 
     printIncludes(source_fp);
@@ -239,7 +237,7 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
         printLenArray(source_fp, lenArray);
         printIndex1(source_fp, set);
         printIndex2(source_fp, set);
-        printExistMark(source_fp);
+        printExistsMask(source_fp);
         printFunctions(source_fp);
     fprintf (source_fp, "}\n");
 
-- 
1.7.1

From 0dba24e06f2a6a69f8b18bf8d860148f487dc875 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Mon, 31 Jan 2011 02:21:17 +0100
Subject: [PATCH 13/18] some small changes

---
 i18npool/source/breakiterator/gendict.cxx |    7 ++-----
 1 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index 6881981..a192484 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -153,7 +153,7 @@ static inline void printIndex1(FILE *source_fp, sal_Int16 *set)
     sal_Int16 count = 0;
     for (sal_Int32 i = 0; i < 0x100; i++) {
         sal_Int32 j = 0;
-        while( j < 0x100 && charArray[(i*0x100) + j] == 0)
+        while( j < 0x100 && charArray[(i<<8) + j] == 0)
             j++;
 
         fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff));
@@ -176,10 +176,7 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
                         k++;
 
                 prev = charArray[(i<<8) + j];
-                fprintf(
-                    source_fp, "0x%lx, ",
-                    sal::static_int_cast< unsigned long >(
-                        k < 0x10000 ? charArray[k] + 1 : 0));
+                fprintf(source_fp, "0x%x, ",(k < 0x10000 ? charArray[k] + 1 : 0));
                 if ((j+1) % 0x10 == 0)
                     fprintf (source_fp, "\n\t");
             }
-- 
1.7.1

From 6b044dd5b26dfe21295d296525d50687ccc74049 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Mon, 31 Jan 2011 02:31:30 +0100
Subject: [PATCH 14/18] use of fputs

---
 i18npool/source/breakiterator/gendict.cxx |   48 +++++++++++++---------------
 1 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index a192484..cb5754b 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -69,27 +69,23 @@ static inline void set_exists(sal_uInt32 index)
 
 static inline void printIncludes(FILE* source_fp)
 {
-    fprintf(source_fp, "/*\n");
-    fprintf(source_fp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
-    fprintf(source_fp, " * All Rights Reserved.\n");
-    fprintf(source_fp, " */\n\n");
-    fprintf(source_fp, "/* !!!The file is generated automatically. DO NOT edit the file 
manually!!! */\n\n");
-    fprintf(source_fp, "#include <sal/types.h>\n\n");
+    fputs("/* !!!The file is generated automatically. DO NOT edit the file manually!!! */\n\n", 
source_fp);
+    fputs("#include <sal/types.h>\n\n", source_fp);
 }
 
 static inline void printFunctions(FILE* source_fp)
 {
-    fprintf (source_fp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
-    fprintf (source_fp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
-    fprintf (source_fp, "\tconst sal_Int32* getIndex2() { return index2; }\n");
-    fprintf (source_fp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n");
-    fprintf (source_fp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
+    fputs ("\tconst sal_uInt8* getExistMark() { return existMark; }\n", source_fp);
+    fputs ("\tconst sal_Int16* getIndex1() { return index1; }\n", source_fp);
+    fputs ("\tconst sal_Int32* getIndex2() { return index2; }\n", source_fp);
+    fputs ("\tconst sal_Int32* getLenArray() { return lenArray; }\n", source_fp);
+    fputs ("\tconst sal_Unicode* getDataArea() { return dataArea; }\n", source_fp);
 }
 
 static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_uInt32>& 
lenArray)
 {
     // generate main dict. data array
-    fprintf(source_fp, "static const sal_Unicode dataArea[] = {");
+    fputs("static const sal_Unicode dataArea[] = {", source_fp);
     sal_Char str[1024];
     sal_uInt32 lenArrayCurr = 0;
     sal_Unicode current = 0;
@@ -121,13 +117,13 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, 
vector<sa
         for (i = 1; i < len; i++, lenArrayCurr++) {
             set_exists(u[i]);
             if (lenArrayCurr % 0x10 == 0)
-                fprintf(source_fp, "\n\t");
+                fputs("\n\t", source_fp);
             fprintf(source_fp, "0x%04x, ", u[i]);
         }
     }
     lenArray.push_back( lenArrayCurr ); // store last ending pointer
     charArray[current+1] = lenArray.size();
-    fprintf(source_fp, "\n};\n");
+    fputs("\n};\n", source_fp);
 }
 
 static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenArray)
@@ -137,11 +133,11 @@ static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& 
lenA
     for (size_t k = 0; k < lenArray.size(); k++)
     {
         if( !(k & 0xf) )
-            fprintf(source_fp, "\n\t");
+            fputs("\n\t", source_fp);
 
         fprintf(source_fp, "0x%x, ", lenArray[k]);
     }
-    fprintf(source_fp, "\n};\n");
+    fputs("\n};\n", source_fp );
 }
 
 /* FIXME?: what happens if in every range i there is at least one charArray != 0
@@ -158,14 +154,14 @@ static inline void printIndex1(FILE *source_fp, sal_Int16 *set)
 
         fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff));
         if ((i+1) % 0x10 == 0)
-            fprintf (source_fp, "\n\t");
+            fputs ("\n\t", source_fp);
     }
-    fprintf (source_fp, "};\n");
+    fputs("};\n", source_fp);
 }
 
 static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
 {
-    fprintf (source_fp, "static const sal_Int32 index2[] = {\n\t");
+    fputs ("static const sal_Int32 index2[] = {\n\t", source_fp);
     sal_Int32 prev = 0;
     for (sal_Int32 i = 0; i < 0x100; i++) {
         if (set[i] != 0xff) {
@@ -178,12 +174,12 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
                 prev = charArray[(i<<8) + j];
                 fprintf(source_fp, "0x%x, ",(k < 0x10000 ? charArray[k] + 1 : 0));
                 if ((j+1) % 0x10 == 0)
-                    fprintf (source_fp, "\n\t");
+                    fputs ("\n\t", source_fp);
             }
-            fprintf (source_fp, "\n\t");
+            fputs ("\n\t", source_fp);
         }
     }
-    fprintf (source_fp, "\n};\n");
+    fputs ("\n};\n", source_fp);
 }
 
 /* Generates a bitmask for the existance of sal_Unicode values in dictionary;
@@ -195,9 +191,9 @@ static inline void printExistsMask(FILE *source_fp)
     {
         fprintf(source_fp, "0x%02x, ", exists[i]);
         if ( (i & 0xf) == 0xf )
-            fprintf(source_fp, "\n\t");
+            fputs("\n\t", source_fp);
     }
-    fprintf (source_fp, "\n};\n");
+    fputs("\n};\n", source_fp);
 }
 
 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
@@ -229,14 +225,14 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
     sal_Int16 set[0x100];
 
     printIncludes(source_fp);
-    fprintf(source_fp, "extern \"C\" {\n");
+    fputs("extern \"C\" {\n", source_fp);
         printDataArea(dictionary_fp, source_fp, lenArray);
         printLenArray(source_fp, lenArray);
         printIndex1(source_fp, set);
         printIndex2(source_fp, set);
         printExistsMask(source_fp);
         printFunctions(source_fp);
-    fprintf (source_fp, "}\n");
+    fputs("}\n", source_fp);
 
     fclose(dictionary_fp);
     fclose(source_fp);
-- 
1.7.1

From 70d79f45fdd2131ef5286b20e5cfe2cab4f4a8b0 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Mon, 31 Jan 2011 12:08:19 +0100
Subject: [PATCH 15/18] printing to stderr

---
 i18npool/source/breakiterator/gendict.cxx |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index cb5754b..b2cf491 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -116,7 +116,7 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa
         // first character is stored in charArray, so start from second
         for (i = 1; i < len; i++, lenArrayCurr++) {
             set_exists(u[i]);
-            if (lenArrayCurr % 0x10 == 0)
+            if ((lenArrayCurr & 0x10) == 0x10)
                 fputs("\n\t", source_fp);
             fprintf(source_fp, "0x%04x, ", u[i]);
         }
@@ -202,7 +202,7 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
 
     if (argc < 3)
     {
-        printf("2 arguments required: dictionary_file_name source_file_name");
+        fputs("2 arguments required: dictionary_file_name source_file_name", stderr);
         exit(-1);
     }
 
-- 
1.7.1

From c3b29e489e7450734b41e02ac2e36c8edae8de4e Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Mon, 31 Jan 2011 12:16:31 +0100
Subject: [PATCH 16/18] added support to write to stdout

---
 i18npool/source/breakiterator/gendict.cxx |   19 ++++++++++++-------
 1 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index b2cf491..e8f96d7 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -200,7 +200,7 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
 {
     FILE *dictionary_fp, *source_fp;
 
-    if (argc < 3)
+    if (argc == 1 || argc > 3)
     {
         fputs("2 arguments required: dictionary_file_name source_file_name", stderr);
         exit(-1);
@@ -213,12 +213,17 @@ SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
         return -1;
     }
 
-    // create the C source file to write
-    source_fp = fopen(argv[2], "wb");
-    if (source_fp == NULL) {
-        fclose(dictionary_fp);
-        printf("Can't create the C source file.");
-        return -1;
+    if(argc == 2)
+        source_fp = stdout;
+    else
+    {
+        // create the C source file to write
+        source_fp = fopen(argv[2], "wb");
+        if (source_fp == NULL) {
+            fclose(dictionary_fp);
+            printf("Can't create the C source file.");
+            return -1;
+        }
     }
 
     vector<sal_uInt32> lenArray;   // stores the word boundaries in DataArea
-- 
1.7.1

From 1e10cdc1bce56c343037432c53a472f3d0790d5c Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Mon, 31 Jan 2011 12:22:25 +0100
Subject: [PATCH 17/18] OSL_ENSURE

---
 i18npool/source/breakiterator/gendict.cxx |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index e8f96d7..f4493f5 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -36,6 +36,7 @@
 #include <sal/types.h>
 #include <rtl/strbuf.hxx>
 #include <rtl/ustring.hxx>
+#include <osl/diagnose.h>
 #include <vector>
 using std::vector;
 
@@ -104,8 +105,7 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa
             continue;  // skip one character word
 
         if (u[0] != current) {
-            if (u[0] < current)
-                printf("u %x, current %x, count %u, lenArray.size() %lu\n", u[0], current, 
lenArrayCurr, lenArray.size());
+            OSL_ENSURE( (u[0] > current), "Dictionary file should be sorted");
             current = u[0];
             charArray[current] = lenArray.size();
         }
-- 
1.7.1

From 440d13737c3a43f3178648103033a139d938b212 Mon Sep 17 00:00:00 2001
From: Kenneth Venken <kenneth.venken@gmail.com>
Date: Mon, 31 Jan 2011 12:43:11 +0100
Subject: [PATCH 18/18] replaced % count with &

---
 i18npool/source/breakiterator/gendict.cxx |   12 ++++++------
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index f4493f5..974ca02 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -86,7 +86,7 @@ static inline void printFunctions(FILE* source_fp)
 static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_uInt32>& 
lenArray)
 {
     // generate main dict. data array
-    fputs("static const sal_Unicode dataArea[] = {", source_fp);
+    fputs("static const sal_Unicode dataArea[] = {\n\t", source_fp);
     sal_Char str[1024];
     sal_uInt32 lenArrayCurr = 0;
     sal_Unicode current = 0;
@@ -97,7 +97,7 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa
         OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
         const sal_Unicode *u = Ostr.getStr();
 
-        sal_Int32 len = Ostr.getLength();
+        const sal_Int32 len = Ostr.getLength();
 
         sal_Int32 i=0;
         Ostr.iterateCodePoints(&i, 1);
@@ -116,9 +116,9 @@ static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sa
         // first character is stored in charArray, so start from second
         for (i = 1; i < len; i++, lenArrayCurr++) {
             set_exists(u[i]);
-            if ((lenArrayCurr & 0x10) == 0x10)
-                fputs("\n\t", source_fp);
             fprintf(source_fp, "0x%04x, ", u[i]);
+            if ((lenArrayCurr & 0x0f) == 0x0f)
+                fputs("\n\t", source_fp);
         }
     }
     lenArray.push_back( lenArrayCurr ); // store last ending pointer
@@ -153,7 +153,7 @@ static inline void printIndex1(FILE *source_fp, sal_Int16 *set)
             j++;
 
         fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff));
-        if ((i+1) % 0x10 == 0)
+        if ((i & 0x0f) == 0x0f)
             fputs ("\n\t", source_fp);
     }
     fputs("};\n", source_fp);
@@ -173,7 +173,7 @@ static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
 
                 prev = charArray[(i<<8) + j];
                 fprintf(source_fp, "0x%x, ",(k < 0x10000 ? charArray[k] + 1 : 0));
-                if ((j+1) % 0x10 == 0)
+                if ((j & 0x0f) == 0x0f)
                     fputs ("\n\t", source_fp);
             }
             fputs ("\n\t", source_fp);
-- 
1.7.1


Context


Privacy Policy | Impressum (Legal Info) | Copyright information: Unless otherwise specified, all text and images on this website are licensed under the Creative Commons Attribution-Share Alike 3.0 License. This does not include the source code of LibreOffice, which is licensed under the Mozilla Public License (MPLv2). "LibreOffice" and "The Document Foundation" are registered trademarks of their corresponding registered owners or are in actual use as trademarks in one or more countries. Their respective logos and icons are also subject to international copyright laws. Use thereof is explained in our trademark policy.