Date: prev next · Thread: first prev next last
2011 Archives by date, by thread · List index


-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

This patch enables hunspell to deal with two-level-inflexed compound
words.

like kichwa:
wiñana -> wiñashka (two levels of inflexion) + compounds =
wiñashka+kuna+ntin+mi


note, the file lingucomponent/unxlngx6.pro/lib/libspelllo.so must be
removed before rebuilding, or else this patch won't be picked up

Perhaps there are better ways to make the build process aware of the
need to rebuild libsepelllo.so?

as you've already figured out, this is one of my first patches o_O beware

Arno

- -----

$ hunspell -d qu_EC -m
wiñashkakunantinmi
wiñashkakunantinmi  pa:wiñashka st:wiñana # stem # Perfect 3rd person
singular pa:kuna st:kuna pa:ntin st:ntin pa:mi
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iEYEARECAAYFAk5v6pkACgkQEMIGVCc8BjDLHwCggGEL66N/53csBRyuPDP+ITGs
/CoAoMqx7TgMYWhnBr5GliItDLUHSlqc
=X0J+
-----END PGP SIGNATURE-----
From ecea3f9e53793140e10a4e6b79d7ef5cb3714f19 Mon Sep 17 00:00:00 2001
From: Arno Teigseth <arnotixe@gmail.com>
Date: Tue, 13 Sep 2011 18:34:16 -0500
Subject: [PATCH] added twofold affix+compound to hunspell, as the official fixed 
https://sourceforge.net/tracker/index.php?func=detail&aid=3288562&group_id=143754&atid=756395

---
 hunspell/hunspell-twoaffixcompound.patch |   80 ++++++++++++++++++++++++++++++
 hunspell/makefile.mk                     |    1 +
 2 files changed, 81 insertions(+), 0 deletions(-)
 create mode 100644 hunspell/hunspell-twoaffixcompound.patch

diff --git a/hunspell/hunspell-twoaffixcompound.patch b/hunspell/hunspell-twoaffixcompound.patch
new file mode 100644
index 0000000..71881a2
--- /dev/null
+++ b/hunspell/hunspell-twoaffixcompound.patch
@@ -0,0 +1,80 @@
+--- misc/hunspell-1.3.2/src/hunspell/affixmgr.cxx      2010-06-17 15:56:41.000000000 +0200
++++ misc/build/hunspell-1.3.2/src/hunspell/affixmgr.cxx        2011-02-10 20:47:22.000000000 +0100
+@@ -48,6 +48,7 @@
+   compoundroot = FLAG_NULL; // compound word signing flag
+   compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
+   compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
++  compoundmoresuffixes = 0; // allow more suffixes within compound words
+   checkcompounddup = 0; // forbid double words in compounds
+   checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
+   checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
+@@ -404,6 +405,10 @@
+           }
+        }
+ 
++       if (strncmp(line,"COMPOUNDMORESUFFIXES",20) == 0) {
++                   compoundmoresuffixes = 1;
++       }
++
+        if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
+                    checkcompounddup = 1;
+        }
+@@ -1626,8 +1631,9 @@
+             if (onlycpdrule) break;
+             if (compoundflag && 
+              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, 
compoundflag))) {
+-                if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
+-                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && 
!hu_mov_rule &&
++                if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
++                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || 
++                        (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, 
compoundflag)))) && !hu_mov_rule &&
+                     sfx->getCont() &&
+                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, 
+                             sfx->getContLen())) || (compoundend &&
+@@ -1640,9 +1646,11 @@
+             if (rv ||
+               (((wordnum == 0) && compoundbegin &&
+                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, 
hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
++                (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, 
compoundbegin))) || // twofold suffixes + compound
+                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, 
compoundbegin)))) ||
+               ((wordnum > 0) && compoundmiddle &&
+                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, 
hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
++                (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, 
compoundmiddle))) || // twofold suffixes + compound
+                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, 
compoundmiddle)))))
+               ) checked_prefix = 1;
+         // else check forbiddenwords and needaffix
+@@ -2118,8 +2126,9 @@
+             if (onlycpdrule) break;
+             if (compoundflag &&
+              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, 
compoundflag))) {
+-                if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
+-                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && 
!hu_mov_rule &&
++                if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
++                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
++                        (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, 
compoundflag)))) && !hu_mov_rule &&
+                     sfx->getCont() &&
+                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, 
+                             sfx->getContLen())) || (compoundend &&
+@@ -2132,9 +2141,11 @@
+             if (rv ||
+               (((wordnum == 0) && compoundbegin &&
+                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, 
hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
++                (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, 
compoundbegin))) ||  // twofold suffix+compound
+                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, 
compoundbegin)))) ||
+               ((wordnum > 0) && compoundmiddle &&
+                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, 
hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
++                (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, 
compoundmiddle))) ||  // twofold suffix+compound
+                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, 
compoundmiddle)))))
+               ) {
+                 // char * p = prefix_check_morph(st, i, 0, compound);
+--- misc/hunspell-1.3.2/src/hunspell/affixmgr.hxx      2010-06-17 15:56:41.000000000 +0200
++++ misc/build/hunspell-1.3.2/src/hunspell/affixmgr.hxx        2011-02-10 20:47:22.000000000 +0100
+@@ -41,6 +41,7 @@
+   FLAG                compoundroot;
+   FLAG                compoundforbidflag;
+   FLAG                compoundpermitflag;
++  int                 compoundmoresuffixes;
+   int                 checkcompounddup;
+   int                 checkcompoundrep;
+   int                 checkcompoundcase;
+
diff --git a/hunspell/makefile.mk b/hunspell/makefile.mk
index bf6f0b9..8cf2e4e 100644
--- a/hunspell/makefile.mk
+++ b/hunspell/makefile.mk
@@ -41,6 +41,7 @@ TARFILE_MD5=3121aaf3e13e5d88dfff13fb4a5f1ab8
 ADDITIONAL_FILES+=config.h
 
 PATCH_FILES=\
+    hunspell-twoaffixcompound.patch \
     hunspell-static.patch \
     hunspell-wntconfig.patch \
     hunspell-solaris.patch \
-- 
1.7.4.1

Attachment: twofoldaffix_compound.patch.sig
Description: Binary data


Context


Privacy Policy | Impressum (Legal Info) | Copyright information: Unless otherwise specified, all text and images on this website are licensed under the Creative Commons Attribution-Share Alike 3.0 License. This does not include the source code of LibreOffice, which is licensed under the Mozilla Public License (MPLv2). "LibreOffice" and "The Document Foundation" are registered trademarks of their corresponding registered owners or are in actual use as trademarks in one or more countries. Their respective logos and icons are also subject to international copyright laws. Use thereof is explained in our trademark policy.