-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 This patch enables hunspell to deal with two-level-inflexed compound words. like kichwa: wiñana -> wiñashka (two levels of inflexion) + compounds = wiñashka+kuna+ntin+mi note, the file lingucomponent/unxlngx6.pro/lib/libspelllo.so must be removed before rebuilding, or else this patch won't be picked up Perhaps there are better ways to make the build process aware of the need to rebuild libsepelllo.so? as you've already figured out, this is one of my first patches o_O beware Arno - ----- $ hunspell -d qu_EC -m wiñashkakunantinmi wiñashkakunantinmi pa:wiñashka st:wiñana # stem # Perfect 3rd person singular pa:kuna st:kuna pa:ntin st:ntin pa:mi -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/ iEYEARECAAYFAk5v6pkACgkQEMIGVCc8BjDLHwCggGEL66N/53csBRyuPDP+ITGs /CoAoMqx7TgMYWhnBr5GliItDLUHSlqc =X0J+ -----END PGP SIGNATURE-----
From ecea3f9e53793140e10a4e6b79d7ef5cb3714f19 Mon Sep 17 00:00:00 2001 From: Arno Teigseth <arnotixe@gmail.com> Date: Tue, 13 Sep 2011 18:34:16 -0500 Subject: [PATCH] added twofold affix+compound to hunspell, as the official fixed https://sourceforge.net/tracker/index.php?func=detail&aid=3288562&group_id=143754&atid=756395 --- hunspell/hunspell-twoaffixcompound.patch | 80 ++++++++++++++++++++++++++++++ hunspell/makefile.mk | 1 + 2 files changed, 81 insertions(+), 0 deletions(-) create mode 100644 hunspell/hunspell-twoaffixcompound.patch diff --git a/hunspell/hunspell-twoaffixcompound.patch b/hunspell/hunspell-twoaffixcompound.patch new file mode 100644 index 0000000..71881a2 --- /dev/null +++ b/hunspell/hunspell-twoaffixcompound.patch @@ -0,0 +1,80 @@ +--- misc/hunspell-1.3.2/src/hunspell/affixmgr.cxx 2010-06-17 15:56:41.000000000 +0200 ++++ misc/build/hunspell-1.3.2/src/hunspell/affixmgr.cxx 2011-02-10 20:47:22.000000000 +0100 +@@ -48,6 +48,7 @@ + compoundroot = FLAG_NULL; // compound word signing flag + compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word + compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word ++ compoundmoresuffixes = 0; // allow more suffixes within compound words + checkcompounddup = 0; // forbid double words in compounds + checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution) + checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds +@@ -404,6 +405,10 @@ + } + } + ++ if (strncmp(line,"COMPOUNDMORESUFFIXES",20) == 0) { ++ compoundmoresuffixes = 1; ++ } ++ + if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) { + checkcompounddup = 1; + } +@@ -1626,8 +1631,9 @@ + if (onlycpdrule) break; + if (compoundflag && + !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) { +- if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, +- FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule && ++ if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, ++ FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || ++ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule && + sfx->getCont() && + ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())) || (compoundend && +@@ -1640,9 +1646,11 @@ + if (rv || + (((wordnum == 0) && compoundbegin && + ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || ++ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffixes + compound + (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) || + ((wordnum > 0) && compoundmiddle && + ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || ++ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffixes + compound + (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle))))) + ) checked_prefix = 1; + // else check forbiddenwords and needaffix +@@ -2118,8 +2126,9 @@ + if (onlycpdrule) break; + if (compoundflag && + !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) { +- if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, +- FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule && ++ if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, ++ FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || ++ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule && + sfx->getCont() && + ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())) || (compoundend && +@@ -2132,9 +2141,11 @@ + if (rv || + (((wordnum == 0) && compoundbegin && + ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || ++ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffix+compound + (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) || + ((wordnum > 0) && compoundmiddle && + ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || ++ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffix+compound + (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle))))) + ) { + // char * p = prefix_check_morph(st, i, 0, compound); +--- misc/hunspell-1.3.2/src/hunspell/affixmgr.hxx 2010-06-17 15:56:41.000000000 +0200 ++++ misc/build/hunspell-1.3.2/src/hunspell/affixmgr.hxx 2011-02-10 20:47:22.000000000 +0100 +@@ -41,6 +41,7 @@ + FLAG compoundroot; + FLAG compoundforbidflag; + FLAG compoundpermitflag; ++ int compoundmoresuffixes; + int checkcompounddup; + int checkcompoundrep; + int checkcompoundcase; + diff --git a/hunspell/makefile.mk b/hunspell/makefile.mk index bf6f0b9..8cf2e4e 100644 --- a/hunspell/makefile.mk +++ b/hunspell/makefile.mk @@ -41,6 +41,7 @@ TARFILE_MD5=3121aaf3e13e5d88dfff13fb4a5f1ab8 ADDITIONAL_FILES+=config.h PATCH_FILES=\ + hunspell-twoaffixcompound.patch \ hunspell-static.patch \ hunspell-wntconfig.patch \ hunspell-solaris.patch \ -- 1.7.4.1
Attachment:
twofoldaffix_compound.patch.sig
Description: Binary data