Text-Hunspell-2.08/0000755000175000017500000000000012140304161013513 5ustar cosimocosimoText-Hunspell-2.08/t/0000755000175000017500000000000012140304161013756 5ustar cosimocosimoText-Hunspell-2.08/t/07-add-dictionary.t0000644000175000017500000000103112124375254017272 0ustar cosimocosimouse strict; use warnings; use Data::Dumper; use Test::More tests => 4; use Text::Hunspell; my $speller = Text::Hunspell->new(qw(./t/test.aff ./t/test.dic)); die unless $speller; ok($speller, qq(Created a Text::Hunspell object [$speller])); my $word = q(munkey); ok( !$speller->check($word), qq(Word '$word' shouldn't be in the test dictionary) ); ok( !$speller->add_dic(q(./t/supp.dic)), q(Added a supplemental dictionary) ); ok( $speller->check($word), qq(Word '$word' is in the supplemental dictionary) ); Text-Hunspell-2.08/t/00-prereq.t0000644000175000017500000000031412124255763015673 0ustar cosimocosimouse Test::More tests => 1; use lib qw(inc ../inc); use Devel::CheckLib; check_lib_or_exit( lib => 'hunspell', header => 'hunspell/hunspell.h', ); ok(1, 'Prerequisite hunspell library found'); Text-Hunspell-2.08/t/05-core.t0000644000175000017500000000133412124375254015333 0ustar cosimocosimouse strict; use warnings; use Data::Dumper; use Test::More tests => 5; use Text::Hunspell; my $speller = Text::Hunspell->new(qw(./t/test.aff ./t/test.dic)); die unless $speller; ok($speller, qq(Created a Text::Hunspell object [$speller])); my $word = q(lótól); ok( $speller->check($word), qq(Word '$word' should be in the test dictionary) ); $word = q(lóotól); ok( ! $speller->check($word), qq(Word '$word' shouldn't be in the test dictionary) ); # Check spell suggestions my $misspelled = q(lóo); my @suggestions = $speller->suggest($misspelled); ok(scalar @suggestions > 0, q(Got some suggestions)); is_deeply( \@suggestions => [ qw(lói ló lót) ], q(List of suggestions should be correct) ); Text-Hunspell-2.08/t/06-analyze.t0000644000175000017500000000132212124255763016046 0ustar cosimocosimouse strict; use warnings; use Data::Dumper; use Test::More tests => 7; use Text::Hunspell; my $speller = Text::Hunspell->new(qw(./t/test.aff ./t/test.dic)); ok($speller, qq(Created a Text::Hunspell object [$speller])); # Sample analysis: # # 'st:ló po:noun ts:NOM al:lovak is:ABL' # my $word = q(lótól); my $analysis = $speller->analyze($word); ok($analysis, q(Got something back)); diag("Analysis result: [$analysis]"); # I'm a total newbie in dictionary stuff ok($analysis =~ m{st:ló}, q(Stemming root)); ok($analysis =~ m{po:noun}, q(Word supposed to be a noun)); ok($analysis =~ m{ts:NOM}, q(Have no idea)); ok($analysis =~ m{al:lovak}, q(Also here, no idea)); ok($analysis =~ m{is:ABL}, q(Guess what?)); Text-Hunspell-2.08/t/test.dic0000644000175000017500000000002212124255763015427 0ustar cosimocosimo2 ló/A 6 rom/B 1 Text-Hunspell-2.08/t/02-pod.t0000644000175000017500000000031412124255763015161 0ustar cosimocosimo# Thanks sri use Test::More; eval "use Test::Pod 1.14"; plan skip_all => 'Test::Pod 1.14 required' if $@; plan skip_all => 'set TEST_POD to enable this test' unless $ENV{TEST_POD}; all_pod_files_ok(); Text-Hunspell-2.08/t/supp.dic0000644000175000017500000000001112124255763015435 0ustar cosimocosimo1 munkey Text-Hunspell-2.08/t/test.aff0000644000175000017500000000062612124255763015436 0ustar cosimocosimoAM 7 AM po:noun ts:NOM AM po:noun ts:ABL AM po:noun ts:DAT AM po:noun ts:ACC AM po:adj ts:NOM AM po:noun ts:NOM al:lovak AM is:ABL SET UTF-8 KEY öüó|qwertzuiopőú|asdfghjkléáű|íyxcvbnm TRY íóútaeslzánorhgkiédmyőpvöbucfjüűxwq-.à SFX A Y 4 SFX A 0 i . 5 SFX A 0 t . 4 SFX A 0 nak . 3 SFX A 0 tól . 7 SFX B Y 4 SFX B 0 i . 5 SFX B 0 ot . 4 SFX B 0 nak . 3 SFX B 0 tól . 7 Text-Hunspell-2.08/t/01-use.t0000644000175000017500000000006612124255763015176 0ustar cosimocosimouse Test::More tests => 1; use_ok('Text::Hunspell'); Text-Hunspell-2.08/Changes0000644000175000017500000000350712140304015015011 0ustar cosimocosimoRevision history for Perl extension Text::Hunspell. 2.08 2013/05/01 23:22:35 Europe/Oslo (cosimo) Improved main POD documentation for Hunspell.pm. Fixes RT#84964. Thanks to Paul Flo Williams for the patch. (https://github.com/cosimo/perl5-text-hunspell/pull/2) 2.07 2013/03/26 20:45:00 Europe/Oslo (cosimo) DEPRECATED the delete() method, and implemented proper object handles in the hunspell XS glue. Again, thanks to Paul Flo Williams for patch and test case. Closes RT#84054. 2.06 2013/03/09 11:39:28 Europe/Oslo (cosimo) Implemented new add_dic() function from hunspell API. Fixes RT#83765. Thanks to Paul Flo Williams for patch and test case. 2.05 2012/09/21 08:17:00 (cosimo) Fixed RT#79630 incorrect encoding of Hunspell.pm POD documentation. Thanks to Paul for the patch. 2.04 2012/09/20 23:05:00 (cosimo) Fixed t/02_pod.t test due to newest Test::Pod and missing =encoding directive. Thanks to SHLOMIF for the patch. 2.03 2011/06/24 11:12:00 (cosimo) Fixed use of "qw()" as parenthesis in inc/Devel/CheckLib.pm because deprecated in perl 5.14. Thanks to H. Merijn Brand for reporting. 2.02 2010/10/24 13:02:00 (cosimo) Added an explicit warning if the unversioned libhunspell.so symlink or library is not found. Linking will almost certainly fail. 2.01 2010/09 (cosimo) Fixed build/test problems 2.00 2010/09 (cosimo) Now cosimo at cpan dot org is a co-maintainer for Text::Hunspell. Added a dirty hack to work around some underlying changes in libhunspell that made the XS code unable to initialize any Hunspell object. 1.xx Author and maintainer was ELEONORA Text-Hunspell-2.08/examples/0000755000175000017500000000000012140304161015331 5ustar cosimocosimoText-Hunspell-2.08/examples/basic.pl0000644000175000017500000000244012124255763016766 0ustar cosimocosimo#!/usr/bin/env perl # # Basic tests of Text::Hunspell # Taken from the main POD documentation. # # For this example to work, you have to have # the US english dictionary installed! # # On Debian/Ubuntu systems, it should be # enough to type: # # sudo apt-get install hunspell libhunspell-dev # # Have fun! # # Cosimo, 06/Sep/2010 # use strict; use warnings; use Text::Hunspell; # You can use relative or absolute paths. my $speller = Text::Hunspell->new( "/usr/share/hunspell/en_US.aff", # Hunspell affix file "/usr/share/hunspell/en_US.dic" # Hunspell dictionary file ); die unless $speller; # Check a word against the dictionary my $word = 'opera'; print $speller->check($word) ? "'$word' found in the dictionary\n" : "'$word' not found in the dictionary!\n"; # Spell check suggestions my $misspelled = 'programmng'; my @suggestions = $speller->suggest($misspelled); print "\n", "You typed '$misspelled'. Did you mean?\n"; for (@suggestions) { print " - $_\n"; } # Analysis of a word $word = 'automatic'; my $analysis = $speller->analyze($word); print "\n", "Analysis of '$word' returns '$analysis'\n"; # Word stemming $word = 'development'; my @stemming = $speller->stem($word); print "\n", "Stemming of '$word' returns:\n"; for (@stemming) { print " - $_\n"; } Text-Hunspell-2.08/MANIFEST0000644000175000017500000000060112140304161014641 0ustar cosimocosimoChanges examples/basic.pl hunspell.h Hunspell.pm Hunspell.xs inc/Devel/CheckLib.pm Makefile.PL MANIFEST META.yml Module meta-data (added by MakeMaker) perlobject.map README t/00-prereq.t t/01-use.t t/02-pod.t t/05-core.t t/06-analyze.t t/07-add-dictionary.t t/supp.dic t/test.aff t/test.dic typemap META.json Module JSON meta-data (added by MakeMaker) Text-Hunspell-2.08/typemap0000644000175000017500000000126312124375254015134 0ustar cosimocosimo# $Id: typemap,v 1.3 2002/08/26 02:40:25 moseley Exp $ TYPEMAP Hunspell * O_OBJECT # From: "perlobject.map" Dean Roehrich, version 19960302 # O_OBJECT -> link an opaque C or C++ object to a blessed Perl object. OUTPUT # The Perl object is blessed into 'CLASS', which should be a # char* having the name of the package for the blessing. O_OBJECT sv_setref_pv( $arg, CLASS, (void*)$var ); INPUT O_OBJECT if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) ) $var = ($type)SvIV((SV*)SvRV( $arg )); else{ warn( \"${Package}::$func_name() -- $var is not a blessed SV reference\" ); XSRETURN_UNDEF; } Text-Hunspell-2.08/hunspell.h0000644000175000017500000007617712124255763015557 0ustar cosimocosimo#ifndef _HTYPES_HXX_ #define _HTYPES_HXX_ #include #include #include #include #define MAXCONDLEN 20 #define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char *)) #define MAXDELEN 256 #define ROTATE_LEN 5 #define ROTATE(v,q) \ (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1)); // hentry options #define H_OPT (1 << 0) #define H_OPT_ALIASM (1 << 1) #define H_OPT_PHON (1 << 2) // see also csutil.hxx #define HENTRY_WORD(h) &(h->word) // approx. number of user defined words #define USERWORD 1000 #ifdef HUNSPELL_STATIC #define DLLEXPORT #else #ifdef HUNSPELL_EXPORTS #define DLLEXPORT __declspec( dllexport ) #else #define DLLEXPORT __declspec( dllimport ) #endif #endif #define MAXCONDLEN 20 #define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char *)) #ifndef _HUNZIP_HXX_ #define _HUNZIP_HXX_ #define BUFSIZE 65536 #define HZIP_EXTENSION ".hz" #define MSG_OPEN "error: %s: cannot open\n" #define MSG_FORMAT "error: %s: not in hzip format\n" #define MSG_MEMORY "error: %s: missing memory\n" #define MSG_KEY "error: %s: missing or bad password\n" struct bit { unsigned char c[2]; int v[2]; }; class Hunzip { protected: char * filename; FILE * fin; int bufsiz, lastbit, inc, inbits, outc; struct bit * dec; // code table char in[BUFSIZE]; // input buffer char out[BUFSIZE + 1]; // Huffman-decoded buffer char line[BUFSIZE + 50]; // decoded line int getcode(const char * key); int getbuf(); int fail(const char * err, const char * par); public: Hunzip(const char * filename, const char * key = NULL); ~Hunzip(); const char * getline(); }; #endif #ifndef _FILEMGR_HXX_ #define _FILEMGR_HXX_ class FileMgr { protected: FILE * fin; Hunzip * hin; char in[BUFSIZE + 50]; // input buffer int fail(const char * err, const char * par); int linenum; public: FileMgr(const char * filename, const char * key = NULL); ~FileMgr(); char * getline(); int getlinenum(); }; #endif #ifndef __WCHARHXX__ #define __WCHARHXX__ typedef struct { unsigned char l; unsigned char h; } w_char; // two character arrays struct replentry { char * pattern; char * pattern2; }; #endif #ifndef __CSUTILHXX__ #define __CSUTILHXX__ // First some base level utility routines // convert UTF-16 characters to UTF-8 char * u16_u8(char * dest, int size, const w_char * src, int srclen); // convert UTF-8 characters to UTF-16 int u8_u16(w_char * dest, int size, const char * src); // sort 2-byte vector void flag_qsort(unsigned short flags[], short begin, short end); // binary search in 2-byte vector int flag_bsearch(unsigned short flags[], unsigned short flag, short right); // remove end of line char(s) void mychomp(char * s); // duplicate string char * mystrdup(const char * s); // duplicate reverse of string char * myrevstrdup(const char * s); // parse into tokens with char delimiter char * mystrsep(char ** sptr, const char delim); // parse into tokens with char delimiter char * mystrrep(char *, const char *, const char *); // is one string a leading subset of another int isSubset(const char * s1, const char * s2); // is one reverse string a leading subset of the end of another int isRevSubset(const char * s1, const char * s2, int len); // append s to ends of every lines in text void strlinecat(char * lines, const char * s); // tokenize into lines with new line int line_tok(const char * text, char *** lines); // tokenize into lines with new line and uniq in place char * line_uniq(char * text); // change \n to c in place char * line_join(char * text, char c); // leave only last {[^}]*} pattern in string char * delete_zeros(char * morphout); // reverse word void reverseword(char *); // reverse word void reverseword_utf(char *); // character encoding information struct cs_info { unsigned char ccase; unsigned char clower; unsigned char cupper; }; // Unicode character encoding information struct unicode_info { unsigned short ccase; unsigned short cupper; unsigned short clower; }; struct unicode_info2 { char ccase; unsigned short cupper; unsigned short clower; }; struct enc_entry { const char * enc_name; struct cs_info * cs_table; }; // language to encoding default map struct lang_map { const char * lang; const char * def_enc; int num; }; struct cs_info * get_current_cs(const char * es); struct unicode_info * get_utf_cs(); int get_utf_cs_len(); const char * get_default_enc(const char * lang); int get_lang_num(const char * lang); // convert null terminated string to all caps using encoding void enmkallcap(char * d, const char * p, const char * encoding); // convert null terminated string to all little using encoding void enmkallsmall(char * d, const char * p, const char * encoding); // convert null terminated string to have intial capital using encoding void enmkinitcap(char * d, const char * p, const char * encoding); // convert null terminated string to all caps void mkallcap(char * p, const struct cs_info * csconv); // convert null terminated string to all little void mkallsmall(char * p, const struct cs_info * csconv); // convert null terminated string to have intial capital void mkinitcap(char * p, const struct cs_info * csconv); // convert first nc characters of UTF-8 string to little void mkallsmall_utf(w_char * u, int nc, struct unicode_info2 * utfconv); #endif struct hentry { unsigned char blen; // word length in bytes unsigned char clen; // word length in characters (different for UTF-8 enc.) short alen; // length of affix flag vector unsigned short * astr; // affix flag vector struct hentry * next; // next word with same hash code struct hentry * next_homonym; // next homonym word (with same hash code) char var; // variable fields (only for special pronounciation yet) char word; // variable-length word (8-bit or UTF-8 encoding) }; #endif #ifndef _HASHMGR_HXX_ #define _HASHMGR_HXX_ enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; class HashMgr { int tablesize; struct hentry ** tableptr; int userword; flag flag_mode; int complexprefixes; int utf8; unsigned short forbiddenword; int langnum; char * enc; char * lang; struct cs_info * csconv; char * ignorechars; unsigned short * ignorechars_utf16; int ignorechars_utf16_len; int numaliasf; // flag vector `compression' with aliases unsigned short ** aliasf; unsigned short * aliasflen; int numaliasm; // morphological desciption `compression' with aliases char ** aliasm; public: HashMgr(const char * tpath, const char * apath, const char * key = NULL); ~HashMgr(); struct hentry * lookup(const char *) const; int hash(const char *) const; struct hentry * walk_hashtable(int & col, struct hentry * hp) const; int add(const char * word); int add_with_affix(const char * word, const char * pattern); int remove(const char * word); int decode_flags(unsigned short ** result, char * flags, FileMgr * af); unsigned short decode_flag(const char * flag); char * encode_flag(unsigned short flag); int is_aliasf(); int get_aliasf(int index, unsigned short ** fvec, FileMgr * af); int is_aliasm(); char * get_aliasm(int index); private: int get_clen_and_captype(const char * word, int wbl, int * captype); int load_tables(const char * tpath, const char * key); int add_word(const char * word, int wbl, int wcl, unsigned short * ap, int al, const char * desc, bool onlyupcase); int load_config(const char * affpath, const char * key); int parse_aliasf(char * line, FileMgr * af); int add_hidden_capitalized_word(char * word, int wbl, int wcl, unsigned short * flags, int al, char * dp, int captype); int parse_aliasm(char * line, FileMgr * af); int remove_forbidden_flag(const char * word); }; #endif #ifndef _ATYPES_HXX_ #define _ATYPES_HXX_ // HUNSTEM def. #define HUNSTEM #define SETSIZE 256 #define CONTSIZE 65536 #define MAXWORDLEN 100 #define MAXWORDUTF8LEN (MAXWORDLEN * 4) #define XPRODUCT (1 << 0) enum {IN_CPD_NOT, IN_CPD_BEGIN, IN_CPD_END}; #define MAXLNLEN 8192 * 4 #define MAXCOMPOUND 10 #define MAXACC 1000 #define MISSING_DESCRIPTION "[]" #define FLAG unsigned short #define FLAG_NULL 0x00 #define FREE_FLAG(a) a = 0 #define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c) struct affentry { char * strip; char * appnd; unsigned char stripl; unsigned char appndl; char numconds; char opts; unsigned short aflag; unsigned short * contclass; short contclasslen; union { char conds[MAXCONDLEN]; struct { char conds1[MAXCONDLEN_1]; char * conds2; } l; } c; char * morphcode; }; struct guessword { char * word; bool allow; char * orig; }; struct mapentry { char * set; w_char * set_utf16; int len; }; struct flagentry { FLAG * def; int len; }; struct patentry { char * pattern; char * pattern2; char * pattern3; FLAG cond; FLAG cond2; }; #endif #ifndef __PHONETHXX__ #define __PHONETHXX__ #define HASHSIZE 256 #define MAXPHONETLEN 256 #define MAXPHONETUTF8LEN (MAXPHONETLEN * 4) struct phonetable { char utf8; cs_info * lang; int num; char * * rules; int hash[HASHSIZE]; }; void init_phonet_hash(phonetable & parms); int phonet (const char * inword, char * target, int len, phonetable & phone); #endif #ifndef _BASEAFF_HXX_ #define _BASEAFF_HXX_ class AffEntry { public: protected: char * appnd; char * strip; unsigned char appndl; unsigned char stripl; char numconds; char opts; unsigned short aflag; union { char conds[MAXCONDLEN]; struct { char conds1[MAXCONDLEN_1]; char * conds2; } l; } c; char * morphcode; unsigned short * contclass; short contclasslen; }; #endif #ifndef _HASHMGR_HXX_ #define _HASHMGR_HXX_ e enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; class HashMgr { int tablesize; struct hentry ** tableptr; int userword; flag flag_mode; int complexprefixes; int utf8; unsigned short forbiddenword; int langnum; char * enc; char * lang; struct cs_info * csconv; char * ignorechars; unsigned short * ignorechars_utf16; int ignorechars_utf16_len; int numaliasf; // flag vector `compression' with aliases unsigned short ** aliasf; unsigned short * aliasflen; int numaliasm; // morphological desciption `compression' with aliases char ** aliasm; public: HashMgr(const char * tpath, const char * apath, const char * key = NULL); ~HashMgr(); struct hentry * lookup(const char *) const; int hash(const char *) const; struct hentry * walk_hashtable(int & col, struct hentry * hp) const; int add(const char * word); int add_with_affix(const char * word, const char * pattern); int remove(const char * word); int decode_flags(unsigned short ** result, char * flags, FileMgr * af); unsigned short decode_flag(const char * flag); char * encode_flag(unsigned short flag); int is_aliasf(); int get_aliasf(int index, unsigned short ** fvec, FileMgr * af); int is_aliasm(); char * get_aliasm(int index); private: int get_clen_and_captype(const char * word, int wbl, int * captype); int load_tables(const char * tpath, const char * key); int add_word(const char * word, int wbl, int wcl, unsigned short * ap, int al, const char * desc, bool onlyupcase); int load_config(const char * affpath, const char * key); int parse_aliasf(char * line, FileMgr * af); int add_hidden_capitalized_word(char * word, int wbl, int wcl, unsigned short * flags, int al, char * dp, int captype); int parse_aliasm(char * line, FileMgr * af); int remove_forbidden_flag(const char * word); }; #endif #ifndef _REPLIST_HXX_ #define _REPLIST_HXX_ class RepList { protected: replentry ** dat; int size; int pos; public: RepList(int n); ~RepList(); int get_pos(); int add(char * pat1, char * pat2); replentry * item(int n); int near(const char * word); int match(const char * word, int n); int conv(const char * word, char * dest); }; #endif #ifndef _AFFIXMGR_HXX_ #define _AFFIXMGR_HXX_ #define dupSFX (1 << 0) #define dupPFX (1 << 1) class AffixMgr { AffEntry * pStart[SETSIZE]; AffEntry * sStart[SETSIZE]; AffEntry * pFlag[SETSIZE]; AffEntry * sFlag[SETSIZE]; HashMgr * pHMgr; HashMgr ** alldic; int * maxdic; char * keystring; char * trystring; char * encoding; struct cs_info * csconv; int utf8; int complexprefixes; FLAG compoundflag; FLAG compoundbegin; FLAG compoundmiddle; FLAG compoundend; FLAG compoundroot; FLAG compoundforbidflag; FLAG compoundpermitflag; int checkcompounddup; int checkcompoundrep; int checkcompoundcase; int checkcompoundtriple; int simplifiedtriple; FLAG forbiddenword; FLAG nosuggest; FLAG needaffix; int cpdmin; int numrep; replentry * reptable; RepList * iconvtable; RepList * oconvtable; int nummap; mapentry * maptable; int numbreak; char ** breaktable; int numcheckcpd; patentry * checkcpdtable; int simplifiedcpd; int numdefcpd; flagentry * defcpdtable; phonetable * phone; int maxngramsugs; int nosplitsugs; int sugswithdots; int cpdwordmax; int cpdmaxsyllable; char * cpdvowels; w_char * cpdvowels_utf16; int cpdvowels_utf16_len; char * cpdsyllablenum; const char * pfxappnd; // BUG: not stateless const char * sfxappnd; // BUG: not stateless FLAG sfxflag; // BUG: not stateless char * derived; // BUG: not stateless AffEntry * sfx; // BUG: not stateless AffEntry * pfx; // BUG: not stateless int checknum; char * wordchars; unsigned short * wordchars_utf16; int wordchars_utf16_len; char * ignorechars; unsigned short * ignorechars_utf16; int ignorechars_utf16_len; char * version; char * lang; int langnum; FLAG lemma_present; FLAG circumfix; FLAG onlyincompound; FLAG keepcase; FLAG substandard; int checksharps; int fullstrip; int havecontclass; // boolean variable char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix) public: AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key = NULL); ~AffixMgr(); struct hentry * affix_check(const char * word, int len, const unsigned short needflag = (unsigned short) 0, char in_compound = IN_CPD_NOT); struct hentry * prefix_check(const char * word, int len, char in_compound, const FLAG needflag = FLAG_NULL); inline int isSubset(const char * s1, const char * s2); struct hentry * prefix_check_twosfx(const char * word, int len, char in_compound, const FLAG needflag = FLAG_NULL); inline int isRevSubset(const char * s1, const char * end_of_s2, int len); struct hentry * suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); struct hentry * suffix_check_twosfx(const char * word, int len, int sfxopts, AffEntry* ppfx, const FLAG needflag = FLAG_NULL); char * affix_check_morph(const char * word, int len, const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); char * prefix_check_morph(const char * word, int len, char in_compound, const FLAG needflag = FLAG_NULL); char * suffix_check_morph (const char * word, int len, int sfxopts, AffEntry * ppfx, const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); char * prefix_check_twosfx_morph(const char * word, int len, char in_compound, const FLAG needflag = FLAG_NULL); char * suffix_check_twosfx_morph(const char * word, int len, int sfxopts, AffEntry * ppfx, const FLAG needflag = FLAG_NULL); char * morphgen(char * ts, int wl, const unsigned short * ap, unsigned short al, char * morph, char * targetmorph, int level); int expand_rootword(struct guessword * wlst, int maxn, const char * ts, int wl, const unsigned short * ap, unsigned short al, char * bad, int, char *); short get_syllable (const char * word, int wlen); int cpdrep_check(const char * word, int len); int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2); int defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** rwords, char all); int cpdcase_check(const char * word, int len); inline int candidate_check(const char * word, int len); void setcminmax(int * cmin, int * cmax, const char * word, int len); struct hentry * compound_check(const char * word, int len, short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words, char hu_mov_rule, char is_sug); int compound_check_morph(const char * word, int len, short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words, char hu_mov_rule, char ** result, char * partresult); struct hentry * lookup(const char * word); int get_numrep(); struct replentry * get_reptable(); RepList * get_iconvtable(); RepList * get_oconvtable(); struct phonetable * get_phonetable(); int get_nummap(); struct mapentry * get_maptable(); int get_numbreak(); char ** get_breaktable(); char * get_encoding(); int get_langnum(); char * get_key_string(); char * get_try_string(); const char * get_wordchars(); unsigned short * get_wordchars_utf16(int * len); char * get_ignore(); unsigned short * get_ignore_utf16(int * len); int get_compound(); FLAG get_compoundflag(); FLAG get_compoundbegin(); FLAG get_forbiddenword(); FLAG get_nosuggest(); FLAG get_needaffix(); FLAG get_onlyincompound(); FLAG get_compoundroot(); FLAG get_lemma_present(); int get_checknum(); char * get_possible_root(); const char * get_prefix(); const char * get_suffix(); const char * get_derived(); const char * get_version(); const int have_contclass(); int get_utf8(); int get_complexprefixes(); char * get_suffixed(char ); int get_maxngramsugs(); int get_nosplitsugs(); int get_sugswithdots(void); FLAG get_keepcase(void); int get_checksharps(void); char * encode_flag(unsigned short aflag); int get_fullstrip(); private: int parse_file(const char * affpath, const char * key); int parse_flag(char * line, unsigned short * out, FileMgr * af); int parse_num(char * line, int * out, FileMgr * af); int parse_cpdsyllable(char * line, FileMgr * af); int parse_reptable(char * line, FileMgr * af); int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword); int parse_phonetable(char * line, FileMgr * af); int parse_maptable(char * line, FileMgr * af); int parse_breaktable(char * line, FileMgr * af); int parse_checkcpdtable(char * line, FileMgr * af); int parse_defcpdtable(char * line, FileMgr * af); int parse_affix(char * line, const char at, FileMgr * af, char * dupflags); void reverse_condition(char *); void debugflag(char * result, unsigned short flag); int condlen(char *); int encodeit(struct affentry * ptr, char * cs); int build_pfxtree(AffEntry* pfxptr); int build_sfxtree(AffEntry* sfxptr); int process_pfx_order(); int process_sfx_order(); AffEntry * process_pfx_in_order(AffEntry * ptr, AffEntry * nptr); AffEntry * process_sfx_in_order(AffEntry * ptr, AffEntry * nptr); int process_pfx_tree_to_list(); int process_sfx_tree_to_list(); int redundant_condition(char, char * strip, int stripl, const char * cond, int); }; #endif #ifndef _SUGGESTMGR_HXX_ #define _SUGGESTMGR_HXX_ #define MAXSWL 100 #define MAXSWUTF8L (MAXSWL * 4) #define MAX_ROOTS 100 #define MAX_WORDS 100 #define MAX_GUESS 200 #define MAXNGRAMSUGS 4 #define MAXPHONSUGS 2 // timelimit: max ~1/4 sec (process time on Linux) for a time consuming function #define TIMELIMIT (CLOCKS_PER_SEC >> 2) #define MINTIMER 100 #define MAXPLUSTIMER 100 #define NGRAM_LONGER_WORSE (1 << 0) #define NGRAM_ANY_MISMATCH (1 << 1) #define NGRAM_LOWERING (1 << 2) #include enum { LCS_UP, LCS_LEFT, LCS_UPLEFT }; class SuggestMgr { char * ckey; int ckeyl; w_char * ckey_utf; char * ctry; int ctryl; w_char * ctry_utf; AffixMgr* pAMgr; int maxSug; struct cs_info * csconv; int utf8; int langnum; int nosplitsugs; int maxngramsugs; int complexprefixes; public: SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr); ~SuggestMgr(); int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug); int ngsuggest(char ** wlst, char * word, int ns, HashMgr** pHMgr, int md); int suggest_auto(char*** slst, const char * word, int nsug); int suggest_stems(char*** slst, const char * word, int nsug); int suggest_pos_stems(char*** slst, const char * word, int nsug); char * suggest_morph(const char * word); char * suggest_gen(char ** pl, int pln, char * pattern); char * suggest_morph_for_spelling_error(const char * word); private: int testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest, int * timer, clock_t * timelimit); int checkword(const char *, int, int, int *, clock_t *); int check_forbidden(const char *, int); int capchars(char **, const char *, int, int); int replchars(char**, const char *, int, int); int doubletwochars(char**, const char *, int, int); int forgotchar(char **, const char *, int, int); int swapchar(char **, const char *, int, int); int longswapchar(char **, const char *, int, int); int movechar(char **, const char *, int, int); int extrachar(char **, const char *, int, int); int badcharkey(char **, const char *, int, int); int badchar(char **, const char *, int, int); int twowords(char **, const char *, int, int); int fixstems(char **, const char *, int); int capchars_utf(char **, const w_char *, int wl, int, int); int doubletwochars_utf(char**, const w_char *, int wl, int, int); int forgotchar_utf(char**, const w_char *, int wl, int, int); int extrachar_utf(char**, const w_char *, int wl, int, int); int badcharkey_utf(char **, const w_char *, int wl, int, int); int badchar_utf(char **, const w_char *, int wl, int, int); int swapchar_utf(char **, const w_char *, int wl, int, int); int longswapchar_utf(char **, const w_char *, int, int, int); int movechar_utf(char **, const w_char *, int, int, int); int mapchars(char**, const char *, int, int); int map_related(const char *, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *); int map_related_utf(w_char *, int, int, int, char ** wlst, int, const mapentry*, int, int *, clock_t *); int ngram(int n, char * s1, const char * s2, int opt); int mystrlen(const char * word); int leftcommonsubstring(char * s1, const char * s2); int commoncharacterpositions(char * s1, const char * s2, int * is_swap); void bubblesort( char ** rwd, char ** rwd2, int * rsc, int n); void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result); int lcslen(const char * s, const char* s2); char * suggest_hentry_gen(hentry * rv, char * pattern); }; #endif #ifndef _LANGNUM_HXX_ #define _LANGNUM_HXX_ /* language numbers for language specific codes see http://l10n.openoffice.org/languages.html */ enum { LANG_ar=96, LANG_az=100, // custom number LANG_bg=41, LANG_ca=37, LANG_cs=42, LANG_da=45, LANG_de=49, LANG_el=30, LANG_en=01, LANG_es=34, LANG_eu=10, LANG_fr=02, LANG_gl=38, LANG_hr=78, LANG_hu=36, LANG_it=39, LANG_la=99, // custom number LANG_lv=101, // custom number LANG_nl=31, LANG_pl=48, LANG_pt=03, LANG_ru=07, LANG_sv=50, LANG_tr=90, LANG_uk=80, LANG_xx=999 }; #endif #define SPELL_COMPOUND (1 << 0) #define SPELL_FORBIDDEN (1 << 1) #define SPELL_ALLCAP (1 << 2) #define SPELL_NOCAP (1 << 3) #define SPELL_INITCAP (1 << 4) #define SPELL_XML "" #define MAXDIC 20 #define MAXSUGGESTION 15 #define MAXSHARPS 5 #ifndef _MYSPELLMGR_HXX_ #define _MYSPELLMGR_HXX_ #ifdef HUNSPELL_STATIC #define DLLEXPORT #else #ifdef HUNSPELL_EXPORTS #define DLLEXPORT __declspec( dllexport ) #else #define DLLEXPORT __declspec( dllimport ) #endif #endif #ifdef WIN32 class DLLEXPORT Hunspell #else class Hunspell #endif { AffixMgr* pAMgr; HashMgr* pHMgr[MAXDIC]; int maxdic; SuggestMgr* pSMgr; char * affixpath; char * encoding; struct cs_info * csconv; int langnum; int utf8; int complexprefixes; char** wordbreak; public: /* Hunspell(aff, dic) - constructor of Hunspell class * input: path of affix file and dictionary file */ Hunspell(const char * affpath, const char * dpath, const char * key = NULL); ~Hunspell(); /* load extra dictionaries (only dic files) */ int add_dic(const char * dpath, const char * key = NULL); /* spell(word) - spellcheck word * output: 0 = bad word, not 0 = good word * * plus output: * info: information bit array, fields: * SPELL_COMPOUND = a compound word * SPELL_FORBIDDEN = an explicit forbidden word * root: root (stem), when input is a word with affix(es) */ int spell(const char * word, int * info = NULL, char ** root = NULL); /* suggest(suggestions, word) - search suggestions * input: pointer to an array of strings pointer and the (bad) word * array of strings pointer (here *slst) may not be initialized * output: number of suggestions in string array, and suggestions in * a newly allocated array of strings (*slts will be NULL when number * of suggestion equals 0.) */ int suggest(char*** slst, const char * word); /* deallocate suggestion lists */ void free_list(char *** slst, int n); char * get_dic_encoding(); /* morphological functions */ /* analyze(result, word) - morphological analysis of the word */ int analyze(char*** slst, const char * word); /* stem(result, word) - stemmer function */ int stem(char*** slst, const char * word); /* stem(result, analysis, n) - get stems from a morph. analysis * example: * char ** result, result2; * int n1 = analyze(&result, "words"); * int n2 = stem(&result2, result, n1); */ int stem(char*** slst, char ** morph, int n); /* generate(result, word, word2) - morphological generation by example(s) */ int generate(char*** slst, const char * word, const char * word2); /* generate(result, word, desc, n) - generation by morph. description(s) * example: * char ** result; * char * affix = "is:plural"; // description depends from dictionaries, too * int n = generate(&result, "word", &affix, 1); * for (int i = 0; i < n; i++) printf("%s\n", result[i]); */ int generate(char*** slst, const char * word, char ** desc, int n); /* functions for run-time modification of the dictionary */ /* add word to the run-time dictionary */ int add(const char * word); /* add word to the run-time dictionary with affix flags of * the example (a dictionary word): Hunspell will recognize * affixed forms of the new word, too. */ int add_with_affix(const char * word, const char * example); /* remove word from the run-time dictionary */ int remove(const char * word); /* other */ /* get extra word characters definied in affix file for tokenization */ const char * get_wordchars(); unsigned short * get_wordchars_utf16(int * len); struct cs_info * get_csconv(); const char * get_version(); /* experimental and deprecated functions */ #ifdef HUNSPELL_EXPERIMENTAL /* suffix is an affix flag string, similarly in dictionary files */ int put_word_suffix(const char * word, const char * suffix); char * morph_with_correction(const char * word); /* spec. suggestions */ int suggest_auto(char*** slst, const char * word); int suggest_pos_stems(char*** slst, const char * word); char * get_possible_root(); #endif private: int cleanword(char *, const char *, int * pcaptype, int * pabbrev); int cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev); void mkinitcap(char *); int mkinitcap2(char * p, w_char * u, int nc); int mkinitsmall2(char * p, w_char * u, int nc); void mkallcap(char *); int mkallcap2(char * p, w_char * u, int nc); void mkallsmall(char *); int mkallsmall2(char * p, w_char * u, int nc); struct hentry * checkword(const char *, int * info, char **root); char * sharps_u8_l1(char * dest, char * source); hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root); int is_keepcase(const hentry * rv); int insert_sug(char ***slst, char * word, int ns); void cat_result(char * result, char * st); char * stem_description(const char * desc); int spellml(char*** slst, const char * word); int get_xml_par(char * dest, const char * par, int maxl); const char * get_xml_pos(const char * s, const char * attr); int get_xml_list(char ***slst, char * list, const char * tag); int check_xml_par(const char * q, const char * attr, const char * value); }; #endif Text-Hunspell-2.08/inc/0000755000175000017500000000000012140304161014264 5ustar cosimocosimoText-Hunspell-2.08/inc/Devel/0000755000175000017500000000000012140304161015323 5ustar cosimocosimoText-Hunspell-2.08/inc/Devel/CheckLib.pm0000644000175000017500000003217312124255763017352 0ustar cosimocosimo# $Id: CheckLib.pm,v 1.25 2008/10/27 12:16:23 drhyde Exp $ package # Devel::CheckLib; use strict; use vars qw($VERSION @ISA @EXPORT); $VERSION = '0.7'; use Config; use File::Spec; use File::Temp; require Exporter; @ISA = qw(Exporter); @EXPORT = qw(assert_lib check_lib_or_exit check_lib); # localising prevents the warningness leaking out of this module local $^W = 1; # use warnings is a 5.6-ism _findcc(); # bomb out early if there's no compiler =head1 NAME Devel::CheckLib - check that a library is available =head1 DESCRIPTION Devel::CheckLib is a perl module that checks whether a particular C library and its headers are available. =head1 SYNOPSIS use Devel::CheckLib; check_lib_or_exit( lib => 'jpeg', header => 'jpeglib.h' ); check_lib_or_exit( lib => [ 'iconv', 'jpeg' ] ); # or prompt for path to library and then do this: check_lib_or_exit( lib => 'jpeg', libpath => $additional_path ); =head1 USING IT IN Makefile.PL or Build.PL If you want to use this from Makefile.PL or Build.PL, do not simply copy the module into your distribution as this may cause problems when PAUSE and search.cpan.org index the distro. Instead, use the use-devel-checklib script. =head1 HOW IT WORKS You pass named parameters to a function, describing to it how to build and link to the libraries. It works by trying to compile some code - which defaults to this: int main(void) { return 0; } and linking it to the specified libraries. If something pops out the end which looks executable, it gets executed, and if main() returns 0 we know that it worked. That tiny program is built once for each library that you specify, and (without linking) once for each header file. If you want to check for the presence of particular functions in a library, or even that those functions return particular results, then you can pass your own function body for main() thus: check_lib_or_exit( function => 'foo();if(libversion() > 5) return 0; else return 1;' incpath => ... libpath => ... lib => ... header => ... ); In that case, it will fail to build if either foo() or libversion() don't exist, and main() will return the wrong value if libversion()'s return value isn't what you want. =head1 FUNCTIONS All of these take the same named parameters and are exported by default. To avoid exporting them, C. =head2 assert_lib This takes several named parameters, all of which are optional, and dies with an error message if any of the libraries listed can not be found. B: dying in a Makefile.PL or Build.PL may provoke a 'FAIL' report from CPAN Testers' automated smoke testers. Use C instead. The named parameters are: =over =item lib Must be either a string with the name of a single library or a reference to an array of strings of library names. Depending on the compiler found, library names will be fed to the compiler either as C<-l> arguments or as C<.lib> file names. (E.g. C<-ljpeg> or C) =item libpath a string or an array of strings representing additional paths to search for libraries. =item LIBS a C-style space-seperated list of libraries (each preceded by '-l') and directories (preceded by '-L'). This can also be supplied on the command-line. =back And libraries are no use without header files, so ... =over =item header Must be either a string with the name of a single header file or a reference to an array of strings of header file names. =item incpath a string or an array of strings representing additional paths to search for headers. =item INC a C-style space-seperated list of incpaths, each preceded by '-I'. This can also be supplied on the command-line. =back =head2 check_lib_or_exit This behaves exactly the same as C except that instead of dieing, it warns (with exactly the same error message) and exits. This is intended for use in Makefile.PL / Build.PL when you might want to prompt the user for various paths and things before checking that what they've told you is sane. If any library or header is missing, it exits with an exit value of 0 to avoid causing a CPAN Testers 'FAIL' report. CPAN Testers should ignore this result -- which is what you want if an external library dependency is not available. =head2 check_lib This behaves exactly the same as C except that it is silent, returning false instead of dieing, or true otherwise. =cut sub check_lib_or_exit { eval 'assert_lib(@_)'; if($@) { warn $@; exit; } } sub check_lib { eval 'assert_lib(@_)'; return $@ ? 0 : 1; } sub assert_lib { my %args = @_; my (@libs, @libpaths, @headers, @incpaths); # FIXME: these four just SCREAM "refactor" at me @libs = (ref($args{lib}) ? @{$args{lib}} : $args{lib}) if $args{lib}; @libpaths = (ref($args{libpath}) ? @{$args{libpath}} : $args{libpath}) if $args{libpath}; @headers = (ref($args{header}) ? @{$args{header}} : $args{header}) if $args{header}; @incpaths = (ref($args{incpath}) ? @{$args{incpath}} : $args{incpath}) if $args{incpath}; # work-a-like for Makefile.PL's LIBS and INC arguments # if given as command-line argument, append to %args for my $arg (@ARGV) { for my $mm_attr_key (qw(LIBS INC)) { if (my ($mm_attr_value) = $arg =~ /\A $mm_attr_key = (.*)/x) { # it is tempting to put some \s* into the expression, but the # MM command-line parser only accepts LIBS etc. followed by =, # so we should not be any more lenient with whitespace than that $args{$mm_attr_key} .= " $mm_attr_value"; } } } # using special form of split to trim whitespace if(defined($args{LIBS})) { foreach my $arg (split(' ', $args{LIBS})) { die("LIBS argument badly-formed: $arg\n") unless($arg =~ /^-l/i); push @{$arg =~ /^-l/ ? \@libs : \@libpaths}, substr($arg, 2); } } if(defined($args{INC})) { foreach my $arg (split(' ', $args{INC})) { die("INC argument badly-formed: $arg\n") unless($arg =~ /^-I/); push @incpaths, substr($arg, 2); } } my @cc = _findcc(); my @missing; my @wrongresult; # first figure out which headers we can't find ... for my $header (@headers) { my($ch, $cfile) = File::Temp::tempfile( 'assertlibXXXXXXXX', SUFFIX => '.c' ); print $ch qq{#include <$header>\nint main(void) { return 0; }\n}; close($ch); my $exefile = File::Temp::mktemp( 'assertlibXXXXXXXX' ) . $Config{_exe}; my @sys_cmd; # FIXME: re-factor - almost identical code later when linking if ( $Config{cc} eq 'cl' ) { # Microsoft compiler require Win32; @sys_cmd = ( @cc, $cfile, "/Fe$exefile", (map { '/I'.Win32::GetShortPathName($_) } @incpaths) ); } elsif($Config{cc} =~ /bcc32(\.exe)?/) { # Borland @sys_cmd = ( @cc, (map { "-I$_" } @incpaths), "-o$exefile", $cfile ); } else { # Unix-ish: gcc, Sun, AIX (gcc, cc), ... @sys_cmd = ( @cc, $cfile, (map { "-I$_" } @incpaths), "-o", "$exefile" ); } warn "# @sys_cmd\n" if $args{debug}; my $rv = $args{debug} ? system(@sys_cmd) : _quiet_system(@sys_cmd); push @missing, $header if $rv != 0 || ! -x $exefile; _cleanup_exe($exefile); unlink $cfile; } # now do each library in turn with headers my($ch, $cfile) = File::Temp::tempfile( 'assertlibXXXXXXXX', SUFFIX => '.c' ); print $ch qq{#include <$_>\n} foreach (@headers); print $ch "int main(void) { ".($args{function} || 'return 0;')." }\n"; close($ch); for my $lib ( @libs ) { my $exefile = File::Temp::mktemp( 'assertlibXXXXXXXX' ) . $Config{_exe}; my @sys_cmd; if ( $Config{cc} eq 'cl' ) { # Microsoft compiler require Win32; my @libpath = map { q{/libpath:} . Win32::GetShortPathName($_) } @libpaths; # this is horribly sensitive to the order of arguments @sys_cmd = ( @cc, $cfile, "${lib}.lib", "/Fe$exefile", (map { '/I'.Win32::GetShortPathName($_) } @incpaths), "/link", (map {'/libpath:'.Win32::GetShortPathName($_)} @libpaths), ); } elsif($Config{cc} eq 'CC/DECC') { # VMS } elsif($Config{cc} =~ /bcc32(\.exe)?/) { # Borland @sys_cmd = ( @cc, "-o$exefile", "-l$lib", (map { "-I$_" } @incpaths), (map { "-L$_" } @libpaths), $cfile); } else { # Unix-ish # gcc, Sun, AIX (gcc, cc) @sys_cmd = ( @cc, $cfile, "-o", "$exefile", "-l$lib", (map { "-I$_" } @incpaths), (map { "-L$_" } @libpaths) ); } warn "# @sys_cmd\n" if $args{debug}; my $rv = $args{debug} ? system(@sys_cmd) : _quiet_system(@sys_cmd); push @missing, $lib if $rv != 0 || ! -x $exefile; push @wrongresult, $lib if $rv == 0 && -x $exefile && system(File::Spec->rel2abs($exefile)) != 0; _cleanup_exe($exefile); } unlink $cfile; my $miss_string = join( q{, }, map { qq{'$_'} } @missing ); die("Can't link/include $miss_string\n") if @missing; my $wrong_string = join( q{, }, map { qq{'$_'} } @wrongresult); die("wrong result: $wrong_string\n") if @wrongresult; } sub _cleanup_exe { my ($exefile) = @_; my $ofile = $exefile; $ofile =~ s/$Config{_exe}$/$Config{_o}/; unlink $exefile if -f $exefile; unlink $ofile if -f $ofile; unlink "$exefile\.manifest" if -f "$exefile\.manifest"; return } sub _findcc { my @paths = split(/$Config{path_sep}/, $ENV{PATH}); my @cc = split(/\s+/, $Config{cc}); return @cc if -x $cc[0]; foreach my $path (@paths) { my $compiler = File::Spec->catfile($path, $cc[0]) . $Config{_exe}; return ($compiler, @cc[1 .. $#cc]) if -x $compiler; } die("Couldn't find your C compiler\n"); } # code substantially borrowed from IPC::Run3 sub _quiet_system { my (@cmd) = @_; # save handles local *STDOUT_SAVE; local *STDERR_SAVE; open STDOUT_SAVE, ">&STDOUT" or die "CheckLib: $! saving STDOUT"; open STDERR_SAVE, ">&STDERR" or die "CheckLib: $! saving STDERR"; # redirect to nowhere local *DEV_NULL; open DEV_NULL, ">" . File::Spec->devnull or die "CheckLib: $! opening handle to null device"; open STDOUT, ">&" . fileno DEV_NULL or die "CheckLib: $! redirecting STDOUT to null handle"; open STDERR, ">&" . fileno DEV_NULL or die "CheckLib: $! redirecting STDERR to null handle"; # run system command my $rv = system(@cmd); # restore handles open STDOUT, ">&" . fileno STDOUT_SAVE or die "CheckLib: $! restoring STDOUT handle"; open STDERR, ">&" . fileno STDERR_SAVE or die "CheckLib: $! restoring STDERR handle"; return $rv; } =head1 PLATFORMS SUPPORTED You must have a C compiler installed. We check for C<$Config{cc}>, both literally as it is in Config.pm and also in the $PATH. It has been tested with varying degrees on rigourousness on: =over =item gcc (on Linux, *BSD, Mac OS X, Solaris, Cygwin) =item Sun's compiler tools on Solaris =item IBM's tools on AIX =item SGI's tools on Irix 6.5 =item Microsoft's tools on Windows =item MinGW on Windows (with Strawberry Perl) =item Borland's tools on Windows =item QNX =back =head1 WARNINGS, BUGS and FEEDBACK This is a very early release intended primarily for feedback from people who have discussed it. The interface may change and it has not been adequately tested. Feedback is most welcome, including constructive criticism. Bug reports should be made using L or by email. When submitting a bug report, please include the output from running: perl -V perl -MDevel::CheckLib -e0 =head1 SEE ALSO L L =head1 AUTHORS David Cantrell Edavid@cantrell.org.ukE David Golden Edagolden@cpan.orgE Yasuhiro Matsumoto Emattn@cpan.orgE Thanks to the cpan-testers-discuss mailing list for prompting us to write it in the first place; to Chris Williams for help with Borland support; to Tony Cook for help with Microsoft compiler command-line options =head1 COPYRIGHT and LICENCE Copyright 2007 David Cantrell. Portions copyright 2007 David Golden. This module is free-as-in-speech software, and may be used, distributed, and modified under the same conditions as perl itself. =head1 CONSPIRACY This module is also free-as-in-mason software. =cut 1; Text-Hunspell-2.08/Hunspell.xs0000644000175000017500000000571112124375254015702 0ustar cosimocosimo#include "hunspell.h" #include "assert.h" #ifdef __cplusplus extern "C" { #endif #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #ifdef __cplusplus } #endif using namespace std; /*using namespace Hunspell;*/ /* $Id: Hunspell.xs,v 1.5 2002/08/29 20:28:00 moseley Exp $ */ static void * get_mortalspace ( size_t nbytes ) { SV * mortal; mortal = sv_2mortal( NEWSV(0, nbytes ) ); return (void *)SvPVX(mortal); } MODULE = Text::Hunspell PACKAGE = Text::Hunspell PROTOTYPES: ENABLE # Make sure that we have at least xsubpp version 1.922. REQUIRE: 1.922 Hunspell * Hunspell::new(aff,dic ) char *aff; char *dic; CODE: RETVAL = new Hunspell(aff, dic); OUTPUT: RETVAL int Hunspell::delete(h) Hunspell *h; CODE: warn("Text::Hunspell::delete() is deprecated and no replacement is needed"); RETVAL = 1; OUTPUT: RETVAL void Hunspell::DESTROY() int Hunspell::add_dic(dic) char *dic; CODE: RETVAL = THIS->add_dic(dic); OUTPUT: RETVAL int Hunspell::check(buf) char *buf; CODE: RETVAL = THIS->spell(buf); OUTPUT: RETVAL void Hunspell::suggest(buf) char *buf; PREINIT: char **wlsti; int i, val; PPCODE: val = THIS->suggest(&wlsti, buf); for (int i = 0; i < val; i++) { PUSHs(sv_2mortal(newSVpv( wlsti[i] ,0 ))); free(wlsti[i]); } void Hunspell::analyze(buf) char *buf; PREINIT: char **wlsti; int i, val; PPCODE: val = THIS->analyze(&wlsti, buf); for (i = 0; i < val; i++) { PUSHs(sv_2mortal(newSVpv(wlsti[i], 0))); free(wlsti[i]); } void Hunspell::stem( buf) char *buf; PREINIT: char **wlsti; int i, val; PPCODE: val = THIS->stem(&wlsti, buf); for (int i = 0; i < val; i++) { PUSHs(sv_2mortal(newSVpv( wlsti[i] ,0 ))); free(wlsti[i]); } void Hunspell::generate( buf, sample) char *buf; char *sample; PREINIT: char **wlsti; int i, val; PPCODE: val = THIS->generate(&wlsti, buf, sample); for (int i = 0; i < val; i++) { PUSHs(sv_2mortal(newSVpv( wlsti[i] ,0 ))); free(wlsti[i]); } void Hunspell::generate2( buf, avref) AV * avref; char *buf; PREINIT: char ** array; char **wlsti; int len; SV ** elem; int i, val; PPCODE: len = av_len(avref) + 1; /* First allocate some memory for the pointers */ array = (char **) get_mortalspace( len * sizeof( *array )); /* Loop over each element copying pointers to the new array */ for (i=0; igenerate(&wlsti, buf, array, len); for (int i = 0; i < val; i++) { PUSHs(sv_2mortal(newSVpv( wlsti[i] ,0 ))); free(wlsti[i]); } Text-Hunspell-2.08/perlobject.map0000644000175000017500000000507712124255763016373 0ustar cosimocosimo# "perlobject.map" Dean Roehrich, version 19960302 # # TYPEMAPs # # HV * -> unblessed Perl HV object. # AV * -> unblessed Perl AV object. # # INPUT/OUTPUT maps # # O_* -> opaque blessed objects # T_* -> opaque blessed or unblessed objects # # O_OBJECT -> link an opaque C or C++ object to a blessed Perl object. # T_OBJECT -> link an opaque C or C++ object to an unblessed Perl object. # O_HvRV -> a blessed Perl HV object. # T_HvRV -> an unblessed Perl HV object. # O_AvRV -> a blessed Perl AV object. # T_AvRV -> an unblessed Perl AV object. TYPEMAP HV * T_HvRV AV * T_AvRV ###################################################################### OUTPUT # The Perl object is blessed into 'CLASS', which should be a # char* having the name of the package for the blessing. O_OBJECT sv_setref_pv( $arg, CLASS, (void*)$var ); T_OBJECT sv_setref_pv( $arg, Nullch, (void*)$var ); # Cannot use sv_setref_pv() because that will destroy # the HV-ness of the object. Remember that newRV() will increment # the refcount. O_HvRV $arg = sv_bless( newRV((SV*)$var), gv_stashpv(CLASS,1) ); T_HvRV $arg = newRV((SV*)$var); # Cannot use sv_setref_pv() because that will destroy # the AV-ness of the object. Remember that newRV() will increment # the refcount. O_AvRV $arg = sv_bless( newRV((SV*)$var), gv_stashpv(CLASS,1) ); T_AvRV $arg = newRV((SV*)$var); ###################################################################### INPUT O_OBJECT if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) ) $var = ($type)SvIV((SV*)SvRV( $arg )); else{ warn( \"${Package}::$func_name() -- $var is not a blessed SV reference\" ); XSRETURN_UNDEF; } T_OBJECT if( SvROK($arg) ) $var = ($type)SvIV((SV*)SvRV( $arg )); else{ warn( \"${Package}::$func_name() -- $var is not an SV reference\" ); XSRETURN_UNDEF; } O_HvRV if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVHV) ) $var = (HV*)SvRV( $arg ); else { warn( \"${Package}::$func_name() -- $var is not a blessed HV reference\" ); XSRETURN_UNDEF; } T_HvRV if( SvROK($arg) && (SvTYPE(SvRV($arg)) == SVt_PVHV) ) $var = (HV*)SvRV( $arg ); else { warn( \"${Package}::$func_name() -- $var is not an HV reference\" ); XSRETURN_UNDEF; } O_AvRV if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVAV) ) $var = (AV*)SvRV( $arg ); else { warn( \"${Package}::$func_name() -- $var is not a blessed AV reference\" ); XSRETURN_UNDEF; } T_AvRV if( SvROK($arg) && (SvTYPE(SvRV($arg)) == SVt_PVAV) ) $var = (AV*)SvRV( $arg ); else { warn( \"${Package}::$func_name() -- $var is not an AV reference\" ); XSRETURN_UNDEF; } Text-Hunspell-2.08/Makefile.PL0000755000175000017500000000352512124376154015512 0ustar cosimocosimouse lib qw(inc); use Config; use Devel::CheckLib; # # Try to warn the user if linking might fail. # Some systems seem to not install the unversioned library symlinks. # # Probably too platform specific, but seemed an acceptable compromise. # my $dlext = $Config{dlext} || 'so'; my $candidate_lib; for (split " " => $Config{libpth}) { my $lib_path = "$_/libhunspell.$dlext"; #warn "Checking $lib_path\n"; if (-e $lib_path) { $candidate_lib++; warn "Found '$lib_path'. Good.\n"; } } if (0 == $candidate_lib) { my $def_lib = "/usr/lib/libhunspell"; my ($possible_candidate) = glob("$def_lib-*.so"); $possible_candidate ||= "/usr/lib/libhunspell-.so"; warn "---------------------------------------------------------------------\n"; warn "Your system doesn't appear to have a libhunspell.$dlext link.\n"; warn "Linking might fail. If it does, you might want to try installing\n"; warn "the libhunspell-dev package (or the equivalent on your OS) or try:\n"; warn "\n"; warn " $Config{lns} $possible_candidate $def_lib.so\n"; warn "---------------------------------------------------------------------\n"; warn "\n"; } check_lib_or_exit( lib => 'hunspell', header => 'hunspell/hunspell.h', ); use ExtUtils::MakeMaker; my $CC = $ENV{"CXX"} || 'g++'; WriteMakefile( NAME => 'Text::Hunspell', VERSION_FROM => 'Hunspell.pm', LIBS => ['-lhunspell'], CC => $CC, LD => '$(CC)', PREREQ_PM => {}, # e.g., Module::Name => 1.1 XSOPT => '-C++', TYPEMAPS => ['perlobject.map', 'typemap'], META_MERGE => { resources => { repository => 'https://github.com/cosimo/perl5-text-hunspell', }, keywords => [ qw(hunspell spelling spell-checker text-processing) ], }, ); Text-Hunspell-2.08/Hunspell.pm0000644000175000017500000001236012140303613015646 0ustar cosimocosimopackage Text::Hunspell; require DynaLoader; use vars qw/ @ISA $VERSION /; @ISA = 'DynaLoader'; $VERSION = '2.08'; bootstrap Text::Hunspell $VERSION; # Preloaded methods go here. 1; __END__ =encoding utf8 =head1 NAME Text::Hunspell - Perl interface to the Hunspell library =head1 SYNOPSIS use Text::Hunspell; # You can use relative or absolute paths. my $speller = Text::Hunspell->new( "/usr/share/hunspell/en_US.aff", # Hunspell affix file "/usr/share/hunspell/en_US.dic" # Hunspell dictionary file ); die unless $speller; # Check a word against the dictionary my $word = 'opera'; print $speller->check($word) ? "'$word' found in the dictionary\n" : "'$word' not found in the dictionary!\n"; # Spell check suggestions my $misspelled = 'programmng'; my @suggestions = $speller->suggest($misspelled); print "\n", "You typed '$misspelled'. Did you mean?\n"; for (@suggestions) { print " - $_\n"; } # Add dictionaries later $speller->add_dic('dictionary_file.dic'); =head1 DESCRIPTION This module provides a Perl interface to the B library. This module is to meet the need of looking up many words, one at a time, in a single session, such as spell-checking a document in memory. The example code describes the interface on http://hunspell.sf.net =head1 DEPENDENCIES B on your system before installing this C Perl module. Hunspell location is: http://hunspell.sf.net There have been a number of bug reports because people failed to install hunspell before installing this module. This is an interface to the hunspell library installed on your system, not a replacement for hunspell. You must also have one hunspell dictionary installed when running the module's test suite. Also, please see the README and Changes files. README may have specific information about your platform. =head1 METHODS The following methods are available: =head2 Text::Hunspell->new($full_path_to_affix, $full_path_to_dic) Creates a new speller object. Parameters are: =over 4 =item full path of affix (.aff) file =item full path of dictionary (.dic) file =back Returns C if the object could not be created, which is unlikely. =head2 add_dic($path_to_dic) Adds a new dictionary to the current C object. This dictionary will use the same affix file as the original dictionary, so this is like using a personal word list in a given language. To check spellings in several different languages, use multiple C objects. =head2 check($word) Check the word. Returns 1 if the word is found, 0 otherwise. =head2 suggest($misspelled_word) Returns the list of suggestions for the misspelled word. The following methods are used for morphological analysis, which is looking at the structure of words; parts of speech, inflectional suffixes and so on. However, most of the dictionaries that Hunspell can use are missing this information and only contain affix flags which allow, for example, 'cat' to turn into 'cats' but not 'catability'. (Users of the French and Hungarian dictionaries will find that they have more information available.) =head2 analyze($word) Returns the analysis list for the word. This will be a list of strings that contain a stem word and the morphological information about the changes that have taken place from the stem. This will most likely be 'fl:X' strings that indicate that affix flag 'X' was applied to the stem. Words may have more than one stem, and each one will be returned as a different item in the list. However, with a French dictionary loaded, C will return st:chanson po:nom is:fem is:sg to tell you that "chanson" is a feminine singular noun, and C will return st:chanson po:nom is:fem is:pl to tell you that you've analyzed the plural of the same noun. =head2 stem($word) Returns the stem list for the word. This is a simpler version of the results from C. =head2 generate2($stem, \@suggestions) Returns a morphologically modified stem as defined in C<@suggestions> (got by analysis). With a French dictionary: $feminine_form = 'chanteuse'; @ana = $speller->analyze($feminine_form); $ana[0] =~ s/is:fem/is:mas/; print $speller->generate2($feminine_form, \@ana) will print 'chanteur'. =head2 generate($stem, $word) Returns morphologically modified stem like $word. $french_speller->generate('danseuse', 'chanteur'); tells us that the masculine form of 'danseuse' is 'danseur'. =head1 BUGS Probably. Yes, definitely. =head1 LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 AUTHORS Originally written by Eleonora, Eeleonora46_at_gmx_dot_netE. The current maintainer is Cosimo Streppone, Ecosimo@cpan.orgE This module is based on L written by Bill Moseley moseley at hank dot org. Hunspell is written as myspell by Kevin B. Hendricks. Hunspell is maintained by Németh László. Please see: http://hunspell.sf.net For the dictionaries: http://lingucomponent.openoffice.org/spell_dic.html http://magyarispell.sf.net for Hungarian dictionary =cut Text-Hunspell-2.08/README0000644000175000017500000001774112124255763014424 0ustar cosimocosimoNAME Text::Hunspell - Perl interface to the Hunspell library DESCRIPTION Text::Hunspell is an interface to the GNU Hunspell library. GNU Hunspell is a Free and Open Source spell checker. The Text::Hunspell module is a thin XS wrapper around the Hunspell C++ Library. Hopefully this is obvious, but you MUST have the Hunspell library installed on your system as a .so library, before installing this module. You also MUST have any hunspell dictionary installed for "make test" to pass. Hunspell's location is http://hunspell.sourceforge.net See BUILD NOTES and CURRENT ISSUES below before reporting any bugs. This module has been built and passed all tests on the following platforms: perl 5.8.8 on Linux, gcc version 4.0.1 hunspell 1.2.8 or higher Mandriva Linux 2006 Knoppix Linux (Debian Linux) Please read SUPPORT below if you have trouble building Text::Hunspell. INSTALLATION Windows users see below. 1) Install Hunspell and a Dictionary Make sure you have a current version of Hunspell installed. You must install both the Hunspell program and a dictionary. They are distributed as separate packages. The dictionary can be copied anywhere, since at startup the full path of the .dic/.aff files will be givern to hunspell. Hunspell and the dictionary packages contain README files that include installation instructions. Here's a basic overview: Hunspell must be installed first, then install the dictionary file and finally install the Text::Hunspell module. Hunspell installation example: 1. download an untar hunspell 2. cd hunspell 3. ./configure 4. ./make 5. cd ./src/hunspell 6. ./configure 7. make 8. copy makeso.sh file here 9. sh makeso.sh and do manually the copy and 2 link commands as described at the end of makeso.sh 10. cd ../tools 11. ./configure 12. make If you used a --prefix option to install Hunspell in a non-standard location you will need to adjust your path to include $PREFIX/bin. The configure script for the dictionary needs to find programs installed in the previous step. At this point you should be able to run Hunspell in interactive mode. For example: cd hunspell/src/tools ./example full_path_aff full_path_dic file_to test 2) Build and install this Text::Hunspell module. $ perl Makefile.PL $ make $ make test # make install If you installed Hunspell in a non-standard location (for example, if you don't have root access) then you will need to tell Makefile.PL where to find the library. For example, if Hunspell was installed in $HOME/local (--prefix=$HOME/local) and the perl module should be installed in the perl library $HOME/perl_lib: $ perl Makefile.PL PREFIX=$HOME/perl_lib \ CCFLAGS=-I$HOME/local/include \ LIBS="-L$HOME/local/lib -lhunspell" $ LD_RUN_PATH=$HOME/local/lib make $ make test $ make install Perl must have been buit with thread support in order to work with Text::Hunspell. WINDOWS USERS Randy Kobes has provided a PPM and the following instructions for installing Text::Hunspell on Windows. Thanks very much Randy. For installing on Win32, first get and install the "Full installer" executable at http://hunspell.net/win32/ this will install Hunspell into a location such as C:\Program Files\Hunspell. You will also need to fetch and install at least one of the precompiled dictionaries found on the same page. Make sure that the path to the Hunspell bin directory (e.g. C:\Program Files\Hunspell\bin\) is in your PATH environment variable. For help with setting your path see "set environment variables" in the Windows Help Utility. You may need to reboot or open a new shell window after setting your path. The Hunspell .dll file must be located in the PATH before using Text::Hunspell. Then, to install Text::Hunspell, type at a DOS prompt (all on one line) C:\> ppm install http://theoryx5.uwinnipeg.ca/ppms/Text-Hunspell.ppd for an ActivePerl 8xx version, or C:\> ppm install http://theoryx5.uwinnipeg.ca/ppmpackages/Text-Hunspell.ppd for an ActivePerl 6xx version. If you wish to build Text::Hunspell from source: If you want to build Text::Hunspell yourself, you'll need a C compiler, which must be the same one that your Perl is built with (for ActivePerl, this means VC++ 6). Assuming you have that, in addition to the "Full installer" binary package at http://hunspell.net/win32/, you'll also need to get and install the hunspell-dev package (also located at http://hunspell.net/win32/), which contains the necessary .lib and .h files needed to compile the Perl module. Like above, make sure the PATH environment variable points to the location of the installed Hunspell .dll file before building Text::Hunspell. Installation then proceeds as described for the Unix version: perl Makefile.PL nmake nmake test nmake install with the additional requirement of passing to 'perl Makefile.PL' the necessary arguments (e.g. INC and LIBS) to specify the locations of the lib and header files, if these were installed in a non-standard location. Make sure that if a non-standard location was used that this is added to your PATH environment variable before running the tests. SUPPORT Before contacting me with problems building Text::Hunspell please try and debug as much as possible. For example, if "make test" fails, then run in verbose mode: make test TEST_VERBOSE=1 That may show at what test is failing. It's easy to run the test script manually -- and you can even edit and add a few print statements to aid in debugging. For example: perl -Iblib/lib -Iblib/arch t/test.t | less Use of LD_RUN_PATH, CCFLAGS and LIBS as above may also help if the build process fails. Remember that you *must* have the English dictionary installed for tests to pass (sorry, have to check against some dictionary). Also, you may need to set your LANG variable to "en_US" so that the English dictionary is selected. If all else fails, use the request tracker at: http://rt.cpan.org/NoAuth/Bugs.html?Dist=Text-Hunspell BUILD NOTES / CURRENT ISSUES A number of people have reported the $speller->store_replacement() test failing on some platforms. I don't know why. Search google to find out why I don't know why. The code (from what I can tell) looks like it should be returning an error value, but fails on some platforms. Until I can get a better answer I'm going to assume Hunspell is broken and ignore the return value in t/test.t. So don't test the return value in your code, either. Some users of OS X have reported the test failing after calling clear_session(). The test is suppose to see if a word added to the session is removed after calling clear_session(). It's another error I have not been able to reproduce on the machines I tested on. "Pax" provided these notes: I just got Text::Hunspell to build and work under OpenBSD 3.7 and thought you might like a tiny suggestion: in the README, you might note that under this operating system you need to add -lstdc++ to the list of libraries you link against, e.g.: $ perl Makefile.PL \ PREFIX=/usr/local INSTALLDIRS=site \ CCFLAGS=-I/usr/local/include \ LIBS="-L/usr/local/lib -lhunspell -lstdc++" If you don't do this, then any attempt to use Text::Hunspell will throw thousands of unresolved symbol errors trying to load your shared object. This is against libhunspell in /usr/local/lib installed from the hunspell port in the OpenBSD 3.7 ports tree, FWIW. A number of Solaris uses have reported the need to set: LDFLAGS='-L/usr/local/lib -R/usr/local/lib' and also having to copy libstdc++* to /usr/lib. YMMV. Changes compared to Hunspell::text hunspell.h was modified, since classe Hunspell was heavily modified between 1.2.8 and 1.4.x, among others methods spell and suggest got new parameters, that causes the function name to get modified from U _ZN8Hunspell5spellEPKc to U _ZN8Hunspell5spellEPKcPiPPc Text-Hunspell-2.08/META.yml0000664000175000017500000000107612140304161014772 0ustar cosimocosimo--- abstract: unknown author: - unknown build_requires: ExtUtils::MakeMaker: 0 configure_requires: ExtUtils::MakeMaker: 0 dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.120921' keywords: - hunspell - spelling - spell-checker - text-processing license: unknown meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: Text-Hunspell no_index: directory: - t - inc requires: {} resources: repository: https://github.com/cosimo/perl5-text-hunspell version: 2.08 Text-Hunspell-2.08/META.json0000664000175000017500000000177312140304161015146 0ustar cosimocosimo{ "abstract" : "unknown", "author" : [ "unknown" ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.120921", "keywords" : [ "hunspell", "spelling", "spell-checker", "text-processing" ], "license" : [ "unknown" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "Text-Hunspell", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : {} } }, "release_status" : "stable", "resources" : { "repository" : { "url" : "https://github.com/cosimo/perl5-text-hunspell" } }, "version" : "2.08" }