123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223 |
- #
- # 2014 Jun 09
- #
- # The author disclaims copyright to this source code. In place of
- # a legal notice, here is a blessing:
- #
- # May you do good and not evil.
- # May you find forgiveness for yourself and forgive others.
- # May you share freely, never taking more than you give.
- #
- #-------------------------------------------------------------------------
- #
- # This script generates the implementations of the following C functions,
- # which are part of the porter tokenizer implementation:
- #
- # static int fts5PorterStep1B(char *aBuf, int *pnBuf);
- # static int fts5PorterStep1B2(char *aBuf, int *pnBuf);
- # static int fts5PorterStep2(char *aBuf, int *pnBuf);
- # static int fts5PorterStep3(char *aBuf, int *pnBuf);
- # static int fts5PorterStep4(char *aBuf, int *pnBuf);
- #
- set O(Step1B2) {
- { at {} ate 1 }
- { bl {} ble 1 }
- { iz {} ize 1 }
- }
- set O(Step1B) {
- { "eed" fts5Porter_MGt0 "ee" 0 }
- { "ed" fts5Porter_Vowel "" 1 }
- { "ing" fts5Porter_Vowel "" 1 }
- }
- set O(Step2) {
- { "ational" fts5Porter_MGt0 "ate" }
- { "tional" fts5Porter_MGt0 "tion" }
- { "enci" fts5Porter_MGt0 "ence" }
- { "anci" fts5Porter_MGt0 "ance" }
- { "izer" fts5Porter_MGt0 "ize" }
- { "logi" fts5Porter_MGt0 "log" }
- { "bli" fts5Porter_MGt0 "ble" }
- { "alli" fts5Porter_MGt0 "al" }
- { "entli" fts5Porter_MGt0 "ent" }
- { "eli" fts5Porter_MGt0 "e" }
- { "ousli" fts5Porter_MGt0 "ous" }
- { "ization" fts5Porter_MGt0 "ize" }
- { "ation" fts5Porter_MGt0 "ate" }
- { "ator" fts5Porter_MGt0 "ate" }
- { "alism" fts5Porter_MGt0 "al" }
- { "iveness" fts5Porter_MGt0 "ive" }
- { "fulness" fts5Porter_MGt0 "ful" }
- { "ousness" fts5Porter_MGt0 "ous" }
- { "aliti" fts5Porter_MGt0 "al" }
- { "iviti" fts5Porter_MGt0 "ive" }
- { "biliti" fts5Porter_MGt0 "ble" }
- }
- set O(Step3) {
- { "icate" fts5Porter_MGt0 "ic" }
- { "ative" fts5Porter_MGt0 "" }
- { "alize" fts5Porter_MGt0 "al" }
- { "iciti" fts5Porter_MGt0 "ic" }
- { "ical" fts5Porter_MGt0 "ic" }
- { "ful" fts5Porter_MGt0 "" }
- { "ness" fts5Porter_MGt0 "" }
- }
- set O(Step4) {
- { "al" fts5Porter_MGt1 "" }
- { "ance" fts5Porter_MGt1 "" }
- { "ence" fts5Porter_MGt1 "" }
- { "er" fts5Porter_MGt1 "" }
- { "ic" fts5Porter_MGt1 "" }
- { "able" fts5Porter_MGt1 "" }
- { "ible" fts5Porter_MGt1 "" }
- { "ant" fts5Porter_MGt1 "" }
- { "ement" fts5Porter_MGt1 "" }
- { "ment" fts5Porter_MGt1 "" }
- { "ent" fts5Porter_MGt1 "" }
- { "ion" fts5Porter_MGt1_and_S_or_T "" }
- { "ou" fts5Porter_MGt1 "" }
- { "ism" fts5Porter_MGt1 "" }
- { "ate" fts5Porter_MGt1 "" }
- { "iti" fts5Porter_MGt1 "" }
- { "ous" fts5Porter_MGt1 "" }
- { "ive" fts5Porter_MGt1 "" }
- { "ize" fts5Porter_MGt1 "" }
- }
- proc sort_cb {lhs rhs} {
- set L [string range [lindex $lhs 0] end-1 end-1]
- set R [string range [lindex $rhs 0] end-1 end-1]
- string compare $L $R
- }
- proc create_step_function {name data} {
- set T(function) {
- static int fts5Porter${name}(char *aBuf, int *pnBuf){
- int ret = 0;
- int nBuf = *pnBuf;
- switch( aBuf[nBuf-2] ){
- ${switchbody}
- }
- return ret;
- }
- }
- set T(case) {
- case '${k}':
- ${ifstmts}
- break;
- }
- set T(if_0_0_0) {
- if( ${match} ){
- *pnBuf = nBuf - $n;
- }
- }
- set T(if_1_0_0) {
- if( ${match} ){
- if( ${cond} ){
- *pnBuf = nBuf - $n;
- }
- }
- }
- set T(if_0_1_0) {
- if( ${match} ){
- ${memcpy}
- *pnBuf = nBuf - $n + $nRep;
- }
- }
- set T(if_1_1_0) {
- if( ${match} ){
- if( ${cond} ){
- ${memcpy}
- *pnBuf = nBuf - $n + $nRep;
- }
- }
- }
- set T(if_1_0_1) {
- if( ${match} ){
- if( ${cond} ){
- *pnBuf = nBuf - $n;
- ret = 1;
- }
- }
- }
- set T(if_0_1_1) {
- if( ${match} ){
- ${memcpy}
- *pnBuf = nBuf - $n + $nRep;
- ret = 1;
- }
- }
- set T(if_1_1_1) {
- if( ${match} ){
- if( ${cond} ){
- ${memcpy}
- *pnBuf = nBuf - $n + $nRep;
- ret = 1;
- }
- }
- }
- set switchbody ""
- foreach I $data {
- set k [string range [lindex $I 0] end-1 end-1]
- lappend aCase($k) $I
- }
- foreach k [lsort [array names aCase]] {
- set ifstmts ""
- foreach I $aCase($k) {
- set zSuffix [lindex $I 0] ;# Suffix text for this rule
- set zRep [lindex $I 2] ;# Replacement text for rule
- set xCond [lindex $I 1] ;# Condition callback (or "")
- set n [string length $zSuffix]
- set nRep [string length $zRep]
- set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)"
- set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);"
- set cond "${xCond}(aBuf, nBuf-$n)"
- set bMemcpy [expr {$nRep>0}]
- set bCond [expr {$xCond!=""}]
- set bRet [expr {[llength $I]>3 && [lindex $I 3]}]
- set t $T(if_${bCond}_${bMemcpy}_${bRet})
- lappend ifstmts [string trim [subst -nocommands $t]]
- }
- set ifstmts [join $ifstmts "else "]
- append switchbody [subst -nocommands $T(case)]
- }
- puts [subst -nocommands $T(function)]
- }
- puts [string trim {
- /**************************************************************************
- ***************************************************************************
- ** GENERATED CODE STARTS HERE (mkportersteps.tcl)
- */
- }]
- foreach step [array names O] {
- create_step_function $step $O($step)
- }
- puts [string trim {
- /*
- ** GENERATED CODE ENDS HERE (mkportersteps.tcl)
- ***************************************************************************
- **************************************************************************/
- }]
|