mkportersteps.tcl 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. #
  2. # 2014 Jun 09
  3. #
  4. # The author disclaims copyright to this source code. In place of
  5. # a legal notice, here is a blessing:
  6. #
  7. # May you do good and not evil.
  8. # May you find forgiveness for yourself and forgive others.
  9. # May you share freely, never taking more than you give.
  10. #
  11. #-------------------------------------------------------------------------
  12. #
  13. # This script generates the implementations of the following C functions,
  14. # which are part of the porter tokenizer implementation:
  15. #
  16. # static int fts5PorterStep1B(char *aBuf, int *pnBuf);
  17. # static int fts5PorterStep1B2(char *aBuf, int *pnBuf);
  18. # static int fts5PorterStep2(char *aBuf, int *pnBuf);
  19. # static int fts5PorterStep3(char *aBuf, int *pnBuf);
  20. # static int fts5PorterStep4(char *aBuf, int *pnBuf);
  21. #
  22. set O(Step1B2) {
  23. { at {} ate 1 }
  24. { bl {} ble 1 }
  25. { iz {} ize 1 }
  26. }
  27. set O(Step1B) {
  28. { "eed" fts5Porter_MGt0 "ee" 0 }
  29. { "ed" fts5Porter_Vowel "" 1 }
  30. { "ing" fts5Porter_Vowel "" 1 }
  31. }
  32. set O(Step2) {
  33. { "ational" fts5Porter_MGt0 "ate" }
  34. { "tional" fts5Porter_MGt0 "tion" }
  35. { "enci" fts5Porter_MGt0 "ence" }
  36. { "anci" fts5Porter_MGt0 "ance" }
  37. { "izer" fts5Porter_MGt0 "ize" }
  38. { "logi" fts5Porter_MGt0 "log" }
  39. { "bli" fts5Porter_MGt0 "ble" }
  40. { "alli" fts5Porter_MGt0 "al" }
  41. { "entli" fts5Porter_MGt0 "ent" }
  42. { "eli" fts5Porter_MGt0 "e" }
  43. { "ousli" fts5Porter_MGt0 "ous" }
  44. { "ization" fts5Porter_MGt0 "ize" }
  45. { "ation" fts5Porter_MGt0 "ate" }
  46. { "ator" fts5Porter_MGt0 "ate" }
  47. { "alism" fts5Porter_MGt0 "al" }
  48. { "iveness" fts5Porter_MGt0 "ive" }
  49. { "fulness" fts5Porter_MGt0 "ful" }
  50. { "ousness" fts5Porter_MGt0 "ous" }
  51. { "aliti" fts5Porter_MGt0 "al" }
  52. { "iviti" fts5Porter_MGt0 "ive" }
  53. { "biliti" fts5Porter_MGt0 "ble" }
  54. }
  55. set O(Step3) {
  56. { "icate" fts5Porter_MGt0 "ic" }
  57. { "ative" fts5Porter_MGt0 "" }
  58. { "alize" fts5Porter_MGt0 "al" }
  59. { "iciti" fts5Porter_MGt0 "ic" }
  60. { "ical" fts5Porter_MGt0 "ic" }
  61. { "ful" fts5Porter_MGt0 "" }
  62. { "ness" fts5Porter_MGt0 "" }
  63. }
  64. set O(Step4) {
  65. { "al" fts5Porter_MGt1 "" }
  66. { "ance" fts5Porter_MGt1 "" }
  67. { "ence" fts5Porter_MGt1 "" }
  68. { "er" fts5Porter_MGt1 "" }
  69. { "ic" fts5Porter_MGt1 "" }
  70. { "able" fts5Porter_MGt1 "" }
  71. { "ible" fts5Porter_MGt1 "" }
  72. { "ant" fts5Porter_MGt1 "" }
  73. { "ement" fts5Porter_MGt1 "" }
  74. { "ment" fts5Porter_MGt1 "" }
  75. { "ent" fts5Porter_MGt1 "" }
  76. { "ion" fts5Porter_MGt1_and_S_or_T "" }
  77. { "ou" fts5Porter_MGt1 "" }
  78. { "ism" fts5Porter_MGt1 "" }
  79. { "ate" fts5Porter_MGt1 "" }
  80. { "iti" fts5Porter_MGt1 "" }
  81. { "ous" fts5Porter_MGt1 "" }
  82. { "ive" fts5Porter_MGt1 "" }
  83. { "ize" fts5Porter_MGt1 "" }
  84. }
  85. proc sort_cb {lhs rhs} {
  86. set L [string range [lindex $lhs 0] end-1 end-1]
  87. set R [string range [lindex $rhs 0] end-1 end-1]
  88. string compare $L $R
  89. }
  90. proc create_step_function {name data} {
  91. set T(function) {
  92. static int fts5Porter${name}(char *aBuf, int *pnBuf){
  93. int ret = 0;
  94. int nBuf = *pnBuf;
  95. switch( aBuf[nBuf-2] ){
  96. ${switchbody}
  97. }
  98. return ret;
  99. }
  100. }
  101. set T(case) {
  102. case '${k}':
  103. ${ifstmts}
  104. break;
  105. }
  106. set T(if_0_0_0) {
  107. if( ${match} ){
  108. *pnBuf = nBuf - $n;
  109. }
  110. }
  111. set T(if_1_0_0) {
  112. if( ${match} ){
  113. if( ${cond} ){
  114. *pnBuf = nBuf - $n;
  115. }
  116. }
  117. }
  118. set T(if_0_1_0) {
  119. if( ${match} ){
  120. ${memcpy}
  121. *pnBuf = nBuf - $n + $nRep;
  122. }
  123. }
  124. set T(if_1_1_0) {
  125. if( ${match} ){
  126. if( ${cond} ){
  127. ${memcpy}
  128. *pnBuf = nBuf - $n + $nRep;
  129. }
  130. }
  131. }
  132. set T(if_1_0_1) {
  133. if( ${match} ){
  134. if( ${cond} ){
  135. *pnBuf = nBuf - $n;
  136. ret = 1;
  137. }
  138. }
  139. }
  140. set T(if_0_1_1) {
  141. if( ${match} ){
  142. ${memcpy}
  143. *pnBuf = nBuf - $n + $nRep;
  144. ret = 1;
  145. }
  146. }
  147. set T(if_1_1_1) {
  148. if( ${match} ){
  149. if( ${cond} ){
  150. ${memcpy}
  151. *pnBuf = nBuf - $n + $nRep;
  152. ret = 1;
  153. }
  154. }
  155. }
  156. set switchbody ""
  157. foreach I $data {
  158. set k [string range [lindex $I 0] end-1 end-1]
  159. lappend aCase($k) $I
  160. }
  161. foreach k [lsort [array names aCase]] {
  162. set ifstmts ""
  163. foreach I $aCase($k) {
  164. set zSuffix [lindex $I 0] ;# Suffix text for this rule
  165. set zRep [lindex $I 2] ;# Replacement text for rule
  166. set xCond [lindex $I 1] ;# Condition callback (or "")
  167. set n [string length $zSuffix]
  168. set nRep [string length $zRep]
  169. set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)"
  170. set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);"
  171. set cond "${xCond}(aBuf, nBuf-$n)"
  172. set bMemcpy [expr {$nRep>0}]
  173. set bCond [expr {$xCond!=""}]
  174. set bRet [expr {[llength $I]>3 && [lindex $I 3]}]
  175. set t $T(if_${bCond}_${bMemcpy}_${bRet})
  176. lappend ifstmts [string trim [subst -nocommands $t]]
  177. }
  178. set ifstmts [join $ifstmts "else "]
  179. append switchbody [subst -nocommands $T(case)]
  180. }
  181. puts [subst -nocommands $T(function)]
  182. }
  183. puts [string trim {
  184. /**************************************************************************
  185. ***************************************************************************
  186. ** GENERATED CODE STARTS HERE (mkportersteps.tcl)
  187. */
  188. }]
  189. foreach step [array names O] {
  190. create_step_function $step $O($step)
  191. }
  192. puts [string trim {
  193. /*
  194. ** GENERATED CODE ENDS HERE (mkportersteps.tcl)
  195. ***************************************************************************
  196. **************************************************************************/
  197. }]