spell.txt 68 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771
  1. *spell.txt* Nvim
  2. VIM REFERENCE MANUAL by Bram Moolenaar
  3. Spell checking *spell*
  4. Type |gO| to see the table of contents.
  5. ==============================================================================
  6. 1. Quick start *spell-quickstart* *E756*
  7. This command switches on spell checking: >
  8. :setlocal spell spelllang=en_us
  9. This switches on the 'spell' option and specifies to check for US English.
  10. The words that are not recognized are highlighted with one of these:
  11. SpellBad word not recognized |hl-SpellBad|
  12. SpellCap word not capitalised |hl-SpellCap|
  13. SpellRare rare word |hl-SpellRare|
  14. SpellLocal wrong spelling for selected region |hl-SpellLocal|
  15. Vim only checks words for spelling, there is no grammar check.
  16. If the 'mousemodel' option is set to "popup" and the cursor is on a badly
  17. spelled word or it is "popup_setpos" and the mouse pointer is on a badly
  18. spelled word, then the popup menu will contain a submenu to replace the bad
  19. word. Note: this slows down the appearance of the popup menu.
  20. To search for the next misspelled word:
  21. *]s*
  22. ]s Move to next misspelled word after the cursor.
  23. A count before the command can be used to repeat.
  24. 'wrapscan' applies.
  25. *[s*
  26. [s Like "]s" but search backwards, find the misspelled
  27. word before the cursor. Doesn't recognize words
  28. split over two lines, thus may stop at words that are
  29. not highlighted as bad. Does not stop at word with
  30. missing capital at the start of a line.
  31. *]S*
  32. ]S Like "]s" but only stop at bad words, not at rare
  33. words or words for another region.
  34. *[S*
  35. [S Like "]S" but search backwards.
  36. To add words to your own word list:
  37. *zg*
  38. zg Add word under the cursor as a good word to the first
  39. name in 'spellfile'. A count may precede the command
  40. to indicate the entry in 'spellfile' to be used. A
  41. count of two uses the second entry.
  42. In Visual mode the selected characters are added as a
  43. word (including white space!).
  44. When the cursor is on text that is marked as badly
  45. spelled then the marked text is used.
  46. Otherwise the word under the cursor, separated by
  47. non-word characters, is used.
  48. If the word is explicitly marked as bad word in
  49. another spell file the result is unpredictable.
  50. *zG*
  51. zG Like "zg" but add the word to the internal word list
  52. |internal-wordlist|.
  53. *zw*
  54. zw Like "zg" but mark the word as a wrong (bad) word.
  55. If the word already appears in 'spellfile' it is
  56. turned into a comment line. See |spellfile-cleanup|
  57. for getting rid of those.
  58. *zW*
  59. zW Like "zw" but add the word to the internal word list
  60. |internal-wordlist|.
  61. zuw *zug* *zuw*
  62. zug Undo |zw| and |zg|, remove the word from the entry in
  63. 'spellfile'. Count used as with |zg|.
  64. zuW *zuG* *zuW*
  65. zuG Undo |zW| and |zG|, remove the word from the internal
  66. word list. Count used as with |zg|.
  67. *:spe* *:spellgood* *E1280*
  68. :[count]spe[llgood] {word}
  69. Add {word} as a good word to 'spellfile', like with
  70. |zg|. Without count the first name is used, with a
  71. count of two the second entry, etc.
  72. :spe[llgood]! {word} Add {word} as a good word to the internal word list,
  73. like with |zG|.
  74. *:spellw* *:spellwrong*
  75. :[count]spellw[rong] {word}
  76. Add {word} as a wrong (bad) word to 'spellfile', as
  77. with |zw|. Without count the first name is used, with
  78. a count of two the second entry, etc.
  79. :spellw[rong]! {word} Add {word} as a wrong (bad) word to the internal word
  80. list, like with |zW|.
  81. *:spellra* *:spellrare*
  82. :[count]spellr[are] {word}
  83. Add {word} as a rare word to 'spellfile', similar to
  84. |zw|. Without count the first name is used, with
  85. a count of two the second entry, etc.
  86. There are no normal mode commands to mark words as
  87. rare as this is a fairly uncommon command and all
  88. intuitive commands for this are already taken. If you
  89. want you can add mappings with e.g.: >
  90. nnoremap z? :exe ':spellrare ' .. expand('<cWORD>')<CR>
  91. nnoremap z/ :exe ':spellrare! ' .. expand('<cWORD>')<CR>
  92. < |:spellundo|, |zuw|, or |zuW| can be used to undo this.
  93. :spellr[rare]! {word} Add {word} as a rare word to the internal word
  94. list, similar to |zW|.
  95. :[count]spellu[ndo] {word} *:spellu* *:spellundo*
  96. Like |zuw|. [count] used as with |:spellgood|.
  97. :spellu[ndo]! {word} Like |zuW|. [count] used as with |:spellgood|.
  98. After adding a word to 'spellfile' with the above commands its associated
  99. ".spl" file will automatically be updated and reloaded. If you change
  100. 'spellfile' manually you need to use the |:mkspell| command. This sequence of
  101. commands mostly works well: >
  102. :edit <file in 'spellfile'>
  103. < (make changes to the spell file) >
  104. :mkspell! %
  105. More details about the 'spellfile' format below |spell-wordlist-format|.
  106. *internal-wordlist*
  107. The internal word list is used for all buffers where 'spell' is set. It is
  108. not stored, it is lost when you exit Vim. It is also cleared when 'encoding'
  109. is set.
  110. Finding suggestions for bad words:
  111. *z=*
  112. z= For the word under/after the cursor suggest correctly
  113. spelled words. This also works to find alternatives
  114. for a word that is not highlighted as a bad word,
  115. e.g., when the word after it is bad.
  116. In Visual mode the highlighted text is taken as the
  117. word to be replaced.
  118. The results are sorted on similarity to the word being
  119. replaced.
  120. This may take a long time. Hit CTRL-C when you get
  121. bored.
  122. If the command is used without a count the
  123. alternatives are listed and you can enter the number
  124. of your choice or press <Enter> if you don't want to
  125. replace. You can also use the mouse to click on your
  126. choice (only works if the mouse can be used in Normal
  127. mode and when there are no line wraps). Click on the
  128. first line (the header) to cancel.
  129. The suggestions listed normally replace a highlighted
  130. bad word. Sometimes they include other text, in that
  131. case the replaced text is also listed after a "<".
  132. If a count is used that suggestion is used, without
  133. prompting. For example, "1z=" always takes the first
  134. suggestion.
  135. If 'verbose' is non-zero a score will be displayed
  136. with the suggestions to indicate the likeliness to the
  137. badly spelled word (the higher the score the more
  138. different).
  139. When a word was replaced the redo command "." will
  140. repeat the word replacement. This works like "ciw",
  141. the good word and <Esc>. This does NOT work for Thai
  142. and other languages without spaces between words.
  143. *:spellr* *:spellrepall* *E752* *E753*
  144. :spellr[epall] Repeat the replacement done by |z=| for all matches
  145. with the replaced word in the current window.
  146. In Insert mode, when the cursor is after a badly spelled word, you can use
  147. CTRL-X s to find suggestions. This works like Insert mode completion. Use
  148. CTRL-N to use the next suggestion, CTRL-P to go back. |i_CTRL-X_s|
  149. The 'spellsuggest' option influences how the list of suggestions is generated
  150. and sorted. See |'spellsuggest'|.
  151. The 'spellcapcheck' option is used to check the first word of a sentence
  152. starts with a capital. This doesn't work for the first word in the file.
  153. When there is a line break right after a sentence the highlighting of the next
  154. line may be postponed. Use |CTRL-L| when needed. Also see |set-spc-auto| for
  155. how it can be set automatically when 'spelllang' is set.
  156. The 'spelloptions' option has a few more flags that influence the way spell
  157. checking works.
  158. Vim counts the number of times a good word is encountered. This is used to
  159. sort the suggestions: words that have been seen before get a small bonus,
  160. words that have been seen often get a bigger bonus. The COMMON item in the
  161. affix file can be used to define common words, so that this mechanism also
  162. works in a new or short file |spell-COMMON|.
  163. ==============================================================================
  164. 2. Remarks on spell checking *spell-remarks*
  165. PERFORMANCE
  166. Vim does on-the-fly spell checking. To make this work fast the word list is
  167. loaded in memory. Thus this uses a lot of memory (1 Mbyte or more). There
  168. might also be a noticeable delay when the word list is loaded, which happens
  169. when 'spell' is set and when 'spelllang' is set while 'spell' was already set.
  170. To minimize the delay each word list is only loaded once, it is not deleted
  171. when 'spelllang' is made empty or 'spell' is reset. When 'encoding' is set
  172. all the word lists are reloaded, thus you may notice a delay then too.
  173. REGIONS
  174. A word may be spelled differently in various regions. For example, English
  175. comes in (at least) these variants:
  176. en all regions
  177. en_au Australia
  178. en_ca Canada
  179. en_gb Great Britain
  180. en_nz New Zealand
  181. en_us USA
  182. Words that are not used in one region but are used in another region are
  183. highlighted with SpellLocal |hl-SpellLocal|.
  184. Always use lowercase letters for the language and region names.
  185. When adding a word with |zg| or another command it's always added for all
  186. regions. You can change that by manually editing the 'spellfile'. See
  187. |spell-wordlist-format|. Note that the regions as specified in the files in
  188. 'spellfile' are only used when all entries in 'spelllang' specify the same
  189. region (not counting files specified by their .spl name).
  190. *spell-german*
  191. Specific exception: For German these special regions are used:
  192. de all German words accepted
  193. de_de old and new spelling
  194. de_19 old spelling
  195. de_20 new spelling
  196. de_at Austria
  197. de_ch Switzerland
  198. *spell-russian*
  199. Specific exception: For Russian these special regions are used:
  200. ru all Russian words accepted
  201. ru_ru "IE" letter spelling
  202. ru_yo "YO" letter spelling
  203. *spell-yiddish*
  204. Yiddish requires using "utf-8" encoding, because of the special characters
  205. used. If you are using latin1 Vim will use transliterated (romanized) Yiddish
  206. instead. If you want to use transliterated Yiddish with utf-8 use "yi-tr".
  207. In a table:
  208. 'encoding' 'spelllang'
  209. utf-8 yi Yiddish
  210. latin1 yi transliterated Yiddish
  211. utf-8 yi-tr transliterated Yiddish
  212. *spell-cjk*
  213. Chinese, Japanese and other East Asian characters are normally marked as
  214. errors, because spell checking of these characters is not supported. If
  215. 'spelllang' includes "cjk", these characters are not marked as errors. This
  216. is useful when editing text with spell checking while some Asian words are
  217. present.
  218. SPELL FILES *spell-load*
  219. Vim searches for spell files in the "spell" subdirectory of the directories in
  220. 'runtimepath'. The name is: LL.EEE.spl, where:
  221. LL the language name
  222. EEE the value of 'encoding'
  223. The value for "LL" comes from 'spelllang', but excludes the region name.
  224. Examples:
  225. 'spelllang' LL ~
  226. en_us en
  227. en-rare en-rare
  228. medical_ca medical
  229. Only the first file is loaded, the one that is first in 'runtimepath'. If
  230. this succeeds then additionally files with the name LL.EEE.add.spl are loaded.
  231. All the ones that are found are used.
  232. If no spell file is found the |SpellFileMissing| autocommand event is
  233. triggered. This may trigger the |spellfile.vim| plugin to offer you
  234. downloading the spell file.
  235. Additionally, the files related to the names in 'spellfile' are loaded. These
  236. are the files that |zg| and |zw| add good and wrong words to.
  237. Exceptions:
  238. - Vim uses "latin1" when 'encoding' is "iso-8859-15". The euro sign doesn't
  239. matter for spelling.
  240. - When no spell file for 'encoding' is found "ascii" is tried. This only
  241. works for languages where nearly all words are ASCII, such as English. It
  242. helps when 'encoding' is not "latin1", such as iso-8859-2, and English text
  243. is being edited. For the ".add" files the same name as the found main
  244. spell file is used.
  245. For example, with these values:
  246. 'runtimepath' is "~/.config/nvim,/usr/share/nvim/runtime/,~/.config/nvim/after"
  247. 'encoding' is "iso-8859-2"
  248. 'spelllang' is "pl"
  249. Vim will look for:
  250. 1. ~/.config/nvim/spell/pl.iso-8859-2.spl
  251. 2. /usr/share/nvim/runtime/spell/pl.iso-8859-2.spl
  252. 3. ~/.config/nvim/spell/pl.iso-8859-2.add.spl
  253. 4. /usr/share/nvim/runtime/spell/pl.iso-8859-2.add.spl
  254. 5. ~/.config/nvim/after/spell/pl.iso-8859-2.add.spl
  255. This assumes 1. is not found and 2. is found.
  256. If 'encoding' is "latin1" Vim will look for:
  257. 1. ~/.config/nvim/spell/pl.latin1.spl
  258. 2. /usr/share/nvim/runtime/spell/pl.latin1.spl
  259. 3. ~/.config/nvim/after/spell/pl.latin1.spl
  260. 4. ~/.config/nvim/spell/pl.ascii.spl
  261. 5. /usr/share/nvim/runtime/spell/pl.ascii.spl
  262. 6. ~/.config/nvim/after/spell/pl.ascii.spl
  263. This assumes none of them are found (Polish doesn't make sense when leaving
  264. out the non-ASCII characters).
  265. A spell file might not be available in the current 'encoding'. See
  266. |spell-mkspell| about how to create a spell file. Converting a spell file
  267. with "iconv" will NOT work!
  268. *spell-sug-file* *E781*
  269. If there is a file with exactly the same name as the ".spl" file but ending in
  270. ".sug", that file will be used for giving better suggestions. It isn't loaded
  271. before suggestions are made to reduce memory use.
  272. *E758* *E759* *E778* *E779* *E780* *E782*
  273. When loading a spell file Vim checks that it is properly formatted. If you
  274. get an error the file may be truncated, modified or intended for another Vim
  275. version.
  276. SPELLFILE CLEANUP *spellfile-cleanup*
  277. The |zw| command turns existing entries in 'spellfile' into comment lines.
  278. This avoids having to write a new file every time, but results in the file
  279. only getting longer, never shorter. To clean up the comment lines in all
  280. ".add" spell files do this: >
  281. :runtime spell/cleanadd.vim
  282. This deletes all comment lines, except the ones that start with "##". Use
  283. "##" lines to add comments that you want to keep.
  284. You can invoke this script as often as you like. A variable is provided to
  285. skip updating files that have been changed recently. Set it to the number of
  286. seconds that has passed since a file was changed before it will be cleaned.
  287. For example, to clean only files that were not changed in the last hour: >
  288. let g:spell_clean_limit = 60 * 60
  289. The default is one second.
  290. WORDS
  291. Vim uses a fixed method to recognize a word. This is independent of
  292. 'iskeyword', so that it also works in help files and for languages that
  293. include characters like '-' in 'iskeyword'. The word characters do depend on
  294. 'encoding'.
  295. The table with word characters is stored in the main .spl file. Therefore it
  296. matters what the current locale is when generating it! A .add.spl file does
  297. not contain a word table though.
  298. For a word that starts with a digit the digit is ignored, unless the word as a
  299. whole is recognized. Thus if "3D" is a word and "D" is not then "3D" is
  300. recognized as a word, but if "3D" is not a word then only the "D" is marked as
  301. bad. Hex numbers in the form 0x12ab and 0X12AB are recognized.
  302. WORD COMBINATIONS
  303. It is possible to spell-check words that include a space. This is used to
  304. recognize words that are invalid when used by themselves, e.g. for "et al.".
  305. It can also be used to recognize "the the" and highlight it.
  306. The number of spaces is irrelevant. In most cases a line break may also
  307. appear. However, this makes it difficult to find out where to start checking
  308. for spelling mistakes. When you make a change to one line and only that line
  309. is redrawn Vim won't look in the previous line, thus when "et" is at the end
  310. of the previous line "al." will be flagged as an error. And when you type
  311. "the<CR>the" the highlighting doesn't appear until the first line is redrawn.
  312. Use |CTRL-L| to redraw right away. "[s" will also stop at a word combination
  313. with a line break.
  314. When encountering a line break Vim skips characters such as '*', '>' and '"',
  315. so that comments in C, shell and Vim code can be spell checked.
  316. SYNTAX HIGHLIGHTING *spell-syntax*
  317. Files that use syntax highlighting can specify where spell checking should be
  318. done:
  319. 1. everywhere default
  320. 2. in specific items use "contains=@Spell"
  321. 3. everywhere but specific items use "contains=@NoSpell"
  322. For the second method adding the @NoSpell cluster will disable spell checking
  323. again. This can be used, for example, to add @Spell to the comments of a
  324. program, and add @NoSpell for items that shouldn't be checked.
  325. Also see |:syn-spell| for text that is not in a syntax item.
  326. VIM SCRIPTS
  327. If you want to write a Vim script that does something with spelling, you may
  328. find these functions useful:
  329. spellbadword() find badly spelled word at the cursor
  330. spellsuggest() get list of spelling suggestions
  331. soundfold() get the sound-a-like version of a word
  332. SETTING 'spellcapcheck' AUTOMATICALLY *set-spc-auto*
  333. After the 'spelllang' option has been set successfully, Vim will source the
  334. files "spell/LANG.vim" in 'runtimepath'. "LANG" is the value of 'spelllang'
  335. up to the first comma, dot or underscore. This can be used to set options
  336. specifically for the language, especially 'spellcapcheck'.
  337. The distribution includes a few of these files. Use this command to see what
  338. they do: >
  339. :next $VIMRUNTIME/spell/*.vim
  340. Note that the default scripts don't set 'spellcapcheck' if it was changed from
  341. the default value. This assumes the user prefers another value then.
  342. DOUBLE SCORING *spell-double-scoring*
  343. The 'spellsuggest' option can be used to select "double" scoring. This
  344. mechanism is based on the principle that there are two kinds of spelling
  345. mistakes:
  346. 1. You know how to spell the word, but mistype something. This results in a
  347. small editing distance (character swapped/omitted/inserted) and possibly a
  348. word that sounds completely different.
  349. 2. You don't know how to spell the word and type something that sounds right.
  350. The edit distance can be big but the word is similar after sound-folding.
  351. Since scores for these two mistakes will be very different we use a list
  352. for each and mix them.
  353. The sound-folding is slow and people that know the language won't make the
  354. second kind of mistakes. Therefore 'spellsuggest' can be set to select the
  355. preferred method for scoring the suggestions.
  356. ==============================================================================
  357. 3. Generating a spell file *spell-mkspell*
  358. Vim uses a binary file format for spelling. This greatly speeds up loading
  359. the word list and keeps it small.
  360. *.aff* *.dic* *Myspell*
  361. You can create a Vim spell file from the .aff and .dic files that Myspell
  362. uses. Myspell is used by OpenOffice.org and Mozilla. The OpenOffice .oxt
  363. files are zip files which contain the .aff and .dic files. You should be able
  364. to find them here:
  365. https://extensions.services.openoffice.org/dictionary
  366. The older, OpenOffice 2 files may be used if this doesn't work:
  367. http://wiki.services.openoffice.org/wiki/Dictionaries
  368. You can also use a plain word list. The results are the same, the choice
  369. depends on what word lists you can find.
  370. If you install Aap (from www.a-a-p.org) you can use the recipes in the
  371. runtime/spell/??/ directories. Aap will take care of downloading the files,
  372. apply patches needed for Vim and build the .spl file.
  373. Make sure your current locale is set properly, otherwise Vim doesn't know what
  374. characters are upper/lower case letters. If the locale isn't available (e.g.,
  375. when using an MS-Windows codepage on Unix) add tables to the .aff file
  376. |spell-affix-chars|. If the .aff file doesn't define a table then the word
  377. table of the currently active spelling is used. If spelling is not active
  378. then Vim will try to guess.
  379. *:mksp* *:mkspell*
  380. :mksp[ell][!] [-ascii] {outname} {inname} ...
  381. Generate a Vim spell file from word lists. Example: >
  382. :mkspell /tmp/nl nl_NL.words
  383. < *E751*
  384. When {outname} ends in ".spl" it is used as the output
  385. file name. Otherwise it should be a language name,
  386. such as "en", without the region name. The file
  387. written will be "{outname}.{encoding}.spl", where
  388. {encoding} is the value of the 'encoding' option.
  389. When the output file already exists [!] must be used
  390. to overwrite it.
  391. When the [-ascii] argument is present, words with
  392. non-ascii characters are skipped. The resulting file
  393. ends in "ascii.spl".
  394. The input can be the Myspell format files {inname}.aff
  395. and {inname}.dic. If {inname}.aff does not exist then
  396. {inname} is used as the file name of a plain word
  397. list.
  398. Multiple {inname} arguments can be given to combine
  399. regions into one Vim spell file. Example: >
  400. :mkspell ~/.config/nvim/spell/en /tmp/en_US /tmp/en_CA /tmp/en_AU
  401. < This combines the English word lists for US, CA and AU
  402. into one en.spl file.
  403. Up to eight regions can be combined. *E754* *E755*
  404. The REP and SAL items of the first .aff file where
  405. they appear are used. |spell-REP| |spell-SAL|
  406. *E845*
  407. This command uses a lot of memory, required to find
  408. the optimal word tree (Polish, Italian and Hungarian
  409. require several hundred Mbyte). The final result will
  410. be much smaller, because compression is used. To
  411. avoid running out of memory compression will be done
  412. now and then. This can be tuned with the 'mkspellmem'
  413. option.
  414. After the spell file was written and it was being used
  415. in a buffer it will be reloaded automatically.
  416. :mksp[ell] [-ascii] {name}.{enc}.add
  417. Like ":mkspell" above, using {name}.{enc}.add as the
  418. input file and producing an output file in the same
  419. directory that has ".spl" appended.
  420. :mksp[ell] [-ascii] {name}
  421. Like ":mkspell" above, using {name} as the input file
  422. and producing an output file in the same directory
  423. that has ".{enc}.spl" appended.
  424. Vim will report the number of duplicate words. This might be a mistake in the
  425. list of words. But sometimes it is used to have different prefixes and
  426. suffixes for the same basic word to avoid them combining (e.g. Czech uses
  427. this). If you want Vim to report all duplicate words set the 'verbose'
  428. option.
  429. Since you might want to change a Myspell word list for use with Vim the
  430. following procedure is recommended:
  431. 1. Obtain the xx_YY.aff and xx_YY.dic files from Myspell.
  432. 2. Make a copy of these files to xx_YY.orig.aff and xx_YY.orig.dic.
  433. 3. Change the xx_YY.aff and xx_YY.dic files to remove bad words, add missing
  434. words, define word characters with FOL/LOW/UPP, etc. The distributed
  435. "*.diff" files can be used.
  436. 4. Start Vim with the right locale and use |:mkspell| to generate the Vim
  437. spell file.
  438. 5. Try out the spell file with ":set spell spelllang=xx" if you wrote it in
  439. a spell directory in 'runtimepath', or ":set spelllang=xx.enc.spl" if you
  440. wrote it somewhere else.
  441. When the Myspell files are updated you can merge the differences:
  442. 1. Obtain the new Myspell files as xx_YY.new.aff and xx_UU.new.dic.
  443. 2. Use |diff-mode| to see what changed: >
  444. nvim -d xx_YY.orig.dic xx_YY.new.dic
  445. 3. Take over the changes you like in xx_YY.dic.
  446. You may also need to change xx_YY.aff.
  447. 4. Rename xx_YY.new.dic to xx_YY.orig.dic and xx_YY.new.aff to xx_YY.orig.aff.
  448. SPELL FILE VERSIONS *E770* *E771* *E772*
  449. Spell checking is a relatively new feature in Vim, thus it's possible that the
  450. .spl file format will be changed to support more languages. Vim will check
  451. the validity of the spell file and report anything wrong.
  452. E771: Old spell file, needs to be updated ~
  453. This spell file is older than your Vim. You need to update the .spl file.
  454. E772: Spell file is for newer version of Vim ~
  455. This means the spell file was made for a later version of Vim. You need to
  456. update Vim.
  457. E770: Unsupported section in spell file ~
  458. This means the spell file was made for a later version of Vim and contains a
  459. section that is required for the spell file to work. In this case it's
  460. probably a good idea to upgrade your Vim.
  461. SPELL FILE DUMP
  462. If for some reason you want to check what words are supported by the currently
  463. used spelling files, use this command:
  464. *:spelldump* *:spelld*
  465. :spelld[ump] Open a new window and fill it with all currently valid
  466. words. Compound words are not included.
  467. Note: For some languages the result may be enormous,
  468. causing Vim to run out of memory.
  469. :spelld[ump]! Like ":spelldump" and include the word count. This is
  470. the number of times the word was found while
  471. updating the screen. Words that are in COMMON items
  472. get a starting count of 10.
  473. The format of the word list is used |spell-wordlist-format|. You should be
  474. able to read it with ":mkspell" to generate one .spl file that includes all
  475. the words.
  476. When all entries to 'spelllang' use the same regions or no regions at all then
  477. the region information is included in the dumped words. Otherwise only words
  478. for the current region are included and no "/regions" line is generated.
  479. Comment lines with the name of the .spl file are used as a header above the
  480. words that were generated from that .spl file.
  481. SPELL FILE MISSING *spell-SpellFileMissing* *spellfile.vim*
  482. If the spell file for the language you are using is not available, you will
  483. get an error message. But if the "spellfile.vim" plugin is active it will
  484. offer you to download the spell file. Just follow the instructions, it will
  485. ask you where to write the file (there must be a writable directory in
  486. 'runtimepath' for this).
  487. The plugin has a default place where to look for spell files, on the Vim ftp
  488. server. The protocol used is SSL (https://) for security. If you want to use
  489. another location or another protocol, set the g:spellfile_URL variable to the
  490. directory that holds the spell files. You can use http:// or ftp://, but you
  491. are taking a security risk then. The |netrw| plugin is used for getting the
  492. file, look there for the specific syntax of the URL. Example: >
  493. let g:spellfile_URL = 'https://ftp.nluug.nl/vim/runtime/spell'
  494. You may need to escape special characters.
  495. The plugin will only ask about downloading a language once. If you want to
  496. try again anyway restart Vim, or set g:spellfile_URL to another value (e.g.,
  497. prepend a space).
  498. To avoid using the "spellfile.vim" plugin do this in your vimrc file: >
  499. let loaded_spellfile_plugin = 1
  500. Instead of using the plugin you can define a |SpellFileMissing| autocommand to
  501. handle the missing file yourself. You can use it like this: >
  502. :au SpellFileMissing * call Download_spell_file(expand('<amatch>'))
  503. Thus the <amatch> item contains the name of the language. Another important
  504. value is 'encoding', since every encoding has its own spell file. With two
  505. exceptions:
  506. - For ISO-8859-15 (latin9) the name "latin1" is used (the encodings only
  507. differ in characters not used in dictionary words).
  508. - The name "ascii" may also be used for some languages where the words use
  509. only ASCII letters for most of the words.
  510. The default "spellfile.vim" plugin uses this autocommand, if you define your
  511. autocommand afterwards you may want to use ":au! SpellFileMissing" to overrule
  512. it. If you define your autocommand before the plugin is loaded it will notice
  513. this and not do anything.
  514. *E797*
  515. Note that the SpellFileMissing autocommand must not change or destroy the
  516. buffer the user was editing.
  517. ==============================================================================
  518. 4. Spell file format *spell-file-format*
  519. This is the format of the files that are used by the person who creates and
  520. maintains a word list.
  521. Note that we avoid the word "dictionary" here. That is because the goal of
  522. spell checking differs from writing a dictionary (as in the book). For
  523. spelling we need a list of words that are OK, thus should not be highlighted.
  524. Person and company names will not appear in a dictionary, but do appear in a
  525. word list. And some old words are rarely used while they are common
  526. misspellings. These do appear in a dictionary but not in a word list.
  527. There are two formats: A straight list of words and a list using affix
  528. compression. The files with affix compression are used by Myspell (Mozilla
  529. and OpenOffice.org). This requires two files, one with .aff and one with .dic
  530. extension.
  531. FORMAT OF STRAIGHT WORD LIST *spell-wordlist-format*
  532. The words must appear one per line. That is all that is required.
  533. Additionally the following items are recognized:
  534. - Empty and blank lines are ignored.
  535. # comment ~
  536. - Lines starting with a # are ignored (comment lines).
  537. /encoding=utf-8 ~
  538. - A line starting with "/encoding=", before any word, specifies the encoding
  539. of the file. After the second '=' comes an encoding name. This tells Vim
  540. to setup conversion from the specified encoding to 'encoding'. Thus you can
  541. use one word list for several target encodings.
  542. /regions=usca ~
  543. - A line starting with "/regions=" specifies the region names that are
  544. supported. Each region name must be two ASCII letters. The first one is
  545. region 1. Thus "/regions=usca" has region 1 "us" and region 2 "ca".
  546. In an addition word list the region names should be equal to the main word
  547. list!
  548. - Other lines starting with '/' are reserved for future use. The ones that
  549. are not recognized are ignored. You do get a warning message, so that you
  550. know something won't work.
  551. - A "/" may follow the word with the following items:
  552. = Case must match exactly.
  553. ? Rare word.
  554. ! Bad (wrong) word.
  555. 1 to 9 A region in which the word is valid. If no regions are
  556. specified the word is valid in all regions.
  557. Example:
  558. # This is an example word list comment
  559. /encoding=latin1 encoding of the file
  560. /regions=uscagb regions "us", "ca" and "gb"
  561. example word for all regions
  562. blah/12 word for regions "us" and "ca"
  563. vim/! bad word
  564. Campbell/?3 rare word in region 3 "gb"
  565. 's mornings/= keep-case word
  566. Note that when "/=" is used the same word with all upper-case letters is not
  567. accepted. This is different from a word with mixed case that is automatically
  568. marked as keep-case, those words may appear in all upper-case letters.
  569. FORMAT WITH .AFF AND .DIC FILES *aff-dic-format*
  570. There are two files: the basic word list and an affix file. The affix file
  571. specifies settings for the language and can contain affixes. The affixes are
  572. used to modify the basic words to get the full word list. This significantly
  573. reduces the number of words, especially for a language like Polish. This is
  574. called affix compression.
  575. The basic word list and the affix file are combined with the ":mkspell"
  576. command and results in a binary spell file. All the preprocessing has been
  577. done, thus this file loads fast. The binary spell file format is described in
  578. the source code (src/spell.c). But only developers need to know about it.
  579. The preprocessing also allows us to take the Myspell language files and modify
  580. them before the Vim word list is made. The tools for this can be found in the
  581. "src/spell" directory.
  582. The format for the affix and word list files is based on what Myspell uses
  583. (the spell checker of Mozilla and OpenOffice.org). A description can be found
  584. here:
  585. https://lingucomponent.openoffice.org/affix.readme ~
  586. Note that affixes are case sensitive, this isn't obvious from the description.
  587. Vim supports quite a few extras. They are described below |spell-affix-vim|.
  588. Attempts have been made to keep this compatible with other spell checkers, so
  589. that the same files can often be used. One other project that offers more
  590. than Myspell is Hunspell ( https://hunspell.github.io ).
  591. WORD LIST FORMAT *spell-dic-format*
  592. A short example, with line numbers:
  593. 1 1234 ~
  594. 2 aan ~
  595. 3 Als ~
  596. 4 Etten-Leur ~
  597. 5 et al. ~
  598. 6 's-Gravenhage ~
  599. 7 's-Gravenhaags ~
  600. 8 # word that differs between regions ~
  601. 9 kado/1 ~
  602. 10 cadeau/2 ~
  603. 11 TCP,IP ~
  604. 12 /the S affix may add a 's' ~
  605. 13 bedel/S ~
  606. The first line contains the number of words. Vim ignores it, but you do get
  607. an error message if it's not there. *E760*
  608. What follows is one word per line. White space at the end of the line is
  609. ignored, all other white space matters. The encoding is specified in the
  610. affix file |spell-SET|.
  611. Comment lines start with '#' or '/'. See the example lines 8 and 12. Note
  612. that putting a comment after a word is NOT allowed:
  613. someword # comment that causes an error! ~
  614. After the word there is an optional slash and flags. Most of these flags are
  615. letters that indicate the affixes that can be used with this word. These are
  616. specified with SFX and PFX lines in the .aff file, see |spell-SFX| and
  617. |spell-PFX|. Vim allows using other flag types with the FLAG item in the
  618. affix file |spell-FLAG|.
  619. When the word only has lower-case letters it will also match with the word
  620. starting with an upper-case letter.
  621. When the word includes an upper-case letter, this means the upper-case letter
  622. is required at this position. The same word with a lower-case letter at this
  623. position will not match. When some of the other letters are upper-case it will
  624. not match either.
  625. The word with all upper-case characters will always be OK,
  626. word list matches does not match ~
  627. als als Als ALS ALs AlS aLs aLS
  628. Als Als ALS als ALs AlS aLs aLS
  629. ALS ALS als Als ALs AlS aLs aLS
  630. AlS AlS ALS als Als ALs aLs aLS
  631. The KEEPCASE affix ID can be used to specifically match a word with identical
  632. case only, see below |spell-KEEPCASE|.
  633. Note: in line 5 to 7 non-word characters are used. You can include any
  634. character in a word. When checking the text a word still only matches when it
  635. appears with a non-word character before and after it. For Myspell a word
  636. starting with a non-word character probably won't work.
  637. In line 12 the word "TCP/IP" is defined. Since the slash has a special
  638. meaning the comma is used instead. This is defined with the SLASH item in the
  639. affix file, see |spell-SLASH|. Note that without this SLASH item the word
  640. will be "TCP,IP".
  641. AFFIX FILE FORMAT *spell-aff-format* *spell-affix-vim*
  642. *spell-affix-comment*
  643. Comment lines in the .aff file start with a '#':
  644. # comment line ~
  645. Items with a fixed number of arguments can be followed by a comment. But only
  646. if none of the arguments can contain white space. The comment must start with
  647. a "#" character. Example:
  648. KEEPCASE = # fix case for words with this flag ~
  649. ENCODING *spell-SET*
  650. The affix file can be in any encoding that is supported by "iconv". However,
  651. in some cases the current locale should also be set properly at the time
  652. |:mkspell| is invoked. Adding FOL/LOW/UPP lines removes this requirement
  653. |spell-FOL|.
  654. The encoding should be specified before anything where the encoding matters.
  655. The encoding applies both to the affix file and the dictionary file. It is
  656. done with a SET line:
  657. SET utf-8 ~
  658. The encoding can be different from the value of the 'encoding' option at the
  659. time ":mkspell" is used. Vim will then convert everything to 'encoding' and
  660. generate a spell file for 'encoding'. If some of the used characters to not
  661. fit in 'encoding' you will get an error message.
  662. *spell-affix-mbyte*
  663. When using a multibyte encoding it's possible to use more different affix
  664. flags. But Myspell doesn't support that, thus you may not want to use it
  665. anyway. For compatibility use an 8-bit encoding.
  666. INFORMATION
  667. These entries in the affix file can be used to add information to the spell
  668. file. There are no restrictions on the format, but they should be in the
  669. right encoding.
  670. *spell-NAME* *spell-VERSION* *spell-HOME*
  671. *spell-AUTHOR* *spell-EMAIL* *spell-COPYRIGHT*
  672. NAME Name of the language
  673. VERSION 1.0.1 with fixes
  674. HOME https://www.example.com
  675. AUTHOR John Doe
  676. EMAIL john AT Doe DOT net
  677. COPYRIGHT LGPL
  678. These fields are put in the .spl file as-is. The |:spellinfo| command can be
  679. used to view the info.
  680. *:spellinfo* *:spelli*
  681. :spelli[nfo] Display the information for the spell file(s) used for
  682. the current buffer.
  683. CHARACTER TABLES
  684. *spell-affix-chars*
  685. When using an 8-bit encoding the affix file should define what characters are
  686. word characters. This is because the system where ":mkspell" is used may not
  687. support a locale with this encoding and isalpha() won't work. For example
  688. when using "cp1250" on Unix.
  689. *E761* *E762* *spell-FOL*
  690. *spell-LOW* *spell-UPP*
  691. Three lines in the affix file are needed. Simplistic example:
  692. FOL áëñ ~
  693. LOW áëñ ~
  694. UPP ÁËÑ ~
  695. All three lines must have exactly the same number of characters.
  696. The "FOL" line specifies the case-folded characters. These are used to
  697. compare words while ignoring case. For most encodings this is identical to
  698. the lower case line.
  699. The "LOW" line specifies the characters in lower-case. Mostly it's equal to
  700. the "FOL" line.
  701. The "UPP" line specifies the characters with upper-case. That is, a character
  702. is upper-case where it's different from the character at the same position in
  703. "FOL".
  704. An exception is made for the German sharp s ß. The upper-case version is
  705. "SS". In the FOL/LOW/UPP lines it should be included, so that it's recognized
  706. as a word character, but use the ß character in all three.
  707. ASCII characters should be omitted, Vim always handles these in the same way.
  708. When the encoding is UTF-8 no word characters need to be specified.
  709. *E763*
  710. Vim allows you to use spell checking for several languages in the same file.
  711. You can list them in the 'spelllang' option. As a consequence all spell files
  712. for the same encoding must use the same word characters, otherwise they can't
  713. be combined without errors.
  714. If you get an E763 warning that the word tables differ you need to update your
  715. ".spl" spell files. If you downloaded the files, get the latest version of
  716. all spell files you use. If you are only using one, e.g., German, then also
  717. download the recent English spell files. Otherwise generate the .spl file
  718. again with |:mkspell|. If you still get errors check the FOL, LOW and UPP
  719. lines in the used .aff files.
  720. The XX.ascii.spl spell file generated with the "-ascii" argument will not
  721. contain the table with characters, so that it can be combine with spell files
  722. for any encoding. The .add.spl files also do not contain the table.
  723. MID-WORD CHARACTERS
  724. *spell-midword*
  725. Some characters are only to be considered word characters if they are used in
  726. between two ordinary word characters. An example is the single quote: It is
  727. often used to put text in quotes, thus it can't be recognized as a word
  728. character, but when it appears in between word characters it must be part of
  729. the word. This is needed to detect a spelling error such as they'are. That
  730. should be they're, but since "they" and "are" are words themselves that would
  731. go unnoticed.
  732. These characters are defined with MIDWORD in the .aff file. Example:
  733. MIDWORD '- ~
  734. FLAG TYPES *spell-FLAG*
  735. Flags are used to specify the affixes that can be used with a word and for
  736. other properties of the word. Normally single-character flags are used. This
  737. limits the number of possible flags, especially for 8-bit encodings. The FLAG
  738. item can be used if more affixes are to be used. Possible values:
  739. FLAG long use two-character flags
  740. FLAG num use numbers, from 1 up to 65000
  741. FLAG caplong use one-character flags without A-Z and two-character
  742. flags that start with A-Z
  743. With "FLAG num" the numbers in a list of affixes need to be separated with a
  744. comma: "234,2143,1435". This method is inefficient, but useful if the file is
  745. generated with a program.
  746. When using "caplong" the two-character flags all start with a capital: "Aa",
  747. "B1", "BB", etc. This is useful to use one-character flags for the most
  748. common items and two-character flags for uncommon items.
  749. Note: When using utf-8 only characters up to 65000 may be used for flags.
  750. Note: even when using "num" or "long" the number of flags available to
  751. compounding and prefixes is limited to about 250.
  752. AFFIXES
  753. *spell-PFX* *spell-SFX*
  754. The usual PFX (prefix) and SFX (suffix) lines are supported (see the Myspell
  755. documentation or the Aspell manual:
  756. http://aspell.net/man-html/Affix-Compression.html).
  757. Summary:
  758. SFX L Y 2 ~
  759. SFX L 0 re [^x] ~
  760. SFX L 0 ro x ~
  761. The first line is a header and has four fields:
  762. SFX {flag} {combine} {count}
  763. {flag} The name used for the suffix. Mostly it's a single letter,
  764. but other characters can be used, see |spell-FLAG|.
  765. {combine} Can be 'Y' or 'N'. When 'Y' then the word plus suffix can
  766. also have a prefix. When 'N' then a prefix is not allowed.
  767. {count} The number of lines following. If this is wrong you will get
  768. an error message.
  769. For PFX the fields are exactly the same.
  770. The basic format for the following lines is:
  771. SFX {flag} {strip} {add} {condition} {extra}
  772. {flag} Must be the same as the {flag} used in the first line.
  773. {strip} Characters removed from the basic word. There is no check if
  774. the characters are actually there, only the length is used (in
  775. bytes). This better match the {condition}, otherwise strange
  776. things may happen. If the {strip} length is equal to or
  777. longer than the basic word the suffix won't be used.
  778. When {strip} is 0 (zero) then nothing is stripped.
  779. {add} Characters added to the basic word, after removing {strip}.
  780. Optionally there is a '/' followed by flags. The flags apply
  781. to the word plus affix. See |spell-affix-flags|
  782. {condition} A simplistic pattern. Only when this matches with a basic
  783. word will the suffix be used for that word. This is normally
  784. for using one suffix letter with different {add} and {strip}
  785. fields for words with different endings.
  786. When {condition} is a . (dot) there is no condition.
  787. The pattern may contain:
  788. - Literal characters.
  789. - A set of characters in []. [abc] matches a, b and c.
  790. A dash is allowed for a range [a-c], but this is
  791. Vim-specific.
  792. - A set of characters that starts with a ^, meaning the
  793. complement of the specified characters. [^abc] matches any
  794. character but a, b and c.
  795. {extra} Optional extra text:
  796. # comment Comment is ignored
  797. - Hunspell uses this, ignored
  798. For PFX the fields are the same, but the {strip}, {add} and {condition} apply
  799. to the start of the word.
  800. Note: Myspell ignores any extra text after the relevant info. Vim requires
  801. this text to start with a "#" so that mistakes don't go unnoticed. Example:
  802. SFX F 0 in [^i]n # Spion > Spionin ~
  803. SFX F 0 nen in # Bauerin > Bauerinnen ~
  804. However, to avoid lots of errors in affix files written for Myspell, you can
  805. add the IGNOREEXTRA flag.
  806. Apparently Myspell allows an affix name to appear more than once. Since this
  807. might also be a mistake, Vim checks for an extra "S". The affix files for
  808. Myspell that use this feature apparently have this flag. Example:
  809. SFX a Y 1 S ~
  810. SFX a 0 an . ~
  811. SFX a Y 2 S ~
  812. SFX a 0 en . ~
  813. SFX a 0 on . ~
  814. AFFIX FLAGS *spell-affix-flags*
  815. This is a feature that comes from Hunspell: The affix may specify flags. This
  816. works similar to flags specified on a basic word. The flags apply to the
  817. basic word plus the affix (but there are restrictions). Example:
  818. SFX S Y 1 ~
  819. SFX S 0 s . ~
  820. SFX A Y 1 ~
  821. SFX A 0 able/S . ~
  822. When the dictionary file contains "drink/AS" then these words are possible:
  823. drink
  824. drinks uses S suffix
  825. drinkable uses A suffix
  826. drinkables uses A suffix and then S suffix
  827. Generally the flags of the suffix are added to the flags of the basic word,
  828. both are used for the word plus suffix. But the flags of the basic word are
  829. only used once for affixes, except that both one prefix and one suffix can be
  830. used when both support combining.
  831. Specifically, the affix flags can be used for:
  832. - Suffixes on suffixes, as in the example above. This works once, thus you
  833. can have two suffixes on a word (plus one prefix).
  834. - Making the word with the affix rare, by using the |spell-RARE| flag.
  835. - Exclude the word with the affix from compounding, by using the
  836. |spell-COMPOUNDFORBIDFLAG| flag.
  837. - Allow the word with the affix to be part of a compound word on the side of
  838. the affix with the |spell-COMPOUNDPERMITFLAG|.
  839. - Use the NEEDCOMPOUND flag: word plus affix can only be used as part of a
  840. compound word. |spell-NEEDCOMPOUND|
  841. - Compound flags: word plus affix can be part of a compound word at the end,
  842. middle, start, etc. The flags are combined with the flags of the basic
  843. word. |spell-compound|
  844. - NEEDAFFIX: another affix is needed to make a valid word.
  845. - CIRCUMFIX, as explained just below.
  846. IGNOREEXTRA *spell-IGNOREEXTRA*
  847. Normally Vim gives an error for an extra field that does not start with '#'.
  848. This avoids errors going unnoticed. However, some files created for Myspell
  849. or Hunspell may contain many entries with an extra field. Use the IGNOREEXTRA
  850. flag to avoid lots of errors.
  851. CIRCUMFIX *spell-CIRCUMFIX*
  852. The CIRCUMFIX flag means a prefix and suffix must be added at the same time.
  853. If a prefix has the CIRCUMFIX flag then only suffixes with the CIRCUMFIX flag
  854. can be added, and the other way around.
  855. An alternative is to only specify the suffix, and give that suffix two flags:
  856. the required prefix and the NEEDAFFIX flag. |spell-NEEDAFFIX|
  857. PFXPOSTPONE *spell-PFXPOSTPONE*
  858. When an affix file has very many prefixes that apply to many words it's not
  859. possible to build the whole word list in memory. This applies to Hebrew (a
  860. list with all words is over a Gbyte). In that case applying prefixes must be
  861. postponed. This makes spell checking slower. It is indicated by this keyword
  862. in the .aff file:
  863. PFXPOSTPONE ~
  864. Only prefixes without a chop string and without flags can be postponed.
  865. Prefixes with a chop string or with flags will still be included in the word
  866. list. An exception if the chop string is one character and equal to the last
  867. character of the added string, but in lower case. Thus when the chop string
  868. is used to allow the following word to start with an upper case letter.
  869. WORDS WITH A SLASH *spell-SLASH*
  870. The slash is used in the .dic file to separate the basic word from the affix
  871. letters and other flags. Unfortunately, this means you cannot use a slash in
  872. a word. Thus "TCP/IP" is not a word but "TCP" with the flags "IP". To include
  873. a slash in the word put a backslash before it: "TCP\/IP". In the rare case
  874. you want to use a backslash inside a word you need to use two backslashes.
  875. Any other use of the backslash is reserved for future expansion.
  876. KEEP-CASE WORDS *spell-KEEPCASE*
  877. In the affix file a KEEPCASE line can be used to define the affix name used
  878. for keep-case words. Example:
  879. KEEPCASE = ~
  880. This flag is not supported by Myspell. It has the meaning that case matters.
  881. This can be used if the word does not have the first letter in upper case at
  882. the start of a sentence. Example:
  883. word list matches does not match ~
  884. 's morgens/= 's morgens 'S morgens 's Morgens 'S MORGENS
  885. 's Morgens 's Morgens 'S MORGENS 'S morgens 's morgens
  886. The flag can also be used to avoid that the word matches when it is in all
  887. upper-case letters.
  888. RARE WORDS *spell-RARE*
  889. In the affix file a RARE line can be used to define the affix name used for
  890. rare words. Example:
  891. RARE ? ~
  892. Rare words are highlighted differently from bad words. This is to be used for
  893. words that are correct for the language, but are hardly ever used and could be
  894. a typing mistake anyway.
  895. This flag can also be used on an affix, so that a basic word is not rare but
  896. the basic word plus affix is rare |spell-affix-flags|. However, if the word
  897. also appears as a good word in another way (e.g., in another region) it won't
  898. be marked as rare.
  899. BAD WORDS *spell-BAD*
  900. In the affix file a BAD line can be used to define the affix name used for
  901. bad words. Example:
  902. BAD ! ~
  903. This can be used to exclude words that would otherwise be good. For example
  904. "the the" in the .dic file:
  905. the the/! ~
  906. Once a word has been marked as bad it won't be undone by encountering the same
  907. word as good.
  908. The flag also applies to the word with affixes, thus this can be used to mark
  909. a whole bunch of related words as bad.
  910. *spell-FORBIDDENWORD*
  911. FORBIDDENWORD can be used just like BAD. For compatibility with Hunspell.
  912. *spell-NEEDAFFIX*
  913. The NEEDAFFIX flag is used to require that a word is used with an affix. The
  914. word itself is not a good word (unless there is an empty affix). Example:
  915. NEEDAFFIX + ~
  916. COMPOUND WORDS *spell-compound*
  917. A compound word is a longer word made by concatenating words that appear in
  918. the .dic file. To specify which words may be concatenated a character is
  919. used. This character is put in the list of affixes after the word. We will
  920. call this character a flag here. Obviously these flags must be different from
  921. any affix IDs used.
  922. *spell-COMPOUNDFLAG*
  923. The Myspell compatible method uses one flag, specified with COMPOUNDFLAG. All
  924. words with this flag combine in any order. This means there is no control
  925. over which word comes first. Example:
  926. COMPOUNDFLAG c ~
  927. *spell-COMPOUNDRULE*
  928. A more advanced method to specify how compound words can be formed uses
  929. multiple items with multiple flags. This is not compatible with Myspell 3.0.
  930. Let's start with an example:
  931. COMPOUNDRULE c+ ~
  932. COMPOUNDRULE se ~
  933. The first line defines that words with the "c" flag can be concatenated in any
  934. order. The second line defines compound words that are made of one word with
  935. the "s" flag and one word with the "e" flag. With this dictionary:
  936. bork/c ~
  937. onion/s ~
  938. soup/e ~
  939. You can make these words:
  940. bork
  941. borkbork
  942. borkborkbork
  943. (etc.)
  944. onion
  945. soup
  946. onionsoup
  947. The COMPOUNDRULE item may appear multiple times. The argument is made out of
  948. one or more groups, where each group can be:
  949. one flag e.g., c
  950. alternate flags inside [] e.g., [abc]
  951. Optionally this may be followed by:
  952. * the group appears zero or more times, e.g., sm*e
  953. + the group appears one or more times, e.g., c+
  954. ? the group appears zero times or once, e.g., x?
  955. This is similar to the regexp pattern syntax (but not the same!). A few
  956. examples with the sequence of word flags they require:
  957. COMPOUNDRULE x+ x xx xxx etc.
  958. COMPOUNDRULE yz yz
  959. COMPOUNDRULE x+z xz xxz xxxz etc.
  960. COMPOUNDRULE yx+ yx yxx yxxx etc.
  961. COMPOUNDRULE xy?z xz xyz
  962. COMPOUNDRULE [abc]z az bz cz
  963. COMPOUNDRULE [abc]+z az aaz abaz bz baz bcbz cz caz cbaz etc.
  964. COMPOUNDRULE a[xyz]+ ax axx axyz ay ayx ayzz az azy azxy etc.
  965. COMPOUNDRULE sm*e se sme smme smmme etc.
  966. COMPOUNDRULE s[xyz]*e se sxe sxye sxyxe sye syze sze szye szyxe etc.
  967. A specific example: Allow a compound to be made of two words and a dash:
  968. In the .aff file:
  969. COMPOUNDRULE sde ~
  970. NEEDAFFIX x ~
  971. COMPOUNDWORDMAX 3 ~
  972. COMPOUNDMIN 1 ~
  973. In the .dic file:
  974. start/s ~
  975. end/e ~
  976. -/xd ~
  977. This allows for the word "start-end", but not "startend".
  978. An additional implied rule is that, without further flags, a word with a
  979. prefix cannot be compounded after another word, and a word with a suffix
  980. cannot be compounded with a following word. Thus the affix cannot appear
  981. on the inside of a compound word. This can be changed with the
  982. |spell-COMPOUNDPERMITFLAG|.
  983. *spell-NEEDCOMPOUND*
  984. The NEEDCOMPOUND flag is used to require that a word is used as part of a
  985. compound word. The word itself is not a good word. Example:
  986. NEEDCOMPOUND & ~
  987. *spell-ONLYINCOMPOUND*
  988. The ONLYINCOMPOUND does exactly the same as NEEDCOMPOUND. Supported for
  989. compatibility with Hunspell.
  990. *spell-COMPOUNDMIN*
  991. The minimal character length of a word used for compounding is specified with
  992. COMPOUNDMIN. Example:
  993. COMPOUNDMIN 5 ~
  994. When omitted there is no minimal length. Obviously you could just leave out
  995. the compound flag from short words instead, this feature is present for
  996. compatibility with Myspell.
  997. *spell-COMPOUNDWORDMAX*
  998. The maximum number of words that can be concatenated into a compound word is
  999. specified with COMPOUNDWORDMAX. Example:
  1000. COMPOUNDWORDMAX 3 ~
  1001. When omitted there is no maximum. It applies to all compound words.
  1002. To set a limit for words with specific flags make sure the items in
  1003. COMPOUNDRULE where they appear don't allow too many words.
  1004. *spell-COMPOUNDSYLMAX*
  1005. The maximum number of syllables that a compound word may contain is specified
  1006. with COMPOUNDSYLMAX. Example:
  1007. COMPOUNDSYLMAX 6 ~
  1008. This has no effect if there is no SYLLABLE item. Without COMPOUNDSYLMAX there
  1009. is no limit on the number of syllables.
  1010. If both COMPOUNDWORDMAX and COMPOUNDSYLMAX are defined, a compound word is
  1011. accepted if it fits one of the criteria, thus is either made from up to
  1012. COMPOUNDWORDMAX words or contains up to COMPOUNDSYLMAX syllables.
  1013. *spell-COMPOUNDFORBIDFLAG*
  1014. The COMPOUNDFORBIDFLAG specifies a flag that can be used on an affix. It
  1015. means that the word plus affix cannot be used in a compound word. Example:
  1016. affix file:
  1017. COMPOUNDFLAG c ~
  1018. COMPOUNDFORBIDFLAG x ~
  1019. SFX a Y 2 ~
  1020. SFX a 0 s . ~
  1021. SFX a 0 ize/x . ~
  1022. dictionary:
  1023. word/c ~
  1024. util/ac ~
  1025. This allows for "wordutil" and "wordutils" but not "wordutilize".
  1026. Note: this doesn't work for postponed prefixes yet.
  1027. *spell-COMPOUNDPERMITFLAG*
  1028. The COMPOUNDPERMITFLAG specifies a flag that can be used on an affix. It
  1029. means that the word plus affix can also be used in a compound word in a way
  1030. where the affix ends up halfway through the word. Without this flag that is
  1031. not allowed.
  1032. Note: this doesn't work for postponed prefixes yet.
  1033. *spell-COMPOUNDROOT*
  1034. The COMPOUNDROOT flag is used for words in the dictionary that are already a
  1035. compound. This means it counts for two words when checking the compounding
  1036. rules. Can also be used for an affix to count the affix as a compounding
  1037. word.
  1038. *spell-CHECKCOMPOUNDPATTERN*
  1039. CHECKCOMPOUNDPATTERN is used to define patterns that, when matching at the
  1040. position where two words are compounded together forbids the compound.
  1041. For example:
  1042. CHECKCOMPOUNDPATTERN o e ~
  1043. This forbids compounding if the first word ends in "o" and the second word
  1044. starts with "e".
  1045. The arguments must be plain text, no patterns are actually supported, despite
  1046. the item name. Case is always ignored.
  1047. The Hunspell feature to use three arguments and flags is not supported.
  1048. *spell-NOCOMPOUNDSUGS*
  1049. This item indicates that using compounding to make suggestions is not a good
  1050. idea. Use this when compounding is used with very short or one-character
  1051. words. E.g. to make numbers out of digits. Without this flag creating
  1052. suggestions would spend most time trying all kind of weird compound words.
  1053. NOCOMPOUNDSUGS ~
  1054. *spell-SYLLABLE*
  1055. The SYLLABLE item defines characters or character sequences that are used to
  1056. count the number of syllables in a word. Example:
  1057. SYLLABLE aáeéiíoóöõuúüûy/aa/au/ea/ee/ei/ie/oa/oe/oo/ou/uu/ui ~
  1058. Before the first slash is the set of characters that are counted for one
  1059. syllable, also when repeated and mixed, until the next character that is not
  1060. in this set. After the slash come sequences of characters that are counted
  1061. for one syllable. These are preferred over using characters from the set.
  1062. With the example "ideeen" has three syllables, counted by "i", "ee" and "e".
  1063. Only case-folded letters need to be included.
  1064. Another way to restrict compounding was mentioned above: Adding the
  1065. |spell-COMPOUNDFORBIDFLAG| flag to an affix causes all words that are made
  1066. with that affix to not be used for compounding.
  1067. UNLIMITED COMPOUNDING *spell-NOBREAK*
  1068. For some languages, such as Thai, there is no space in between words. This
  1069. looks like all words are compounded. To specify this use the NOBREAK item in
  1070. the affix file, without arguments:
  1071. NOBREAK ~
  1072. Vim will try to figure out where one word ends and a next starts. When there
  1073. are spelling mistakes this may not be quite right.
  1074. *spell-COMMON*
  1075. Common words can be specified with the COMMON item. This will give better
  1076. suggestions when editing a short file. Example:
  1077. COMMON the of to and a in is it you that he she was for on are ~
  1078. The words must be separated by white space, up to 25 per line.
  1079. When multiple regions are specified in a ":mkspell" command the common words
  1080. for all regions are combined and used for all regions.
  1081. *spell-NOSPLITSUGS*
  1082. This item indicates that splitting a word to make suggestions is not a good
  1083. idea. Split-word suggestions will appear only when there are few similar
  1084. words.
  1085. NOSPLITSUGS ~
  1086. *spell-NOSUGGEST*
  1087. The flag specified with NOSUGGEST can be used for words that will not be
  1088. suggested. Can be used for obscene words.
  1089. NOSUGGEST % ~
  1090. REPLACEMENTS *spell-REP*
  1091. In the affix file REP items can be used to define common mistakes. This is
  1092. used to make spelling suggestions. The items define the "from" text and the
  1093. "to" replacement. Example:
  1094. REP 4 ~
  1095. REP f ph ~
  1096. REP ph f ~
  1097. REP k ch ~
  1098. REP ch k ~
  1099. The first line specifies the number of REP lines following. Vim ignores the
  1100. number, but it must be there (for compatibility with Myspell).
  1101. Don't include simple one-character replacements or swaps. Vim will try these
  1102. anyway. You can include whole words if you want to, but you might want to use
  1103. the "file:" item in 'spellsuggest' instead.
  1104. You can include a space by using an underscore:
  1105. REP the_the the ~
  1106. SIMILAR CHARACTERS *spell-MAP* *E783*
  1107. In the affix file MAP items can be used to define letters that are very much
  1108. alike. This is mostly used for a letter with different accents. This is used
  1109. to prefer suggestions with these letters substituted. Example:
  1110. MAP 2 ~
  1111. MAP eéëêè ~
  1112. MAP uüùúû ~
  1113. The first line specifies the number of MAP lines following. Vim ignores the
  1114. number, but the line must be there.
  1115. Each letter must appear in only one of the MAP items. It's a bit more
  1116. efficient if the first letter is ASCII or at least one without accents.
  1117. .SUG FILE *spell-NOSUGFILE*
  1118. When soundfolding is specified in the affix file then ":mkspell" will normally
  1119. produce a .sug file next to the .spl file. This file is used to find
  1120. suggestions by their sound-a-like form quickly. At the cost of a lot of
  1121. memory (the amount depends on the number of words, |:mkspell| will display an
  1122. estimate when it's done).
  1123. To avoid producing a .sug file use this item in the affix file:
  1124. NOSUGFILE ~
  1125. Users can simply omit the .sug file if they don't want to use it.
  1126. SOUND-A-LIKE *spell-SAL*
  1127. In the affix file SAL items can be used to define the sounds-a-like mechanism
  1128. to be used. The main items define the "from" text and the "to" replacement.
  1129. Simplistic example:
  1130. SAL CIA X ~
  1131. SAL CH X ~
  1132. SAL C K ~
  1133. SAL K K ~
  1134. There are a few rules and this can become quite complicated. An explanation
  1135. how it works can be found in the Aspell manual:
  1136. http://aspell.net/man-html/Phonetic-Code.html.
  1137. There are a few special items:
  1138. SAL followup true ~
  1139. SAL collapse_result true ~
  1140. SAL remove_accents true ~
  1141. "1" has the same meaning as "true". Any other value means "false".
  1142. SIMPLE SOUNDFOLDING *spell-SOFOFROM* *spell-SOFOTO*
  1143. The SAL mechanism is complex and slow. A simpler mechanism is mapping all
  1144. characters to another character, mapping similar sounding characters to the
  1145. same character. At the same time this does case folding. You can not have
  1146. both SAL items and simple soundfolding.
  1147. There are two items required: one to specify the characters that are mapped
  1148. and one that specifies the characters they are mapped to. They must have
  1149. exactly the same number of characters. Example:
  1150. SOFOFROM abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ~
  1151. SOFOTO ebctefghejklnnepkrstevvkesebctefghejklnnepkrstevvkes ~
  1152. In the example all vowels are mapped to the same character 'e'. Another
  1153. method would be to leave out all vowels. Some characters that sound nearly
  1154. the same and are often mixed up, such as 'm' and 'n', are mapped to the same
  1155. character. Don't do this too much, all words will start looking alike.
  1156. Characters that do not appear in SOFOFROM will be left out, except that all
  1157. white space is replaced by one space. Sequences of the same character in
  1158. SOFOFROM are replaced by one.
  1159. You can use the |soundfold()| function to try out the results. Or set the
  1160. 'verbose' option to see the score in the output of the |z=| command.
  1161. UNSUPPORTED ITEMS *spell-affix-not-supported*
  1162. These items appear in the affix file of other spell checkers. In Vim they are
  1163. ignored, not supported or defined in another way.
  1164. ACCENT (Hunspell) *spell-ACCENT*
  1165. Use MAP instead. |spell-MAP|
  1166. BREAK (Hunspell) *spell-BREAK*
  1167. Define break points. Unclear how it works exactly.
  1168. Not supported.
  1169. CHECKCOMPOUNDCASE (Hunspell) *spell-CHECKCOMPOUNDCASE*
  1170. Disallow uppercase letters at compound word boundaries.
  1171. Not supported.
  1172. CHECKCOMPOUNDDUP (Hunspell) *spell-CHECKCOMPOUNDDUP*
  1173. Disallow using the same word twice in a compound. Not
  1174. supported.
  1175. CHECKCOMPOUNDREP (Hunspell) *spell-CHECKCOMPOUNDREP*
  1176. Something about using REP items and compound words. Not
  1177. supported.
  1178. CHECKCOMPOUNDTRIPLE (Hunspell) *spell-CHECKCOMPOUNDTRIPLE*
  1179. Forbid three identical characters when compounding. Not
  1180. supported.
  1181. CHECKSHARPS (Hunspell) *spell-CHECKSHARPS*
  1182. SS letter pair in uppercased (German) words may be upper case
  1183. sharp s (ß). Not supported.
  1184. COMPLEXPREFIXES (Hunspell) *spell-COMPLEXPREFIXES*
  1185. Enables using two prefixes. Not supported.
  1186. COMPOUND (Hunspell) *spell-COMPOUND*
  1187. This is one line with the count of COMPOUND items, followed by
  1188. that many COMPOUND lines with a pattern.
  1189. Remove the first line with the count and rename the other
  1190. items to COMPOUNDRULE |spell-COMPOUNDRULE|
  1191. COMPOUNDFIRST (Hunspell) *spell-COMPOUNDFIRST*
  1192. Use COMPOUNDRULE instead. |spell-COMPOUNDRULE|
  1193. COMPOUNDBEGIN (Hunspell) *spell-COMPOUNDBEGIN*
  1194. Words signed with COMPOUNDBEGIN may be first elements in
  1195. compound words.
  1196. Use COMPOUNDRULE instead. |spell-COMPOUNDRULE|
  1197. COMPOUNDLAST (Hunspell) *spell-COMPOUNDLAST*
  1198. Words signed with COMPOUNDLAST may be last elements in
  1199. compound words.
  1200. Use COMPOUNDRULE instead. |spell-COMPOUNDRULE|
  1201. COMPOUNDEND (Hunspell) *spell-COMPOUNDEND*
  1202. Probably the same as COMPOUNDLAST
  1203. COMPOUNDMIDDLE (Hunspell) *spell-COMPOUNDMIDDLE*
  1204. Words signed with COMPOUNDMIDDLE may be middle elements in
  1205. compound words.
  1206. Use COMPOUNDRULE instead. |spell-COMPOUNDRULE|
  1207. COMPOUNDRULES (Hunspell) *spell-COMPOUNDRULES*
  1208. Number of COMPOUNDRULE lines following. Ignored, but the
  1209. argument must be a number.
  1210. COMPOUNDSYLLABLE (Hunspell) *spell-COMPOUNDSYLLABLE*
  1211. Use SYLLABLE and COMPOUNDSYLMAX instead. |spell-SYLLABLE|
  1212. |spell-COMPOUNDSYLMAX|
  1213. KEY (Hunspell) *spell-KEY*
  1214. Define characters that are close together on the keyboard.
  1215. Used to give better suggestions. Not supported.
  1216. LANG (Hunspell) *spell-LANG*
  1217. This specifies language-specific behavior. This actually
  1218. moves part of the language knowledge into the program,
  1219. therefore Vim does not support it. Each language property
  1220. must be specified separately.
  1221. LEMMA_PRESENT (Hunspell) *spell-LEMMA_PRESENT*
  1222. Only needed for morphological analysis.
  1223. MAXNGRAMSUGS (Hunspell) *spell-MAXNGRAMSUGS*
  1224. Set number of n-gram suggestions. Not supported.
  1225. PSEUDOROOT (Hunspell) *spell-PSEUDOROOT*
  1226. Use NEEDAFFIX instead. |spell-NEEDAFFIX|
  1227. SUGSWITHDOTS (Hunspell) *spell-SUGSWITHDOTS*
  1228. Adds dots to suggestions. Vim doesn't need this.
  1229. SYLLABLENUM (Hunspell) *spell-SYLLABLENUM*
  1230. Not supported.
  1231. TRY (Myspell, Hunspell, others) *spell-TRY*
  1232. Vim does not use the TRY item, it is ignored. For making
  1233. suggestions the actual characters in the words are used, that
  1234. is much more efficient.
  1235. WORDCHARS (Hunspell) *spell-WORDCHARS*
  1236. Used to recognize words. Vim doesn't need it, because there
  1237. is no need to separate words before checking them (using a
  1238. trie instead of a hashtable).
  1239. ==============================================================================
  1240. 5. Spell checker design *develop-spell*
  1241. When spell checking was going to be added to Vim a survey was done over the
  1242. available spell checking libraries and programs. Unfortunately, the result
  1243. was that none of them provided sufficient capabilities to be used as the spell
  1244. checking engine in Vim, for various reasons:
  1245. - Missing support for multi-byte encodings. At least UTF-8 must be supported,
  1246. so that more than one language can be used in the same file.
  1247. Doing on-the-fly conversion is not always possible (would require iconv
  1248. support).
  1249. - For the programs and libraries: Using them as-is would require installing
  1250. them separately from Vim. That's mostly not impossible, but a drawback.
  1251. - Performance: A few tests showed that it's possible to check spelling on the
  1252. fly (while redrawing), just like syntax highlighting. But the mechanisms
  1253. used by other code are much slower. Myspell uses a hashtable, for example.
  1254. The affix compression that most spell checkers use makes it slower too.
  1255. - For using an external program like aspell a communication mechanism would
  1256. have to be setup. That's complicated to do in a portable way (Unix-only
  1257. would be relatively simple, but that's not good enough). And performance
  1258. will become a problem (lots of process switching involved).
  1259. - Missing support for words with non-word characters, such as "Etten-Leur" and
  1260. "et al.", would require marking the pieces of them OK, lowering the
  1261. reliability.
  1262. - Missing support for regions or dialects. Makes it difficult to accept
  1263. all English words and highlight non-Canadian words differently.
  1264. - Missing support for rare words. Many words are correct but hardly ever used
  1265. and could be a misspelled often-used word.
  1266. - For making suggestions the speed is less important and requiring to install
  1267. another program or library would be acceptable. But the word lists probably
  1268. differ, the suggestions may be wrong words.
  1269. Spelling suggestions *develop-spell-suggestions*
  1270. For making suggestions there are two basic mechanisms:
  1271. 1. Try changing the bad word a little bit and check for a match with a good
  1272. word. Or go through the list of good words, change them a little bit and
  1273. check for a match with the bad word. The changes are deleting a character,
  1274. inserting a character, swapping two characters, etc.
  1275. 2. Perform soundfolding on both the bad word and the good words and then find
  1276. matches, possibly with a few changes like with the first mechanism.
  1277. The first is good for finding typing mistakes. After experimenting with
  1278. hashtables and looking at solutions from other spell checkers the conclusion
  1279. was that a trie (a kind of tree structure) is ideal for this. Both for
  1280. reducing memory use and being able to try sensible changes. For example, when
  1281. inserting a character only characters that lead to good words need to be
  1282. tried. Other mechanisms (with hashtables) need to try all possible letters at
  1283. every position in the word. Also, a hashtable has the requirement that word
  1284. boundaries are identified separately, while a trie does not require this.
  1285. That makes the mechanism a lot simpler.
  1286. Soundfolding is useful when someone knows how the words sounds but doesn't
  1287. know how it is spelled. For example, the word "dictionary" might be written
  1288. as "daktonerie". The number of changes that the first method would need to
  1289. try is very big, it's hard to find the good word that way. After soundfolding
  1290. the words become "tktnr" and "tkxnry", these differ by only two letters.
  1291. To find words by their soundfolded equivalent (soundalike word) we need a list
  1292. of all soundfolded words. A few experiments have been done to find out what
  1293. the best method is. Alternatives:
  1294. 1. Do the sound folding on the fly when looking for suggestions. This means
  1295. walking through the trie of good words, soundfolding each word and
  1296. checking how different it is from the bad word. This is very efficient for
  1297. memory use, but takes a long time. On a fast PC it takes a couple of
  1298. seconds for English, which can be acceptable for interactive use. But for
  1299. some languages it takes more than ten seconds (e.g., German, Catalan),
  1300. which is unacceptable slow. For batch processing (automatic corrections)
  1301. it's too slow for all languages.
  1302. 2. Use a trie for the soundfolded words, so that searching can be done just
  1303. like how it works without soundfolding. This requires remembering a list
  1304. of good words for each soundfolded word. This makes finding matches very
  1305. fast but requires quite a lot of memory, in the order of 1 to 10 Mbyte.
  1306. For some languages more than the original word list.
  1307. 3. Like the second alternative, but reduce the amount of memory by using affix
  1308. compression and store only the soundfolded basic word. This is what Aspell
  1309. does. Disadvantage is that affixes need to be stripped from the bad word
  1310. before soundfolding it, which means that mistakes at the start and/or end
  1311. of the word will cause the mechanism to fail. Also, this becomes slow when
  1312. the bad word is quite different from the good word.
  1313. The choice made is to use the second mechanism and use a separate file. This
  1314. way a user with sufficient memory can get very good suggestions while a user
  1315. who is short of memory or just wants the spell checking and no suggestions
  1316. doesn't use so much memory.
  1317. Word frequency
  1318. For sorting suggestions it helps to know which words are common. In theory we
  1319. could store a word frequency with the word in the dictionary. However, this
  1320. requires storing a count per word. That degrades word tree compression a lot.
  1321. And maintaining the word frequency for all languages will be a heavy task.
  1322. Also, it would be nice to prefer words that are already in the text. This way
  1323. the words that appear in the specific text are preferred for suggestions.
  1324. What has been implemented is to count words that have been seen during
  1325. displaying. A hashtable is used to quickly find the word count. The count is
  1326. initialized from words listed in COMMON items in the affix file, so that it
  1327. also works when starting a new file.
  1328. This isn't ideal, because the longer Vim is running the higher the counts
  1329. become. But in practice it is a noticeable improvement over not using the word
  1330. count.
  1331. vim:tw=78:sw=4:ts=8:noet:ft=help:norl: