unidecode.html 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. <?xml version="1.0" encoding="utf-8" ?>
  2. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3. <!-- This file is generated by Nim. -->
  4. <html xmlns="https://www.w3.org/1999/xhtml" xml:lang="en" lang="en" data-theme="auto">
  5. <head>
  6. <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8. <title>std/unidecode</title>
  9. <!-- Google fonts -->
  10. <link href='https://fonts.googleapis.com/css?family=Lato:400,600,900' rel='stylesheet' type='text/css'/>
  11. <link href='https://fonts.googleapis.com/css?family=Source+Code+Pro:400,500,600' rel='stylesheet' type='text/css'/>
  12. <!-- Favicon -->
  13. <link rel="shortcut icon" href="data:image/x-icon;base64,AAABAAEAEBAAAAEAIABoBAAAFgAAACgAAAAQAAAAIAAAAAEAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AAAAAAUAAAAF////AP///wD///8A////AP///wD///8A////AP///wD///8A////AAAAAAIAAABbAAAAlQAAAKIAAACbAAAAmwAAAKIAAACVAAAAWwAAAAL///8A////AP///wD///8A////AAAAABQAAADAAAAAYwAAAA3///8A////AP///wD///8AAAAADQAAAGMAAADAAAAAFP///wD///8A////AP///wAAAACdAAAAOv///wD///8A////AP///wD///8A////AP///wD///8AAAAAOgAAAJ3///8A////AP///wAAAAAnAAAAcP///wAAAAAoAAAASv///wD///8A////AP///wAAAABKAAAAKP///wAAAABwAAAAJ////wD///8AAAAAgQAAABwAAACIAAAAkAAAAJMAAACtAAAAFQAAABUAAACtAAAAkwAAAJAAAACIAAAAHAAAAIH///8A////AAAAAKQAAACrAAAAaP///wD///8AAAAARQAAANIAAADSAAAARf///wD///8AAAAAaAAAAKsAAACk////AAAAADMAAACcAAAAnQAAABj///8A////AP///wAAAAAYAAAAGP///wD///8A////AAAAABgAAACdAAAAnAAAADMAAAB1AAAAwwAAAP8AAADpAAAAsQAAAE4AAAAb////AP///wAAAAAbAAAATgAAALEAAADpAAAA/wAAAMMAAAB1AAAAtwAAAOkAAAD/AAAA/wAAAP8AAADvAAAA3gAAAN4AAADeAAAA3gAAAO8AAAD/AAAA/wAAAP8AAADpAAAAtwAAAGUAAAA/AAAA3wAAAP8AAAD/AAAA/wAAAP8AAAD/AAAA/wAAAP8AAAD/AAAA/wAAAP8AAADfAAAAPwAAAGX///8A////AAAAAEgAAADtAAAAvwAAAL0AAADGAAAA7wAAAO8AAADGAAAAvQAAAL8AAADtAAAASP///wD///8A////AP///wD///8AAAAAO////wD///8A////AAAAAIcAAACH////AP///wD///8AAAAAO////wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A////AP///wD///8A//8AAP//AAD4HwAA7/cAAN/7AAD//wAAoYUAAJ55AACf+QAAh+EAAAAAAADAAwAA4AcAAP5/AAD//wAA//8AAA=="/>
  14. <link rel="icon" type="image/png" sizes="32x32" href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAABmJLR0QA/wD/AP+gvaeTAAAACXBIWXMAAA3XAAAN1wFCKJt4AAAAB3RJTUUH4QQQEwksSS9ZWwAAAk1JREFUWMPtll2ITVEUx39nn/O7Y5qR8f05wtCUUr6ZIS++8pEnkZInPImneaCQ5METNdOkeFBKUhMPRIkHKfEuUZSUlGlKPN2TrgfncpvmnntnmlEyq1Z7t89/rf9a6+y99oZxGZf/XeIq61EdtgKXgdXA0xrYAvBjOIF1AI9zvjcC74BSpndrJPkBWDScTF8Aa4E3wDlgHbASaANmVqlcCnwHvgDvgVfAJ+AikAAvgfVZwLnSVZHZaOuKoQi3ZOMi4NkYkpe1p4J7A8BpYAD49hfIy/oqG0+hLomiKP2L5L+1ubn5115S+3OAn4EnwBlgMzCjyt6ZAnQCJ4A7wOs88iRJHvw50HoujuPBoCKwHWiosy8MdfZnAdcHk8dxXFJ3VQbQlCTJvRBCGdRbD4M6uc5glpY3eAihpN5S5w12diSEcCCEcKUO4ljdr15T76ur1FDDLIQQ3qv71EdDOe3Kxj3leRXyk+pxdWnFWod6Wt2bY3de3aSuUHcPBVimHs7mK9WrmeOF6lR1o9qnzskh2ar2qm1qizpfXaPeVGdlmGN5pb09qMxz1Xb1kLqgzn1RyH7JUXW52lr5e/Kqi9qpto7V1atuUzfnARrV7jEib1T76gG2qxdGmXyiekkt1GswPTtek0aBfJp6YySGBfWg2tPQ0FAYgf1stUfdmdcjarbYJEniKIq6gY/Aw+zWHAC+p2labGpqiorFYgGYCEzN7oQdQClN07O1/EfDyGgC0ALMBdYAi4FyK+4H3gLPsxfR1zRNi+NP7nH5J+QntnXe5B5mpfQAAAAASUVORK5CYII=">
  15. <!-- CSS -->
  16. <link rel="stylesheet" type="text/css" href="nimdoc.out.css?v=2.3.1">
  17. <!-- JS -->
  18. <script type="text/javascript" src="dochack.js?v=2.3.1"></script>
  19. </head>
  20. <body>
  21. <div class="document" id="documentId">
  22. <div class="container">
  23. <h1 class="title">std/unidecode</h1>
  24. <div class="row">
  25. <div class="three columns">
  26. <div class="theme-select-wrapper">
  27. <label for="theme-select">Theme:&nbsp;</label>
  28. <select id="theme-select" onchange="setTheme(this.value)">
  29. <option value="auto">🌗 Match OS</option>
  30. <option value="dark">🌑 Dark</option>
  31. <option value="light">🌕 Light</option>
  32. </select>
  33. </div>
  34. <div id="global-links">
  35. <ul class="simple-boot">
  36. <li><a href="manual.html">Manual</a></li>
  37. <li><a href="lib.html">Standard library</a></li>
  38. <li> <a id="indexLink" href="theindex.html">Index</a></li>
  39. <li><a href="compiler/theindex.html">Compiler docs</a></li>
  40. <li><a href="https://nim-lang.github.io/fusion/theindex.html">Fusion docs</a></li>
  41. <li><a href="https://nim-lang.github.io/Nim/">devel</a>, <a href="https://nim-lang.org/documentation.html">stable</a></li>
  42. </ul>
  43. </div>
  44. <div id="searchInputDiv">
  45. Search: <input type="search" id="searchInput"
  46. oninput="search()" />
  47. </div>
  48. <ul class="simple simple-toc" id="toc-list">
  49. <li>
  50. <a class="reference reference-toplevel" href="#6" id="56">Imports</a>
  51. </li>
  52. <li>
  53. <details open>
  54. <summary><a class="reference reference-toplevel" href="#12" id="62">Procs</a></summary>
  55. <ul class="simple simple-toc-section">
  56. <ul class="simple nested-toc-section">loadUnidecodeTable
  57. <li><a class="reference" href="#loadUnidecodeTable%2Cstring" title="loadUnidecodeTable(datafile = &quot;unidecode.dat&quot;)">loadUnidecodeTable(datafile = &quot;unidecode.dat&quot;)</a></li>
  58. </ul>
  59. <ul class="simple nested-toc-section">unidecode
  60. <li><a class="reference" href="#unidecode%2Cstring" title="unidecode(s: string): string">unidecode(s: string): string</a></li>
  61. </ul>
  62. </ul>
  63. </details>
  64. </li>
  65. </ul>
  66. </div>
  67. <div class="nine columns" id="content">
  68. <a href="https://github.com/nim-lang/Nim/tree/devel/lib/pure/unidecode/unidecode.nim#L1" class="link-seesrc" target="_blank">Source</a>&nbsp;&nbsp;
  69. <a href="https://github.com/nim-lang/Nim/edit/devel/lib/pure/unidecode/unidecode.nim#L1" class="link-seesrc" target="_blank" >Edit</a>&nbsp;&nbsp;
  70. <div id="tocRoot"></div>
  71. <p class="module-desc"><p>This module is based on Python's <a class="reference external" href="https://pypi.org/project/Unidecode/">Unidecode</a> module by Tomaz Solc, which in turn is based on the <a class="reference external" href="https://metacpan.org/pod/Text::Unidecode">Text::Unidecode</a> Perl module by Sean M. Burke.</p>
  72. <p>It provides a <a class="reference external" href="#unidecode,string">unidecode proc</a> that does Unicode to ASCII transliterations: It finds the sequence of ASCII characters that is the closest approximation to the Unicode string.</p>
  73. <p>For example, the closest to string &quot;Äußerst&quot; in ASCII is &quot;Ausserst&quot;. Some information is lost in this transformation, of course, since several Unicode strings can be transformed to the same ASCII representation. So this is a strictly one-way transformation. However, a human reader will probably still be able to guess from the context, what the original string was.</p>
  74. <p>This module needs the data file <tt class="docutils literal"><span class="pre"><span class="Identifier">unidecode</span><span class="Operator">.</span><span class="Identifier">dat</span></span></tt> to work: This file is embedded as a resource into your application by default. You can also define the symbol <tt class="docutils literal"><span class="pre"><span class="Operator">--</span><span class="Identifier">define</span><span class="Punctuation">:</span><span class="Identifier">noUnidecodeTable</span></span></tt> during compile time and use the <a class="reference external" href="#loadUnidecodeTable">loadUnidecodeTable proc</a> to initialize this module.</p>
  75. </p>
  76. <div class="section" id="6">
  77. <h1><a class="toc-backref" href="#6">Imports</a></h1>
  78. <dl class="item">
  79. <a class="reference external" href="unicode.html">unicode</a>, <a class="reference external" href="strutils.html">strutils</a>
  80. </dl>
  81. </div>
  82. <div class="section" id="12">
  83. <h1><a class="toc-backref" href="#12">Procs</a></h1>
  84. <dl class="item">
  85. <div id="loadUnidecodeTable-procs-all">
  86. <div id="loadUnidecodeTable,string">
  87. <dt><pre><span class="Keyword">proc</span> <a href="#loadUnidecodeTable%2Cstring"><span class="Identifier">loadUnidecodeTable</span></a><span class="Other">(</span><span class="Identifier">datafile</span> <span class="Other">=</span> <span class="StringLit">&quot;unidecode.dat&quot;</span><span class="Other">)</span> {.<span><span class="Other pragmadots">...</span></span><span class="pragmawrap"><span class="Identifier">raises</span><span class="Other">:</span> <span class="Other">[</span><span class="Other">]</span><span class="Other">,</span> <span class="Identifier">tags</span><span class="Other">:</span> <span class="Other">[</span><span class="Other">]</span><span class="Other">,</span>
  88. <span class="Identifier">forbids</span><span class="Other">:</span> <span class="Other">[</span><span class="Other">]</span></span>.}</pre></dt>
  89. <dd>
  90. Loads the datafile that <a class="reference external" href="#unidecode,string">unidecode</a> needs to work. This is only required if the module was compiled with the <tt class="docutils literal"><span class="pre"><span class="Operator">--</span><span class="Identifier">define</span><span class="Punctuation">:</span><span class="Identifier">noUnidecodeTable</span></span></tt> switch. This needs to be called by the main thread before any thread can make a call to <tt class="docutils literal"><span class="pre"><span class="Identifier">unidecode</span></span></tt>.
  91. <a href="https://github.com/nim-lang/Nim/tree/devel/lib/pure/unidecode/unidecode.nim#L41" class="link-seesrc" target="_blank">Source</a>&nbsp;&nbsp;
  92. <a href="https://github.com/nim-lang/Nim/edit/devel/lib/pure/unidecode/unidecode.nim#L41" class="link-seesrc" target="_blank" >Edit</a>&nbsp;&nbsp;
  93. </dd>
  94. </div>
  95. </div>
  96. <div id="unidecode-procs-all">
  97. <div id="unidecode,string">
  98. <dt><pre><span class="Keyword">proc</span> <a href="#unidecode%2Cstring"><span class="Identifier">unidecode</span></a><span class="Other">(</span><span class="Identifier">s</span><span class="Other">:</span> <a href="system.html#string"><span class="Identifier">string</span></a><span class="Other">)</span><span class="Other">:</span> <a href="system.html#string"><span class="Identifier">string</span></a> {.<span><span class="Other pragmadots">...</span></span><span class="pragmawrap"><span class="Identifier">raises</span><span class="Other">:</span> <span class="Other">[</span><span class="Other">]</span><span class="Other">,</span> <span class="Identifier">tags</span><span class="Other">:</span> <span class="Other">[</span><span class="Other">]</span><span class="Other">,</span> <span class="Identifier">forbids</span><span class="Other">:</span> <span class="Other">[</span><span class="Other">]</span></span>.}</pre></dt>
  99. <dd>
  100. Finds the sequence of ASCII characters that is the closest approximation to the UTF-8 string <tt class="docutils literal"><span class="pre"><span class="Identifier">s</span></span></tt>.
  101. <p><strong class="examples_text">Example:</strong></p>
  102. <pre class="listing"><span class="Identifier">doAssert</span> <span class="Identifier">unidecode</span><span class="Punctuation">(</span><span class="StringLit">&quot;北京&quot;</span><span class="Punctuation">)</span> <span class="Operator">==</span> <span class="StringLit">&quot;Bei Jing &quot;</span>
  103. <span class="Identifier">doAssert</span> <span class="Identifier">unidecode</span><span class="Punctuation">(</span><span class="StringLit">&quot;Äußerst&quot;</span><span class="Punctuation">)</span> <span class="Operator">==</span> <span class="StringLit">&quot;Ausserst&quot;</span></pre>
  104. <a href="https://github.com/nim-lang/Nim/tree/devel/lib/pure/unidecode/unidecode.nim#L53" class="link-seesrc" target="_blank">Source</a>&nbsp;&nbsp;
  105. <a href="https://github.com/nim-lang/Nim/edit/devel/lib/pure/unidecode/unidecode.nim#L53" class="link-seesrc" target="_blank" >Edit</a>&nbsp;&nbsp;
  106. </dd>
  107. </div>
  108. </div>
  109. </dl>
  110. </div>
  111. </div>
  112. </div>
  113. <div class="twelve-columns footer">
  114. <span class="nim-sprite"></span>
  115. <br>
  116. <small style="color: var(--hint);">Made with Nim. Generated: 2025-03-19 07:24:17 UTC</small>
  117. </div>
  118. </div>
  119. </div>
  120. <script defer data-domain="nim-lang.org" src="https://plausible.io/js/plausible.js"></script>
  121. </body>
  122. </html>