tencodings.nim 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. discard """
  2. matrix: "--mm:refc; --mm:orc"
  3. """
  4. import std/encodings
  5. import std/assertions
  6. var fromGBK = open("utf-8", "gbk")
  7. var toGBK = open("gbk", "utf-8")
  8. var fromGB2312 = open("utf-8", "gb2312")
  9. var toGB2312 = open("gb2312", "utf-8")
  10. block:
  11. let data = "\215\237\186\243\178\187\214\170\204\236\212\218\203\174\163\172\194\250\180\178\208\199\195\206\209\185\208\199\186\211"
  12. doAssert fromGBK.convert(data) == "醉后不知天在水,满床星梦压星河"
  13. block:
  14. let data = "万两黄金容易得,知心一个也难求"
  15. doAssert toGBK.convert(data) == "\205\242\193\189\187\198\189\240\200\221\210\215\181\195\163\172\214\170\208\196\210\187\184\246\210\178\196\209\199\243"
  16. block:
  17. let data = "\215\212\208\197\200\203\201\250\182\254\176\217\196\234\163\172\187\225\181\177\203\174\187\247\200\253\199\167\192\239"
  18. doAssert fromGB2312.convert(data) == "自信人生二百年,会当水击三千里"
  19. block:
  20. let data = "谁怕?一蓑烟雨任平生"
  21. doAssert toGB2312.convert(data) == "\203\173\197\194\163\191\210\187\203\242\209\204\211\234\200\206\198\189\201\250"
  22. when defined(windows):
  23. block should_throw_on_unsupported_conversions:
  24. let original = "some string"
  25. doAssertRaises(EncodingError):
  26. discard convert(original, "utf-8", "utf-32")
  27. doAssertRaises(EncodingError):
  28. discard convert(original, "utf-8", "unicodeFFFE")
  29. doAssertRaises(EncodingError):
  30. discard convert(original, "utf-8", "utf-32BE")
  31. doAssertRaises(EncodingError):
  32. discard convert(original, "unicodeFFFE", "utf-8")
  33. doAssertRaises(EncodingError):
  34. discard convert(original, "utf-32", "utf-8")
  35. doAssertRaises(EncodingError):
  36. discard convert(original, "utf-32BE", "utf-8")
  37. block should_convert_from_utf16_to_utf8:
  38. let original = "\x42\x04\x35\x04\x41\x04\x42\x04" # utf-16 little endian test string "тест"
  39. let result = convert(original, "utf-8", "utf-16")
  40. doAssert(result == "\xd1\x82\xd0\xb5\xd1\x81\xd1\x82")
  41. block should_convert_from_utf16_to_win1251:
  42. let original = "\x42\x04\x35\x04\x41\x04\x42\x04" # utf-16 little endian test string "тест"
  43. let result = convert(original, "windows-1251", "utf-16")
  44. doAssert(result == "\xf2\xe5\xf1\xf2")
  45. block should_convert_from_win1251_to_koi8r:
  46. let original = "\xf2\xe5\xf1\xf2" # win1251 test string "тест"
  47. let result = convert(original, "koi8-r", "windows-1251")
  48. doAssert(result == "\xd4\xc5\xd3\xd4")
  49. block should_convert_from_koi8r_to_win1251:
  50. let original = "\xd4\xc5\xd3\xd4" # koi8r test string "тест"
  51. let result = convert(original, "windows-1251", "koi8-r")
  52. doAssert(result == "\xf2\xe5\xf1\xf2")
  53. block should_convert_from_utf8_to_win1251:
  54. let original = "\xd1\x82\xd0\xb5\xd1\x81\xd1\x82" # utf-8 test string "тест"
  55. let result = convert(original, "windows-1251", "utf-8")
  56. doAssert(result == "\xf2\xe5\xf1\xf2")
  57. block should_convert_from_utf8_to_utf16:
  58. let original = "\xd1\x82\xd0\xb5\xd1\x81\xd1\x82" # utf-8 test string "тест"
  59. let result = convert(original, "utf-16", "utf-8")
  60. doAssert(result == "\x42\x04\x35\x04\x41\x04\x42\x04")
  61. block should_handle_empty_string_for_any_conversion:
  62. let original = ""
  63. var result = convert(original, "utf-16", "utf-8")
  64. doAssert(result == "")
  65. result = convert(original, "utf-8", "utf-16")
  66. doAssert(result == "")
  67. result = convert(original, "windows-1251", "koi8-r")
  68. doAssert(result == "")
  69. block:
  70. let
  71. orig = "öäüß"
  72. cp1252 = convert(orig, "CP1252", "UTF-8")
  73. ibm850 = convert(cp1252, "ibm850", "CP1252")
  74. current = getCurrentEncoding()
  75. doAssert orig == "\195\182\195\164\195\188\195\159"
  76. doAssert ibm850 == "\148\132\129\225"
  77. doAssert convert(ibm850, current, "ibm850") == orig
  78. block: # fixes about #23481
  79. doAssertRaises EncodingError:
  80. discard open(destEncoding="this is a invalid enc")