mimeenc.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. /*
  2. * mimeenc.c - translate our internal character set codes to and
  3. * from MIME standard character-set names.
  4. *
  5. */
  6. #include <ctype.h>
  7. #include "charset.h"
  8. #include "internal.h"
  9. static const struct {
  10. const char *name;
  11. int charset;
  12. } mimeencs[] = {
  13. /*
  14. * These names are taken from
  15. *
  16. * http://www.iana.org/assignments/character-sets
  17. *
  18. * Where multiple encoding names map to the same encoding id
  19. * (such as the variety of aliases for ISO-8859-1), the first
  20. * is considered canonical and will be returned when
  21. * translating the id to a string.
  22. */
  23. { "ISO-8859-1", CS_ISO8859_1 },
  24. { "iso-ir-100", CS_ISO8859_1 },
  25. { "ISO_8859-1", CS_ISO8859_1 },
  26. { "ISO_8859-1:1987", CS_ISO8859_1 },
  27. { "latin1", CS_ISO8859_1 },
  28. { "l1", CS_ISO8859_1 },
  29. { "IBM819", CS_ISO8859_1 },
  30. { "CP819", CS_ISO8859_1 },
  31. { "csISOLatin1", CS_ISO8859_1 },
  32. { "ISO-8859-2", CS_ISO8859_2 },
  33. { "ISO_8859-2:1987", CS_ISO8859_2 },
  34. { "iso-ir-101", CS_ISO8859_2 },
  35. { "ISO_8859-2", CS_ISO8859_2 },
  36. { "latin2", CS_ISO8859_2 },
  37. { "l2", CS_ISO8859_2 },
  38. { "csISOLatin2", CS_ISO8859_2 },
  39. { "ISO-8859-3", CS_ISO8859_3 },
  40. { "ISO_8859-3:1988", CS_ISO8859_3 },
  41. { "iso-ir-109", CS_ISO8859_3 },
  42. { "ISO_8859-3", CS_ISO8859_3 },
  43. { "latin3", CS_ISO8859_3 },
  44. { "l3", CS_ISO8859_3 },
  45. { "csISOLatin3", CS_ISO8859_3 },
  46. { "ISO-8859-4", CS_ISO8859_4 },
  47. { "ISO_8859-4:1988", CS_ISO8859_4 },
  48. { "iso-ir-110", CS_ISO8859_4 },
  49. { "ISO_8859-4", CS_ISO8859_4 },
  50. { "latin4", CS_ISO8859_4 },
  51. { "l4", CS_ISO8859_4 },
  52. { "csISOLatin4", CS_ISO8859_4 },
  53. { "ISO-8859-5", CS_ISO8859_5 },
  54. { "ISO_8859-5:1988", CS_ISO8859_5 },
  55. { "iso-ir-144", CS_ISO8859_5 },
  56. { "ISO_8859-5", CS_ISO8859_5 },
  57. { "cyrillic", CS_ISO8859_5 },
  58. { "csISOLatinCyrillic", CS_ISO8859_5 },
  59. { "ISO-8859-6", CS_ISO8859_6 },
  60. { "ISO_8859-6:1987", CS_ISO8859_6 },
  61. { "iso-ir-127", CS_ISO8859_6 },
  62. { "ISO_8859-6", CS_ISO8859_6 },
  63. { "ECMA-114", CS_ISO8859_6 },
  64. { "ASMO-708", CS_ISO8859_6 },
  65. { "arabic", CS_ISO8859_6 },
  66. { "csISOLatinArabic", CS_ISO8859_6 },
  67. { "ISO-8859-7", CS_ISO8859_7 },
  68. { "ISO_8859-7:1987", CS_ISO8859_7 },
  69. { "iso-ir-126", CS_ISO8859_7 },
  70. { "ISO_8859-7", CS_ISO8859_7 },
  71. { "ELOT_928", CS_ISO8859_7 },
  72. { "ECMA-118", CS_ISO8859_7 },
  73. { "greek", CS_ISO8859_7 },
  74. { "greek8", CS_ISO8859_7 },
  75. { "csISOLatinGreek", CS_ISO8859_7 },
  76. { "ISO-8859-8", CS_ISO8859_8 },
  77. { "ISO_8859-8:1988", CS_ISO8859_8 },
  78. { "iso-ir-138", CS_ISO8859_8 },
  79. { "ISO_8859-8", CS_ISO8859_8 },
  80. { "hebrew", CS_ISO8859_8 },
  81. { "csISOLatinHebrew", CS_ISO8859_8 },
  82. { "ISO-8859-9", CS_ISO8859_9 },
  83. { "ISO_8859-9:1989", CS_ISO8859_9 },
  84. { "iso-ir-148", CS_ISO8859_9 },
  85. { "ISO_8859-9", CS_ISO8859_9 },
  86. { "latin5", CS_ISO8859_9 },
  87. { "l5", CS_ISO8859_9 },
  88. { "csISOLatin5", CS_ISO8859_9 },
  89. { "ISO-8859-10", CS_ISO8859_10 },
  90. { "iso-ir-157", CS_ISO8859_10 },
  91. { "l6", CS_ISO8859_10 },
  92. { "ISO_8859-10:1992", CS_ISO8859_10 },
  93. { "csISOLatin6", CS_ISO8859_10 },
  94. { "latin6", CS_ISO8859_10 },
  95. { "ISO-8859-13", CS_ISO8859_13 },
  96. { "ISO-8859-14", CS_ISO8859_14 },
  97. { "iso-ir-199", CS_ISO8859_14 },
  98. { "ISO_8859-14:1998", CS_ISO8859_14 },
  99. { "ISO_8859-14", CS_ISO8859_14 },
  100. { "latin8", CS_ISO8859_14 },
  101. { "iso-celtic", CS_ISO8859_14 },
  102. { "l8", CS_ISO8859_14 },
  103. { "ISO-8859-15", CS_ISO8859_15 },
  104. { "ISO_8859-15", CS_ISO8859_15 },
  105. { "Latin-9", CS_ISO8859_15 },
  106. { "ISO-8859-16", CS_ISO8859_16 },
  107. { "iso-ir-226", CS_ISO8859_16 },
  108. { "ISO_8859-16", CS_ISO8859_16 },
  109. { "ISO_8859-16:2001", CS_ISO8859_16 },
  110. { "latin10", CS_ISO8859_16 },
  111. { "l10", CS_ISO8859_16 },
  112. { "IBM437", CS_CP437 },
  113. { "cp437", CS_CP437 },
  114. { "437", CS_CP437 },
  115. { "csPC8CodePage437", CS_CP437 },
  116. { "IBM850", CS_CP850 },
  117. { "cp850", CS_CP850 },
  118. { "850", CS_CP850 },
  119. { "csPC850Multilingual", CS_CP850 },
  120. { "IBM852", CS_CP852 },
  121. { "cp852", CS_CP852 },
  122. { "852", CS_CP852 },
  123. { "csIBM852", CS_CP852 },
  124. { "IBM866", CS_CP866 },
  125. { "cp866", CS_CP866 },
  126. { "866", CS_CP866 },
  127. { "csIBM866", CS_CP866 },
  128. { "windows-1250", CS_CP1250 },
  129. { "windows-1251", CS_CP1251 },
  130. { "windows-1252", CS_CP1252 },
  131. { "windows-1253", CS_CP1253 },
  132. { "windows-1254", CS_CP1254 },
  133. { "windows-1255", CS_CP1255 },
  134. { "windows-1256", CS_CP1256 },
  135. { "windows-1257", CS_CP1257 },
  136. { "windows-1258", CS_CP1258 },
  137. { "KOI8-R", CS_KOI8_R },
  138. { "csKOI8R", CS_KOI8_R },
  139. { "KOI8-U", CS_KOI8_U },
  140. { "macintosh", CS_MAC_ROMAN_OLD },
  141. { "mac", CS_MAC_ROMAN_OLD },
  142. { "csMacintosh", CS_MAC_ROMAN_OLD },
  143. { "VISCII", CS_VISCII },
  144. { "csVISCII", CS_VISCII },
  145. { "hp-roman8", CS_HP_ROMAN8 },
  146. { "roman8", CS_HP_ROMAN8 },
  147. { "r8", CS_HP_ROMAN8 },
  148. { "csHPRoman8", CS_HP_ROMAN8 },
  149. { "DEC-MCS", CS_DEC_MCS },
  150. { "dec", CS_DEC_MCS },
  151. { "csDECMCS", CS_DEC_MCS },
  152. { "UTF-8", CS_UTF8 },
  153. };
  154. const char *charset_to_mimeenc(int charset)
  155. {
  156. int i;
  157. for (i = 0; i < (int)lenof(mimeencs); i++)
  158. if (charset == mimeencs[i].charset)
  159. return mimeencs[i].name;
  160. return NULL; /* not found */
  161. }
  162. int charset_from_mimeenc(const char *name)
  163. {
  164. int i;
  165. for (i = 0; i < (int)lenof(mimeencs); i++) {
  166. const char *p, *q;
  167. p = name;
  168. q = mimeencs[i].name;
  169. while (*p || *q) {
  170. if (tolower((unsigned char)*p) != tolower((unsigned char)*q))
  171. break;
  172. p++; q++;
  173. }
  174. if (!*p && !*q)
  175. return mimeencs[i].charset;
  176. }
  177. return CS_NONE; /* not found */
  178. }