nls.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /*
  2. * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version 2
  7. * of the License, or (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, see <http://www.gnu.org/licenses/>.
  16. */
  17. /************************************************************************/
  18. /* */
  19. /* PROJECT : exFAT & FAT12/16/32 File System */
  20. /* FILE : nls.c */
  21. /* PURPOSE : sdFAT NLS Manager */
  22. /* */
  23. /*----------------------------------------------------------------------*/
  24. /* NOTES */
  25. /* */
  26. /* */
  27. /************************************************************************/
  28. #include <linux/string.h>
  29. #include <linux/nls.h>
  30. #include "sdfat.h"
  31. #include "core.h"
  32. /*----------------------------------------------------------------------*/
  33. /* Global Variable Definitions */
  34. /*----------------------------------------------------------------------*/
  35. /*----------------------------------------------------------------------*/
  36. /* Local Variable Definitions */
  37. /*----------------------------------------------------------------------*/
  38. static u16 bad_dos_chars[] = {
  39. /* + , ; = [ ] */
  40. 0x002B, 0x002C, 0x003B, 0x003D, 0x005B, 0x005D,
  41. 0xFF0B, 0xFF0C, 0xFF1B, 0xFF1D, 0xFF3B, 0xFF3D,
  42. 0
  43. };
  44. /*
  45. * Allow full-width illegal characters :
  46. * "MS windows 7" supports full-width-invalid-name-characters.
  47. * So we should check half-width-invalid-name-characters(ASCII) only
  48. * for compatibility.
  49. *
  50. * " * / : < > ? \ |
  51. *
  52. * patch 1.2.0
  53. */
  54. static u16 bad_uni_chars[] = {
  55. 0x0022, 0x002A, 0x002F, 0x003A,
  56. 0x003C, 0x003E, 0x003F, 0x005C, 0x007C,
  57. #if 0 /* allow full-width characters */
  58. 0x201C, 0x201D, 0xFF0A, 0xFF0F, 0xFF1A,
  59. 0xFF1C, 0xFF1E, 0xFF1F, 0xFF3C, 0xFF5C,
  60. #endif
  61. 0
  62. };
  63. /*----------------------------------------------------------------------*/
  64. /* Local Function Declarations */
  65. /*----------------------------------------------------------------------*/
  66. static s32 convert_uni_to_ch(struct nls_table *nls, u16 uni, u8 *ch, s32 *lossy);
  67. static s32 convert_ch_to_uni(struct nls_table *nls, u8 *ch, u16 *uni, s32 *lossy);
  68. static u16 nls_upper(struct super_block *sb, u16 a)
  69. {
  70. FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi);
  71. if (SDFAT_SB(sb)->options.casesensitive)
  72. return a;
  73. if ((fsi->vol_utbl)[get_col_index(a)] != NULL)
  74. return (fsi->vol_utbl)[get_col_index(a)][get_row_index(a)];
  75. else
  76. return a;
  77. }
  78. /*======================================================================*/
  79. /* Global Function Definitions */
  80. /*======================================================================*/
  81. u16 *nls_wstrchr(u16 *str, u16 wchar)
  82. {
  83. while (*str) {
  84. if (*(str++) == wchar)
  85. return str;
  86. }
  87. return 0;
  88. }
  89. s32 nls_cmp_sfn(struct super_block *sb, u8 *a, u8 *b)
  90. {
  91. return strncmp((void *)a, (void *)b, DOS_NAME_LENGTH);
  92. }
  93. s32 nls_cmp_uniname(struct super_block *sb, u16 *a, u16 *b)
  94. {
  95. s32 i;
  96. for (i = 0; i < MAX_NAME_LENGTH; i++, a++, b++) {
  97. if (nls_upper(sb, *a) != nls_upper(sb, *b))
  98. return 1;
  99. if (*a == 0x0)
  100. return 0;
  101. }
  102. return 0;
  103. }
  104. #define CASE_LOWER_BASE (0x08) /* base is lower case */
  105. #define CASE_LOWER_EXT (0x10) /* extension is lower case */
  106. s32 nls_uni16s_to_sfn(struct super_block *sb, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname, s32 *p_lossy)
  107. {
  108. s32 i, j, len, lossy = NLS_NAME_NO_LOSSY;
  109. u8 buf[MAX_CHARSET_SIZE];
  110. u8 lower = 0, upper = 0;
  111. u8 *dosname = p_dosname->name;
  112. u16 *uniname = p_uniname->name;
  113. u16 *p, *last_period;
  114. struct nls_table *nls = SDFAT_SB(sb)->nls_disk;
  115. /* DOSNAME is filled with space */
  116. for (i = 0; i < DOS_NAME_LENGTH; i++)
  117. *(dosname+i) = ' ';
  118. /* DOT and DOTDOT are handled by VFS layer */
  119. /* search for the last embedded period */
  120. last_period = NULL;
  121. for (p = uniname; *p; p++) {
  122. if (*p == (u16) '.')
  123. last_period = p;
  124. }
  125. i = 0;
  126. while (i < DOS_NAME_LENGTH) {
  127. if (i == 8) {
  128. if (last_period == NULL)
  129. break;
  130. if (uniname <= last_period) {
  131. if (uniname < last_period)
  132. lossy |= NLS_NAME_OVERLEN;
  133. uniname = last_period + 1;
  134. }
  135. }
  136. if (*uniname == (u16) '\0') {
  137. break;
  138. } else if (*uniname == (u16) ' ') {
  139. lossy |= NLS_NAME_LOSSY;
  140. } else if (*uniname == (u16) '.') {
  141. if (uniname < last_period)
  142. lossy |= NLS_NAME_LOSSY;
  143. else
  144. i = 8;
  145. } else if (nls_wstrchr(bad_dos_chars, *uniname)) {
  146. lossy |= NLS_NAME_LOSSY;
  147. *(dosname+i) = '_';
  148. i++;
  149. } else {
  150. len = convert_uni_to_ch(nls, *uniname, buf, &lossy);
  151. if (len > 1) {
  152. if ((i >= 8) && ((i+len) > DOS_NAME_LENGTH))
  153. break;
  154. if ((i < 8) && ((i+len) > 8)) {
  155. i = 8;
  156. continue;
  157. }
  158. lower = 0xFF;
  159. for (j = 0; j < len; j++, i++)
  160. *(dosname+i) = *(buf+j);
  161. } else { /* len == 1 */
  162. if ((*buf >= 'a') && (*buf <= 'z')) {
  163. *(dosname+i) = *buf - ('a' - 'A');
  164. lower |= (i < 8) ?
  165. CASE_LOWER_BASE :
  166. CASE_LOWER_EXT;
  167. } else if ((*buf >= 'A') && (*buf <= 'Z')) {
  168. *(dosname+i) = *buf;
  169. upper |= (i < 8) ?
  170. CASE_LOWER_BASE :
  171. CASE_LOWER_EXT;
  172. } else {
  173. *(dosname+i) = *buf;
  174. }
  175. i++;
  176. }
  177. }
  178. uniname++;
  179. }
  180. if (*dosname == 0xE5)
  181. *dosname = 0x05;
  182. if (*uniname != 0x0)
  183. lossy |= NLS_NAME_OVERLEN;
  184. if (upper & lower)
  185. p_dosname->name_case = 0xFF;
  186. else
  187. p_dosname->name_case = lower;
  188. if (p_lossy)
  189. *p_lossy = lossy;
  190. return i;
  191. }
  192. s32 nls_sfn_to_uni16s(struct super_block *sb, DOS_NAME_T *p_dosname, UNI_NAME_T *p_uniname)
  193. {
  194. s32 i = 0, j, n = 0;
  195. u8 buf[MAX_DOSNAME_BUF_SIZE];
  196. u8 *dosname = p_dosname->name;
  197. u16 *uniname = p_uniname->name;
  198. struct nls_table *nls = SDFAT_SB(sb)->nls_disk;
  199. if (*dosname == 0x05) {
  200. *buf = 0xE5;
  201. i++;
  202. n++;
  203. }
  204. for ( ; i < 8; i++, n++) {
  205. if (*(dosname+i) == ' ')
  206. break;
  207. if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') &&
  208. (p_dosname->name_case & CASE_LOWER_BASE))
  209. *(buf+n) = *(dosname+i) + ('a' - 'A');
  210. else
  211. *(buf+n) = *(dosname+i);
  212. }
  213. if (*(dosname+8) != ' ') {
  214. *(buf+n) = '.';
  215. n++;
  216. }
  217. for (i = 8; i < DOS_NAME_LENGTH; i++, n++) {
  218. if (*(dosname+i) == ' ')
  219. break;
  220. if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') &&
  221. (p_dosname->name_case & CASE_LOWER_EXT))
  222. *(buf+n) = *(dosname+i) + ('a' - 'A');
  223. else
  224. *(buf+n) = *(dosname+i);
  225. }
  226. *(buf+n) = '\0';
  227. i = j = 0;
  228. while (j < MAX_NAME_LENGTH) {
  229. if (*(buf+i) == '\0')
  230. break;
  231. i += convert_ch_to_uni(nls, (buf+i), uniname, NULL);
  232. uniname++;
  233. j++;
  234. }
  235. *uniname = (u16) '\0';
  236. return j;
  237. }
  238. static s32 __nls_utf16s_to_vfsname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 buflen)
  239. {
  240. s32 len;
  241. const u16 *uniname = p_uniname->name;
  242. /* always len >= 0 */
  243. len = utf16s_to_utf8s(uniname, MAX_NAME_LENGTH, UTF16_HOST_ENDIAN,
  244. p_cstring, buflen);
  245. p_cstring[len] = '\0';
  246. return len;
  247. }
  248. static s32 __nls_vfsname_to_utf16s(struct super_block *sb, const u8 *p_cstring,
  249. const s32 len, UNI_NAME_T *p_uniname, s32 *p_lossy)
  250. {
  251. s32 i, unilen, lossy = NLS_NAME_NO_LOSSY;
  252. u16 upname[MAX_NAME_LENGTH+1];
  253. u16 *uniname = p_uniname->name;
  254. BUG_ON(!len);
  255. unilen = utf8s_to_utf16s(p_cstring, len, UTF16_HOST_ENDIAN,
  256. (wchar_t *)uniname, MAX_NAME_LENGTH+2);
  257. if (unilen < 0) {
  258. MMSG("%s: failed to vfsname_to_utf16(err:%d) "
  259. "vfsnamelen:%d", __func__, unilen, len);
  260. return unilen;
  261. }
  262. if (unilen > MAX_NAME_LENGTH) {
  263. MMSG("%s: failed to vfsname_to_utf16(estr:ENAMETOOLONG) "
  264. "vfsnamelen:%d, unilen:%d>%d",
  265. __func__, len, unilen, MAX_NAME_LENGTH);
  266. return -ENAMETOOLONG;
  267. }
  268. p_uniname->name_len = (u8)(unilen & 0xFF);
  269. for (i = 0; i < unilen; i++) {
  270. if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname))
  271. lossy |= NLS_NAME_LOSSY;
  272. *(upname+i) = nls_upper(sb, *uniname);
  273. uniname++;
  274. }
  275. *uniname = (u16)'\0';
  276. p_uniname->name_len = unilen;
  277. p_uniname->name_hash = calc_chksum_2byte((void *) upname,
  278. unilen << 1, 0, CS_DEFAULT);
  279. if (p_lossy)
  280. *p_lossy = lossy;
  281. return unilen;
  282. }
  283. static s32 __nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 buflen)
  284. {
  285. s32 i, j, len, out_len = 0;
  286. u8 buf[MAX_CHARSET_SIZE];
  287. const u16 *uniname = p_uniname->name;
  288. struct nls_table *nls = SDFAT_SB(sb)->nls_io;
  289. i = 0;
  290. while ((i < MAX_NAME_LENGTH) && (out_len < (buflen-1))) {
  291. if (*uniname == (u16)'\0')
  292. break;
  293. len = convert_uni_to_ch(nls, *uniname, buf, NULL);
  294. if (out_len + len >= buflen)
  295. len = (buflen - 1) - out_len;
  296. out_len += len;
  297. if (len > 1) {
  298. for (j = 0; j < len; j++)
  299. *p_cstring++ = (s8) *(buf+j);
  300. } else { /* len == 1 */
  301. *p_cstring++ = (s8) *buf;
  302. }
  303. uniname++;
  304. i++;
  305. }
  306. *p_cstring = '\0';
  307. return out_len;
  308. }
  309. static s32 __nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring,
  310. const s32 len, UNI_NAME_T *p_uniname, s32 *p_lossy)
  311. {
  312. s32 i, unilen, lossy = NLS_NAME_NO_LOSSY;
  313. u16 upname[MAX_NAME_LENGTH+1];
  314. u16 *uniname = p_uniname->name;
  315. struct nls_table *nls = SDFAT_SB(sb)->nls_io;
  316. BUG_ON(!len);
  317. i = unilen = 0;
  318. while ((unilen < MAX_NAME_LENGTH) && (i < len)) {
  319. i += convert_ch_to_uni(nls, (u8 *)(p_cstring+i), uniname, &lossy);
  320. if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname))
  321. lossy |= NLS_NAME_LOSSY;
  322. *(upname+unilen) = nls_upper(sb, *uniname);
  323. uniname++;
  324. unilen++;
  325. }
  326. if (*(p_cstring+i) != '\0')
  327. lossy |= NLS_NAME_OVERLEN;
  328. *uniname = (u16)'\0';
  329. p_uniname->name_len = unilen;
  330. p_uniname->name_hash =
  331. calc_chksum_2byte((void *) upname, unilen<<1, 0, CS_DEFAULT);
  332. if (p_lossy)
  333. *p_lossy = lossy;
  334. return unilen;
  335. }
  336. s32 nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *uniname, u8 *p_cstring, s32 buflen)
  337. {
  338. if (SDFAT_SB(sb)->options.utf8)
  339. return __nls_utf16s_to_vfsname(sb, uniname, p_cstring, buflen);
  340. return __nls_uni16s_to_vfsname(sb, uniname, p_cstring, buflen);
  341. }
  342. s32 nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring, const s32 len, UNI_NAME_T *uniname, s32 *p_lossy)
  343. {
  344. if (SDFAT_SB(sb)->options.utf8)
  345. return __nls_vfsname_to_utf16s(sb, p_cstring, len, uniname, p_lossy);
  346. return __nls_vfsname_to_uni16s(sb, p_cstring, len, uniname, p_lossy);
  347. }
  348. /*======================================================================*/
  349. /* Local Function Definitions */
  350. /*======================================================================*/
  351. static s32 convert_ch_to_uni(struct nls_table *nls, u8 *ch, u16 *uni, s32 *lossy)
  352. {
  353. int len;
  354. *uni = 0x0;
  355. if (ch[0] < 0x80) {
  356. *uni = (u16) ch[0];
  357. return 1;
  358. }
  359. len = nls->char2uni(ch, MAX_CHARSET_SIZE, uni);
  360. if (len < 0) {
  361. /* conversion failed */
  362. DMSG("%s: fail to use nls\n", __func__);
  363. if (lossy != NULL)
  364. *lossy |= NLS_NAME_LOSSY;
  365. *uni = (u16) '_';
  366. if (!strcmp(nls->charset, "utf8"))
  367. return 1;
  368. return 2;
  369. }
  370. return len;
  371. } /* end of convert_ch_to_uni */
  372. static s32 convert_uni_to_ch(struct nls_table *nls, u16 uni, u8 *ch, s32 *lossy)
  373. {
  374. int len;
  375. ch[0] = 0x0;
  376. if (uni < 0x0080) {
  377. ch[0] = (u8) uni;
  378. return 1;
  379. }
  380. len = nls->uni2char(uni, ch, MAX_CHARSET_SIZE);
  381. if (len < 0) {
  382. /* conversion failed */
  383. DMSG("%s: fail to use nls\n", __func__);
  384. if (lossy != NULL)
  385. *lossy |= NLS_NAME_LOSSY;
  386. ch[0] = '_';
  387. return 1;
  388. }
  389. return len;
  390. } /* end of convert_uni_to_ch */
  391. /* end of nls.c */