umapfile.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. *
  6. * Copyright (C) 1999-2013, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. ******************************************************************************/
  10. /*----------------------------------------------------------------------------
  11. *
  12. * Memory mapped file wrappers for use by the ICU Data Implementation
  13. * All of the platform-specific implementation for mapping data files
  14. * is here. The rest of the ICU Data implementation uses only the
  15. * wrapper functions.
  16. *
  17. *----------------------------------------------------------------------------*/
  18. /* Defines _XOPEN_SOURCE for access to POSIX functions.
  19. * Must be before any other #includes. */
  20. #include "uposixdefs.h"
  21. #include "unicode/putil.h"
  22. #include "unicode/ustring.h"
  23. #include "udatamem.h"
  24. #include "umapfile.h"
  25. /* memory-mapping base definitions ------------------------------------------ */
  26. #if MAP_IMPLEMENTATION==MAP_WIN32
  27. #ifndef WIN32_LEAN_AND_MEAN
  28. # define WIN32_LEAN_AND_MEAN
  29. #endif
  30. # define VC_EXTRALEAN
  31. # define NOUSER
  32. # define NOSERVICE
  33. # define NOIME
  34. # define NOMCX
  35. # if U_PLATFORM_HAS_WINUWP_API == 1
  36. // Some previous versions of the Windows 10 SDK don't expose various APIs for UWP applications
  37. // to use, even though UWP apps are allowed to call and use them. Temporarily change the
  38. // WINAPI family partition below to Desktop, so that function declarations are visible for UWP.
  39. # include <winapifamily.h>
  40. # if !(WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_SYSTEM))
  41. # pragma push_macro("WINAPI_PARTITION_DESKTOP")
  42. # undef WINAPI_PARTITION_DESKTOP
  43. # define WINAPI_PARTITION_DESKTOP 1
  44. # define CHANGED_WINAPI_PARTITION_DESKTOP_VALUE
  45. # endif
  46. # endif
  47. # include <windows.h>
  48. # if U_PLATFORM_HAS_WINUWP_API == 1 && defined(CHANGED_WINAPI_PARTITION_DESKTOP_VALUE)
  49. # pragma pop_macro("WINAPI_PARTITION_DESKTOP")
  50. # endif
  51. # include "cmemory.h"
  52. typedef HANDLE MemoryMap;
  53. # define IS_MAP(map) ((map)!=nullptr)
  54. #elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL
  55. typedef size_t MemoryMap;
  56. # define IS_MAP(map) ((map)!=0)
  57. # include <unistd.h>
  58. # include <sys/mman.h>
  59. # include <sys/stat.h>
  60. # include <fcntl.h>
  61. # ifndef MAP_FAILED
  62. # define MAP_FAILED ((void*)-1)
  63. # endif
  64. # if MAP_IMPLEMENTATION==MAP_390DLL
  65. /* No memory mapping for 390 batch mode. Fake it using dll loading. */
  66. # include <dll.h>
  67. # include "cstring.h"
  68. # include "cmemory.h"
  69. # include "unicode/udata.h"
  70. # define LIB_PREFIX "lib"
  71. # define LIB_SUFFIX ".dll"
  72. /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */
  73. # define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat"
  74. # endif
  75. #elif MAP_IMPLEMENTATION==MAP_STDIO
  76. # include <stdio.h>
  77. # include "cmemory.h"
  78. typedef void *MemoryMap;
  79. # define IS_MAP(map) ((map)!=nullptr)
  80. #endif
  81. /*----------------------------------------------------------------------------*
  82. * *
  83. * Memory Mapped File support. Platform dependent implementation of *
  84. * functions used by the rest of the implementation.*
  85. * *
  86. *----------------------------------------------------------------------------*/
  87. #if MAP_IMPLEMENTATION==MAP_NONE
  88. U_CFUNC UBool
  89. uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
  90. if (U_FAILURE(*status)) {
  91. return false;
  92. }
  93. UDataMemory_init(pData); /* Clear the output struct. */
  94. return false; /* no file access */
  95. }
  96. U_CFUNC void uprv_unmapFile(UDataMemory *pData) {
  97. /* nothing to do */
  98. }
  99. #elif MAP_IMPLEMENTATION==MAP_WIN32
  100. U_CFUNC UBool
  101. uprv_mapFile(
  102. UDataMemory *pData, /* Fill in with info on the result doing the mapping. */
  103. /* Output only; any original contents are cleared. */
  104. const char *path, /* File path to be opened/mapped. */
  105. UErrorCode *status /* Error status, used to report out-of-memory errors. */
  106. )
  107. {
  108. if (U_FAILURE(*status)) {
  109. return false;
  110. }
  111. HANDLE map = nullptr;
  112. HANDLE file = INVALID_HANDLE_VALUE;
  113. UDataMemory_init(pData); /* Clear the output struct. */
  114. /* open the input file */
  115. #if U_PLATFORM_HAS_WINUWP_API == 0
  116. // Note: In the non-UWP code-path (ie: Win32), the value of the path variable might have come from
  117. // the CRT 'getenv' function, and would be therefore be encoded in the default ANSI code page.
  118. // This means that we can't call the *W version of API below, whereas in the UWP code-path
  119. // there is no 'getenv' call, and thus the string will be only UTF-8/Invariant characters.
  120. file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr,
  121. OPEN_EXISTING,
  122. FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, nullptr);
  123. #else
  124. // Convert from UTF-8 string to UTF-16 string.
  125. wchar_t utf16Path[MAX_PATH];
  126. int32_t pathUtf16Len = 0;
  127. u_strFromUTF8(reinterpret_cast<char16_t*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status);
  128. if (U_FAILURE(*status)) {
  129. return false;
  130. }
  131. if (*status == U_STRING_NOT_TERMINATED_WARNING) {
  132. // Report back an error instead of a warning.
  133. *status = U_BUFFER_OVERFLOW_ERROR;
  134. return false;
  135. }
  136. file = CreateFileW(utf16Path, GENERIC_READ, FILE_SHARE_READ, nullptr,
  137. OPEN_EXISTING,
  138. FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, nullptr);
  139. #endif
  140. if (file == INVALID_HANDLE_VALUE) {
  141. // If we failed to open the file due to an out-of-memory error, then we want
  142. // to report that error back to the caller.
  143. if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
  144. *status = U_MEMORY_ALLOCATION_ERROR;
  145. }
  146. return false;
  147. }
  148. // Note: We use nullptr/nullptr for lpAttributes parameter below.
  149. // This means our handle cannot be inherited and we will get the default security descriptor.
  150. /* create an unnamed Windows file-mapping object for the specified file */
  151. map = CreateFileMappingW(file, nullptr, PAGE_READONLY, 0, 0, nullptr);
  152. CloseHandle(file);
  153. if (map == nullptr) {
  154. // If we failed to create the mapping due to an out-of-memory error, then
  155. // we want to report that error back to the caller.
  156. if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
  157. *status = U_MEMORY_ALLOCATION_ERROR;
  158. }
  159. return false;
  160. }
  161. /* map a view of the file into our address space */
  162. pData->pHeader = reinterpret_cast<const DataHeader *>(MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0));
  163. if (pData->pHeader == nullptr) {
  164. CloseHandle(map);
  165. return false;
  166. }
  167. pData->map = map;
  168. return true;
  169. }
  170. U_CFUNC void
  171. uprv_unmapFile(UDataMemory *pData) {
  172. if (pData != nullptr && pData->map != nullptr) {
  173. UnmapViewOfFile(pData->pHeader);
  174. CloseHandle(pData->map);
  175. pData->pHeader = nullptr;
  176. pData->map = nullptr;
  177. }
  178. }
  179. #elif MAP_IMPLEMENTATION==MAP_POSIX
  180. U_CFUNC UBool
  181. uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
  182. int fd;
  183. int length;
  184. struct stat mystat;
  185. void *data;
  186. if (U_FAILURE(*status)) {
  187. return false;
  188. }
  189. UDataMemory_init(pData); /* Clear the output struct. */
  190. /* determine the length of the file */
  191. if(stat(path, &mystat)!=0 || mystat.st_size<=0) {
  192. return false;
  193. }
  194. length=mystat.st_size;
  195. /* open the file */
  196. fd=open(path, O_RDONLY);
  197. if(fd==-1) {
  198. return false;
  199. }
  200. /* get a view of the mapping */
  201. #if U_PLATFORM != U_PF_HPUX
  202. data=mmap(0, length, PROT_READ, MAP_SHARED, fd, 0);
  203. #else
  204. data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
  205. #endif
  206. close(fd); /* no longer needed */
  207. if(data==MAP_FAILED) {
  208. // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
  209. return false;
  210. }
  211. pData->map = (char *)data + length;
  212. pData->pHeader=(const DataHeader *)data;
  213. pData->mapAddr = data;
  214. #if U_PLATFORM == U_PF_IPHONE
  215. posix_madvise(data, length, POSIX_MADV_RANDOM);
  216. #endif
  217. return true;
  218. }
  219. U_CFUNC void
  220. uprv_unmapFile(UDataMemory *pData) {
  221. if(pData!=nullptr && pData->map!=nullptr) {
  222. size_t dataLen = (char *)pData->map - (char *)pData->mapAddr;
  223. if(munmap(pData->mapAddr, dataLen)==-1) {
  224. }
  225. pData->pHeader=nullptr;
  226. pData->map=0;
  227. pData->mapAddr=nullptr;
  228. }
  229. }
  230. #elif MAP_IMPLEMENTATION==MAP_STDIO
  231. /* copy of the filestrm.c/T_FileStream_size() implementation */
  232. static int32_t
  233. umap_fsize(FILE *f) {
  234. int32_t savedPos = ftell(f);
  235. int32_t size = 0;
  236. /*Changes by Bertrand A. D. doesn't affect the current position
  237. goes to the end of the file before ftell*/
  238. fseek(f, 0, SEEK_END);
  239. size = (int32_t)ftell(f);
  240. fseek(f, savedPos, SEEK_SET);
  241. return size;
  242. }
  243. U_CFUNC UBool
  244. uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
  245. FILE *file;
  246. int32_t fileLength;
  247. void *p;
  248. if (U_FAILURE(*status)) {
  249. return false;
  250. }
  251. UDataMemory_init(pData); /* Clear the output struct. */
  252. /* open the input file */
  253. file=fopen(path, "rb");
  254. if(file==nullptr) {
  255. return false;
  256. }
  257. /* get the file length */
  258. fileLength=umap_fsize(file);
  259. if(ferror(file) || fileLength<=20) {
  260. fclose(file);
  261. return false;
  262. }
  263. /* allocate the memory to hold the file data */
  264. p=uprv_malloc(fileLength);
  265. if(p==nullptr) {
  266. fclose(file);
  267. *status = U_MEMORY_ALLOCATION_ERROR;
  268. return false;
  269. }
  270. /* read the file */
  271. if(fileLength!=fread(p, 1, fileLength, file)) {
  272. uprv_free(p);
  273. fclose(file);
  274. return false;
  275. }
  276. fclose(file);
  277. pData->map=p;
  278. pData->pHeader=(const DataHeader *)p;
  279. pData->mapAddr=p;
  280. return true;
  281. }
  282. U_CFUNC void
  283. uprv_unmapFile(UDataMemory *pData) {
  284. if(pData!=nullptr && pData->map!=nullptr) {
  285. uprv_free(pData->map);
  286. pData->map = nullptr;
  287. pData->mapAddr = nullptr;
  288. pData->pHeader = nullptr;
  289. }
  290. }
  291. #elif MAP_IMPLEMENTATION==MAP_390DLL
  292. /* 390 specific Library Loading.
  293. * This is the only platform left that dynamically loads an ICU Data Library.
  294. * All other platforms use .data files when dynamic loading is required, but
  295. * this turn out to be awkward to support in 390 batch mode.
  296. *
  297. * The idea here is to hide the fact that 390 is using dll loading from the
  298. * rest of ICU, and make it look like there is file loading happening.
  299. *
  300. */
  301. static char *strcpy_returnEnd(char *dest, const char *src)
  302. {
  303. while((*dest=*src)!=0) {
  304. ++dest;
  305. ++src;
  306. }
  307. return dest;
  308. }
  309. /*------------------------------------------------------------------------------
  310. *
  311. * computeDirPath given a user-supplied path of an item to be opened,
  312. * compute and return
  313. * - the full directory path to be used
  314. * when opening the file.
  315. * - Pointer to null at end of above returned path
  316. *
  317. * Parameters:
  318. * path: input path. Buffer is not altered.
  319. * pathBuffer: Output buffer. Any contents are overwritten.
  320. *
  321. * Returns:
  322. * Pointer to null termination in returned pathBuffer.
  323. *
  324. * TODO: This works the way ICU historically has, but the
  325. * whole data fallback search path is so complicated that
  326. * probably almost no one will ever really understand it,
  327. * the potential for confusion is large. (It's not just
  328. * this one function, but the whole scheme.)
  329. *
  330. *------------------------------------------------------------------------------*/
  331. static char *uprv_computeDirPath(const char *path, char *pathBuffer)
  332. {
  333. char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */
  334. int32_t pathLen; /* Length of the returned directory path */
  335. finalSlash = 0;
  336. if (path != 0) {
  337. finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR);
  338. }
  339. *pathBuffer = 0;
  340. if (finalSlash == 0) {
  341. /* No user-supplied path.
  342. * Copy the ICU_DATA path to the path buffer and return that*/
  343. const char *icuDataDir;
  344. icuDataDir=u_getDataDirectory();
  345. if(icuDataDir!=nullptr && *icuDataDir!=0) {
  346. return strcpy_returnEnd(pathBuffer, icuDataDir);
  347. } else {
  348. /* there is no icuDataDir either. Just return the empty pathBuffer. */
  349. return pathBuffer;
  350. }
  351. }
  352. /* User supplied path did contain a directory portion.
  353. * Copy it to the output path buffer */
  354. pathLen = (int32_t)(finalSlash - path + 1);
  355. uprv_memcpy(pathBuffer, path, pathLen);
  356. *(pathBuffer+pathLen) = 0;
  357. return pathBuffer+pathLen;
  358. }
  359. # define DATA_TYPE "dat"
  360. U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
  361. const char *inBasename;
  362. char *basename;
  363. char pathBuffer[1024];
  364. const DataHeader *pHeader;
  365. dllhandle *handle;
  366. void *val=0;
  367. if (U_FAILURE(*status)) {
  368. return false;
  369. }
  370. inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR);
  371. if(inBasename==nullptr) {
  372. inBasename = path;
  373. } else {
  374. inBasename++;
  375. }
  376. basename=uprv_computeDirPath(path, pathBuffer);
  377. if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) {
  378. /* must mmap file... for build */
  379. int fd;
  380. int length;
  381. struct stat mystat;
  382. void *data;
  383. UDataMemory_init(pData); /* Clear the output struct. */
  384. /* determine the length of the file */
  385. if(stat(path, &mystat)!=0 || mystat.st_size<=0) {
  386. return false;
  387. }
  388. length=mystat.st_size;
  389. /* open the file */
  390. fd=open(path, O_RDONLY);
  391. if(fd==-1) {
  392. return false;
  393. }
  394. /* get a view of the mapping */
  395. data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
  396. close(fd); /* no longer needed */
  397. if(data==MAP_FAILED) {
  398. // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
  399. return false;
  400. }
  401. pData->map = (char *)data + length;
  402. pData->pHeader=(const DataHeader *)data;
  403. pData->mapAddr = data;
  404. return true;
  405. }
  406. # ifdef OS390BATCH
  407. /* ### hack: we still need to get u_getDataDirectory() fixed
  408. for OS/390 (batch mode - always return "//"? )
  409. and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!)
  410. This is probably due to the strange file system on OS/390. It's more like
  411. a database with short entry names than a typical file system. */
  412. /* U_ICUDATA_NAME should always have the correct name */
  413. /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */
  414. /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */
  415. /* PROJECT!!!!! */
  416. uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA");
  417. # else
  418. /* set up the library name */
  419. uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX);
  420. # endif
  421. # ifdef UDATA_DEBUG
  422. fprintf(stderr, "dllload: %s ", pathBuffer);
  423. # endif
  424. handle=dllload(pathBuffer);
  425. # ifdef UDATA_DEBUG
  426. fprintf(stderr, " -> %08X\n", handle );
  427. # endif
  428. if(handle != nullptr) {
  429. /* we have a data DLL - what kind of lookup do we need here? */
  430. /* try to find the Table of Contents */
  431. UDataMemory_init(pData); /* Clear the output struct. */
  432. val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME);
  433. if(val == 0) {
  434. /* failed... so keep looking */
  435. return false;
  436. }
  437. # ifdef UDATA_DEBUG
  438. fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val);
  439. # endif
  440. pData->pHeader=(const DataHeader *)val;
  441. return true;
  442. } else {
  443. return false; /* no handle */
  444. }
  445. }
  446. U_CFUNC void uprv_unmapFile(UDataMemory *pData) {
  447. if(pData!=nullptr && pData->map!=nullptr) {
  448. uprv_free(pData->map);
  449. pData->map = nullptr;
  450. pData->mapAddr = nullptr;
  451. pData->pHeader = nullptr;
  452. }
  453. }
  454. #else
  455. # error MAP_IMPLEMENTATION is set incorrectly
  456. #endif