nsReadableUtils.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383
  1. /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "nsReadableUtils.h"
  6. #include "nsReadableUtilsImpl.h"
  7. #include <algorithm>
  8. #include "mozilla/CheckedInt.h"
  9. #include "nscore.h"
  10. #include "nsMemory.h"
  11. #include "nsString.h"
  12. #include "nsTArray.h"
  13. #include "nsUTF8Utils.h"
  14. using mozilla::IsASCII;
  15. /**
  16. * Fallback implementation for finding the first non-ASCII character in a
  17. * UTF-16 string.
  18. */
  19. static inline int32_t
  20. FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd)
  21. {
  22. typedef mozilla::NonASCIIParameters<sizeof(size_t)> p;
  23. const size_t kMask = p::mask();
  24. const uintptr_t kAlignMask = p::alignMask();
  25. const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
  26. const char16_t* idx = aBegin;
  27. // Align ourselves to a word boundary.
  28. for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) {
  29. if (!IsASCII(*idx)) {
  30. return idx - aBegin;
  31. }
  32. }
  33. // Check one word at a time.
  34. const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask);
  35. for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
  36. const size_t word = *reinterpret_cast<const size_t*>(idx);
  37. if (word & kMask) {
  38. return idx - aBegin;
  39. }
  40. }
  41. // Take care of the remainder one character at a time.
  42. for (; idx != aEnd; idx++) {
  43. if (!IsASCII(*idx)) {
  44. return idx - aBegin;
  45. }
  46. }
  47. return -1;
  48. }
  49. /*
  50. * This function returns -1 if all characters in str are ASCII characters.
  51. * Otherwise, it returns a value less than or equal to the index of the first
  52. * ASCII character in str. For example, if first non-ASCII character is at
  53. * position 25, it may return 25, 24, or 16. But it guarantees
  54. * there are only ASCII characters before returned value.
  55. */
  56. static inline int32_t
  57. FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
  58. {
  59. #ifdef MOZILLA_MAY_SUPPORT_SSE2
  60. if (mozilla::supports_sse2()) {
  61. return mozilla::SSE2::FirstNonASCII(aBegin, aEnd);
  62. }
  63. #endif
  64. return FirstNonASCIIUnvectorized(aBegin, aEnd);
  65. }
  66. void
  67. LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
  68. {
  69. aDest.Truncate();
  70. LossyAppendUTF16toASCII(aSource, aDest);
  71. }
  72. void
  73. CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
  74. {
  75. aDest.Truncate();
  76. AppendASCIItoUTF16(aSource, aDest);
  77. }
  78. void
  79. LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest)
  80. {
  81. aDest.Truncate();
  82. if (aSource) {
  83. LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
  84. }
  85. }
  86. void
  87. CopyASCIItoUTF16(const char* aSource, nsAString& aDest)
  88. {
  89. aDest.Truncate();
  90. if (aSource) {
  91. AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
  92. }
  93. }
  94. void
  95. CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
  96. {
  97. if (!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible)) {
  98. // Note that this may wildly underestimate the allocation that failed, as
  99. // we report the length of aSource as UTF-16 instead of UTF-8.
  100. aDest.AllocFailed(aDest.Length() + aSource.Length());
  101. }
  102. }
  103. bool
  104. CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
  105. const mozilla::fallible_t& aFallible)
  106. {
  107. aDest.Truncate();
  108. if (!AppendUTF16toUTF8(aSource, aDest, aFallible)) {
  109. return false;
  110. }
  111. return true;
  112. }
  113. void
  114. CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
  115. {
  116. aDest.Truncate();
  117. AppendUTF8toUTF16(aSource, aDest);
  118. }
  119. void
  120. CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest)
  121. {
  122. aDest.Truncate();
  123. AppendUTF16toUTF8(aSource, aDest);
  124. }
  125. void
  126. CopyUTF8toUTF16(const char* aSource, nsAString& aDest)
  127. {
  128. aDest.Truncate();
  129. AppendUTF8toUTF16(aSource, aDest);
  130. }
  131. void
  132. LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest)
  133. {
  134. uint32_t old_dest_length = aDest.Length();
  135. aDest.SetLength(old_dest_length + aSource.Length());
  136. nsAString::const_iterator fromBegin, fromEnd;
  137. nsACString::iterator dest;
  138. aDest.BeginWriting(dest);
  139. dest.advance(old_dest_length);
  140. // right now, this won't work on multi-fragment destinations
  141. LossyConvertEncoding16to8 converter(dest.get());
  142. copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
  143. converter);
  144. }
  145. void
  146. AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
  147. {
  148. if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible)) {
  149. aDest.AllocFailed(aDest.Length() + aSource.Length());
  150. }
  151. }
  152. bool
  153. AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
  154. const mozilla::fallible_t& aFallible)
  155. {
  156. uint32_t old_dest_length = aDest.Length();
  157. if (!aDest.SetLength(old_dest_length + aSource.Length(),
  158. aFallible)) {
  159. return false;
  160. }
  161. nsACString::const_iterator fromBegin, fromEnd;
  162. nsAString::iterator dest;
  163. aDest.BeginWriting(dest);
  164. dest.advance(old_dest_length);
  165. // right now, this won't work on multi-fragment destinations
  166. LossyConvertEncoding8to16 converter(dest.get());
  167. copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
  168. converter);
  169. return true;
  170. }
  171. void
  172. LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest)
  173. {
  174. if (aSource) {
  175. LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
  176. }
  177. }
  178. bool
  179. AppendASCIItoUTF16(const char* aSource, nsAString& aDest, const mozilla::fallible_t& aFallible)
  180. {
  181. if (aSource) {
  182. return AppendASCIItoUTF16(nsDependentCString(aSource), aDest, aFallible);
  183. }
  184. return true;
  185. }
  186. void
  187. AppendASCIItoUTF16(const char* aSource, nsAString& aDest)
  188. {
  189. if (aSource) {
  190. AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
  191. }
  192. }
  193. void
  194. AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
  195. {
  196. if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible)) {
  197. // Note that this may wildly underestimate the allocation that failed, as
  198. // we report the length of aSource as UTF-16 instead of UTF-8.
  199. aDest.AllocFailed(aDest.Length() + aSource.Length());
  200. }
  201. }
  202. bool
  203. AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
  204. const mozilla::fallible_t& aFallible)
  205. {
  206. // At 16 characters analysis showed better performance of both the all ASCII
  207. // and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of
  208. // that length.
  209. const nsAString::size_type kFastPathMinLength = 16;
  210. int32_t firstNonASCII = 0;
  211. if (aSource.Length() >= kFastPathMinLength) {
  212. firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading());
  213. }
  214. if (firstNonASCII == -1) {
  215. // This is all ASCII, we can use the more efficient lossy append.
  216. mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length());
  217. new_length += aDest.Length();
  218. if (!new_length.isValid() ||
  219. !aDest.SetCapacity(new_length.value(), aFallible)) {
  220. return false;
  221. }
  222. LossyAppendUTF16toASCII(aSource, aDest);
  223. return true;
  224. }
  225. nsAString::const_iterator source_start, source_end;
  226. CalculateUTF8Size calculator;
  227. aSource.BeginReading(source_start);
  228. aSource.EndReading(source_end);
  229. // Skip the characters that we know are single byte.
  230. source_start.advance(firstNonASCII);
  231. copy_string(source_start,
  232. source_end, calculator);
  233. // Include the ASCII characters that were skipped in the count.
  234. size_t count = calculator.Size() + firstNonASCII;
  235. if (count) {
  236. auto old_dest_length = aDest.Length();
  237. // Grow the buffer if we need to.
  238. mozilla::CheckedInt<nsACString::size_type> new_length(count);
  239. new_length += old_dest_length;
  240. if (!new_length.isValid() ||
  241. !aDest.SetLength(new_length.value(), aFallible)) {
  242. return false;
  243. }
  244. // All ready? Time to convert
  245. nsAString::const_iterator ascii_end;
  246. aSource.BeginReading(ascii_end);
  247. if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) {
  248. // Use the more efficient lossy converter for the ASCII portion.
  249. LossyConvertEncoding16to8 lossy_converter(
  250. aDest.BeginWriting() + old_dest_length);
  251. nsAString::const_iterator ascii_start;
  252. aSource.BeginReading(ascii_start);
  253. ascii_end.advance(firstNonASCII);
  254. copy_string(ascii_start, ascii_end, lossy_converter);
  255. } else {
  256. // Not using the lossy shortcut, we need to include the leading ASCII
  257. // chars.
  258. firstNonASCII = 0;
  259. }
  260. ConvertUTF16toUTF8 converter(
  261. aDest.BeginWriting() + old_dest_length + firstNonASCII);
  262. copy_string(ascii_end,
  263. aSource.EndReading(source_end), converter);
  264. NS_ASSERTION(converter.Size() == count - firstNonASCII,
  265. "Unexpected disparity between CalculateUTF8Size and "
  266. "ConvertUTF16toUTF8");
  267. }
  268. return true;
  269. }
  270. void
  271. AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
  272. {
  273. if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible)) {
  274. aDest.AllocFailed(aDest.Length() + aSource.Length());
  275. }
  276. }
  277. bool
  278. AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest,
  279. const mozilla::fallible_t& aFallible)
  280. {
  281. nsACString::const_iterator source_start, source_end;
  282. CalculateUTF8Length calculator;
  283. copy_string(aSource.BeginReading(source_start),
  284. aSource.EndReading(source_end), calculator);
  285. uint32_t count = calculator.Length();
  286. // Avoid making the string mutable if we're appending an empty string
  287. if (count) {
  288. uint32_t old_dest_length = aDest.Length();
  289. // Grow the buffer if we need to.
  290. if (!aDest.SetLength(old_dest_length + count, aFallible)) {
  291. return false;
  292. }
  293. // All ready? Time to convert
  294. ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length);
  295. copy_string(aSource.BeginReading(source_start),
  296. aSource.EndReading(source_end), converter);
  297. NS_ASSERTION(converter.ErrorEncountered() ||
  298. converter.Length() == count,
  299. "CalculateUTF8Length produced the wrong length");
  300. if (converter.ErrorEncountered()) {
  301. NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
  302. aDest.SetLength(old_dest_length);
  303. }
  304. }
  305. return true;
  306. }
  307. void
  308. AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest)
  309. {
  310. if (aSource) {
  311. AppendUTF16toUTF8(nsDependentString(aSource), aDest);
  312. }
  313. }
  314. void
  315. AppendUTF8toUTF16(const char* aSource, nsAString& aDest)
  316. {
  317. if (aSource) {
  318. AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
  319. }
  320. }
  321. /**
  322. * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
  323. *
  324. * @param aSource an string you will eventually be making a copy of
  325. * @return a new buffer (of the type specified by the second parameter) which you must free with |free|.
  326. *
  327. */
  328. template <class FromStringT, class ToCharT>
  329. inline
  330. ToCharT*
  331. AllocateStringCopy(const FromStringT& aSource, ToCharT*)
  332. {
  333. return static_cast<ToCharT*>(moz_xmalloc(
  334. (aSource.Length() + 1) * sizeof(ToCharT)));
  335. }
  336. char*
  337. ToNewCString(const nsAString& aSource)
  338. {
  339. char* result = AllocateStringCopy(aSource, (char*)0);
  340. if (!result) {
  341. return nullptr;
  342. }
  343. nsAString::const_iterator fromBegin, fromEnd;
  344. LossyConvertEncoding16to8 converter(result);
  345. copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
  346. converter).write_terminator();
  347. return result;
  348. }
  349. char*
  350. ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count)
  351. {
  352. nsAString::const_iterator start, end;
  353. CalculateUTF8Size calculator;
  354. copy_string(aSource.BeginReading(start), aSource.EndReading(end),
  355. calculator);
  356. if (aUTF8Count) {
  357. *aUTF8Count = calculator.Size();
  358. }
  359. char* result = static_cast<char*>
  360. (moz_xmalloc(calculator.Size() + 1));
  361. if (!result) {
  362. return nullptr;
  363. }
  364. ConvertUTF16toUTF8 converter(result);
  365. copy_string(aSource.BeginReading(start), aSource.EndReading(end),
  366. converter).write_terminator();
  367. NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
  368. return result;
  369. }
  370. char*
  371. ToNewCString(const nsACString& aSource)
  372. {
  373. // no conversion needed, just allocate a buffer of the correct length and copy into it
  374. char* result = AllocateStringCopy(aSource, (char*)0);
  375. if (!result) {
  376. return nullptr;
  377. }
  378. nsACString::const_iterator fromBegin, fromEnd;
  379. char* toBegin = result;
  380. *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
  381. toBegin) = char(0);
  382. return result;
  383. }
  384. char16_t*
  385. ToNewUnicode(const nsAString& aSource)
  386. {
  387. // no conversion needed, just allocate a buffer of the correct length and copy into it
  388. char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
  389. if (!result) {
  390. return nullptr;
  391. }
  392. nsAString::const_iterator fromBegin, fromEnd;
  393. char16_t* toBegin = result;
  394. *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
  395. toBegin) = char16_t(0);
  396. return result;
  397. }
  398. char16_t*
  399. ToNewUnicode(const nsACString& aSource)
  400. {
  401. char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
  402. if (!result) {
  403. return nullptr;
  404. }
  405. nsACString::const_iterator fromBegin, fromEnd;
  406. LossyConvertEncoding8to16 converter(result);
  407. copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
  408. converter).write_terminator();
  409. return result;
  410. }
  411. uint32_t
  412. CalcUTF8ToUnicodeLength(const nsACString& aSource)
  413. {
  414. nsACString::const_iterator start, end;
  415. CalculateUTF8Length calculator;
  416. copy_string(aSource.BeginReading(start), aSource.EndReading(end),
  417. calculator);
  418. return calculator.Length();
  419. }
  420. char16_t*
  421. UTF8ToUnicodeBuffer(const nsACString& aSource, char16_t* aBuffer,
  422. uint32_t* aUTF16Count)
  423. {
  424. nsACString::const_iterator start, end;
  425. ConvertUTF8toUTF16 converter(aBuffer);
  426. copy_string(aSource.BeginReading(start),
  427. aSource.EndReading(end),
  428. converter).write_terminator();
  429. if (aUTF16Count) {
  430. *aUTF16Count = converter.Length();
  431. }
  432. return aBuffer;
  433. }
  434. char16_t*
  435. UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count)
  436. {
  437. const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
  438. const size_t buffer_size = (length + 1) * sizeof(char16_t);
  439. char16_t* buffer = static_cast<char16_t*>(moz_xmalloc(buffer_size));
  440. if (!buffer) {
  441. return nullptr;
  442. }
  443. uint32_t copied;
  444. UTF8ToUnicodeBuffer(aSource, buffer, &copied);
  445. NS_ASSERTION(length == copied, "length mismatch");
  446. if (aUTF16Count) {
  447. *aUTF16Count = copied;
  448. }
  449. return buffer;
  450. }
  451. char16_t*
  452. CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest,
  453. uint32_t aLength)
  454. {
  455. nsAString::const_iterator fromBegin, fromEnd;
  456. char16_t* toBegin = aDest;
  457. copy_string(aSource.BeginReading(fromBegin).advance(int32_t(aSrcOffset)),
  458. aSource.BeginReading(fromEnd).advance(int32_t(aSrcOffset + aLength)),
  459. toBegin);
  460. return aDest;
  461. }
  462. void
  463. CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
  464. const nsAString::const_iterator& aSrcEnd,
  465. nsAString& aDest)
  466. {
  467. aDest.SetLength(Distance(aSrcStart, aSrcEnd));
  468. nsAString::char_iterator dest = aDest.BeginWriting();
  469. nsAString::const_iterator fromBegin(aSrcStart);
  470. copy_string(fromBegin, aSrcEnd, dest);
  471. }
  472. void
  473. AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
  474. const nsAString::const_iterator& aSrcEnd,
  475. nsAString& aDest)
  476. {
  477. uint32_t oldLength = aDest.Length();
  478. aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));
  479. nsAString::char_iterator dest = aDest.BeginWriting() + oldLength;
  480. nsAString::const_iterator fromBegin(aSrcStart);
  481. copy_string(fromBegin, aSrcEnd, dest);
  482. }
  483. bool
  484. IsASCII(const nsAString& aString)
  485. {
  486. static const char16_t NOT_ASCII = char16_t(~0x007F);
  487. // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
  488. nsAString::const_iterator iter, done_reading;
  489. aString.BeginReading(iter);
  490. aString.EndReading(done_reading);
  491. const char16_t* c = iter.get();
  492. const char16_t* end = done_reading.get();
  493. while (c < end) {
  494. if (*c++ & NOT_ASCII) {
  495. return false;
  496. }
  497. }
  498. return true;
  499. }
  500. bool
  501. IsASCII(const nsACString& aString)
  502. {
  503. static const char NOT_ASCII = char(~0x7F);
  504. // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
  505. nsACString::const_iterator iter, done_reading;
  506. aString.BeginReading(iter);
  507. aString.EndReading(done_reading);
  508. const char* c = iter.get();
  509. const char* end = done_reading.get();
  510. while (c < end) {
  511. if (*c++ & NOT_ASCII) {
  512. return false;
  513. }
  514. }
  515. return true;
  516. }
  517. bool
  518. IsUTF8(const nsACString& aString, bool aRejectNonChar)
  519. {
  520. nsReadingIterator<char> done_reading;
  521. aString.EndReading(done_reading);
  522. int32_t state = 0;
  523. bool overlong = false;
  524. bool surrogate = false;
  525. bool nonchar = false;
  526. uint16_t olupper = 0; // overlong byte upper bound.
  527. uint16_t slower = 0; // surrogate byte lower bound.
  528. nsReadingIterator<char> iter;
  529. aString.BeginReading(iter);
  530. const char* ptr = iter.get();
  531. const char* end = done_reading.get();
  532. while (ptr < end) {
  533. uint8_t c;
  534. if (0 == state) {
  535. c = *ptr++;
  536. if (UTF8traits::isASCII(c)) {
  537. continue;
  538. }
  539. if (c <= 0xC1) { // [80-BF] where not expected, [C0-C1] for overlong.
  540. return false;
  541. } else if (UTF8traits::is2byte(c)) {
  542. state = 1;
  543. } else if (UTF8traits::is3byte(c)) {
  544. state = 2;
  545. if (c == 0xE0) { // to exclude E0[80-9F][80-BF]
  546. overlong = true;
  547. olupper = 0x9F;
  548. } else if (c == 0xED) { // ED[A0-BF][80-BF] : surrogate codepoint
  549. surrogate = true;
  550. slower = 0xA0;
  551. } else if (c == 0xEF) { // EF BF [BE-BF] : non-character
  552. nonchar = true;
  553. }
  554. } else if (c <= 0xF4) { // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
  555. state = 3;
  556. nonchar = true;
  557. if (c == 0xF0) { // to exclude F0[80-8F][80-BF]{2}
  558. overlong = true;
  559. olupper = 0x8F;
  560. } else if (c == 0xF4) { // to exclude F4[90-BF][80-BF]
  561. // actually not surrogates but codepoints beyond 0x10FFFF
  562. surrogate = true;
  563. slower = 0x90;
  564. }
  565. } else {
  566. return false; // Not UTF-8 string
  567. }
  568. }
  569. if (nonchar && !aRejectNonChar) {
  570. nonchar = false;
  571. }
  572. while (ptr < end && state) {
  573. c = *ptr++;
  574. --state;
  575. // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
  576. if (nonchar &&
  577. ((!state && c < 0xBE) ||
  578. (state == 1 && c != 0xBF) ||
  579. (state == 2 && 0x0F != (0x0F & c)))) {
  580. nonchar = false;
  581. }
  582. if (!UTF8traits::isInSeq(c) || (overlong && c <= olupper) ||
  583. (surrogate && slower <= c) || (nonchar && !state)) {
  584. return false; // Not UTF-8 string
  585. }
  586. overlong = surrogate = false;
  587. }
  588. }
  589. return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
  590. }
  591. /**
  592. * A character sink for in-place case conversion.
  593. */
  594. class ConvertToUpperCase
  595. {
  596. public:
  597. typedef char value_type;
  598. uint32_t
  599. write(const char* aSource, uint32_t aSourceLength)
  600. {
  601. char* cp = const_cast<char*>(aSource);
  602. const char* end = aSource + aSourceLength;
  603. while (cp != end) {
  604. char ch = *cp;
  605. if (ch >= 'a' && ch <= 'z') {
  606. *cp = ch - ('a' - 'A');
  607. }
  608. ++cp;
  609. }
  610. return aSourceLength;
  611. }
  612. };
  613. void
  614. ToUpperCase(nsCSubstring& aCString)
  615. {
  616. ConvertToUpperCase converter;
  617. char* start;
  618. converter.write(aCString.BeginWriting(start), aCString.Length());
  619. }
  620. /**
  621. * A character sink for copying with case conversion.
  622. */
  623. class CopyToUpperCase
  624. {
  625. public:
  626. typedef char value_type;
  627. explicit CopyToUpperCase(nsACString::iterator& aDestIter,
  628. const nsACString::iterator& aEndIter)
  629. : mIter(aDestIter)
  630. , mEnd(aEndIter)
  631. {
  632. }
  633. uint32_t
  634. write(const char* aSource, uint32_t aSourceLength)
  635. {
  636. uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength);
  637. char* cp = mIter.get();
  638. const char* end = aSource + len;
  639. while (aSource != end) {
  640. char ch = *aSource;
  641. if ((ch >= 'a') && (ch <= 'z')) {
  642. *cp = ch - ('a' - 'A');
  643. } else {
  644. *cp = ch;
  645. }
  646. ++aSource;
  647. ++cp;
  648. }
  649. mIter.advance(len);
  650. return len;
  651. }
  652. protected:
  653. nsACString::iterator& mIter;
  654. const nsACString::iterator& mEnd;
  655. };
  656. void
  657. ToUpperCase(const nsACString& aSource, nsACString& aDest)
  658. {
  659. nsACString::const_iterator fromBegin, fromEnd;
  660. nsACString::iterator toBegin, toEnd;
  661. aDest.SetLength(aSource.Length());
  662. CopyToUpperCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd));
  663. copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
  664. converter);
  665. }
  666. /**
  667. * A character sink for case conversion.
  668. */
  669. class ConvertToLowerCase
  670. {
  671. public:
  672. typedef char value_type;
  673. uint32_t
  674. write(const char* aSource, uint32_t aSourceLength)
  675. {
  676. char* cp = const_cast<char*>(aSource);
  677. const char* end = aSource + aSourceLength;
  678. while (cp != end) {
  679. char ch = *cp;
  680. if ((ch >= 'A') && (ch <= 'Z')) {
  681. *cp = ch + ('a' - 'A');
  682. }
  683. ++cp;
  684. }
  685. return aSourceLength;
  686. }
  687. };
  688. void
  689. ToLowerCase(nsCSubstring& aCString)
  690. {
  691. ConvertToLowerCase converter;
  692. char* start;
  693. converter.write(aCString.BeginWriting(start), aCString.Length());
  694. }
  695. /**
  696. * A character sink for copying with case conversion.
  697. */
  698. class CopyToLowerCase
  699. {
  700. public:
  701. typedef char value_type;
  702. explicit CopyToLowerCase(nsACString::iterator& aDestIter,
  703. const nsACString::iterator& aEndIter)
  704. : mIter(aDestIter)
  705. , mEnd(aEndIter)
  706. {
  707. }
  708. uint32_t
  709. write(const char* aSource, uint32_t aSourceLength)
  710. {
  711. uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength);
  712. char* cp = mIter.get();
  713. const char* end = aSource + len;
  714. while (aSource != end) {
  715. char ch = *aSource;
  716. if ((ch >= 'A') && (ch <= 'Z')) {
  717. *cp = ch + ('a' - 'A');
  718. } else {
  719. *cp = ch;
  720. }
  721. ++aSource;
  722. ++cp;
  723. }
  724. mIter.advance(len);
  725. return len;
  726. }
  727. protected:
  728. nsACString::iterator& mIter;
  729. const nsACString::iterator& mEnd;
  730. };
  731. void
  732. ToLowerCase(const nsACString& aSource, nsACString& aDest)
  733. {
  734. nsACString::const_iterator fromBegin, fromEnd;
  735. nsACString::iterator toBegin, toEnd;
  736. aDest.SetLength(aSource.Length());
  737. CopyToLowerCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd));
  738. copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
  739. converter);
  740. }
  741. bool
  742. ParseString(const nsACString& aSource, char aDelimiter,
  743. nsTArray<nsCString>& aArray)
  744. {
  745. nsACString::const_iterator start, end;
  746. aSource.BeginReading(start);
  747. aSource.EndReading(end);
  748. uint32_t oldLength = aArray.Length();
  749. for (;;) {
  750. nsACString::const_iterator delimiter = start;
  751. FindCharInReadable(aDelimiter, delimiter, end);
  752. if (delimiter != start) {
  753. if (!aArray.AppendElement(Substring(start, delimiter))) {
  754. aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength);
  755. return false;
  756. }
  757. }
  758. if (delimiter == end) {
  759. break;
  760. }
  761. start = ++delimiter;
  762. if (start == end) {
  763. break;
  764. }
  765. }
  766. return true;
  767. }
  768. template <class StringT, class IteratorT, class Comparator>
  769. bool
  770. FindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart,
  771. IteratorT& aSearchEnd, const Comparator& aCompare)
  772. {
  773. bool found_it = false;
  774. // only bother searching at all if we're given a non-empty range to search
  775. if (aSearchStart != aSearchEnd) {
  776. IteratorT aPatternStart, aPatternEnd;
  777. aPattern.BeginReading(aPatternStart);
  778. aPattern.EndReading(aPatternEnd);
  779. // outer loop keeps searching till we find it or run out of string to search
  780. while (!found_it) {
  781. // fast inner loop (that's what it's called, not what it is) looks for a potential match
  782. while (aSearchStart != aSearchEnd &&
  783. aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) {
  784. ++aSearchStart;
  785. }
  786. // if we broke out of the `fast' loop because we're out of string ... we're done: no match
  787. if (aSearchStart == aSearchEnd) {
  788. break;
  789. }
  790. // otherwise, we're at a potential match, let's see if we really hit one
  791. IteratorT testPattern(aPatternStart);
  792. IteratorT testSearch(aSearchStart);
  793. // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
  794. for (;;) {
  795. // we already compared the first character in the outer loop,
  796. // so we'll advance before the next comparison
  797. ++testPattern;
  798. ++testSearch;
  799. // if we verified all the way to the end of the pattern, then we found it!
  800. if (testPattern == aPatternEnd) {
  801. found_it = true;
  802. aSearchEnd = testSearch; // return the exact found range through the parameters
  803. break;
  804. }
  805. // if we got to end of the string we're searching before we hit the end of the
  806. // pattern, we'll never find what we're looking for
  807. if (testSearch == aSearchEnd) {
  808. aSearchStart = aSearchEnd;
  809. break;
  810. }
  811. // else if we mismatched ... it's time to advance to the next search position
  812. // and get back into the `fast' loop
  813. if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) {
  814. ++aSearchStart;
  815. break;
  816. }
  817. }
  818. }
  819. }
  820. return found_it;
  821. }
  822. /**
  823. * This searches the entire string from right to left, and returns the first match found, if any.
  824. */
  825. template <class StringT, class IteratorT, class Comparator>
  826. bool
  827. RFindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart,
  828. IteratorT& aSearchEnd, const Comparator& aCompare)
  829. {
  830. IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
  831. aPattern.BeginReading(patternStart);
  832. aPattern.EndReading(patternEnd);
  833. // Point to the last character in the pattern
  834. --patternEnd;
  835. // outer loop keeps searching till we run out of string to search
  836. while (aSearchStart != searchEnd) {
  837. // Point to the end position of the next possible match
  838. --searchEnd;
  839. // Check last character, if a match, explore further from here
  840. if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) {
  841. // We're at a potential match, let's see if we really hit one
  842. IteratorT testPattern(patternEnd);
  843. IteratorT testSearch(searchEnd);
  844. // inner loop verifies the potential match at the current position
  845. do {
  846. // if we verified all the way to the end of the pattern, then we found it!
  847. if (testPattern == patternStart) {
  848. aSearchStart = testSearch; // point to start of match
  849. aSearchEnd = ++searchEnd; // point to end of match
  850. return true;
  851. }
  852. // if we got to end of the string we're searching before we hit the end of the
  853. // pattern, we'll never find what we're looking for
  854. if (testSearch == aSearchStart) {
  855. aSearchStart = aSearchEnd;
  856. return false;
  857. }
  858. // test previous character for a match
  859. --testPattern;
  860. --testSearch;
  861. } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0);
  862. }
  863. }
  864. aSearchStart = aSearchEnd;
  865. return false;
  866. }
  867. bool
  868. FindInReadable(const nsAString& aPattern,
  869. nsAString::const_iterator& aSearchStart,
  870. nsAString::const_iterator& aSearchEnd,
  871. const nsStringComparator& aComparator)
  872. {
  873. return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
  874. }
  875. bool
  876. FindInReadable(const nsACString& aPattern,
  877. nsACString::const_iterator& aSearchStart,
  878. nsACString::const_iterator& aSearchEnd,
  879. const nsCStringComparator& aComparator)
  880. {
  881. return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
  882. }
  883. bool
  884. CaseInsensitiveFindInReadable(const nsACString& aPattern,
  885. nsACString::const_iterator& aSearchStart,
  886. nsACString::const_iterator& aSearchEnd)
  887. {
  888. return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd,
  889. nsCaseInsensitiveCStringComparator());
  890. }
  891. bool
  892. RFindInReadable(const nsAString& aPattern,
  893. nsAString::const_iterator& aSearchStart,
  894. nsAString::const_iterator& aSearchEnd,
  895. const nsStringComparator& aComparator)
  896. {
  897. return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
  898. }
  899. bool
  900. RFindInReadable(const nsACString& aPattern,
  901. nsACString::const_iterator& aSearchStart,
  902. nsACString::const_iterator& aSearchEnd,
  903. const nsCStringComparator& aComparator)
  904. {
  905. return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
  906. }
  907. bool
  908. FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
  909. const nsAString::const_iterator& aSearchEnd)
  910. {
  911. int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
  912. const char16_t* charFoundAt =
  913. nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
  914. if (charFoundAt) {
  915. aSearchStart.advance(charFoundAt - aSearchStart.get());
  916. return true;
  917. }
  918. aSearchStart.advance(fragmentLength);
  919. return false;
  920. }
  921. bool
  922. FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
  923. const nsACString::const_iterator& aSearchEnd)
  924. {
  925. int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
  926. const char* charFoundAt =
  927. nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
  928. if (charFoundAt) {
  929. aSearchStart.advance(charFoundAt - aSearchStart.get());
  930. return true;
  931. }
  932. aSearchStart.advance(fragmentLength);
  933. return false;
  934. }
  935. uint32_t
  936. CountCharInReadable(const nsAString& aStr, char16_t aChar)
  937. {
  938. uint32_t count = 0;
  939. nsAString::const_iterator begin, end;
  940. aStr.BeginReading(begin);
  941. aStr.EndReading(end);
  942. while (begin != end) {
  943. if (*begin == aChar) {
  944. ++count;
  945. }
  946. ++begin;
  947. }
  948. return count;
  949. }
  950. uint32_t
  951. CountCharInReadable(const nsACString& aStr, char aChar)
  952. {
  953. uint32_t count = 0;
  954. nsACString::const_iterator begin, end;
  955. aStr.BeginReading(begin);
  956. aStr.EndReading(end);
  957. while (begin != end) {
  958. if (*begin == aChar) {
  959. ++count;
  960. }
  961. ++begin;
  962. }
  963. return count;
  964. }
  965. bool
  966. StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring)
  967. {
  968. nsAString::size_type src_len = aSource.Length(),
  969. sub_len = aSubstring.Length();
  970. if (sub_len > src_len) {
  971. return false;
  972. }
  973. return Substring(aSource, 0, sub_len).Equals(aSubstring);
  974. }
  975. bool
  976. StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
  977. const nsStringComparator& aComparator)
  978. {
  979. nsAString::size_type src_len = aSource.Length(),
  980. sub_len = aSubstring.Length();
  981. if (sub_len > src_len) {
  982. return false;
  983. }
  984. return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
  985. }
  986. bool
  987. StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring)
  988. {
  989. nsACString::size_type src_len = aSource.Length(),
  990. sub_len = aSubstring.Length();
  991. if (sub_len > src_len) {
  992. return false;
  993. }
  994. return Substring(aSource, 0, sub_len).Equals(aSubstring);
  995. }
  996. bool
  997. StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
  998. const nsCStringComparator& aComparator)
  999. {
  1000. nsACString::size_type src_len = aSource.Length(),
  1001. sub_len = aSubstring.Length();
  1002. if (sub_len > src_len) {
  1003. return false;
  1004. }
  1005. return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
  1006. }
  1007. bool
  1008. StringEndsWith(const nsAString& aSource, const nsAString& aSubstring)
  1009. {
  1010. nsAString::size_type src_len = aSource.Length(),
  1011. sub_len = aSubstring.Length();
  1012. if (sub_len > src_len) {
  1013. return false;
  1014. }
  1015. return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
  1016. }
  1017. bool
  1018. StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
  1019. const nsStringComparator& aComparator)
  1020. {
  1021. nsAString::size_type src_len = aSource.Length(),
  1022. sub_len = aSubstring.Length();
  1023. if (sub_len > src_len) {
  1024. return false;
  1025. }
  1026. return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
  1027. aComparator);
  1028. }
  1029. bool
  1030. StringEndsWith(const nsACString& aSource, const nsACString& aSubstring)
  1031. {
  1032. nsACString::size_type src_len = aSource.Length(),
  1033. sub_len = aSubstring.Length();
  1034. if (sub_len > src_len) {
  1035. return false;
  1036. }
  1037. return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
  1038. }
  1039. bool
  1040. StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
  1041. const nsCStringComparator& aComparator)
  1042. {
  1043. nsACString::size_type src_len = aSource.Length(),
  1044. sub_len = aSubstring.Length();
  1045. if (sub_len > src_len) {
  1046. return false;
  1047. }
  1048. return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
  1049. aComparator);
  1050. }
  1051. static const char16_t empty_buffer[1] = { '\0' };
  1052. const nsAFlatString&
  1053. EmptyString()
  1054. {
  1055. static const nsDependentString sEmpty(empty_buffer);
  1056. return sEmpty;
  1057. }
  1058. const nsAFlatCString&
  1059. EmptyCString()
  1060. {
  1061. static const nsDependentCString sEmpty((const char*)empty_buffer);
  1062. return sEmpty;
  1063. }
  1064. const nsAFlatString&
  1065. NullString()
  1066. {
  1067. static const nsXPIDLString sNull;
  1068. return sNull;
  1069. }
  1070. const nsAFlatCString&
  1071. NullCString()
  1072. {
  1073. static const nsXPIDLCString sNull;
  1074. return sNull;
  1075. }
  1076. int32_t
  1077. CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
  1078. const nsASingleFragmentString& aUTF16String)
  1079. {
  1080. static const uint32_t NOT_ASCII = uint32_t(~0x7F);
  1081. const char* u8;
  1082. const char* u8end;
  1083. aUTF8String.BeginReading(u8);
  1084. aUTF8String.EndReading(u8end);
  1085. const char16_t* u16;
  1086. const char16_t* u16end;
  1087. aUTF16String.BeginReading(u16);
  1088. aUTF16String.EndReading(u16end);
  1089. while (u8 != u8end && u16 != u16end) {
  1090. // Cast away the signedness of *u8 to prevent signextension when
  1091. // converting to uint32_t
  1092. uint32_t c8_32 = (uint8_t)*u8;
  1093. if (c8_32 & NOT_ASCII) {
  1094. bool err;
  1095. c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
  1096. if (err) {
  1097. return INT32_MIN;
  1098. }
  1099. uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
  1100. // The above UTF16CharEnumerator::NextChar() calls can
  1101. // fail, but if it does for anything other than no data to
  1102. // look at (which can't happen here), it returns the
  1103. // Unicode replacement character 0xFFFD for the invalid
  1104. // data they were fed. Ignore that error and treat invalid
  1105. // UTF16 as 0xFFFD.
  1106. //
  1107. // This matches what our UTF16 to UTF8 conversion code
  1108. // does, and thus a UTF8 string that came from an invalid
  1109. // UTF16 string will compare equal to the invalid UTF16
  1110. // string it came from. Same is true for any other UTF16
  1111. // string differs only in the invalid part of the string.
  1112. if (c8_32 != c16_32) {
  1113. return c8_32 < c16_32 ? -1 : 1;
  1114. }
  1115. } else {
  1116. if (c8_32 != *u16) {
  1117. return c8_32 > *u16 ? 1 : -1;
  1118. }
  1119. ++u8;
  1120. ++u16;
  1121. }
  1122. }
  1123. if (u8 != u8end) {
  1124. // We get to the end of the UTF16 string, but no to the end of
  1125. // the UTF8 string. The UTF8 string is longer than the UTF16
  1126. // string
  1127. return 1;
  1128. }
  1129. if (u16 != u16end) {
  1130. // We get to the end of the UTF8 string, but no to the end of
  1131. // the UTF16 string. The UTF16 string is longer than the UTF8
  1132. // string
  1133. return -1;
  1134. }
  1135. // The two strings match.
  1136. return 0;
  1137. }
  1138. void
  1139. AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest)
  1140. {
  1141. NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
  1142. if (IS_IN_BMP(aSource)) {
  1143. aDest.Append(char16_t(aSource));
  1144. } else {
  1145. aDest.Append(H_SURROGATE(aSource));
  1146. aDest.Append(L_SURROGATE(aSource));
  1147. }
  1148. }
  1149. extern "C" {
  1150. void Gecko_AppendUTF16toCString(nsACString* aThis, const nsAString* aOther)
  1151. {
  1152. AppendUTF16toUTF8(*aOther, *aThis);
  1153. }
  1154. void Gecko_AppendUTF8toString(nsAString* aThis, const nsACString* aOther)
  1155. {
  1156. AppendUTF8toUTF16(*aOther, *aThis);
  1157. }
  1158. }