FString.java 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. // Copyright (c) 2001, 2004, 2017 Per M.A. Bothner and Brainfood Inc.
  2. // This is free software; for terms and warranty disclaimer see ./COPYING.
  3. package gnu.lists;
  4. import gnu.text.Char;
  5. import java.io.IOException;
  6. import java.io.Writer;
  7. /** Simple adjustable-length vector whose elements are 32-bit code points
  8. * Used for the Scheme string type.
  9. * This isn't a regular SimpleVector because character indexing
  10. * isn't a simple lookup.
  11. * "Sub-range mode" is not used (at least not for Kawa Scheme):
  12. * if you need an immutable sub-string, use an IString (or java.lang.String).
  13. * @author Per Bothner
  14. */
  15. public class FString extends AbstractCharVector<Char>
  16. implements Appendable, CharSeq, Consumable
  17. {
  18. public FString() {
  19. data = empty;
  20. }
  21. public FString(int num) {
  22. data = new char[num];
  23. }
  24. public FString(int num, int value) {
  25. data = new char[value < 0x10000 ? num : 2 * num];
  26. if (value != 0 && num != 0)
  27. insertRepeatedRaw(0, value, num);
  28. }
  29. /** Create an FString from a char[].
  30. * Note that this contructor does *not* copy the argument. */
  31. public FString(char[] values) {
  32. data = values;
  33. }
  34. /** This constructor makes a copy. */
  35. public FString(char[] buffer, int offset, int length) {
  36. data = new char[length];
  37. System.arraycopy(buffer, offset, data, 0, length);
  38. }
  39. public FString(CharSequence seq) {
  40. this(seq, 0, seq.length());
  41. }
  42. /** Copy a substring of a CharSequence.
  43. * @param offset - start offset in 16-bit char units
  44. * @param length - length in 16-bit char units
  45. */
  46. public FString(CharSequence seq, int offset, int length) {
  47. char[] data = new char[length];
  48. if (seq instanceof CharSeq)
  49. ((CharSeq) seq).getChars(offset, offset+length, data, 0);
  50. else if (seq instanceof String || seq instanceof IString) {
  51. seq.toString().getChars(offset, offset+length, data, 0);
  52. this.data = data;
  53. }
  54. else if (seq instanceof StringBuilder)
  55. ((StringBuilder) seq).getChars(offset, offset+length, data, 0);
  56. else if (seq instanceof StringBuffer)
  57. ((StringBuffer) seq).getChars(offset, offset+length, data, 0);
  58. else {
  59. for (int i = length; --i >= 0; )
  60. data[i] = seq.charAt(offset+i);
  61. }
  62. this.data = data;
  63. }
  64. @Override
  65. public int size() { return Strings.sizeInCodePoints(this); }
  66. /** @param index measured in Unicode code points (characters)
  67. */
  68. @Override
  69. public int effectiveIndex(int index) {
  70. return relativeIndex(0, index);
  71. }
  72. private int relativeIndex(int start, int offset) {
  73. if (isSubRange()) throw new InternalError();
  74. int eff = start;
  75. char prev = 0;
  76. int dlen = data.length;
  77. boolean beforeGap = ! isVerySimple();
  78. int limit = beforeGap ? getGapStart() : dlen;
  79. for (;;) {
  80. if (eff >= limit && beforeGap) {
  81. eff = getGapEnd();
  82. limit = dlen;
  83. beforeGap = false;
  84. }
  85. if (offset == 0)
  86. return eff;
  87. char ch = data[eff];
  88. eff++;
  89. if (! (prev >= 0xD800 && prev <= 0xDBFF
  90. && ch >= 0xDC00 && ch <= 0xDFFF))
  91. offset--;
  92. prev = ch;
  93. }
  94. }
  95. @Override
  96. public int createPos (int index, boolean isAfter) {
  97. int eff = effectiveIndex(index);
  98. // createPos ignores buffer gap; effectiveIndex includes it.
  99. // (At least that is what we do for other SimpleVector types.)
  100. if (isGapBuffer() && eff >= getGapEnd())
  101. eff -= getGapSize();
  102. return (eff << 1) | (isAfter ? 1 : 0);
  103. }
  104. @Override
  105. protected int nextIndex(int ipos) {
  106. int dlen = data.length;
  107. int pos = ipos == -1 ? dlen : ipos >>> 1;
  108. if (isGapBuffer() && pos >= getGapStart())
  109. pos += getGapSize();
  110. int eff = 0;
  111. char prev = 0;
  112. int index = 0;
  113. boolean beforeGap = ! isVerySimple();
  114. int limit = beforeGap ? getGapStart() : dlen;
  115. for (;;) {
  116. if (eff >= limit && beforeGap) {
  117. eff = getGapEnd();
  118. limit = dlen;
  119. beforeGap = false;
  120. }
  121. if (eff == pos) {
  122. return index;
  123. }
  124. char ch = data[eff];
  125. eff++;
  126. if (! (prev >= 0xD800 && prev <= 0xDBFF
  127. && ch >= 0xDC00 && ch <= 0xDFFF))
  128. index++;
  129. prev = ch;
  130. }
  131. }
  132. public int createRelativePos(int pos, int delta, boolean isAfter) {
  133. if (delta >= 0 && pos >= 0) {
  134. return (relativeIndex(pos >>> 1, delta) << 1) | (isAfter ? 1 : 0);
  135. }
  136. return super.createRelativePos(pos, delta, isAfter);
  137. }
  138. /** Create a empty string, but with a given initial buffer size. */
  139. public static FString alloc(int sz) {
  140. if (sz > MAX_GAP_SIZE)
  141. sz = MAX_GAP_SIZE;
  142. FString str = new FString(sz);
  143. str.setGapBounds(0, sz);
  144. return str;
  145. }
  146. public final Char getRaw(int index) {
  147. int ch1 = data[index];
  148. if (ch1 >= 0xD800 && ch1 <= 0xDBFF) {
  149. char ch2 = data[index+1];
  150. if (ch2 >= 0xDC00 && ch2 <= 0xDFFF)
  151. ch1 = ((ch1 - 0xD800) << 10) + (ch2 - 0xDC00) + 0x10000;
  152. }
  153. return Char.valueOf(ch1);
  154. }
  155. /** @param index offset in character units (Unicode code points) */
  156. public final Char get(int index) {
  157. return Char.valueOf(Strings.indexByCodePoints(this, index));
  158. }
  159. /** @param fromChar offset in 16-bit code units */
  160. public int indexOf(int ch, int fromChar) {
  161. char c1, c2;
  162. if (ch >= 0x10000) {
  163. c1 = (char) (((ch - 0x10000) >> 10) + 0xD800);
  164. c2 = (char) ((ch & 0x3FF) + 0xDC00);
  165. } else {
  166. c1 = 0;
  167. c2 = (char) ch;
  168. }
  169. int sz = length();
  170. char prev = 0;
  171. for (int i = fromChar; i < sz; i++) {
  172. char cur = charAt(i);
  173. if (cur == c2) {
  174. if (c1 == 0)
  175. return i;
  176. if (prev == c1)
  177. return i-1;
  178. }
  179. prev = cur;
  180. }
  181. return -1;
  182. }
  183. /** @param fromChar offset in 16-bit code units */
  184. public int lastIndexOf(int ch, int fromChar) {
  185. char c1, c2;
  186. if (ch >= 0x10000) {
  187. c1 = (char) (((ch - 0x10000) >> 10) + 0xD800);
  188. c2 = (char) ((ch & 0x3FF) + 0xDC00);
  189. } else {
  190. c1 = 0;
  191. c2 = (char) ch;
  192. }
  193. for (int i = fromChar; --i >= 0; ) {
  194. if (charAt(i) == c2) {
  195. if (c1 == 0)
  196. return i;
  197. if (i > 0 && charAt(i-1) == c1)
  198. return i - 1;
  199. }
  200. }
  201. return -1;
  202. }
  203. /** @param index offset in character units (Unicode code points) */
  204. public Char set(int index, Char value) {
  205. checkCanWrite();
  206. index = Character.offsetByCodePoints(this, 0, index);
  207. Char old = Char.valueOf(characterAt(index));
  208. setCharacterAt(index, value.intValue());
  209. return old;
  210. }
  211. /** @param index offset in character units (Unicode code points) */
  212. @Override
  213. public final void setRaw(int index, Char value) {
  214. index = Character.offsetByCodePoints(this, 0, index);
  215. setCharacterAt(index, value.intValue());
  216. }
  217. /** @param index offset in 16-bit code units */
  218. public final int characterAt(int index) {
  219. // The following uses charAt, which handles adjusting for indexes.
  220. return Strings.characterAt(this, 0, length(), index);
  221. }
  222. /** Return a char[] contain the characters of this string.
  223. * It is unspecified if the result is a copy or shares with this FString.
  224. */
  225. public char[] toCharArray()
  226. {
  227. if (isVerySimple())
  228. return data;
  229. int seq_length = length();
  230. char[] arr = new char[seq_length];
  231. for (int i = 0; i < seq_length; i++)
  232. arr[i] = charAt(i);
  233. return arr;
  234. }
  235. public void shift(int srcStart, int dstStart, int count)
  236. {
  237. System.arraycopy(data, srcStart, data, dstStart, count);
  238. }
  239. public FString copy (int start, int end)
  240. {
  241. char[] copy = new char[end-start];
  242. char[] src = data; // Move to local to help optimizer.
  243. for (int i = start; i < end; i++)
  244. copy[i-start] = src[i];
  245. return new FString(copy);
  246. }
  247. public boolean addAll (CharSequence s)
  248. {
  249. int ssize = s.length();
  250. int sz = length();
  251. addSpace(sz, ssize);
  252. if (s instanceof String)
  253. ((String) s).getChars(0, ssize, data, sz);
  254. else if (s instanceof CharSeq)
  255. ((CharSeq) s).getChars(0, ssize, data, sz);
  256. else
  257. for (int i = ssize; --i >= 0; )
  258. data[sz+i] = s.charAt(i);
  259. return ssize > 0;
  260. }
  261. public void insert(int where, int ch, boolean beforeMarkers) {
  262. int len;
  263. char c1, c2;
  264. if (ch >= 0x10000) {
  265. c1 = (char) (((ch - 0x10000) >> 10) + 0xD800);
  266. c2 = (char) ((ch & 0x3FF) + 0xDC00);
  267. len = 2;
  268. } else {
  269. c1 = (char) ch;
  270. c2 = 0;
  271. len = 1;
  272. }
  273. addSpace(where, len);
  274. data[where] = c1;
  275. if (c2 > 0)
  276. data[where+1] = c2;
  277. }
  278. public void insert(int where, String str, boolean beforeMarkers) {
  279. int len = str.length();
  280. addSpace(where, len);
  281. str.getChars(0, len, data, where);
  282. }
  283. /** Append arguments to this FString.
  284. * Used to implement Scheme's string-append.
  285. * @param args an array of FString value
  286. * @param startIndex index of first string in <code>args</code> to use
  287. */
  288. public void addAllStrings(Object[] args, int startIndex)
  289. {
  290. int sz = length();
  291. int count = 0;
  292. for (int i = startIndex; i < args.length; ++i)
  293. {
  294. Object arg = args[i];
  295. count += ((CharSequence) arg).length();
  296. }
  297. //if (data.length < total) copyBuffer(total);
  298. gapReserve(sz, count);
  299. for (int i = startIndex; i < args.length; ++i)
  300. {
  301. addAll((CharSequence) args[i]);
  302. }
  303. }
  304. public String toString() {
  305. return substring(0, length());
  306. }
  307. public String substring(int start, int end) {
  308. if (isVerySimple())
  309. return new String(data, start, end - start);
  310. // FIXME aybe also optimize isSubRange() case
  311. else
  312. return new StringBuilder().append(this, start, end).toString();
  313. }
  314. public CharSeq subSequence(int start, int end)
  315. {
  316. // FIXME should maybe share? Or use Strings.substring?
  317. return new FString(this, start, end-start);
  318. }
  319. public void setCharAt(int index, char ch) {
  320. checkCanWrite(); // FIXME maybe inline and fold into following
  321. data[super.effectiveIndex(index)] = ch;
  322. }
  323. public void setCharacterAt(int index, int ch) {
  324. int sz = length();
  325. if (index < 0 || index >= sz)
  326. throw new StringIndexOutOfBoundsException(index);
  327. char old1 = charAt(index);
  328. char old2;
  329. boolean oldIsSupp = old1 >= 0xD800 && old1 <= 0xDBFF
  330. && index+1 < sz
  331. && (old2 = charAt(index+1)) >= 0xDC00 && old2 <= 0xDFFF;
  332. if (ch <= 0xFFFF) {
  333. if (oldIsSupp)
  334. delete(index+1, index+2);
  335. setCharAt(index, (char) ch);
  336. } else {
  337. char c1 = (char) (((ch - 0x10000) >> 10) + 0xD800);
  338. char c2 = (char) ((ch & 0x3FF) + 0xDC00);
  339. setCharAt(index, c1);
  340. if (oldIsSupp) {
  341. setCharAt(index+1, c2);
  342. } else {
  343. insert(index+1, c2, true);
  344. }
  345. }
  346. }
  347. /** Replace a substring of this string with another.
  348. * The two strings may have different lengths, so this
  349. * generalizes insertion and deletion.
  350. * All indexes are code-unit (16-bit char) offsets.
  351. */
  352. public void replace(CharSequence src, int srcStart, int srcEnd,
  353. int dstStart, int dstEnd) {
  354. if (dstStart < 0 || dstStart > dstEnd || dstEnd > length()
  355. || srcStart < 0 || srcStart > srcEnd || srcEnd > src.length())
  356. throw new StringIndexOutOfBoundsException();
  357. int srcLength = srcEnd - srcStart;
  358. int dstLength = dstEnd - dstStart;
  359. int grow = srcLength - dstLength;
  360. if (grow > 0) {
  361. gapReserve(dstEnd, grow);
  362. }
  363. if (src instanceof FString) {
  364. FString fsrc = (FString) src;
  365. int sstart = fsrc.getSegmentReadOnly(srcStart, srcLength);
  366. if (sstart >= 0) {
  367. System.arraycopy(fsrc.data, sstart, data, dstStart, srcLength);
  368. if (grow < 0)
  369. delete(dstEnd+grow, dstEnd);
  370. else if (grow > 0)
  371. setGapBounds(getGapStart() + grow, getGapEnd());
  372. return;
  373. }
  374. }
  375. if (src != this
  376. && ! (src instanceof IString || src instanceof String)
  377. && ! Sequences.copyInPlaceIsSafe(this, src)) {
  378. src = src.subSequence(srcStart, srcEnd).toString();
  379. srcEnd = srcLength;
  380. srcStart = 0;
  381. }
  382. if (grow < 0)
  383. delete(dstEnd+grow, dstEnd);
  384. else if (grow > 0)
  385. setGapBounds(getGapStart()+grow, getGapEnd());
  386. if (srcStart < dstStart) {
  387. int i = dstStart + srcLength;
  388. int j = srcEnd;
  389. while (--j >= srcStart)
  390. data[--i] = src.charAt(j);
  391. } else {
  392. int i = dstStart;
  393. int j = srcStart;
  394. for (; j < srcEnd; i++, j++)
  395. data[i] = src.charAt(j);
  396. }
  397. }
  398. public void setCharAtBuffer (int index, char ch)
  399. {
  400. data[index] = ch;
  401. }
  402. public void insertRepeated(int where, int value, int count) {
  403. addSpace(where, value < 0x10000 ? count : 2 * count);
  404. insertRepeatedRaw(where, value, count);
  405. }
  406. private void insertRepeatedRaw(int where, int value, int count) {
  407. char c1, c2;
  408. int len;
  409. if (value >= 0x10000) {
  410. c1 = (char) (((value - 0x10000) >> 10) + 0xD800);
  411. c2 = (char) ((value & 0x3FF) + 0xDC00);
  412. len = 2 * count;
  413. } else {
  414. c1 = (char) value;
  415. c2 = 0;
  416. len = count;
  417. }
  418. char[] array = data;
  419. int end = where + len;
  420. for (int i = where; i < end; ) {
  421. array[i++] = c1;
  422. if (c2 != 0)
  423. array[where + i++] = c2;
  424. }
  425. }
  426. public void replace(int where, char[] chars, int start, int count)
  427. {
  428. System.arraycopy(chars, start, data, where, count);
  429. }
  430. public void replace(int where, String string)
  431. {
  432. string.getChars(0, string.length(), data, where);
  433. }
  434. public boolean equals(Object obj) {
  435. return obj instanceof FString && equals(this, (FString) obj);
  436. }
  437. @Override
  438. protected FString newInstance(int newLength) {
  439. return new FString(newLength < 0 ? data : new char[newLength]);
  440. }
  441. public int getElementKind()
  442. {
  443. return CHAR_VALUE;
  444. }
  445. public String getTag() { return "c32"; }
  446. public void consumePosRange(int iposStart, int iposEnd, Consumer out) {
  447. if (out.ignoring())
  448. return;
  449. int i = iposStart >>> 3;
  450. int end = iposEnd == -1 ? length() : iposEnd >>> 1;
  451. while (i < end) {
  452. long result = getSegment(i);
  453. int where = (int) result;
  454. int size = (int) (result >> 32);
  455. out.write(data, where, size);
  456. i += size;
  457. }
  458. }
  459. public FString append(char c) {
  460. int sz = length();
  461. addSpace(sz, 1);
  462. char[] d = data;
  463. d[sz] = c;
  464. return this;
  465. }
  466. /** Append a Unicode code point. */
  467. public FString appendCharacter(int c) {
  468. int delta;
  469. if (c < 0x10000)
  470. delta = 1;
  471. else if (c == Char.IGNORABLE_CHAR)
  472. return this;
  473. else
  474. delta = 2;
  475. int sz = length();
  476. addSpace(sz, delta);
  477. char[] d = data;
  478. if (delta > 1) {
  479. d[sz++] = (char) (((c - 0x10000) >> 10) + 0xD800);
  480. c = (c & 0x3FF) + 0xDC00;
  481. }
  482. d[sz++] = (char) c;
  483. return this;
  484. }
  485. public FString append(CharSequence csq) {
  486. if (csq == null)
  487. csq = "null";
  488. return append(csq, 0, csq.length());
  489. }
  490. public FString append(CharSequence csq, int start, int end) {
  491. if (csq == null)
  492. csq = "null";
  493. int len = end - start;
  494. int sz = length();
  495. addSpace(sz, len);
  496. char[] d = data;
  497. if (csq instanceof String)
  498. ((String) csq).getChars(start, end, d, sz);
  499. else if (csq instanceof CharSeq)
  500. ((CharSeq) csq).getChars(start, end, d, sz);
  501. else {
  502. int j = sz;
  503. for (int i = start; i < end; i++)
  504. d[j++] = csq.charAt(i);;
  505. }
  506. return this;
  507. }
  508. public FString append(Object obj) {
  509. if (obj instanceof gnu.text.Char)
  510. appendCharacter(((gnu.text.Char) obj).intValue());
  511. else if (obj instanceof java.lang.Character)
  512. appendCharacter(((java.lang.Character) obj).charValue());
  513. else {
  514. CharSequence str = obj instanceof CharSequence ? (CharSequence) obj
  515. : obj.toString();
  516. append(str, 0, str.length());
  517. }
  518. return this;
  519. }
  520. public FString prependCharacter(int c) {
  521. int delta;
  522. if (c < 0x10000)
  523. delta = 1;
  524. else if (c == Char.IGNORABLE_CHAR)
  525. return this;
  526. else
  527. delta = 2;
  528. int sz = length();
  529. gapReserve(0, delta);
  530. int p = getGapEnd()-delta;
  531. setGapBounds(getGapStart(), p);
  532. char[] d = data;
  533. if (delta > 1) {
  534. d[p++] = (char) (((c - 0x10000) >> 10) + 0xD800);
  535. c = (c & 0x3FF) + 0xDC00;
  536. }
  537. d[p++] = (char) c;
  538. return this;
  539. }
  540. public FString prepend(CharSequence str, int start, int end) {
  541. int len = end - start;
  542. gapReserve(0, len);
  543. int p = getGapEnd()-len;
  544. setGapBounds(getGapStart(), p);
  545. char[] d = data;
  546. if (str instanceof String)
  547. ((String) str).getChars(start, end, d, p);
  548. else if (str instanceof CharSeq)
  549. ((CharSeq) str).getChars(start, end, d, p);
  550. else {
  551. for (int i = start; i < end; i++)
  552. d[p++] = str.charAt(i);;
  553. }
  554. return this;
  555. }
  556. public FString prepend(Object obj) {
  557. if (obj instanceof gnu.text.Char)
  558. prependCharacter(((gnu.text.Char) obj).intValue());
  559. else if (obj instanceof java.lang.Character)
  560. prependCharacter(((java.lang.Character) obj).charValue());
  561. else {
  562. CharSequence str = obj instanceof CharSequence ? (CharSequence) obj
  563. : obj.toString();
  564. prepend(str, 0, str.length());
  565. }
  566. return this;
  567. }
  568. public void writeTo(int start, int count, Appendable dest)
  569. throws IOException {
  570. if (dest instanceof Writer) {
  571. Writer wr = (Writer) dest;
  572. while (count > 0) {
  573. long result = getSegment(start);
  574. int where = (int) result;
  575. int size = (int) (result >> 32);
  576. if (size > count)
  577. size = count;
  578. wr.write(data, where, size);
  579. start += size;
  580. count -= size;
  581. }
  582. }
  583. else
  584. dest.append(this, start, start+count);
  585. }
  586. public void writeTo(Appendable dest) throws IOException {
  587. writeTo(0, length(), dest);
  588. }
  589. }