IString.java 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. package gnu.lists;
  2. import gnu.text.Char;
  3. import java.io.*;
  4. import java.util.RandomAccess;
  5. // FIXME should perhaps also implement CharSeq
  6. // FIXME should perhaps (unlikely) extend SimpleVector
  7. /** A string implementation with contant-time codepoint indexing.
  8. * Suitable for SRFI-135 or SRFI-140.
  9. */
  10. public class IString extends AbstractSequence<Char>
  11. implements CharSequence, Externalizable, Comparable<IString>,
  12. AVector<Char>, RandomAccess, Consumable {
  13. String str;
  14. int cplength; // number of codepoints
  15. /* Index of every 16-th character.
  16. * Null if all characters are in the basic plane. */
  17. int[] offsets;
  18. private static int[] NO_OFFSETS = { };
  19. private static final int INDEX_STEP = 16;
  20. private static final int INDEX_STEP_LOG = 4;
  21. private static final int numSteps(int len) {
  22. return len >> INDEX_STEP_LOG;
  23. }
  24. private static final int restStep(int i) {
  25. return i % INDEX_STEP;
  26. //return i & (INDEX_STEP-1);
  27. }
  28. IString() {
  29. }
  30. public IString(String str) {
  31. init(str);
  32. }
  33. public static IString valueOf(CharSequence str) {
  34. if (str instanceof IString)
  35. return (IString) str;
  36. else
  37. return new IString(str.toString());
  38. }
  39. public static IString valueOf(CharSequence str, int start, int count) {
  40. int slen = str.length();
  41. if (start < 0 || count < 0 || start + count > slen)
  42. throw new IndexOutOfBoundsException();
  43. if (str instanceof IString) {
  44. IString istr = (IString) str;
  45. if (start == 0 && count == slen)
  46. return istr;
  47. return new SubString(istr, start, start+count);
  48. }
  49. int jlStart = Character.offsetByCodePoints(str, 0, start);
  50. int jlEnd = Character.offsetByCodePoints(str, jlStart, count);
  51. return new IString(str.subSequence(jlStart, jlEnd).toString());
  52. }
  53. private void init(String str) {
  54. this.str = str;
  55. cplength = Strings.sizeInCodePoints(str);
  56. if (cplength != str.length()) {
  57. int n = numSteps(cplength);
  58. offsets = n == 0 ? NO_OFFSETS : new int[n];
  59. int off = 0;
  60. for (int i = 0; i < n; i++) {
  61. off = str.offsetByCodePoints(off, INDEX_STEP);
  62. offsets[i] = off;
  63. }
  64. }
  65. }
  66. public int effectiveIndex(int index) {
  67. if (index < 0 || index >= cplength)
  68. throw new StringIndexOutOfBoundsException(); // FIXME add args
  69. return offsetByCodePoints(index)+jlStart();
  70. }
  71. public Char getRaw(int index) {
  72. return Char.valueOf(str.codePointAt(index));
  73. }
  74. /** used for string-ref */
  75. public int indexByCodePoints(int index) {
  76. return str.codePointAt(effectiveIndex(index));
  77. }
  78. /** Map character offset to char offset.
  79. * Caller is responsible for checking that {@code i >=0 && i <= cplength}.
  80. */
  81. public int offsetByCodePoints(int i) {
  82. if (offsets == null)
  83. return i;
  84. int jlOffset = jlStart();
  85. i += cpStart();
  86. int step = i>>INDEX_STEP_LOG;
  87. int ilo, ihi, clo, chi;
  88. // ilo <= i && i < ihi && ihi <= cplength
  89. // clo and chi are the indexes of ilo and ihi in str.
  90. ilo = i - restStep(i);
  91. clo = step == 0 ? 0 : offsets[step-1];
  92. if (ilo <= cplength - INDEX_STEP) {
  93. ihi = ilo + INDEX_STEP;
  94. chi = offsets[step];
  95. } else {
  96. ihi = cplength;
  97. chi = str.length();
  98. }
  99. // Optimization: all characters in [ilo..ihi) are in BMP
  100. if (chi - clo == ihi - ilo)
  101. return clo + restStep(i) - jlOffset;
  102. // Optimization: no characters in range are in BMP
  103. if (chi - clo == 2 * (ihi - ilo))
  104. return clo + 2 * restStep(i) - jlOffset;
  105. // scan linearly from pos, at most INDEX_STEP-1 characters forward:
  106. return str.offsetByCodePoints(clo, restStep(i)) - jlOffset;
  107. }
  108. /* used for string-length */
  109. public int lengthByCodePoints() { return cplength; }
  110. public int size() { return cplength; }
  111. /** To implement CharSequence */
  112. public char charAt(int i) { return str.charAt(i); }
  113. public String toString() { return str; }
  114. public int length() { return str.length(); }
  115. /** Substring using offsets in code-units (16-bit chars). */
  116. public IString subSequence(int from, int to) {
  117. /* FIXME
  118. int ifrom = indexFromCharOffset(from);
  119. int iend = indexFromCharOffset(end);
  120. return new SubString(this, ifrom, ident, from, to);
  121. */
  122. if (to < from || to-from > length())
  123. throw new StringIndexOutOfBoundsException();
  124. int jlStart = jlStart();
  125. return new IString(str.substring(from+jlStart, to+jlStart));
  126. }
  127. public void writeExternal(ObjectOutput out) throws IOException {
  128. out.writeObject(str);
  129. }
  130. public void readExternal(ObjectInput in)
  131. throws IOException, ClassNotFoundException {
  132. init((String) in.readObject());
  133. }
  134. public char[] toCharArray() { return toString().toCharArray(); }
  135. public byte[] getBytes(String charsetName)
  136. throws java.io.UnsupportedEncodingException
  137. { return toString().getBytes(charsetName); }
  138. public void consume(Consumer out) {
  139. out.write(str, jlStart(), length());
  140. }
  141. public int hashCode() { return toString().hashCode(); }
  142. public boolean equals(Object other) {
  143. if (other instanceof IString) {
  144. IString str2 = (IString) other;
  145. return this.length() == str2.length()
  146. && Strings.compareTo(this, str2) == 0;
  147. }
  148. return false;
  149. }
  150. public int compareTo(IString other) {
  151. return Strings.compareTo(this, other);
  152. }
  153. int cpStart() { return 0; }
  154. int jlStart() { return 0; }
  155. public static final class SubString extends IString {
  156. int cpStart;
  157. int jlStart;
  158. int jlLength;
  159. private String jlString; // cache of substring
  160. /** Create a substring of the given base string.
  161. * Assumes caller has validated start and end.
  162. */
  163. public SubString(IString base, int start, int end) {
  164. this(base, start, end,
  165. base.offsetByCodePoints(start),
  166. base.offsetByCodePoints(end));
  167. }
  168. public SubString(IString base, int start, int end,
  169. int jlStart, int jlEnd) {
  170. super();
  171. this.str = base.str;
  172. this.jlStart = base.jlStart() + jlStart;
  173. this.jlLength = jlEnd - jlStart;
  174. this.cpStart = start + base.cpStart();
  175. this.cplength = end - start;
  176. if (jlLength != cplength) {
  177. this.offsets = base.offsets;
  178. }
  179. }
  180. @Override
  181. int cpStart() { return cpStart; }
  182. @Override
  183. int jlStart() { return jlStart; }
  184. @Override
  185. public char charAt(int i) {
  186. if (i >= jlLength)
  187. throw new StringIndexOutOfBoundsException(i);
  188. return str.charAt(i+jlStart); }
  189. @Override
  190. public String toString() {
  191. String jstr = jlString;
  192. if (jstr == null) {
  193. jstr = str.substring(jlStart, jlStart+jlLength);
  194. jlString = jstr; // atomic cache update
  195. }
  196. return jstr;
  197. }
  198. @Override
  199. public int length() { return jlLength; }
  200. public void writeExternal(ObjectOutput out) throws IOException {
  201. out.writeObject(toString());
  202. }
  203. public void readExternal(ObjectInput in)
  204. throws IOException, ClassNotFoundException {
  205. this.str = (String) in.readObject();
  206. }
  207. public Object readResolve() throws ObjectStreamException {
  208. return new IString(str);
  209. }
  210. }
  211. }