gb18030.js 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. import { inRange, decoderError, encoderError, isASCIICodePoint,
  2. end_of_stream, finished, isASCIIByte, floor } from './text_decoder_utils.js'
  3. import index, {
  4. indexGB18030RangesCodePointFor, indexGB18030RangesPointerFor,
  5. indexCodePointFor, indexPointerFor } from './text_decoder_indexes.js'
  6. // 11.2 gb18030
  7. // 11.2.1 gb18030 decoder
  8. /**
  9. * @constructor
  10. * @implements {Decoder}
  11. * @param {{fatal: boolean}} options
  12. */
  13. export class GB18030Decoder {
  14. constructor(options) {
  15. const { fatal } = options
  16. this.fatal = fatal
  17. // gb18030's decoder has an associated gb18030 first, gb18030
  18. // second, and gb18030 third (all initially 0x00).
  19. this.gb18030_first = 0x00
  20. this.gb18030_second = 0x00,
  21. this.gb18030_third = 0x00
  22. }
  23. /**
  24. * @param {Stream} stream The stream of bytes being decoded.
  25. * @param {number} bite The next byte read from the stream.
  26. * @return The next code point(s) decoded, or null if not enough data exists in the input stream to decode a complete code point.
  27. */
  28. handler(stream, bite) {
  29. // 1. If byte is end-of-stream and gb18030 first, gb18030
  30. // second, and gb18030 third are 0x00, return finished.
  31. if (bite === end_of_stream && this.gb18030_first === 0x00 &&
  32. this.gb18030_second === 0x00 && this.gb18030_third === 0x00) {
  33. return finished
  34. }
  35. // 2. If byte is end-of-stream, and gb18030 first, gb18030
  36. // second, or gb18030 third is not 0x00, set gb18030 first,
  37. // gb18030 second, and gb18030 third to 0x00, and return error.
  38. if (bite === end_of_stream &&
  39. (this.gb18030_first !== 0x00 || this.gb18030_second !== 0x00 ||
  40. this.gb18030_third !== 0x00)) {
  41. this.gb18030_first = 0x00
  42. this.gb18030_second = 0x00
  43. this.gb18030_third = 0x00
  44. decoderError(this.fatal)
  45. }
  46. var code_point
  47. // 3. If gb18030 third is not 0x00, run these substeps:
  48. if (this.gb18030_third !== 0x00) {
  49. // 1. Let code point be null.
  50. code_point = null
  51. // 2. If byte is in the range 0x30 to 0x39, inclusive, set
  52. // code point to the index gb18030 ranges code point for
  53. // (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) ×
  54. // 126 + gb18030 third − 0x81) × 10 + byte − 0x30.
  55. if (inRange(bite, 0x30, 0x39)) {
  56. code_point = indexGB18030RangesCodePointFor(
  57. (((this.gb18030_first - 0x81) * 10 + this.gb18030_second - 0x30) * 126 +
  58. this.gb18030_third - 0x81) * 10 + bite - 0x30)
  59. }
  60. // 3. Let buffer be a byte sequence consisting of gb18030
  61. // second, gb18030 third, and byte, in order.
  62. var buffer = [this.gb18030_second, this.gb18030_third, bite]
  63. // 4. Set gb18030 first, gb18030 second, and gb18030 third to
  64. // 0x00.
  65. this.gb18030_first = 0x00
  66. this.gb18030_second = 0x00
  67. this.gb18030_third = 0x00
  68. // 5. If code point is null, prepend buffer to stream and
  69. // return error.
  70. if (code_point === null) {
  71. stream.prepend(buffer)
  72. return decoderError(this.fatal)
  73. }
  74. // 6. Return a code point whose value is code point.
  75. return code_point
  76. }
  77. // 4. If gb18030 second is not 0x00, run these substeps:
  78. if (this.gb18030_second !== 0x00) {
  79. // 1. If byte is in the range 0x81 to 0xFE, inclusive, set
  80. // gb18030 third to byte and return continue.
  81. if (inRange(bite, 0x81, 0xFE)) {
  82. this.gb18030_third = bite
  83. return null
  84. }
  85. // 2. Prepend gb18030 second followed by byte to stream, set
  86. // gb18030 first and gb18030 second to 0x00, and return error.
  87. stream.prepend([this.gb18030_second, bite])
  88. this.gb18030_first = 0x00
  89. this.gb18030_second = 0x00
  90. return decoderError(this.fatal)
  91. }
  92. // 5. If gb18030 first is not 0x00, run these substeps:
  93. if (this.gb18030_first !== 0x00) {
  94. // 1. If byte is in the range 0x30 to 0x39, inclusive, set
  95. // gb18030 second to byte and return continue.
  96. if (inRange(bite, 0x30, 0x39)) {
  97. this.gb18030_second = bite
  98. return null
  99. }
  100. // 2. Let lead be gb18030 first, let pointer be null, and set
  101. // gb18030 first to 0x00.
  102. var lead = this.gb18030_first
  103. var pointer = null
  104. this.gb18030_first = 0x00
  105. // 3. Let offset be 0x40 if byte is less than 0x7F and 0x41
  106. // otherwise.
  107. var offset = bite < 0x7F ? 0x40 : 0x41
  108. // 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
  109. // to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 +
  110. // (byte − offset).
  111. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE))
  112. pointer = (lead - 0x81) * 190 + (bite - offset)
  113. // 5. Let code point be null if pointer is null and the index
  114. // code point for pointer in index gb18030 otherwise.
  115. code_point = pointer === null ? null :
  116. indexCodePointFor(pointer, index('gb18030'))
  117. // 6. If code point is null and byte is an ASCII byte, prepend
  118. // byte to stream.
  119. if (code_point === null && isASCIIByte(bite))
  120. stream.prepend(bite)
  121. // 7. If code point is null, return error.
  122. if (code_point === null)
  123. return decoderError(this.fatal)
  124. // 8. Return a code point whose value is code point.
  125. return code_point
  126. }
  127. // 6. If byte is an ASCII byte, return a code point whose value
  128. // is byte.
  129. if (isASCIIByte(bite))
  130. return bite
  131. // 7. If byte is 0x80, return code point U+20AC.
  132. if (bite === 0x80)
  133. return 0x20AC
  134. // 8. If byte is in the range 0x81 to 0xFE, inclusive, set
  135. // gb18030 first to byte and return continue.
  136. if (inRange(bite, 0x81, 0xFE)) {
  137. this.gb18030_first = bite
  138. return null
  139. }
  140. // 9. Return error.
  141. return decoderError(this.fatal)
  142. }
  143. }
  144. // 11.2.2 gb18030 encoder
  145. /**
  146. * @implements {Encoder}
  147. */
  148. export class GB18030Encoder {
  149. /**
  150. * @param {Stream} stream Input stream.
  151. * @param {number} code_point Next code point read from the stream.
  152. * @return Byte(s) to emit.
  153. */
  154. handler(stream, code_point) {
  155. // 1. If code point is end-of-stream, return finished.
  156. if (code_point === end_of_stream)
  157. return finished
  158. // 2. If code point is an ASCII code point, return a byte whose
  159. // value is code point.
  160. if (isASCIICodePoint(code_point))
  161. return code_point
  162. // 3. If code point is U+E5E5, return error with code point.
  163. if (code_point === 0xE5E5)
  164. return encoderError(code_point)
  165. // 4. If the gbk flag is set and code point is U+20AC, return
  166. // byte 0x80.
  167. if (this.gbk_flag && code_point === 0x20AC)
  168. return 0x80
  169. // 5. Let pointer be the index pointer for code point in index
  170. // gb18030.
  171. var pointer = indexPointerFor(code_point, index('gb18030'))
  172. // 6. If pointer is not null, run these substeps:
  173. if (pointer !== null) {
  174. // 1. Let lead be floor(pointer / 190) + 0x81.
  175. var lead = floor(pointer / 190) + 0x81
  176. // 2. Let trail be pointer % 190.
  177. var trail = pointer % 190
  178. // 3. Let offset be 0x40 if trail is less than 0x3F and 0x41 otherwise.
  179. var offset = trail < 0x3F ? 0x40 : 0x41
  180. // 4. Return two bytes whose values are lead and trail + offset.
  181. return [lead, trail + offset]
  182. }
  183. // 7. If gbk flag is set, return error with code point.
  184. if (this.gbk_flag)
  185. return encoderError(code_point)
  186. // 8. Set pointer to the index gb18030 ranges pointer for code
  187. // point.
  188. pointer = indexGB18030RangesPointerFor(code_point)
  189. // 9. Let byte1 be floor(pointer / 10 / 126 / 10).
  190. var byte1 = floor(pointer / 10 / 126 / 10)
  191. // 10. Set pointer to pointer − byte1 × 10 × 126 × 10.
  192. pointer = pointer - byte1 * 10 * 126 * 10
  193. // 11. Let byte2 be floor(pointer / 10 / 126).
  194. var byte2 = floor(pointer / 10 / 126)
  195. // 12. Set pointer to pointer − byte2 × 10 × 126.
  196. pointer = pointer - byte2 * 10 * 126
  197. // 13. Let byte3 be floor(pointer / 10).
  198. var byte3 = floor(pointer / 10)
  199. // 14. Let byte4 be pointer − byte3 × 10.
  200. var byte4 = pointer - byte3 * 10
  201. // 15. Return four bytes whose values are byte1 + 0x81, byte2 +
  202. // 0x30, byte3 + 0x81, byte4 + 0x30.
  203. return [byte1 + 0x81,
  204. byte2 + 0x30,
  205. byte3 + 0x81,
  206. byte4 + 0x30]
  207. }
  208. constructor(options = {}, gbk_flag = false) {
  209. // gb18030's decoder has an associated gbk flag (initially unset).
  210. this.gbk_flag = gbk_flag
  211. }
  212. }