123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217 |
- #
- #
- # Nim's Runtime Library
- # (c) Copyright 2012 Andreas Rumpf
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- # Nim support for C/C++'s `wide strings`:idx:. This is part of the system
- # module! Do not import it directly!
- #when not declared(ThisIsSystem):
- # {.error: "You must not import this module explicitly".}
- type
- Utf16Char* = distinct int16
- when defined(nimv2):
- type
- WideCString* = ptr UncheckedArray[Utf16Char]
- WideCStringObj* = object
- bytes: int
- data: WideCString
- proc `=destroy`(a: var WideCStringObj) =
- if a.data != nil:
- when compileOption("threads"):
- deallocShared(a.data)
- else:
- dealloc(a.data)
- proc `=`(a: var WideCStringObj; b: WideCStringObj) {.error.}
- proc `=sink`(a: var WideCStringObj; b: WideCStringObj) =
- a.bytes = b.bytes
- a.data = b.data
- proc createWide(a: var WideCStringObj; bytes: int) =
- a.bytes = bytes
- when compileOption("threads"):
- a.data = cast[typeof(a.data)](allocShared0(bytes))
- else:
- a.data = cast[typeof(a.data)](alloc0(bytes))
- template `[]`*(a: WideCStringObj; idx: int): Utf16Char = a.data[idx]
- template `[]=`*(a: WideCStringObj; idx: int; val: Utf16Char) = a.data[idx] = val
- template nullWide(): untyped = WideCStringObj(bytes: 0, data: nil)
- converter toWideCString*(x: WideCStringObj): WideCString {.inline.} =
- result = x.data
- else:
- template nullWide(): untyped = nil
- type
- WideCString* = ref UncheckedArray[Utf16Char]
- WideCStringObj* = WideCString
- template createWide(a; L) =
- unsafeNew(a, L)
- proc ord(arg: Utf16Char): int = int(cast[uint16](arg))
- proc len*(w: WideCString): int =
- ## returns the length of a widestring. This traverses the whole string to
- ## find the binary zero end marker!
- result = 0
- while int16(w[result]) != 0'i16: inc result
- const
- UNI_REPLACEMENT_CHAR = Utf16Char(0xFFFD'i16)
- UNI_MAX_BMP = 0x0000FFFF
- UNI_MAX_UTF16 = 0x0010FFFF
- UNI_MAX_UTF32 = 0x7FFFFFFF
- UNI_MAX_LEGAL_UTF32 = 0x0010FFFF
- halfShift = 10
- halfBase = 0x0010000
- halfMask = 0x3FF
- UNI_SUR_HIGH_START = 0xD800
- UNI_SUR_HIGH_END = 0xDBFF
- UNI_SUR_LOW_START = 0xDC00
- UNI_SUR_LOW_END = 0xDFFF
- UNI_REPL = 0xFFFD
- template ones(n: untyped): untyped = ((1 shl n)-1)
- template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) =
- ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
- ## `i` is incremented by the number of bytes that have been processed.
- bind ones
- if ord(s[i]) <= 127:
- result = ord(s[i])
- when doInc: inc(i)
- elif ord(s[i]) shr 5 == 0b110:
- #assert(ord(s[i+1]) shr 6 == 0b10)
- if i <= L - 2:
- result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
- when doInc: inc(i, 2)
- else:
- result = UNI_REPL
- when doInc: inc(i)
- elif ord(s[i]) shr 4 == 0b1110:
- if i <= L - 3:
- #assert(ord(s[i+1]) shr 6 == 0b10)
- #assert(ord(s[i+2]) shr 6 == 0b10)
- result = (ord(s[i]) and ones(4)) shl 12 or
- (ord(s[i+1]) and ones(6)) shl 6 or
- (ord(s[i+2]) and ones(6))
- when doInc: inc(i, 3)
- else:
- result = UNI_REPL
- when doInc: inc(i)
- elif ord(s[i]) shr 3 == 0b11110:
- if i <= L - 4:
- #assert(ord(s[i+1]) shr 6 == 0b10)
- #assert(ord(s[i+2]) shr 6 == 0b10)
- #assert(ord(s[i+3]) shr 6 == 0b10)
- result = (ord(s[i]) and ones(3)) shl 18 or
- (ord(s[i+1]) and ones(6)) shl 12 or
- (ord(s[i+2]) and ones(6)) shl 6 or
- (ord(s[i+3]) and ones(6))
- when doInc: inc(i, 4)
- else:
- result = UNI_REPL
- when doInc: inc(i)
- else:
- result = 0xFFFD
- when doInc: inc(i)
- iterator runes(s: cstring, L: int): int =
- var
- i = 0
- result: int
- while i < L:
- fastRuneAt(s, i, L, result, true)
- yield result
- proc newWideCString*(source: cstring, L: int): WideCStringObj =
- createWide(result, L * 2 + 2)
- var d = 0
- for ch in runes(source, L):
- if ch <= UNI_MAX_BMP:
- if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_LOW_END:
- result[d] = UNI_REPLACEMENT_CHAR
- else:
- result[d] = cast[Utf16Char](uint16(ch))
- elif ch > UNI_MAX_UTF16:
- result[d] = UNI_REPLACEMENT_CHAR
- else:
- let ch = ch - halfBase
- result[d] = cast[Utf16Char](uint16((ch shr halfShift) + UNI_SUR_HIGH_START))
- inc d
- result[d] = cast[Utf16Char](uint16((ch and halfMask) + UNI_SUR_LOW_START))
- inc d
- result[d] = Utf16Char(0)
- proc newWideCString*(s: cstring): WideCStringObj =
- if s.isNil: return nullWide
- result = newWideCString(s, s.len)
- proc newWideCString*(s: string): WideCStringObj =
- result = newWideCString(s, s.len)
- proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string =
- result = newStringOfCap(estimate + estimate shr 2)
- var i = 0
- while w[i].int16 != 0'i16:
- var ch = ord(w[i])
- inc i
- if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END:
- # If the 16 bits following the high surrogate are in the source buffer...
- let ch2 = ord(w[i])
- # If it's a low surrogate, convert to UTF32:
- if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END:
- ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase
- inc i
- else:
- #invalid UTF-16
- ch = replacement
- elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END:
- #invalid UTF-16
- ch = replacement
- if ch < 0x80:
- result.add chr(ch)
- elif ch < 0x800:
- result.add chr((ch shr 6) or 0xc0)
- result.add chr((ch and 0x3f) or 0x80)
- elif ch < 0x10000:
- result.add chr((ch shr 12) or 0xe0)
- result.add chr(((ch shr 6) and 0x3f) or 0x80)
- result.add chr((ch and 0x3f) or 0x80)
- elif ch <= 0x10FFFF:
- result.add chr((ch shr 18) or 0xf0)
- result.add chr(((ch shr 12) and 0x3f) or 0x80)
- result.add chr(((ch shr 6) and 0x3f) or 0x80)
- result.add chr((ch and 0x3f) or 0x80)
- else:
- # replacement char(in case user give very large number):
- result.add chr(0xFFFD shr 12 or 0b1110_0000)
- result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
- result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
- proc `$`*(s: WideCString): string =
- result = s $ 80
|