cmapSubtables.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. import struct
  2. import array
  3. from lxml.etree import Element
  4. from math import floor, log2
  5. def makeTTXSubtable(tag, attrs, cmapGlyphSet):
  6. subtable = Element(tag, attrs)
  7. for g in cmapGlyphSet:
  8. if not g.alias:
  9. subtable.append(Element("map", {"code": hex(g.codepoints.seq[0]), "name": g.name() }))
  10. else:
  11. subtable.append(Element("map", {"code": hex(g.codepoints.seq[0]), "name": g.alias.name() }))
  12. return subtable
  13. def makeGlyphIDArray(glyphs):
  14. """
  15. makes a glyph
  16. """
  17. pass
  18. class cmapFormat0:
  19. """
  20. Class representing cmap subtable format 0.
  21. (Subtable representing codepoints from U+0 - U+FF.)
  22. - https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-0-byte-encoding-table
  23. """
  24. def __init__(self, glyphs, platformID, platEncID, language):
  25. # check if the glyphs are one-byte, reject them if they are not.
  26. for g in glyphs:
  27. if g.codepoints.seq[0] > int('ff', 16):
  28. raise ValueError(f"Creating cmap subtable format 0 has been rejected. A glyph whose codepoint is greater than U+FF was given. cmap Subtable Format 0 must have codepoints less than or equal to U+FF.")
  29. self.format = 0 # hardcoded
  30. self.platformID = platformID
  31. self.platEncID = platEncID
  32. self.language = language
  33. self.glyphs = glyphs
  34. def toTTX(self):
  35. return makeTTXSubtable( "cmap_format_0",
  36. { "platformID": str(self.platformID)
  37. , "platEncID": str(self.platEncID)
  38. , "language": str(self.language)
  39. }
  40. , self.glyphs
  41. )
  42. def toBytes(self):
  43. beginning = struct.pack( ">HHH"
  44. , self.format # UInt16
  45. # length (in bytes) of the subtable. (it's a static length!)
  46. , 262 #UInt16
  47. , self.language # UInt16
  48. )
  49. # initialise list with a fixed size
  50. glyphIdArray = [0x00] * 256 # MAYBE: I presume that no value is 0x00.
  51. for id, glyph in enumerate(self.glyphs):
  52. glyphIdArray[id] = glyph.codepoints.seq[0]
  53. return beginning + array.array('B', glyphIdArray)
  54. class cmapFormat4:
  55. """
  56. Class representing cmap subtable format 4.
  57. (Subtable representing codepoints from U+0 - U+FFFF.)
  58. - https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-4-segment-mapping-to-delta-values
  59. """
  60. def __init__(self, glyphs, platformID, platEncID, language):
  61. # check if the glyphs are two-byte, reject them if they are not.
  62. for g in glyphs:
  63. if g.codepoints.seq[0] > int('ffff', 16):
  64. raise ValueError(f"Creating cmap subtable format 4 has been rejected. A glyph whose codepoint is greater than U+FFFF was given. cmap Subtable Format 4 must only have codepoints less than or equal to U+FFFF.")
  65. self.format = 4 # hard-coded.
  66. self.glyphs = glyphs
  67. self.platformID = platformID
  68. self.platEncID = platEncID
  69. self.language = language
  70. def toTTX(self):
  71. return makeTTXSubtable( "cmap_format_4",
  72. { "platformID": str(self.platformID)
  73. , "platEncID": str(self.platEncID)
  74. , "language": str(self.language)
  75. }
  76. , self.glyphs
  77. )
  78. def toBytes(self):
  79. reservedPad = 0 # hardcoded
  80. endCode = []
  81. startCode = []
  82. idDelta = []
  83. idRangeOffset = []
  84. segCount = 0
  85. # generate segments
  86. for id, glyph in enumerate(self.glyphs):
  87. thisGlyphCodepoint = glyph.codepoints.seq[0]
  88. lastGlyphCodepoint = self.glyphs[id-1].codepoints.seq[0]
  89. if id == 0:
  90. segCount +=1
  91. startCode.append(thisGlyphCodepoint)
  92. else:
  93. # check if it's continuous with the last glyph.
  94. if thisGlyphCodepoint != (lastGlyphCodepoint + 1):
  95. endCode.append(lastGlyphCodepoint)
  96. startCode = thisGlyphCodepoint
  97. segCount += 1
  98. else:
  99. pass
  100. # TODO: learn how to calculate idDelta and idRangeOffset
  101. # TODO: add terminating entries to these arrays.
  102. # METADATA
  103. # Generate a bunch of metadata. these calculations are what they are (and they're tested to be correct).
  104. segCountX2 = segCount * 2
  105. searchRange = 2 * (2 ** floor(log2(39)))
  106. entrySelector = floor(log2(searchRange / 2))
  107. rangeShift = 2 * segCount - searchRange
  108. rangeShift = max(rangeShift, 0)
  109. # RANGES AND OTHER STUFF
  110. reservedPad = 0 # hard-coded
  111. startCode = []
  112. endCode = []
  113. idDelta = []
  114. currentDelta = 0 # just for the upcoming loop
  115. for g in range(0, len(self.glyphs)):
  116. if g == 0:
  117. startCode.append(self.glyphs[g].codepoints.seq[0])
  118. else:
  119. if self.glyphs[g].codepoints.seq[0] == self.glyphs[g-1].codepoints.seq[0] + 1:
  120. currentDelta += 1
  121. else:
  122. endCode.append(self.glyphs[g-1].codepoints.seq[0])
  123. idDelta.append(-currentDelta) # deltas in this should be negative
  124. currentDelta = 0
  125. beginning = struct.pack( ">HHHHHH"
  126. , self.format # UInt16
  127. # length # UInt16
  128. , self.language # UInt16
  129. , segCountX2 # UInt16
  130. , searchRange # UInt16
  131. , entrySelector # UInt16
  132. , rangeShift # UInt16
  133. )
  134. # beginning + startCode + endCode + reservedPad + idDelta
  135. # TODO: create bytes representations of these and compile.
  136. # idRangeOffset[segCount] # UInt16. Offsets into glyphIdArray or 0.
  137. # glyphIdArray[] # Array of UInt16s.
  138. return b'' # placeholder
  139. class SequentialMapGroupRecord:
  140. """
  141. Class representing a SequentialMapGroup Record in a cmap Subtable Format 12.
  142. This is only used during bytes compilation.
  143. """
  144. def __init__ (self, startCharCode, endCharCode, startGlyphID):
  145. self.startCharCode = startCharCode
  146. self.endCharCode = endCharCode
  147. self.startGlyphID = startGlyphID
  148. def toBytes(self):
  149. return struct.pack(">III"
  150. , self.startCharCode # UInt32
  151. , self.endCharCode # UInt32
  152. , self.startGlyphID # UInt32
  153. )
  154. class cmapFormat12:
  155. """
  156. Class representing cmap subtable format 12.
  157. (Subtable representing codepoints from U+0 - U+FFFFFF.)
  158. - https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-12-segmented-coverage
  159. """
  160. def __init__(self, glyphs, platformID, platEncID, language):
  161. # check if the glyphs are four-byte, reject them if they are not.
  162. for g in glyphs:
  163. if g.codepoints.seq[0] > int('ffffff', 16):
  164. raise ValueError(f"Creating cmap subtable format 12 has been rejected. A glyph whose codepoint is greater than U+FFFFFF was given. cmap Subtable Format 12 must only have codepoints less than or equal to U+FFFFFF.")
  165. self.format = 12 # hard-coded.
  166. self.glyphs = glyphs
  167. self.platformID = platformID
  168. self.platEncID = platEncID
  169. self.language = language
  170. def toTTX(self):
  171. return makeTTXSubtable( "cmap_format_12",
  172. { "platformID": str(self.platformID)
  173. , "platEncID": str(self.platEncID)
  174. , "language": str(self.language)
  175. , "format": str(self.format)
  176. , "reserved": "0"
  177. , "length": "0"
  178. , "nGroups": "0"
  179. }
  180. , self.glyphs
  181. )
  182. def toBytes(self):
  183. startCode = 0
  184. endCode = 0
  185. startGlyphID = 0
  186. sequentialMapGroup = []
  187. for g in range(len(self.glyphs)):
  188. if g == 0:
  189. startCode = self.glyphs[g].codepoints.seq[0]
  190. startCodeID = g
  191. else:
  192. if self.glyphs[g].codepoints.seq[0] != self.glyphs[g-1].codepoints.seq[0] + 1:
  193. endCode = self.glyphs[g-1].codepoints.seq[0]
  194. sequentialMapGroup.append(SequentialMapGroupRecord(startCode, endCode, startGlyphID))
  195. startCode = self.glyphs[g].codepoints.seq[0]
  196. startGlyphID = g
  197. subtableLength = 16 + 12*len(sequentialMapGroup)
  198. numGroups = len(sequentialMapGroup)
  199. beginning = struct.pack(">HHIII"
  200. , self.format # UInt16
  201. , 0 # Reserved, UInt16
  202. , subtableLength # UInt32
  203. , self.language # UInt32
  204. , numGroups # UInt32
  205. )
  206. smgBytes = b'\0'
  207. for smg in sequentialMapGroup:
  208. smgBytes += smg.toBytes()
  209. return beginning + smgBytes
  210. class cmapFormat14:
  211. """
  212. Class representing cmap subtable format 14.
  213. (Subtable indicating the usage of variation selectors.)
  214. This is only capable of implementing Variation Selector 16 (U+FE0F) right now.
  215. - https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-14-unicode-variation-sequences
  216. """
  217. def __init__(self, glyphs):
  218. self.format = 14 # hard-coded
  219. self.glyphs = glyphs
  220. self.platformID = 0 # hard-coded
  221. self.platEncID = 5 # hard-coded
  222. # platEncID 5 = cmap subtable 14 in platID 0. It just means that.
  223. # no other platID or platEncID should be used for this subtable.
  224. def toTTX(self):
  225. cmap14 = Element("cmap_format_14", { "platformID": str(self.platformID)
  226. , "platEncID": str(self.platEncID)
  227. , "format": "14"
  228. , "length": "0"
  229. , "numVarSelectorRecords": "1"
  230. })
  231. for g in self.glyphs:
  232. if not g.alias:
  233. cmap14.append(Element("map", {"uvs": "0xfe0f", "uv": hex(g.codepoints.seq[0]), "name": g.name()}))
  234. else:
  235. cmap14.append(Element("map", {"uvs": "0xfe0f", "uv": hex(g.codepoints.seq[0]), "name": g.alias.name()}))
  236. return cmap14
  237. def toBytes(self):
  238. return b'\0' # placeholder