123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- cmake_minimum_required(VERSION 2.8.12)
- set(HEX_ALPHABET "0123456789abcdef")
- function(ConvertToHex dec hex)
- while(dec GREATER 0)
- math(EXPR _val "${dec} % 16")
- math(EXPR dec "${dec} / 16")
- string(SUBSTRING ${HEX_ALPHABET} ${_val} 1 _val)
- set(_res "${_val}${_res}")
- endwhile()
- # Pad the result with the number of zeros
- # specified by the optional third argument
- if(${ARGC} EQUAL 3)
- set(padding ${ARGV2})
- string(LENGTH ${_res} _resLen)
- if(_resLen LESS ${padding})
- math(EXPR _neededPadding "${padding} - ${_resLen}")
- foreach(i RANGE 1 ${_neededPadding})
- set(_res "0${_res}")
- endforeach()
- endif()
- endif()
- set(${hex} "0x${_res}" PARENT_SCOPE)
- endfunction()
- function(ConvertFromHex hex dec)
- string(TOLOWER ${hex} hex)
- string(LENGTH "${hex}" _strlen)
- set(_res 0)
- while(_strlen GREATER 0)
- math(EXPR _res "${_res} * 16")
- string(SUBSTRING "${hex}" 0 1 NIBBLE)
- string(SUBSTRING "${hex}" 1 -1 hex)
- string(FIND ${HEX_ALPHABET} ${NIBBLE} value)
- if(value EQUAL -1)
- message(FATAL_ERROR "Invalid hex character '${NIBBLE}'")
- endif()
- math(EXPR _res "${_res} + ${value}")
- string(LENGTH "${hex}" _strlen)
- endwhile()
- set(${dec} ${_res} PARENT_SCOPE)
- endfunction()
- # Based on http://www.json.org/JSON_checker/utf8_decode.c
- function(DecodeUtf8 hexBytes codePoint)
- string(SUBSTRING ${hexBytes} 0 2 hexByte1)
- ConvertFromHex(${hexByte1} byte1)
- # Zero continuations (0 to 127)
- math(EXPR out "${byte1} & 128")
- if(out EQUAL 0)
- set(${codePoint} ${byte1} PARENT_SCOPE)
- return()
- endif()
- # One continuation (128 to 2047)
- math(EXPR out "${byte1} & 224")
- if(out EQUAL 192)
- string(SUBSTRING ${hexBytes} 2 2 hexByte2)
- ConvertFromHex(${hexByte2} byte2)
- math(EXPR result "((${byte1} & 31) << 6) | ${byte2}")
- if(result GREATER 127)
- set(${codePoint} ${result} PARENT_SCOPE)
- return()
- endif()
- else()
- # Two continuations (2048 to 55295 and 57344 to 65535)
- math(EXPR result "${byte1} & 240")
- if(result EQUAL 224)
- string(SUBSTRING ${hexBytes} 2 2 hexByte2)
- string(SUBSTRING ${hexBytes} 4 2 hexByte3)
- ConvertFromHex(${hexByte2} byte2)
- ConvertFromHex(${hexByte3} byte3)
- math(EXPR result "${byte2} | ${byte3}")
- if(result GREATER -1)
- math(EXPR result "((${byte1} & 15) << 12) | (${byte2} << 6) | ${byte3}")
- if((result GREATER 2047) AND (result LESS 55296 OR result GREATER 57343))
- set(${codePoint} ${result} PARENT_SCOPE)
- return()
- endif()
- endif()
- else()
- # Three continuations (65536 to 1114111)
- math(EXPR result "${byte1} & 248")
- if(result EQUAL 224)
- string(SUBSTRING ${hexBytes} 2 2 hexByte2)
- string(SUBSTRING ${hexBytes} 4 2 hexByte3)
- string(SUBSTRING ${hexBytes} 6 2 hexByte4)
- ConvertFromHex(${hexByte2} byte2)
- ConvertFromHex(${hexByte3} byte3)
- ConvertFromHex(${hexByte4} byte4)
- math(EXPR result "${byte2} | ${byte3} | ${byte4}")
- if(result GREATER -1)
- math(EXPR result "((c & 7) << 18) | (c1 << 12) | (c2 << 6) | c3")
- if((result GREATER 65535) AND (result LESS 1114112))
- set(${codePoint} ${result} PARENT_SCOPE)
- return()
- endif()
- endif()
- endif()
- endif()
- endif()
- message(FATAL_ERROR "Invalid UTF-8 encoding")
- endfunction()
- set(inputFile ${CMAKE_ARGV3})
- set(outputFile ${CMAKE_ARGV4})
- # Get the file contents in text and hex-encoded format because
- # CMake doesn't provide functions for converting between the two
- file(READ "${inputFile}" contents)
- file(READ "${inputFile}" hexContents HEX)
- # Convert the text contents into a list of lines by escaping
- # the list separator ';' and then replacing new line characters
- # with the list separator
- string(REGEX REPLACE ";" "\\\\;" contents ${contents})
- string(REGEX REPLACE "\n" ";" contents ${contents})
- get_filename_component(encname ${inputFile} NAME_WE)
- set(output
- "static const struct StaticTableEncoding encoding_${encname} = {\n"
- " { .decode = &decode_table },\n"
- " {")
- set(hexIndex 0)
- foreach(line ${contents})
- string(LENGTH ${line} lineLength)
- # Convert "A" to 0x41
- string(FIND ${line} "\"" beginQuote)
- if(NOT ${beginQuote} EQUAL -1)
- string(FIND ${line} "\"" endQuote REVERSE)
- if(${beginQuote} EQUAL ${endQuote})
- message(FATAL_ERROR "Line contains only one quote")
- endif()
- math(EXPR beginHexQuote "${hexIndex} + (${beginQuote} + 1)*2")
- math(EXPR endHexQuote "${hexIndex} + (${endQuote} + 1)*2")
- math(EXPR quoteLen "${endHexQuote} - ${beginHexQuote} - 1")
- string(SUBSTRING ${hexContents} ${beginHexQuote} ${quoteLen} hexQuote)
- DecodeUtf8(${hexQuote} codePoint)
- ConvertToHex(${codePoint} hexCodePoint 4)
- STRING(REGEX REPLACE "\"(.+)\"" ${hexCodePoint} line ${line})
- endif()
- # Strip comment
- string(REGEX REPLACE "[ \t\n]*#.*" "" line ${line})
- # Convert 3/1 to [0x31]
- string(REGEX REPLACE "^([0-9]+)/([0-9]+).*" "\\1;\\2" numbers ${line})
- list(GET numbers 0 upperBits)
- list(GET numbers 1 lowerBits)
- math(EXPR res "${upperBits}*16 + ${lowerBits}")
- ConvertToHex(${res} hex 2)
- string(REGEX REPLACE "^([0-9]+)/([0-9]+)" "[${hex}]" line ${line})
- # Convert U+0041 to 0x0041
- string(REPLACE "U+" "0x" line ${line})
- # Indent and append a comma
- set(line " ${line},")
- set(output "${output}\n${line}")
- # Increment the index by the number of characters in the line,
- # plus one for the new line character then multiple by two for the hex digit index
- math(EXPR hexIndex "${hexIndex} + 2*(${lineLength} + 1)")
- endforeach()
- set(output "${output}\n"
- " }\n"
- "}\;\n")
- file(WRITE ${outputFile} ${output})
|