123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- # Copyright (C) 2010, 2013 Apple Inc. All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions
- # are met:
- # 1. Redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer.
- # 2. Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- #
- # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- import sys
- types = {
- "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]},
- "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]},
- "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
- "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]},
- "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0xffff)]},
- "digits": { "UseTable" : False, "data": [('0', '9')]},
- "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] }
- }
- entriesPerLine = 50
- arrays = "";
- functions = "";
- emitTables = (len(sys.argv) < 2 or sys.argv[1] != "--no-tables")
- for name, classes in types.items():
- ranges = [];
- size = 0;
- for _class in classes["data"]:
- if type(_class) == str:
- ranges.append((ord(_class), ord(_class)))
- elif type(_class) == int:
- ranges.append((_class, _class))
- else:
- (min, max) = _class;
- if type(min) == str:
- min = ord(min)
- if type(max) == str:
- max = ord(max)
- if max > 0x7f and min <= 0x7f:
- ranges.append((min, 0x7f))
- min = 0x80
- ranges.append((min,max))
- ranges.sort();
-
- if emitTables and classes["UseTable"] and (not "Inverse" in classes):
- array = ("static const char _%sData[65536] = {\n" % name);
- i = 0
- for (min,max) in ranges:
- while i < min:
- i = i + 1
- array += ('0,')
- if (i % entriesPerLine == 0) and (i != 0):
- array += ('\n')
- while i <= max:
- i = i + 1
- if (i == 65536):
- array += ("1")
- else:
- array += ('1,')
- if (i % entriesPerLine == 0) and (i != 0):
- array += ('\n')
- while i < 0xffff:
- array += ("0,")
- i = i + 1;
- if (i % entriesPerLine == 0) and (i != 0):
- array += ('\n')
- if i == 0xffff:
- array += ("0")
- array += ("\n};\n");
- array += ("DEFINE_REMOTE_VAR(const char *, regExp_%sData, &(_%sData[0]));\n\n" % (name,name));
- arrays += array
-
- # Generate createFunction:
- function = "";
- function += ("CharacterClass* %sCreate()\n" % name)
- function += ("{\n")
- if emitTables and classes["UseTable"]:
- if "Inverse" in classes:
- function += (" CharacterClass* characterClass = new CharacterClass(REMOTE_VAR_VALUE(regExp_%sData), true);\n" % (classes["Inverse"]))
- else:
- function += (" CharacterClass* characterClass = new CharacterClass(REMOTE_VAR_VALUE(regExp_%sData), false);\n" % (name))
- else:
- function += (" CharacterClass* characterClass = new CharacterClass;\n")
- for (min, max) in ranges:
- if (min == max):
- if (min > 127):
- function += (" characterClass->m_matchesUnicode.append(0x%04x);\n" % min)
- else:
- function += (" characterClass->m_matches.append(0x%02x);\n" % min)
- continue
- if (min > 127) or (max > 127):
- function += (" characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max))
- else:
- function += (" characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max))
- function += (" return characterClass;\n")
- function += ("}\n\n")
- functions += function
- if (len(sys.argv) > 1):
- f = open(sys.argv[-1], "w")
- f.write(arrays)
- f.write(functions)
- f.close()
- else:
- print(arrays)
- print(functions)
|