|
@@ -240,7 +240,7 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
|
|
|
# Escape for RE
|
|
|
L = [re.escape(v) for v in L]
|
|
|
|
|
|
- # Tokenizer for decimal numbers.
|
|
|
+ # Tokenizer for decimal numbers (floats).
|
|
|
decimal = (
|
|
|
br'|(?P<dec>(?:'
|
|
|
# Cautiously avoid matching 1 space alone or 1 trailing space.
|
|
@@ -250,18 +250,18 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
|
|
|
br'(?:'
|
|
|
br'\ *[%!\#]' # suffix
|
|
|
br'|\ *E\ *[-+]?(?:[0-9\ ]*[0-9])?' # or exponent, but not both
|
|
|
- br')?)'
|
|
|
+ br')?)' # optional
|
|
|
)
|
|
|
# Tokenizer for uint. Up to 6552, read up to 5 digits; from 6553 on, up to 4.
|
|
|
# Used for line numbers, either leading or after GOTO/GOSUB/etc.
|
|
|
uint = (br'|(?P<dec>'
|
|
|
br'(?:0[0\ ]*)?' # leading zeros prefix
|
|
|
br'(?:0' # zero
|
|
|
- br'|[1-5](?:\ *[0-9]){,4}' # prefix 1..5999, 5 digits
|
|
|
- br'|6\ *[0-4](?:\ *[0-9]){,3}' # prefix 6000..6499, 5 digits
|
|
|
- br'|6\ *5\ *[0-4](?:\ *[0-9]){,2}' # prefix 6500..6549, 5 digits
|
|
|
+ br'|[1-5](?:\ *[0-9]){4}' # prefix 1..5, 5 digits
|
|
|
+ br'|6\ *[0-4](?:\ *[0-9]){3}' # prefix 60..64, 5 digits
|
|
|
+ br'|6\ *5\ *[0-4](?:\ *[0-9]){2}' # prefix 650..654, 5 digits
|
|
|
br'|6\ *5\ *5\ *[0-2](?:\ *[0-9])?' # prefix 6550..6552, 5 digits
|
|
|
- br'|[6-9](?:\ *[0-9]){,3}' # rest, 4 digits
|
|
|
+ br'|[1-9](?:\ *[0-9]){,3}' # rest, 1 to 4 digits
|
|
|
br'))'
|
|
|
)
|
|
|
|
|
@@ -276,7 +276,7 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
|
|
|
#br'|&B(?P<bin>[01]+)' # binary numbers don't have tokens
|
|
|
br'%s' # decimal number
|
|
|
br'|(?P<str>"(?:[^"]*)(?:"|$))' # string literal
|
|
|
- b'|(?P<del>[\x80-\xFF])' # remove these
|
|
|
+ b'|(?P<del>[\x80-\xFF])' # remove those
|
|
|
br'|.'
|
|
|
br')'
|
|
|
)
|
|
@@ -306,9 +306,6 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
|
|
|
trunc_at_null = re.compile(br'\x00.*', re.S)
|
|
|
call_strip = re.compile(br'[^0-\x7F\ (]*')
|
|
|
|
|
|
- # Compile the BASIC to a buffer
|
|
|
- buf = io.BytesIO()
|
|
|
-
|
|
|
# Truncate source at \x1A (^Z)
|
|
|
src = re.sub(b'\x1A.*', b'', src, flags=re.S)
|
|
|
if use_cr:
|
|
@@ -320,8 +317,6 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
|
|
|
src = src.split(b'\n')
|
|
|
ignore = b'\r'
|
|
|
|
|
|
- filestart = buf.tell()
|
|
|
-
|
|
|
# First pass: Read the lines and tokenize them into a dict with the line
|
|
|
# number as the key. Handle line deletion, overwriting, etc.
|
|
|
PRGLines = {}
|
|
@@ -357,7 +352,7 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
|
|
|
del PRGLines[linenum]
|
|
|
continue
|
|
|
|
|
|
- lbuf = io.BytesIO()
|
|
|
+ lbuf = io.BytesIO() # Tokenized line buffer
|
|
|
tok_mode = ModeFloat
|
|
|
|
|
|
while True:
|
|
@@ -464,6 +459,8 @@ def tokenize(src, use_cr=False, type_mode=False, remove_spaces=False):
|
|
|
|
|
|
# Second pass - Write remaining lines in order
|
|
|
addr = 0x8001
|
|
|
+ buf = io.BytesIO()
|
|
|
+
|
|
|
for linenum in sorted(PRGLines.keys()):
|
|
|
line = PRGLines[linenum]
|
|
|
addr += len(line) + 4
|