123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625 |
- #!/usr/bin/env python3
- """
- # Simple AVR disassembly postprocessor
- #
- # Copyright (C) 2012-2014 Michael Buesch <m@bues.ch>
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License along
- # with this program; if not, write to the Free Software Foundation, Inc.,
- # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- """
- import sys
- import re
- import getopt
- LABEL_FMT = "L%04X"
- def die(msg):
- sys.stderr.write(msg + "\n")
- sys.stderr.flush()
- sys.exit(1)
- def parseint(s):
- if s.lower().startswith("0x"):
- return int(s, 16)
- return int(s, 10)
- def ishex(s):
- for c in s:
- if c not in "0123456789abcdefABCDEF":
- return False
- return True
- def eff_linelen(s):
- '''Get effective line length (Tabs => 8 characters).'''
- count = 0
- for c in s:
- if c == '\t':
- count = (count + 8) // 8 * 8
- if c == '\n':
- count = 0
- else:
- count += 1
- return count
- def pad_to_length(s, target_len):
- '''Pad a string up to the specified effective length.'''
- slen = eff_linelen(s)
- if slen >= target_len:
- return s
- return s + ' ' * (target_len - slen)
- def fix_twos_complement(val, nrBits):
- sign = 1 << nrBits
- mask = (sign << 1) - 1
- val &= mask
- if val & sign:
- return -((~val + 1) & mask)
- return val
- class IncFile(object):
- '''A parsed INC-file.'''
- equ_re = re.compile(r"^\s*\.equ\s+(\w+)\s*=\s*(\w+)\s*(?:;.*)?")
- flash_end_re = re.compile(r"^\s*\.equ\s+FLASHEND\s*=\s*(\w+)\s*(?:;.*)?")
- def __init__(self, inc_file_path):
- self.ioaddr_map = {}
- self.irq_map = {}
- self.irq_vectors_size = None
- self.flash_size = None
- in_io = False
- in_irq = False
- try:
- lines = open(inc_file_path, "r").readlines()
- except IOError as e:
- die("Failed to read INC-FILE '%s': %s" % (inc_file_path, str(e)))
- for line in lines:
- line = line.strip()
- if "I/O REGISTER DEFINITIONS" in line:
- in_io = True
- continue
- if "INTERRUPT VECTORS" in line:
- in_irq = True
- continue
- if line.startswith("; *****"):
- in_io = False
- in_irq = False
- continue
- if in_io:
- self.__parse_iomap_entry(line)
- elif in_irq:
- self.__parse_irqmap_entry(line)
- else:
- m = self.flash_end_re.match(line)
- if m:
- try:
- end = int(m.group(1), 16)
- self.flash_size = end + 1
- self.flash_size *= 2 # To bytes
- except ValueError:
- pass
- if not self.flash_size:
- die("Failed to get FLASHEND from INC-FILE")
- self.flash_mask = self.flash_size - 1
- if not self.ioaddr_map:
- die("Failed to parse I/O-map from INC-FILE")
- if not self.irq_map or not self.irq_vectors_size:
- die("Failed to parse IRQ-map from INC-FILE")
- if 0 not in self.irq_map:
- self.irq_map[0] = "RESET"
- # Parse one I/O map entry
- def __parse_iomap_entry(self, line):
- m = self.equ_re.match(line)
- if not m:
- return
- name, addr = m.group(1), m.group(2)
- try:
- addr = int(addr, 16)
- except ValueError:
- die("Failed to convert I/O map address: %s" % line)
- self.ioaddr_map[addr] = name
- # Parse one IRQ map entry
- def __parse_irqmap_entry(self, line):
- m = self.equ_re.match(line)
- if not m:
- return
- name, addr = m.group(1), m.group(2)
- if name == "INT_VECTORS_SIZE":
- try:
- self.irq_vectors_size = int(addr, 10)
- self.irq_vectors_size *= 2 # To byte size
- except ValueError:
- die("Failed to parse IRQ map size: %s" %\
- line)
- return
- if not name.endswith("addr"):
- return
- try:
- addr = int(addr, 16)
- except ValueError:
- die("Failed to convert IRQ map address: %s" % line)
- addr *= 2 # To byte address
- self.irq_map[addr] = name
- class Insn(object):
- '''An AVR assembly instruction'''
- class StringErr(Exception): pass
- class StringIgnore(Exception): pass
- def __init__(self, insn_string):
- # Check whether this is an instruction line.
- m = re.match(r'^\s*[0-9a-fA-F]+:\s+', insn_string)
- if not m:
- raise Insn.StringIgnore()
- # Look for comments
- self.comment = ""
- if ';' in insn_string:
- i = insn_string.index(';')
- self.comment = insn_string[i+1:].strip()
- # Strip it off
- insn_string = insn_string[:i]
- # Fix 0x0x breakage
- self.comment = self.comment.replace("0x0x", "0x")
- s = insn_string.split()
- if len(s) < 2:
- raise Insn.StringErr()
- # Extract the raw bytes
- self.raw_bytes = []
- while len(s[1]) == 2 and ishex(s[1]):
- hexval = s.pop(1)
- try:
- hexval = int(hexval, 16)
- except ValueError:
- die("Failed to parse raw byte")
- self.raw_bytes.append(hexval)
- # Extract offset (2ab:)
- try:
- off = s[0]
- off = off[:-1] # Strip colon
- self.offset = int(off, 16)
- self.offset_label = None
- except TypeError:
- die("Failed to extract insn offset")
- # Extract insn string (jmp...)
- self.insn = s[1].lower()
- # Extract operands
- self.operands = []
- try:
- self.operands = s[2:]
- except IndexError as e:
- pass
- for i, op in enumerate(self.operands):
- # Strip commas from operands
- op = self.operands[i] = op.replace(",", "")
- # Fix 0x0x breakage
- op = self.operands[i] = op.replace("0x0x", "0x")
- self.callers = []
- self.jmpsources = []
- def __makeLabel(self):
- lbl = self.get_offset_string() + ":"
- lbl = pad_to_length(lbl, 10)
- return lbl
- def get_as_data_string(self, inc_file):
- '''Returns the full string of the instruction as data.'''
- max_vect = inc_file.irq_vectors_size - 2
- s = ""
- # Space between IRQ vectors and program
- if self.get_offset() == max_vect + 2:
- s += "\n"
- # Dump the bytes
- s += self.__makeLabel()
- if len(self.raw_bytes) == 2:
- s += ".dw 0x%02X%02X" % (self.raw_bytes[1],\
- self.raw_bytes[0])
- else:
- die("Do not know how to handle raw bytes")
- return s
- def get_as_instruction_string(self, inc_file):
- '''Returns a full string of the instruction'''
- max_vect = inc_file.irq_vectors_size - 2
- is_irq_handler = any(s.get_offset() <= max_vect
- for s in self.jmpsources)
- s = ""
- # Show CALLers
- if self.callers:
- s += "\n; FUNCTION called by "
- c = []
- pfx = ""
- for i, caller in enumerate(self.callers):
- c.append(pfx + caller.get_offset_string())
- if i != 0 and \
- (i + 1) % 6 == 0 and \
- i != len(self.callers) - 1:
- pfx = "\n;\t\t"
- else:
- pfx = ""
- s += ", ".join(c)
- s += "\n"
- # Space between IRQ vectors and program
- if self.get_offset() == max_vect + 2 and not self.callers:
- s += "\n"
- # Show IRQ vector jump sources
- if is_irq_handler and not self.callers:
- s += "\n"
- if is_irq_handler:
- # This is jumped to from IRQ vectors.
- s += "; IRQ handler for "
- s += ", ".join(s.get_offset_string()
- for s in self.jmpsources)
- s += "\n"
- # Dump the instruction string
- s += self.__makeLabel()
- s += self.get_insn()
- if self.get_operands():
- s = pad_to_length(s, 18)
- s += ", ".join(self.get_operands())
- # Add the comment string
- comm = self.get_comment()
- if comm or self.jmpsources:
- s = pad_to_length(s, 35)
- s += ";"
- if comm:
- s += comm
- if self.jmpsources:
- s += " / "
- # Add the (R)JMP sources
- if self.jmpsources:
- nonirq_jmpsrcs = [ s for s in self.jmpsources
- if s.get_offset() > max_vect ]
- if nonirq_jmpsrcs:
- s += "JUMPTARGET from "
- s += ", ".join(s.get_offset_string()
- for s in nonirq_jmpsrcs)
- return s
- def get_offset(self):
- return self.offset
- def get_offset_label(self):
- return self.offset_label
- def get_offset_string(self):
- label = self.get_offset_label()
- if label:
- return label
- return LABEL_FMT % self.get_offset()
- def get_insn(self):
- return self.insn
- def set_insn(self, insn):
- self.insn = insn
- def get_operands(self):
- return self.operands
- def get_comment(self):
- return self.comment
- def add_caller(self, insn):
- self.callers.append(insn)
- def add_jmpsource(self, insn):
- self.jmpsources.append(insn)
- def __rewrite_irq_label(self, inc_file):
- offset = self.get_offset()
- if offset >= inc_file.irq_vectors_size:
- return
- try:
- label = inc_file.irq_map[offset]
- except KeyError:
- return
- self.comment = label
- if label.endswith("addr"):
- label = label[:-4]
- label = "L_" + label
- self.offset_label = label
- def __rewrite_jmp_targets(self, inc_file):
- if self.get_insn() != "jmp" and self.get_insn() != "call":
- return
- operands = self.get_operands()
- if len(operands) != 1:
- die("Error: more than one JMP/CALL operand")
- operands[0] = LABEL_FMT % int(operands[0], 0)
- def __rewrite_rjmp_targets(self, inc_file):
- operlist = self.get_operands()
- r = re.compile(r"^\.([\+-][0-9]+)")
- for i in range(0, len(operlist)):
- m = r.match(operlist[i])
- if not m:
- continue
- offs = fix_twos_complement(int(m.group(1)), 12) + 2
- offs = (self.get_offset() + offs) & inc_file.flash_mask
- operlist[i] = LABEL_FMT % offs
- break
- def __rewrite_io_addrs(self, inc_file):
- offsets = { "sts" : (0, "mem"),
- "lds" : (1, "mem"),
- "in" : (1, "io"),
- "out" : (0, "io"),
- "sbic" : (0, "io"),
- "sbis" : (0, "io"),
- "sbi" : (0, "io"),
- "cbi" : (0, "io"), }
- try:
- (offset, optype) = offsets[self.get_insn()]
- except KeyError as e:
- return
- operands = self.get_operands()
- ioaddr = int(operands[offset], 0)
- if optype == "mem":
- if ioaddr < 0x20:
- print("Error: mem-op offset operand < 0x20")
- exit(1)
- if ioaddr < 0x60:
- ioaddr -= 0x20
- try:
- name = inc_file.ioaddr_map[ioaddr]
- except KeyError as e:
- return
- if optype == "mem" and ioaddr < 0x60:
- name += " + 0x20"
- # Got a name for it. Reassign it.
- operands[offset] = name
- def __rewrite_special_registers(self, inc_file):
- special_regs_tab = { 26 : "XL",
- 27 : "XH",
- 28 : "YL",
- 29 : "YH",
- 30 : "ZL",
- 31 : "ZH", }
- r = re.compile(r"^[rR]([0-9]+)$")
- operands = self.get_operands()
- for i in range(0, len(operands)):
- m = r.match(operands[i])
- if not m:
- continue
- regnum = int(m.group(1))
- try:
- name = special_regs_tab[regnum]
- except KeyError as e:
- continue
- operands[i] = name
- def __fix_raw_words(self, inc_file):
- if self.get_insn() == ".word":
- self.set_insn(".dw")
- def rewrite(self, inc_file):
- '''Rewrite the instruction to be better human readable'''
- self.__rewrite_irq_label(inc_file)
- self.__rewrite_jmp_targets(inc_file)
- self.__rewrite_rjmp_targets(inc_file)
- self.__rewrite_io_addrs(inc_file)
- self.__rewrite_special_registers(inc_file)
- self.__fix_raw_words(inc_file)
- def usage():
- print("avr-postproc [OPTIONS] INC-FILE")
- print("")
- print("INC-FILE is the assembly .inc file.")
- print("Objdump assembly is read from stdin.")
- print("Processed assembly is written to stdout.")
- print("")
- print("Options:")
- print(" -I|--infile FILEPATH Path to input file.")
- print(" If not specified, stdin is used.")
- print(" -O|--outfile FILEPATH Path to output file.")
- print(" If not specified, stdout is used.")
- print(" -s|--start OFFSET Start offset. Default 0x0.")
- print(" -e|--end OFFSET End offset. Default all.")
- print(" -d|--data-range RANGES Define a pure data range in program memory.")
- print(" Example: -d 0x0-0x1F -d 0x100-0x1FF")
- print(" Defines byte range 0h-1Fh and 100h-1FFh as data.")
- print(" -L|--label-file FILE Label file to pick label names from.")
- print(" -C|--comment-file FILE Comment file to pick comments from.")
- def main():
- infilename = None
- outfilename = None
- start_offset = 0
- stop_offset = -1
- data_ranges = []
- label_files = []
- comment_files = []
- try:
- (opts, args) = getopt.getopt(sys.argv[1:],
- "hI:O:s:e:d:L:C:",
- [ "help", "infile=", "outfile=",
- "start=", "end=", "data-range=",
- "label=", "comment=", ])
- except getopt.GetoptError as e:
- usage()
- return 1
- for (o, v) in opts:
- if o in ("-h", "--help"):
- usage()
- return 0
- if o in ("-I", "--infile"):
- infilename = v
- if o in ("-O", "--outfile"):
- outfilename = v
- if o in ("-s", "--start"):
- try:
- start_offset = parseint(v)
- except ValueError as e:
- die("-s|--start is not a number")
- if o in ("-e", "--end"):
- try:
- stop_offset = parseint(v)
- except ValueError as e:
- die("-e|--end is not a number")
- if o in ("-d", "--data-range"):
- if not v.strip():
- continue
- try:
- start, stop = v.split('-')
- start, stop = parseint(start), parseint(stop)
- if start < 0 or stop < 0 or stop < start:
- raise ValueError
- r = range(start, stop + 1)
- data_ranges.append(r)
- except ValueError as e:
- die("-d|--data-range invalid value")
- if o in ("-L", "--label"):
- label_files.append(v)
- #TODO use it
- if o in ("-C", "--comment"):
- comment_files.append(v)
- #TODO use it
- if len(args) != 1:
- die("INC-FILE not specified")
- inc_file_path = args[0]
- inc_file = IncFile(inc_file_path)
- if infilename:
- try:
- infd = open(infilename, "r")
- lines = infd.readlines()
- infd.close()
- except IOError as e:
- die("Failed to read input file '%s':\n%s" %\
- (infilename, str(e)))
- else:
- lines = sys.stdin.readlines()
- insns = []
- funcs = []
- # Parse the input and rewrite the
- # instructions to include symbolic names
- for line in lines:
- try:
- insn = Insn(line)
- except Insn.StringIgnore as e:
- continue
- except Insn.StringErr as e:
- die("ERROR: Could not parse line \"%s\"" % line)
- if insn.get_offset() < start_offset:
- continue
- if stop_offset != -1 and insn.get_offset() > stop_offset:
- break
- insn.rewrite(inc_file)
- insns.append(insn)
- def get_insn_by_offset(offset):
- for insn in insns:
- if insn.get_offset() == offset:
- return insn
- print("; Postproc error: Instruction with "
- "offset 0x%04X not found" % offset)
- return None
- # Annotate jump sources
- for insn in insns:
- branch_insns = { "jmp" : ("type_jmp", 0),
- "rjmp" : ("type_jmp", 0),
- "brbs" : ("type_jmp", 1),
- "brbc" : ("type_jmp", 1),
- "breq" : ("type_jmp", 0),
- "brne" : ("type_jmp", 0),
- "brcs" : ("type_jmp", 0),
- "brcc" : ("type_jmp", 0),
- "brsh" : ("type_jmp", 0),
- "brlo" : ("type_jmp", 0),
- "brmi" : ("type_jmp", 0),
- "brpl" : ("type_jmp", 0),
- "brge" : ("type_jmp", 0),
- "brlt" : ("type_jmp", 0),
- "brhs" : ("type_jmp", 0),
- "brhc" : ("type_jmp", 0),
- "brts" : ("type_jmp", 0),
- "brtc" : ("type_jmp", 0),
- "brvs" : ("type_jmp", 0),
- "brvc" : ("type_jmp", 0),
- "brie" : ("type_jmp", 0),
- "brid" : ("type_jmp", 0),
- "call" : ("type_call", 0),
- "rcall" : ("type_call", 0), }
- insn_name = insn.get_insn()
- try:
- (jmptype, targetoper) = branch_insns[insn_name]
- except KeyError as e:
- continue
- tgt_offset = int(insn.get_operands()[targetoper][1:], 16)
- target = get_insn_by_offset(tgt_offset)
- if target:
- if jmptype == "type_jmp":
- target.add_jmpsource(insn)
- else:
- target.add_caller(insn)
- # Write the output
- if outfilename:
- try:
- outfd = open(outfilename, "w")
- except IOError as e:
- die("Failed to open output file '%s':\n%s" %\
- (outfilename, str(e)))
- else:
- outfd = sys.stdout
- outfd.write('.include "' + inc_file_path.split("/")[-1] + '"\n')
- outfd.write('\n')
- outfd.write('.org 0x000\n')
- outfd.write('\n')
- for insn in insns:
- if any((insn.get_offset() in r) for r in data_ranges):
- # Dump it as data.
- s = insn.get_as_data_string(inc_file)
- if not s:
- continue
- outfd.write(s)
- else:
- # Dump the instruction.
- s = insn.get_as_instruction_string(inc_file)
- if not s:
- continue
- outfd.write(s)
- outfd.write("\n")
- if outfilename:
- outfd.close()
- return 0
- if __name__ == "__main__":
- sys.exit(main())
|