mbuesch
/
avrev


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
							#!/usr/bin/env python3
"""
#  Simple AVR disassembly postprocessor
#
#  Copyright (C) 2012-2014 Michael Buesch <m@bues.ch>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""

import sys
import re
import getopt


LABEL_FMT = "L%04X"


def die(msg):
	sys.stderr.write(msg + "\n")
	sys.stderr.flush()
	sys.exit(1)

def parseint(s):
	if s.lower().startswith("0x"):
		return int(s, 16)
	return int(s, 10)

def ishex(s):
	for c in s:
		if c not in "0123456789abcdefABCDEF":
			return False
	return True

def eff_linelen(s):
	'''Get effective line length (Tabs => 8 characters).'''
	count = 0
	for c in s:
		if c == '\t':
			count = (count + 8) // 8 * 8
		if c == '\n':
			count = 0
		else:
			count += 1
	return count

def pad_to_length(s, target_len):
	'''Pad a string up to the specified effective length.'''
	slen = eff_linelen(s)
	if slen >= target_len:
		return s
	return s + ' ' * (target_len - slen)

def fix_twos_complement(val, nrBits):
	sign = 1 << nrBits
	mask = (sign << 1) - 1
	val &= mask
	if val & sign:
		return -((~val + 1) & mask)
	return val

class IncFile(object):
	'''A parsed INC-file.'''

	equ_re = re.compile(r"^\s*\.equ\s+(\w+)\s*=\s*(\w+)\s*(?:;.*)?")
	flash_end_re = re.compile(r"^\s*\.equ\s+FLASHEND\s*=\s*(\w+)\s*(?:;.*)?")

	def __init__(self, inc_file_path):
		self.ioaddr_map = {}
		self.irq_map = {}
		self.irq_vectors_size = None
		self.flash_size = None
		in_io = False
		in_irq = False
		try:
			lines = open(inc_file_path, "r").readlines()
		except IOError as e:
			die("Failed to read INC-FILE '%s': %s" % (inc_file_path, str(e)))
		for line in lines:
			line = line.strip()
			if "I/O REGISTER DEFINITIONS" in line:
				in_io = True
				continue
			if "INTERRUPT VECTORS" in line:
				in_irq = True
				continue
			if line.startswith("; *****"):
				in_io = False
				in_irq = False
				continue
			if in_io:
				self.__parse_iomap_entry(line)
			elif in_irq:
				self.__parse_irqmap_entry(line)
			else:
				m = self.flash_end_re.match(line)
				if m:
					try:
						end = int(m.group(1), 16)
						self.flash_size = end + 1
						self.flash_size *= 2 # To bytes
					except ValueError:
						pass
		if not self.flash_size:
			die("Failed to get FLASHEND from INC-FILE")
		self.flash_mask = self.flash_size - 1
		if not self.ioaddr_map:
			die("Failed to parse I/O-map from INC-FILE")
		if not self.irq_map or not self.irq_vectors_size:
			die("Failed to parse IRQ-map from INC-FILE")
		if 0 not in self.irq_map:
			self.irq_map[0] = "RESET"

	# Parse one I/O map entry
	def __parse_iomap_entry(self, line):
		m = self.equ_re.match(line)
		if not m:
			return
		name, addr = m.group(1), m.group(2)
		try:
			addr = int(addr, 16)
		except ValueError:
			die("Failed to convert I/O map address: %s" % line)
		self.ioaddr_map[addr] = name

	# Parse one IRQ map entry
	def __parse_irqmap_entry(self, line):
		m = self.equ_re.match(line)
		if not m:
			return
		name, addr = m.group(1), m.group(2)
		if name == "INT_VECTORS_SIZE":
			try:
				self.irq_vectors_size = int(addr, 10)
				self.irq_vectors_size *= 2 # To byte size
			except ValueError:
				die("Failed to parse IRQ map size: %s" %\
				    line)
			return
		if not name.endswith("addr"):
			return
		try:
			addr = int(addr, 16)
		except ValueError:
			die("Failed to convert IRQ map address: %s" % line)
		addr *= 2 # To byte address
		self.irq_map[addr] = name

class Insn(object):
	'''An AVR assembly instruction'''

	class StringErr(Exception): pass
	class StringIgnore(Exception): pass

	def __init__(self, insn_string):
		# Check whether this is an instruction line.
		m = re.match(r'^\s*[0-9a-fA-F]+:\s+', insn_string)
		if not m:
			raise Insn.StringIgnore()
		# Look for comments
		self.comment = ""
		if ';' in insn_string:
			i = insn_string.index(';')
			self.comment = insn_string[i+1:].strip()
			# Strip it off
			insn_string = insn_string[:i]
			# Fix 0x0x breakage
			self.comment = self.comment.replace("0x0x", "0x")
		s = insn_string.split()
		if len(s) < 2:
			raise Insn.StringErr()
		# Extract the raw bytes
		self.raw_bytes = []
		while len(s[1]) == 2 and ishex(s[1]):
			hexval = s.pop(1)
			try:
				hexval = int(hexval, 16)
			except ValueError:
				die("Failed to parse raw byte")
			self.raw_bytes.append(hexval)
		# Extract offset (2ab:)
		try:
			off = s[0]
			off = off[:-1] # Strip colon
			self.offset = int(off, 16)
			self.offset_label = None
		except TypeError:
			die("Failed to extract insn offset")
		# Extract insn string (jmp...)
		self.insn = s[1].lower()
		# Extract operands
		self.operands = []
		try:
			self.operands = s[2:]
		except IndexError as e:
			pass
		for i, op in enumerate(self.operands):
			# Strip commas from operands
			op = self.operands[i] = op.replace(",", "")
			# Fix 0x0x breakage
			op = self.operands[i] = op.replace("0x0x", "0x")
		self.callers = []
		self.jmpsources = []

	def __makeLabel(self):
		lbl = self.get_offset_string() + ":"
		lbl = pad_to_length(lbl, 10)
		return lbl

	def get_as_data_string(self, inc_file):
		'''Returns the full string of the instruction as data.'''

		max_vect = inc_file.irq_vectors_size - 2

		s = ""

		# Space between IRQ vectors and program
		if self.get_offset() == max_vect + 2:
			s += "\n"

		# Dump the bytes
		s += self.__makeLabel()
		if len(self.raw_bytes) == 2:
			s += ".dw 0x%02X%02X" % (self.raw_bytes[1],\
						 self.raw_bytes[0])
		else:
			die("Do not know how to handle raw bytes")

		return s

	def get_as_instruction_string(self, inc_file):
		'''Returns a full string of the instruction'''

		max_vect = inc_file.irq_vectors_size - 2
		is_irq_handler = any(s.get_offset() <= max_vect
				     for s in self.jmpsources)

		s = ""

		# Show CALLers
		if self.callers:
			s += "\n; FUNCTION called by "
			c = []
			pfx = ""
			for i, caller in enumerate(self.callers):
				c.append(pfx + caller.get_offset_string())
				if i != 0 and \
				   (i + 1) % 6 == 0 and \
				   i != len(self.callers) - 1:
					pfx = "\n;\t\t"
				else:
					pfx = ""
			s += ", ".join(c)
			s += "\n"

		# Space between IRQ vectors and program
		if self.get_offset() == max_vect + 2 and not self.callers:
			s += "\n"

		# Show IRQ vector jump sources
		if is_irq_handler and not self.callers:
			s += "\n"
		if is_irq_handler:
			# This is jumped to from IRQ vectors.
			s += "; IRQ handler for "
			s += ", ".join(s.get_offset_string()
				       for s in self.jmpsources)
			s += "\n"

		# Dump the instruction string
		s += self.__makeLabel()
		s += self.get_insn()
		if self.get_operands():
			s = pad_to_length(s, 18)
			s += ", ".join(self.get_operands())

		# Add the comment string
		comm = self.get_comment()
		if comm or self.jmpsources:
			s = pad_to_length(s, 35)
			s += ";"
		if comm:
			s += comm
			if self.jmpsources:
				s += " / "

		# Add the (R)JMP sources
		if self.jmpsources:
			nonirq_jmpsrcs = [ s for s in self.jmpsources
					   if s.get_offset() > max_vect ]
			if nonirq_jmpsrcs:
				s += "JUMPTARGET from "
				s += ", ".join(s.get_offset_string()
					       for s in nonirq_jmpsrcs)
		return s

	def get_offset(self):
		return self.offset

	def get_offset_label(self):
		return self.offset_label

	def get_offset_string(self):
		label = self.get_offset_label()
		if label:
			return label
		return LABEL_FMT % self.get_offset()

	def get_insn(self):
		return self.insn

	def set_insn(self, insn):
		self.insn = insn

	def get_operands(self):
		return self.operands

	def get_comment(self):
		return self.comment

	def add_caller(self, insn):
		self.callers.append(insn)

	def add_jmpsource(self, insn):
		self.jmpsources.append(insn)

	def __rewrite_irq_label(self, inc_file):
		offset = self.get_offset()
		if offset >= inc_file.irq_vectors_size:
			return
		try:
			label = inc_file.irq_map[offset]
		except KeyError:
			return
		self.comment = label
		if label.endswith("addr"):
			label = label[:-4]
		label = "L_" + label
		self.offset_label = label

	def __rewrite_jmp_targets(self, inc_file):
		if self.get_insn() != "jmp" and self.get_insn() != "call":
			return
		operands = self.get_operands()
		if len(operands) != 1:
			die("Error: more than one JMP/CALL operand")
		operands[0] = LABEL_FMT % int(operands[0], 0)

	def __rewrite_rjmp_targets(self, inc_file):
		operlist = self.get_operands()
		r = re.compile(r"^\.([\+-][0-9]+)")
		for i in range(0, len(operlist)):
			m = r.match(operlist[i])
			if not m:
				continue
			offs = fix_twos_complement(int(m.group(1)), 12) + 2
			offs = (self.get_offset() + offs) & inc_file.flash_mask
			operlist[i] = LABEL_FMT % offs
			break

	def __rewrite_io_addrs(self, inc_file):
		offsets = { "sts"  : (0, "mem"),
			    "lds"  : (1, "mem"),
			    "in"   : (1, "io"),
			    "out"  : (0, "io"),
			    "sbic" : (0, "io"),
			    "sbis" : (0, "io"),
			    "sbi"  : (0, "io"),
			    "cbi"  : (0, "io"), }
		try:
			(offset, optype) = offsets[self.get_insn()]
		except KeyError as e:
			return
		operands = self.get_operands()
		ioaddr = int(operands[offset], 0)
		if optype == "mem":
			if ioaddr < 0x20:
				print("Error: mem-op offset operand < 0x20")
				exit(1)
			if ioaddr < 0x60:
				ioaddr -= 0x20
		try:
			name = inc_file.ioaddr_map[ioaddr]
		except KeyError as e:
			return
		if optype == "mem" and ioaddr < 0x60:
			name += " + 0x20"
		# Got a name for it. Reassign it.
		operands[offset] = name

	def __rewrite_special_registers(self, inc_file):
		special_regs_tab = { 26 : "XL",
				     27 : "XH",
				     28 : "YL",
				     29 : "YH",
				     30 : "ZL",
				     31 : "ZH", }
		r = re.compile(r"^[rR]([0-9]+)$")
		operands = self.get_operands()
		for i in range(0, len(operands)):
			m = r.match(operands[i])
			if not m:
				continue
			regnum = int(m.group(1))
			try:
				name = special_regs_tab[regnum]
			except KeyError as e:
				continue
			operands[i] = name

	def __fix_raw_words(self, inc_file):
		if self.get_insn() == ".word":
			self.set_insn(".dw")

	def rewrite(self, inc_file):
		'''Rewrite the instruction to be better human readable'''
		self.__rewrite_irq_label(inc_file)
		self.__rewrite_jmp_targets(inc_file)
		self.__rewrite_rjmp_targets(inc_file)
		self.__rewrite_io_addrs(inc_file)
		self.__rewrite_special_registers(inc_file)
		self.__fix_raw_words(inc_file)


def usage():
	print("avr-postproc [OPTIONS] INC-FILE")
	print("")
	print("INC-FILE is the assembly .inc file.")
	print("Objdump assembly is read from stdin.")
	print("Processed assembly is written to stdout.")
	print("")
	print("Options:")
	print(" -I|--infile FILEPATH     Path to input file.")
	print("                          If not specified, stdin is used.")
	print(" -O|--outfile FILEPATH    Path to output file.")
	print("                          If not specified, stdout is used.")
	print(" -s|--start OFFSET        Start offset. Default 0x0.")
	print(" -e|--end OFFSET          End offset. Default all.")
	print(" -d|--data-range RANGES   Define a pure data range in program memory.")
	print("                Example: -d 0x0-0x1F -d 0x100-0x1FF")
	print("                Defines byte range 0h-1Fh and 100h-1FFh as data.")
	print(" -L|--label-file FILE     Label file to pick label names from.")
	print(" -C|--comment-file FILE   Comment file to pick comments from.")

def main():
	infilename = None
	outfilename = None
	start_offset = 0
	stop_offset = -1
	data_ranges = []
	label_files = []
	comment_files = []

	try:
		(opts, args) = getopt.getopt(sys.argv[1:],
			"hI:O:s:e:d:L:C:",
			[ "help", "infile=", "outfile=",
			  "start=", "end=", "data-range=",
			  "label=", "comment=", ])
	except getopt.GetoptError as e:
		usage()
		return 1
	for (o, v) in opts:
		if o in ("-h", "--help"):
			usage()
			return 0
		if o in ("-I", "--infile"):
			infilename = v
		if o in ("-O", "--outfile"):
			outfilename = v
		if o in ("-s", "--start"):
			try:
				start_offset = parseint(v)
			except ValueError as e:
				die("-s|--start is not a number")
		if o in ("-e", "--end"):
			try:
				stop_offset = parseint(v)
			except ValueError as e:
				die("-e|--end is not a number")
		if o in ("-d", "--data-range"):
			if not v.strip():
				continue
			try:
				start, stop = v.split('-')
				start, stop = parseint(start), parseint(stop)
				if start < 0 or stop < 0 or stop < start:
					raise ValueError
				r = range(start, stop + 1)
				data_ranges.append(r)
			except ValueError as e:
				die("-d|--data-range invalid value")
		if o in ("-L", "--label"):
			label_files.append(v)
			#TODO use it
		if o in ("-C", "--comment"):
			comment_files.append(v)
			#TODO use it
	if len(args) != 1:
		die("INC-FILE not specified")

	inc_file_path = args[0]
	inc_file = IncFile(inc_file_path)

	if infilename:
		try:
			infd = open(infilename, "r")
			lines = infd.readlines()
			infd.close()
		except IOError as e:
			die("Failed to read input file '%s':\n%s" %\
			    (infilename, str(e)))
	else:
		lines = sys.stdin.readlines()
	insns = []
	funcs = []

	# Parse the input and rewrite the
	# instructions to include symbolic names
	for line in lines:
		try:
			insn = Insn(line)
		except Insn.StringIgnore as e:
			continue
		except Insn.StringErr as e:
			die("ERROR: Could not parse line \"%s\"" % line)
		if insn.get_offset() < start_offset:
			continue
		if stop_offset != -1 and insn.get_offset() > stop_offset:
			break
		insn.rewrite(inc_file)
		insns.append(insn)

	def get_insn_by_offset(offset):
		for insn in insns:
			if insn.get_offset() == offset:
				return insn
		print("; Postproc error: Instruction with "
		      "offset 0x%04X not found" % offset)
		return None

	# Annotate jump sources
	for insn in insns:
		branch_insns = { "jmp"   : ("type_jmp", 0),
				 "rjmp"  : ("type_jmp", 0),
				 "brbs"  : ("type_jmp", 1),
				 "brbc"  : ("type_jmp", 1),
				 "breq"  : ("type_jmp", 0),
				 "brne"  : ("type_jmp", 0),
				 "brcs"  : ("type_jmp", 0),
				 "brcc"  : ("type_jmp", 0),
				 "brsh"  : ("type_jmp", 0),
				 "brlo"  : ("type_jmp", 0),
				 "brmi"  : ("type_jmp", 0),
				 "brpl"  : ("type_jmp", 0),
				 "brge"  : ("type_jmp", 0),
				 "brlt"  : ("type_jmp", 0),
				 "brhs"  : ("type_jmp", 0),
				 "brhc"  : ("type_jmp", 0),
				 "brts"  : ("type_jmp", 0),
				 "brtc"  : ("type_jmp", 0),
				 "brvs"  : ("type_jmp", 0),
				 "brvc"  : ("type_jmp", 0),
				 "brie"  : ("type_jmp", 0),
				 "brid"  : ("type_jmp", 0),
				 "call"  : ("type_call", 0),
				 "rcall" : ("type_call", 0), }
		insn_name = insn.get_insn()
		try:
			(jmptype, targetoper) = branch_insns[insn_name]
		except KeyError as e:
			continue
		tgt_offset = int(insn.get_operands()[targetoper][1:], 16)
		target = get_insn_by_offset(tgt_offset)
		if target:
			if jmptype == "type_jmp":
				target.add_jmpsource(insn)
			else:
				target.add_caller(insn)

	# Write the output
	if outfilename:
		try:
			outfd = open(outfilename, "w")
		except IOError as e:
			die("Failed to open output file '%s':\n%s" %\
			    (outfilename, str(e)))
	else:
		outfd = sys.stdout
	outfd.write('.include "' + inc_file_path.split("/")[-1] + '"\n')
	outfd.write('\n')
	outfd.write('.org 0x000\n')
	outfd.write('\n')
	for insn in insns:
		if any((insn.get_offset() in r) for r in data_ranges):
			# Dump it as data.
			s = insn.get_as_data_string(inc_file)
			if not s:
				continue
			outfd.write(s)
		else:
			# Dump the instruction.
			s = insn.get_as_instruction_string(inc_file)
			if not s:
				continue
			outfd.write(s)
		outfd.write("\n")
	if outfilename:
		outfd.close()
	return 0

if __name__ == "__main__":
	sys.exit(main())