The binutils
linker is able to generate a map file when it links your binaries. This provides a lot of detail on how the functions and variables have been arranged into the program memory space, which is crucial information when dealing with embedded devices.
Unfortunately, looking around I didn’t see any decent tools for extracting this information. I wound up cooking my own Python script up to do this. It’s very crude, just takes a map file on standard input, and dumps a report to standard output. It seems to work okay with ARM, and sorta works with AVR but might need some more work.
import re
from sys import stdin, stdout
WIDTH = 8
SPARSE_SKIP = 4*WIDTH
SYMBOL_ONLY_RE = re.compile(\
r'^ \.([a-zA-Z0-9]+)\.([a-zA-Z0-9_\.]+)$')
ADDR_ONLY_RE = re.compile(\
r'^ {16}(0x[0-9a-f]+) +(0x[0-9a-f]+) (.*)$')
ADDR_CXXSYM_RE = re.compile(\
r'^ {16}(0x[0-9a-f]+) {16}([a-zA-Z_][a-zA-Z0-9_:()*\[\]\.]+)$')
SYMBOL_ADDR_RE = re.compile(\
r'^ \.([a-zA-Z0-9]+)\.([a-zA-Z0-9_\.]+) +(0x[0-9a-f]+) +(0x[0-9a-f]+) (.*)$')
FILL_RE = re.compile('^ \*fill\* +(0x[0-9a-f]+) +(0x[0-9a-f]+) +(\d+)$')
REGION_RE = re.compile('^([a-zA-Z0-9_]+) +(0x[0-9a-f]+) (0x[0-9a-f]+) ([rwx]+)$')
regions = []
last = None
objects = []
def on_symbol_only(match):
global last
if match:
(section, symbol) = match.groups()
last = {
'type': 'symbol',
'section': section,
'symbol': symbol
}
objects.append(last)
return match
def on_addr_only(match):
if match:
(address, size, loc) = match.groups()
if last is None:
return match
assert last['type'] == 'symbol'
assert 'address' not in last
assert 'size' not in last
assert 'loc' not in last
last['address'] = int(address, base=16)
last['size'] = int(size, base=16)
last['loc'] = loc
return match
def on_symbol_addr(match):
global last
if match:
(section, symbol, address, size, loc) = match.groups()
last = {
'type': 'symbol',
'section': section,
'symbol': symbol,
'address': int(address, base=16),
'size': int(size, base=16),
'loc': loc
}
objects.append(last)
return match
def on_addr_cxxsym(match):
if match:
(address, cxxsym) = match.groups()
if last is None:
return match
assert last['type'] == 'symbol'
if last['address'] != int(address, base=16):
return match
if 'cxxsyms' not in last:
last['cxxsyms'] = set()
last['cxxsyms'].add(cxxsym)
return match
def on_fill(match):
if match:
(address, size, data) = match.groups()
objects.append({
'type': 'fill',
'address': int(address, base=16),
'size': int(size, base=16),
'data': int(data, base=16)
})
return match
def on_region(match):
if match:
(region, origin, length, attrs) = match.groups()
regions.append({
'region': region,
'address': int(origin, base=16),
'size': int(length, base=16),
'attrs': attrs
})
return match
for line in stdin:
line = line.rstrip()
try:
if line == 'Memory Configuration':
break
except:
print ('# Failed at line %r' % line)
raise
for line in stdin:
line = line.rstrip()
try:
if line == 'Linker script and memory map':
break
if on_region(REGION_RE.match(line)):
continue
except:
print ('# Failed at line %r' % line)
raise
for line in stdin:
line = line.rstrip()
try:
if on_symbol_only(SYMBOL_ONLY_RE.match(line)):
continue
if on_addr_only(ADDR_ONLY_RE.match(line)):
continue
if on_addr_cxxsym(ADDR_CXXSYM_RE.match(line)):
continue
if on_fill(FILL_RE.match(line)):
continue
last = None
except:
print ('Failure context:')
print ('# last = %r' % last)
print ('# line = %r' % line)
raise
for region in regions:
region['end'] = region['address'] + region['size']
regions.sort(key=lambda r : r['address'])
for obj in objects:
if 'cxxsyms' in obj:
obj['cxxsyms'] = list(sorted(obj['cxxsyms']))
if ('address' in obj) and ('size' in obj):
obj['end'] = obj['address'] + obj['size']
for region in regions:
if (obj['end'] <= region['end']) and \ (obj['address'] >= region['address']):
obj['region'] = region['region']
break
objects.sort(key=lambda o : o.get('address', -1))
for region in regions:
address = region['address']
sym_idx = 0
row_rem = 0
row_syms = []
seen = set()
region_objects = list(filter(
lambda obj : obj.get('region') == region['region'],
objects))
if not region_objects:
continue
for obj in region_objects:
if obj['type'] == 'symbol':
sym = '%02d' % (sym_idx % 100)
sym_idx += 1
elif obj['type'] == 'fill':
sym = '--'
else:
sym = '??'
while address < obj['address']: if not row_rem: if (obj['address'] - address) > SPARSE_SKIP:
end = obj['address'] - (obj['address'] % SPARSE_SKIP)
stdout.write('\n%16s 0x%08x -- 0x%08x (%d bytes)' % (
region['region'], address, end, end - address))
address = end
continue
stdout.write('\n%16s 0x%08x: ' % (region['region'], address))
row_rem = WIDTH
stdout.write(' ..')
row_rem -= 1
address += 1
while address < obj['end']:
if not row_rem:
if row_syms:
stdout.write(' | %s\n' % row_syms.pop(0))
else:
stdout.write('\n')
stdout.write('%16s 0x%08x: ' % (region['region'], address))
row_rem = WIDTH
stdout.write(' %s' % sym)
row_rem -= 1
address += 1
if ('symbol' in obj) and (obj['symbol'] not in seen):
row_syms.append('%s: %s' % (sym, obj['symbol']))
seen.add(obj['symbol'])
if not row_rem:
if row_syms:
stdout.write(' | %s\n' % row_syms.pop(0))
else:
stdout.write('\n')
stdout.write('%16s 0x%08x: ' % (region['region'], address))
row_rem = WIDTH
while row_rem:
stdout.write(' ..')
row_rem -= 1
address += 1
stdout.write('\n%16s 0x%08x -- 0x%08x (%d bytes)\n' % (
region['region'], address, region['end'], region['end'] - address))
stdout.write('%16s %d bytes remaining\n' % (
region['region'], region['end'] - address))
Continue reading »