Nov 282018
 

The binutils linker is able to generate a map file when it links your binaries.  This provides a lot of detail on how the functions and variables have been arranged into the program memory space, which is crucial information when dealing with embedded devices.

Unfortunately, looking around I didn’t see any decent tools for extracting this information.  I wound up cooking my own Python script up to do this.  It’s very crude, just takes a map file on standard input, and dumps a report to standard output.  It seems to work okay with ARM, and sorta works with AVR but might need some more work.

import re
from sys import stdin, stdout

WIDTH = 8
SPARSE_SKIP = 4*WIDTH
SYMBOL_ONLY_RE = re.compile(\
        r'^ \.([a-zA-Z0-9]+)\.([a-zA-Z0-9_\.]+)$')
ADDR_ONLY_RE = re.compile(\
        r'^ {16}(0x[0-9a-f]+) +(0x[0-9a-f]+) (.*)$')
ADDR_CXXSYM_RE = re.compile(\
        r'^ {16}(0x[0-9a-f]+) {16}([a-zA-Z_][a-zA-Z0-9_:()*\[\]\.]+)$')
SYMBOL_ADDR_RE = re.compile(\
        r'^ \.([a-zA-Z0-9]+)\.([a-zA-Z0-9_\.]+) +(0x[0-9a-f]+) +(0x[0-9a-f]+) (.*)$')
FILL_RE = re.compile('^ \*fill\* +(0x[0-9a-f]+) +(0x[0-9a-f]+) +(\d+)$')
REGION_RE = re.compile('^([a-zA-Z0-9_]+) +(0x[0-9a-f]+) (0x[0-9a-f]+) ([rwx]+)$')

regions = []
last = None
objects = []

def on_symbol_only(match):
    global last
    if match:
        (section, symbol) = match.groups()
        last = {
            'type': 'symbol',
            'section': section,
            'symbol': symbol
        }
        objects.append(last)
    return match


def on_addr_only(match):
    if match:
        (address, size, loc) = match.groups()
        if last is None:
            return match

        assert last['type'] == 'symbol'
        assert 'address' not in last
        assert 'size' not in last
        assert 'loc' not in last
        last['address'] = int(address, base=16)
        last['size'] = int(size, base=16)
        last['loc'] = loc
    return match


def on_symbol_addr(match):
    global last
    if match:
        (section, symbol, address, size, loc) = match.groups()
        last = {
            'type': 'symbol',
            'section': section,
            'symbol': symbol,
            'address': int(address, base=16),
            'size': int(size, base=16),
            'loc': loc
        }
        objects.append(last)
    return match


def on_addr_cxxsym(match):
    if match:
        (address, cxxsym) = match.groups()
        if last is None:
            return match

        assert last['type'] == 'symbol'
        if last['address'] != int(address, base=16):
            return match
        if 'cxxsyms' not in last:
            last['cxxsyms'] = set()
        last['cxxsyms'].add(cxxsym)
    return match


def on_fill(match):
    if match:
        (address, size, data) = match.groups()
        objects.append({
            'type': 'fill',
            'address': int(address, base=16),
            'size': int(size, base=16),
            'data': int(data, base=16)
        })
    return match

def on_region(match):
    if match:
        (region, origin, length, attrs) = match.groups()
        regions.append({
            'region': region,
            'address': int(origin, base=16),
            'size': int(length, base=16),
            'attrs': attrs
        })
    return match


for line in stdin:
    line = line.rstrip()

    try:
        if line == 'Memory Configuration':
            break
    except:
        print ('# Failed at line %r' % line)
        raise


for line in stdin:
    line = line.rstrip()

    try:
        if line == 'Linker script and memory map':
            break
        if on_region(REGION_RE.match(line)):
            continue
    except:
        print ('# Failed at line %r' % line)
        raise


for line in stdin:
    line = line.rstrip()
    try:
        if on_symbol_only(SYMBOL_ONLY_RE.match(line)):
            continue

        if on_addr_only(ADDR_ONLY_RE.match(line)):
            continue

        if on_addr_cxxsym(ADDR_CXXSYM_RE.match(line)):
            continue

        if on_fill(FILL_RE.match(line)):
            continue

        last = None
    except:
        print ('Failure context:')
        print ('# last = %r' % last)
        print ('# line = %r' % line)
        raise

for region in regions:
    region['end'] = region['address'] + region['size']
regions.sort(key=lambda r : r['address'])

for obj in objects:
    if 'cxxsyms' in obj:
        obj['cxxsyms'] = list(sorted(obj['cxxsyms']))

    if ('address' in obj) and ('size' in obj):
        obj['end'] = obj['address'] + obj['size']

        for region in regions:
            if (obj['end'] <= region['end']) and \ (obj['address'] >= region['address']):
                obj['region'] = region['region']
                break
objects.sort(key=lambda o : o.get('address', -1))

for region in regions:
    address = region['address']
    sym_idx = 0
    row_rem = 0
    row_syms = []
    seen = set()

    region_objects = list(filter(
        lambda obj : obj.get('region') == region['region'],
        objects))
    if not region_objects:
        continue

    for obj in region_objects:
        if obj['type'] == 'symbol':
            sym = '%02d' % (sym_idx % 100)
            sym_idx += 1
        elif obj['type'] == 'fill':
            sym = '--'
        else:
            sym = '??'

        while address < obj['address']: if not row_rem: if (obj['address'] - address) > SPARSE_SKIP:
                    end = obj['address'] - (obj['address'] % SPARSE_SKIP)
                    stdout.write('\n%16s 0x%08x -- 0x%08x (%d bytes)' % (
                        region['region'], address, end, end - address))
                    address = end
                    continue

                stdout.write('\n%16s 0x%08x: ' % (region['region'], address))
                row_rem = WIDTH

            stdout.write(' ..')
            row_rem -= 1
            address += 1

        while address < obj['end']:
            if not row_rem:
                if row_syms:
                    stdout.write(' | %s\n' % row_syms.pop(0))
                else:
                    stdout.write('\n')
                stdout.write('%16s 0x%08x: ' % (region['region'], address))
                row_rem = WIDTH

            stdout.write(' %s' % sym)
            row_rem -= 1
            address += 1
            if ('symbol' in obj) and (obj['symbol'] not in seen):
                row_syms.append('%s: %s' % (sym, obj['symbol']))
                seen.add(obj['symbol'])

            if not row_rem:
                if row_syms:
                    stdout.write(' | %s\n' % row_syms.pop(0))
                else:
                    stdout.write('\n')
                stdout.write('%16s 0x%08x: ' % (region['region'], address))
                row_rem = WIDTH

    while row_rem:
        stdout.write(' ..')
        row_rem -= 1
        address += 1

    stdout.write('\n%16s 0x%08x -- 0x%08x (%d bytes)\n' % (
        region['region'], address, region['end'], region['end'] - address))
    stdout.write('%16s %d bytes remaining\n' % (
        region['region'], region['end'] - address))

Continue reading »