diff --git a/README.md b/README.md index 44d2e4f..70ac78b 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Installation section). ```python >>> import bap - >>> print '\n'.join(insn.asm for insn in bap.disasm("\x48\x83\xec\x08")) + >>> print '\n'.join(insn.asm for insn in bap.disasm(b"\x48\x83\xec\x08")) decl %eax subl $0x8, %esp ``` diff --git a/src/bap/adt.py b/src/bap/adt.py index bb9311f..02129d9 100755 --- a/src/bap/adt.py +++ b/src/bap/adt.py @@ -182,7 +182,10 @@ def count_authors(library): """ -from collections import Iterable,Sequence,Mapping +try: + from collections.abc import Iterable,Sequence,Mapping +except ImportError: + from collections import Iterable,Sequence,Mapping class ADT(object): """Algebraic Data Type. diff --git a/src/bap/bir.py b/src/bap/bir.py index fd28140..e33e2b2 100644 --- a/src/bap/bir.py +++ b/src/bap/bir.py @@ -2,7 +2,10 @@ """BIR - BAP Intermediate Representation""" -from collections import Sequence,Mapping +try: + from collections.abc import Sequence,Mapping +except ImportError: + from collections import Sequence,Mapping from .adt import * from .bil import * from . import noeval_parser @@ -245,7 +248,16 @@ class Attrs(Map) : class Attr(ADT) : """Attribute is a pair of attribute name and value, both represented with str""" - pass + + @property + def name(self): + """name of attribute""" + return self.arg[0] + + @property + def value(self): + """value of attribute""" + return self.arg[1] class Values(Map) : """A set of possible values, taken by a phi-node. @@ -253,7 +265,9 @@ class Values(Map) : It is a mapping from the tid of a preceeding block, to an expression that denotes a value. """ - pass + def __init__(self, *args): + super(Map, self).__init__(args) # pylint: disable=bad-super-call + self.elements = dict(args[0]) class Tid(ADT) : """Tid(id,name=None) term unique identifier. @@ -357,7 +371,15 @@ class Annotation(ADT) : Each annotation denotes an association between a memory region and some arbitrary property, denoted with an attribute. """ - pass + @property + def region(self): + """memory region""" + return self.arg[0] + + @property + def attr(self): + """memory region attribute""" + return self.arg[1] def parse_addr(str): return int(str.split(':')[0],16) diff --git a/src/bap/noeval_parser.py b/src/bap/noeval_parser.py index c22bb64..b1940a7 100755 --- a/src/bap/noeval_parser.py +++ b/src/bap/noeval_parser.py @@ -2,23 +2,32 @@ ''' Parser for ADT string from bap that does not use eval -The nieve eval-based version runs into out-of-memory conditions on large files +The naive eval-based version runs into out-of-memory conditions on large files ''' import gc import sys import time -# NOTE: uses bap.bir, but cannot import at module level (circular references) +from subprocess import check_output + +# bap.1.3 breaks the format of the following types. it prints hexes +# without prefixing them with the `0x` escape. To fix it without +# fixing bap, we will treat integers inside this parents as +# hexadecimals if there is no prefix. +BROKEN_TYPES = [ + 'Section', + 'Region' +] +# NOTE: uses bap.bir, but cannot import at module level (circular references) -def toint(string, start, end): +def toint(string, start, end, base=10): ''' Convert substring string[start:end] to integer/long without eval Note: may contain leading whitespace ''' istr = string[start:end].lstrip() - if sys.version_info > (3,): # then longs don't exist if istr.endswith('L'): istr = istr.rstrip('L') @@ -31,7 +40,7 @@ def toint(string, start, end): if istr.startswith('0x'): return of_str(istr, 16) else: - return of_str(istr) + return of_str(istr, base) def setup_progress(totalitems): ''' @@ -159,17 +168,19 @@ def _parse_end(in_c, in_s, i, objs, stk): raise ParserInputError('Mismatched input stream') j = stk[-1] parent = objs[j] + ptyp = parent['typ'] assert isinstance(parent, dict) assert parent, 'parent is empty' - assert parent['typ'] != 'int', 'parent wrong type: %r' % (parent['typ']) + assert ptyp != 'int', 'parent wrong type: %r' % (parent['typ']) assert 'children' in parent if top: # add to parent if non empty # make real int before appending if top['typ'] == 'd': # int try: - top = toint(in_s, k, i) + base = 16 if ptyp in BROKEN_TYPES else 10 + top = toint(in_s, k, i, base) except ValueError: - raise ParserInputError("Integer expected between [%d..%d)" % (top, i)) + raise ParserInputError("Integer expected between [%d..%d)" % (k, i)) parent['children'].append(top) if in_c == ',': # add blank object and move on # next obj @@ -179,7 +190,6 @@ def _parse_end(in_c, in_s, i, objs, stk): return i else: # we are ending a tuple/list/app do it # maybe handle apply (num and seq are earlier) - ptyp = parent['typ'] if ptyp == '[': if in_c != ']': raise ParserInputError('close %r and open %r mismatch' % (in_c, ptyp)) @@ -325,4 +335,3 @@ def parser(input_str, disable_gc=False, logger=None): 'format': 'adt', 'load': parser } - diff --git a/src/bap/rpc.py b/src/bap/rpc.py index 8ac0c1b..a6a5663 100644 --- a/src/bap/rpc.py +++ b/src/bap/rpc.py @@ -112,11 +112,11 @@ def load(self): if self.msg is None: self.msg = self.bap.get_resource(self.ident) if not self._name in self.msg: - if 'error' in msg: + if 'error' in self.msg: raise ServerError(response) else: msg = "Expected {0} msg but got {1}".format( - self._name, msg) + self._name, self.msg) raise RuntimeError(msg) def get(self, child): @@ -126,7 +126,7 @@ def get(self, child): class Project(Resource): def __init__(self, ident, bap): - super(Image,self).__init__('program', ident, bap) + super(Image,self).__init__('program', ident, bap) # pylint: disable=bad-super-call def load_program(self): self.program = bir.loads(self.get('program')) @@ -171,7 +171,7 @@ def load_symbols(self): def get_symbol(self, name, d=None): try: - return (s for s in self.symbols if s.name == name).next() + return next(s for s in self.symbols if s.name == name) except StopIteration: return d @@ -214,8 +214,8 @@ def __init__(self, mem, parent): def load_data(self): try: - url = (urlparse(url) for url in self.links - if urlparse(url).scheme == 'mmap').next() + url = next(urlparse(url) for url in self.links + if urlparse(url).scheme == 'mmap') qs = parse_qs(url.query) offset = int(qs['offset'][0]) with open(url.path, "rw+b") as f: @@ -266,8 +266,8 @@ def __init__(self, server={}): self.last_id = 0 for attempt in range(RETRIES): try: - self.capabilities = self.call({'init' : { - 'version' : '0.1'}}).next()['capabilities'] + self.capabilities = next(self.call({'init' : { + 'version' : '0.1'}}))['capabilities'] break except Exception: if attempt + 1 == RETRIES: @@ -278,7 +278,7 @@ def __init__(self, server={}): if not "capabilities" in self.__dict__: raise RuntimeError("Failed to connect to BAP server") self.data = {} - self.temp = NamedTemporaryFile('rw+b', prefix="bap-") + self.temp = NamedTemporaryFile('w+b', prefix="bap-") def insns(self, src, **kwargs): req = {'resource' : src} @@ -300,7 +300,7 @@ def load_file(self, name): 'url' : 'file://' + name}}) def get_resource(self, name): - return self.call({'get_resource' : name}).next() + return next(self.call({'get_resource' : name})) def load_chunk(self, data, **kwargs): kwargs.setdefault('url', self.mmap(data)) @@ -341,14 +341,13 @@ def mmap(self, data): return url def _load_resource(self, res): - rep = self.call(res).next() + rep = next(self.call(res)) if 'error' in rep: raise ServerError(rep) return Id(rep['resource']) - def jsons(r, p=0): - dec = json.JSONDecoder(encoding='utf-8') + dec = json.JSONDecoder() while True: obj,p = dec.scan_once(r.text,p) yield obj diff --git a/tests/test_low_level_interface.py b/tests/test_low_level_interface.py new file mode 100644 index 0000000..261097f --- /dev/null +++ b/tests/test_low_level_interface.py @@ -0,0 +1,13 @@ +import unittest +import bap + +class TestLowLevelInterface(unittest.TestCase): + + def test_low_level_interface(self): + asm_str = '\n'.join(insn.asm for insn in bap.disasm(b"\x48\x83\xec\x08")) + self.assertIsNotNone(asm_str) + self.assertIn("\tdecl\t%eax", asm_str) + self.assertIn("\tsubl\t$0x8, %esp", asm_str) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file