|
| 1 | +from builtins import object |
| 2 | +from collections import namedtuple |
| 3 | +import copy |
| 4 | +import hashlib |
| 5 | + |
| 6 | + |
| 7 | +def _recursive_repr(item): |
| 8 | + """Hack around python `repr` to deterministically represent dictionaries. |
| 9 | +
|
| 10 | + This is able to represent more things than json.dumps, since it does not require things to be JSON serializable |
| 11 | + (e.g. datetimes). |
| 12 | + """ |
| 13 | + if isinstance(item, basestring): |
| 14 | + result = str(item) |
| 15 | + elif isinstance(item, list): |
| 16 | + result = '[{}]'.format(', '.join([_recursive_repr(x) for x in item])) |
| 17 | + elif isinstance(item, dict): |
| 18 | + kv_pairs = ['{}: {}'.format(_recursive_repr(k), _recursive_repr(item[k])) for k in sorted(item)] |
| 19 | + result = '{' + ', '.join(kv_pairs) + '}' |
| 20 | + else: |
| 21 | + result = repr(item) |
| 22 | + return result |
| 23 | + |
| 24 | + |
| 25 | +def _get_hash(item): |
| 26 | + hasher = hashlib.sha224() |
| 27 | + repr_ = _recursive_repr(item) |
| 28 | + hasher.update(repr_.encode('utf-8')) |
| 29 | + return hasher.hexdigest() |
| 30 | + |
| 31 | + |
| 32 | +class DagNode(object): |
| 33 | + """Node in a directed-acyclic graph (DAG). |
| 34 | +
|
| 35 | + Edges: |
| 36 | + DagNodes are connected by edges. An edge connects two nodes with a label for each side: |
| 37 | + - ``upstream_node``: upstream/parent node |
| 38 | + - ``upstream_label``: label on the outgoing side of the upstream node |
| 39 | + - ``downstream_node``: downstream/child node |
| 40 | + - ``downstream_label``: label on the incoming side of the downstream node |
| 41 | +
|
| 42 | + For example, DagNode A may be connected to DagNode B with an edge labelled "foo" on A's side, and "bar" on B's |
| 43 | + side: |
| 44 | +
|
| 45 | + _____ _____ |
| 46 | + | | | | |
| 47 | + | A >[foo]---[bar]> B | |
| 48 | + |_____| |_____| |
| 49 | +
|
| 50 | + Edge labels may be integers or strings, and nodes cannot have more than one incoming edge with the same label. |
| 51 | +
|
| 52 | + DagNodes may have any number of incoming edges and any number of outgoing edges. DagNodes keep track only of |
| 53 | + their incoming edges, but the entire graph structure can be inferred by looking at the furthest downstream |
| 54 | + nodes and working backwards. |
| 55 | +
|
| 56 | + Hashing: |
| 57 | + DagNodes must be hashable, and two nodes are considered to be equivalent if they have the same hash value. |
| 58 | +
|
| 59 | + Nodes are immutable, and the hash should remain constant as a result. If a node with new contents is required, |
| 60 | + create a new node and throw the old one away. |
| 61 | +
|
| 62 | + String representation: |
| 63 | + In order for graph visualization tools to show useful information, nodes must be representable as strings. The |
| 64 | + ``repr`` operator should provide a more or less "full" representation of the node, and the ``short_repr`` |
| 65 | + property should be a shortened, concise representation. |
| 66 | +
|
| 67 | + Again, because nodes are immutable, the string representations should remain constant. |
| 68 | + """ |
| 69 | + def __hash__(self): |
| 70 | + """Return an integer hash of the node.""" |
| 71 | + raise NotImplementedError() |
| 72 | + |
| 73 | + def __eq__(self, other): |
| 74 | + """Compare two nodes; implementations should return True if (and only if) hashes match.""" |
| 75 | + raise NotImplementedError() |
| 76 | + |
| 77 | + def __repr__(self, other): |
| 78 | + """Return a full string representation of the node.""" |
| 79 | + raise NotImplementedError() |
| 80 | + |
| 81 | + @property |
| 82 | + def short_repr(self): |
| 83 | + """Return a partial/concise representation of the node.""" |
| 84 | + raise NotImplementedError() |
| 85 | + |
| 86 | + @property |
| 87 | + def incoming_edge_map(self): |
| 88 | + """Provides information about all incoming edges that connect to this node. |
| 89 | +
|
| 90 | + The edge map is a dictionary that maps an ``incoming_label`` to ``(outgoing_node, outgoing_label)``. Note that |
| 91 | + implicity, ``incoming_node`` is ``self``. See "Edges" section above. |
| 92 | + """ |
| 93 | + raise NotImplementedError() |
| 94 | + |
| 95 | + |
| 96 | +DagEdge = namedtuple('DagEdge', ['downstream_node', 'downstream_label', 'upstream_node', 'upstream_label']) |
| 97 | + |
| 98 | + |
| 99 | +class KwargReprNode(DagNode): |
| 100 | + """A DagNode that can be represented as a set of args+kwargs. |
| 101 | + """ |
| 102 | + def __get_hash(self): |
| 103 | + hashes = self.__upstream_hashes + [self.__inner_hash] |
| 104 | + hash_strs = [str(x) for x in hashes] |
| 105 | + hashes_str = ','.join(hash_strs).encode('utf-8') |
| 106 | + hash_str = hashlib.md5(hashes_str).hexdigest() |
| 107 | + return int(hash_str, base=16) |
| 108 | + |
| 109 | + def __init__(self, incoming_edge_map, name, args, kwargs): |
| 110 | + self.__incoming_edge_map = incoming_edge_map |
| 111 | + self.name = name |
| 112 | + self.args = args |
| 113 | + self.kwargs = kwargs |
| 114 | + self.__hash = self.__get_hash() |
| 115 | + |
| 116 | + @property |
| 117 | + def __upstream_hashes(self): |
| 118 | + hashes = [] |
| 119 | + for downstream_label, (upstream_node, upstream_label) in self.incoming_edge_map.items(): |
| 120 | + hashes += [hash(x) for x in [downstream_label, upstream_node, upstream_label]] |
| 121 | + return hashes |
| 122 | + |
| 123 | + @property |
| 124 | + def __inner_hash(self): |
| 125 | + props = {'args': self.args, 'kwargs': self.kwargs} |
| 126 | + return _get_hash(props) |
| 127 | + |
| 128 | + def __hash__(self): |
| 129 | + return self.__hash |
| 130 | + |
| 131 | + def __eq__(self, other): |
| 132 | + return hash(self) == hash(other) |
| 133 | + |
| 134 | + @property |
| 135 | + def short_hash(self): |
| 136 | + return '{:x}'.format(abs(hash(self)))[:12] |
| 137 | + |
| 138 | + def __repr__(self): |
| 139 | + formatted_props = ['{!r}'.format(arg) for arg in self.args] |
| 140 | + formatted_props += ['{}={!r}'.format(key, self.kwargs[key]) for key in sorted(self.kwargs)] |
| 141 | + return '{}({}) <{}>'.format(self.name, ', '.join(formatted_props), self.short_hash) |
| 142 | + |
| 143 | + @property |
| 144 | + def incoming_edges(self): |
| 145 | + edges = [] |
| 146 | + for downstream_label, (upstream_node, upstream_label) in self.incoming_edge_map.items(): |
| 147 | + downstream_node = self |
| 148 | + edges += [DagEdge(downstream_node, downstream_label, upstream_node, upstream_label)] |
| 149 | + return edges |
| 150 | + |
| 151 | + @property |
| 152 | + def incoming_edge_map(self): |
| 153 | + return self.__incoming_edge_map |
| 154 | + |
| 155 | + @property |
| 156 | + def short_repr(self): |
| 157 | + return self.name |
| 158 | + |
| 159 | + |
| 160 | +def topo_sort(start_nodes): |
| 161 | + marked_nodes = [] |
| 162 | + sorted_nodes = [] |
| 163 | + child_map = {} |
| 164 | + def visit(node, child): |
| 165 | + assert node not in marked_nodes, 'Graph is not a DAG' |
| 166 | + if child is not None: |
| 167 | + if node not in child_map: |
| 168 | + child_map[node] = [] |
| 169 | + child_map[node].append(child) |
| 170 | + if node not in sorted_nodes: |
| 171 | + marked_nodes.append(node) |
| 172 | + [visit(parent, node) for parent in node._parents] |
| 173 | + marked_nodes.remove(node) |
| 174 | + sorted_nodes.append(node) |
| 175 | + unmarked_nodes = list(copy.copy(start_nodes)) |
| 176 | + while unmarked_nodes: |
| 177 | + visit(unmarked_nodes.pop(), None) |
| 178 | + return sorted_nodes, child_map |
0 commit comments