|
| 1 | +--- |
| 2 | +group: recipe |
| 3 | +title: Tree traversal |
| 4 | +description: How to do tree traversal (also known as walking or visiting a tree) |
| 5 | +tags: |
| 6 | + - unist |
| 7 | + - tree |
| 8 | + - traverse |
| 9 | + - walk |
| 10 | + - visit |
| 11 | +author: Titus Wormer |
| 12 | +authorTwitter: wooorm |
| 13 | +published: 2019-12-23 |
| 14 | +modified: 2019-12-23 |
| 15 | +index: 1 |
| 16 | +--- |
| 17 | + |
| 18 | +## How to walk a tree |
| 19 | + |
| 20 | +### Contents |
| 21 | + |
| 22 | +* [Tree traversal](#tree-traversal) |
| 23 | +* [Set up](#set-up) |
| 24 | +* [Traverse the tree](#traverse-the-tree) |
| 25 | +* [Visiting a certain kind of node](#visiting-a-certain-kind-of-node) |
| 26 | + |
| 27 | +### Tree traversal |
| 28 | + |
| 29 | +**Tree traversal** is a common task when working with syntax trees. |
| 30 | +The term *tree* here means a node and all its *descendants* (all the nodes |
| 31 | +inside it). |
| 32 | +Traversal means stopping at every node to do something. |
| 33 | +So, tree traversal means doing something for every node in a tree. |
| 34 | + |
| 35 | +Tree traversal is often also called “walking a tree”, or “visiting a tree”. |
| 36 | + |
| 37 | +To learn more, continue reading, but when working with unist (unified’s trees) |
| 38 | +you probably need either: |
| 39 | + |
| 40 | +* [`unist-util-visit`][visit] |
| 41 | +* [`unist-util-visit-parents`][visit-parents] |
| 42 | + |
| 43 | +### Set up |
| 44 | + |
| 45 | +Glad you’re still here! |
| 46 | +Let’s say we have the following fragment of HTML, in a file `example.html`: |
| 47 | + |
| 48 | +```html |
| 49 | +<p> |
| 50 | + <!-- A comment. --> |
| 51 | + Some <strong>strong importance</strong>, <em>emphasis</em>, and a dash of |
| 52 | + <code>code</code>. |
| 53 | +</p> |
| 54 | +``` |
| 55 | + |
| 56 | +You could parse that with the following code (using [`unified`][unified] and |
| 57 | +[`rehype-parse`][rehype-parse]): |
| 58 | + |
| 59 | +```js |
| 60 | +var fs = require('fs') |
| 61 | +var unified = require('unified') |
| 62 | +var parse = require('rehype-parse') |
| 63 | + |
| 64 | +var doc = fs.readFileSync('example.html') |
| 65 | + |
| 66 | +var tree = unified() |
| 67 | + .use(parse, {fragment: true}) |
| 68 | + .parse(doc) |
| 69 | + |
| 70 | +console.log(tree) |
| 71 | +``` |
| 72 | + |
| 73 | +Which would yield (ignoring positional info for brevity): |
| 74 | + |
| 75 | +```js |
| 76 | +{ |
| 77 | + type: 'root', |
| 78 | + children: [ |
| 79 | + { |
| 80 | + type: 'element', |
| 81 | + tagName: 'p', |
| 82 | + properties: {}, |
| 83 | + children: [ |
| 84 | + {type: 'text', value: '\n '}, |
| 85 | + {type: 'comment', value: ' A comment. '}, |
| 86 | + {type: 'text', value: '\n Some '}, |
| 87 | + { |
| 88 | + type: 'element', |
| 89 | + tagName: 'strong', |
| 90 | + properties: {}, |
| 91 | + children: [{type: 'text', value: 'strong importance'}] |
| 92 | + }, |
| 93 | + {type: 'text', value: ', '}, |
| 94 | + { |
| 95 | + type: 'element', |
| 96 | + tagName: 'em', |
| 97 | + properties: {}, |
| 98 | + children: [{type: 'text', value: 'emphasis'}] |
| 99 | + }, |
| 100 | + {type: 'text', value: ', and a dash of\n '}, |
| 101 | + { |
| 102 | + type: 'element', |
| 103 | + tagName: 'code', |
| 104 | + properties: {}, |
| 105 | + children: [{type: 'text', value: 'code'}] |
| 106 | + }, |
| 107 | + {type: 'text', value: '.\n'} |
| 108 | + ] |
| 109 | + }, |
| 110 | + {type: 'text', value: '\n'} |
| 111 | + ], |
| 112 | + data: {quirksMode: false} |
| 113 | +} |
| 114 | +``` |
| 115 | + |
| 116 | +As we are all set up, we can traverse the tree. |
| 117 | + |
| 118 | +### Traverse the tree |
| 119 | + |
| 120 | +A useful utility for that is [`unist-util-visit`][visit], and it works like so: |
| 121 | + |
| 122 | +```js |
| 123 | +var visit = require('unist-util-visit') |
| 124 | + |
| 125 | +// … |
| 126 | + |
| 127 | +visit(tree, function(node) { |
| 128 | + console.log(node.type) |
| 129 | +}) |
| 130 | +``` |
| 131 | + |
| 132 | +```txt |
| 133 | +root |
| 134 | +element |
| 135 | +text |
| 136 | +comment |
| 137 | +text |
| 138 | +element |
| 139 | +text |
| 140 | +text |
| 141 | +element |
| 142 | +text |
| 143 | +text |
| 144 | +element |
| 145 | +text |
| 146 | +text |
| 147 | +text |
| 148 | +``` |
| 149 | + |
| 150 | +We traversed the entire tree, and for each node, we printed its `type`. |
| 151 | + |
| 152 | +### Visiting a certain kind of node |
| 153 | + |
| 154 | +To “visit” only a certain `type` of node, pass a test to |
| 155 | +[`unist-util-visit`][visit] like so: |
| 156 | + |
| 157 | +```js |
| 158 | +var visit = require('unist-util-visit') |
| 159 | + |
| 160 | +// … |
| 161 | + |
| 162 | +visit(tree, 'element', function(node) { |
| 163 | + console.log(node.tagName) |
| 164 | +}) |
| 165 | +``` |
| 166 | + |
| 167 | +```txt |
| 168 | +p |
| 169 | +strong |
| 170 | +em |
| 171 | +code |
| 172 | +``` |
| 173 | + |
| 174 | +You can do this yourself as well. |
| 175 | +The above works the same as: |
| 176 | + |
| 177 | +```js |
| 178 | +visit(tree, function(node) { |
| 179 | + if (node.type === 'element') { |
| 180 | + console.log(node.tagName) |
| 181 | + } |
| 182 | +}) |
| 183 | +``` |
| 184 | + |
| 185 | +But the test passed to `visit` can be more advanced, such as the following to |
| 186 | +visit different kinds of nodes. |
| 187 | + |
| 188 | +```js |
| 189 | +visit(tree, ['comment', 'text'], function(node) { |
| 190 | + console.log([node.value]) |
| 191 | +}) |
| 192 | +``` |
| 193 | + |
| 194 | +```txt |
| 195 | +[ '\n ' ] |
| 196 | +[ ' A comment. ' ] |
| 197 | +[ '\n Some ' ] |
| 198 | +[ 'strong importance' ] |
| 199 | +[ ', ' ] |
| 200 | +[ 'emphasis' ] |
| 201 | +[ ', and a dash of\n ' ] |
| 202 | +[ 'code' ] |
| 203 | +[ '.\n' ] |
| 204 | +[ '\n' ] |
| 205 | +``` |
| 206 | + |
| 207 | +Read more about [`unist-util-visit`][visit] in its readme. |
| 208 | + |
| 209 | +[visit]: https://github.com/syntax-tree/unist-util-visit |
| 210 | + |
| 211 | +[visit-parents]: https://github.com/syntax-tree/unist-util-visit-parents |
| 212 | + |
| 213 | +[unified]: https://github.com/unifiedjs/unified |
| 214 | + |
| 215 | +[rehype-parse]: https://github.com/rehypejs/rehype/tree/master/packages/rehype-parse |
0 commit comments