8000 parse · gitbu/learn-vue@1e3afb0 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1e3afb0

Browse files
author
buaiping
committed
parse
1 parent 8bb65c8 commit 1e3afb0

File tree

4 files changed

+606
-20
lines changed

4 files changed

+606
-20
lines changed

docs/vue2/compile/parse.md

Lines changed: 0 additions & 20 deletions
This file was deleted.
Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
# html-parser
2+
3+
```js
4+
/**
5+
* Not type-checking this file because it's mostly vendo 10000 r code.
6+
*/
7+
8+
/*!
9+
* HTML Parser By John Resig (ejohn.org)
10+
* Modified by Juriy "kangax" Zaytsev
11+
* Original code by Erik Arvidsson (MPL-1.1 OR Apache-2.0 OR GPL-2.0-or-later)
12+
* http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
13+
*/
14+
15+
import { makeMap, no } from 'shared/util'
16+
import { isNonPhrasingTag } from 'web/compiler/util'
17+
import { unicodeRegExp } from 'core/util/lang'
18+
19+
// Regular Expressions for parsing tags and attributes
20+
const attribute = /^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
21+
const dynamicArgAttribute = /^\s*((?:v-[\w-]+:|@|:|#)\[[^=]+\][^\s"'<>\/=]*)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/
22+
const ncname = `[a-zA-Z_][\\-\\.0-9_a-zA-Z${unicodeRegExp.source}]*`
23+
const qnameCapture = `((?:${ncname}\\:)?${ncname})`
24+
const startTagOpen = new RegExp(`^<${qnameCapture}`)
25+
const startTagClose = /^\s*(\/?)>/
26+
const endTag = new RegExp(`^<\\/${qnameCapture}[^>]*>`)
27+
const doctype = /^<!DOCTYPE [^>]+>/i
28+
// #7298: escape - to avoid being passed as HTML comment when inlined in page
29+
const comment = /^<!\--/
30+
const conditionalComment = /^<!\[/
31+
32+
// Special Elements (can contain anything)
33+
export const isPlainTextElement = makeMap('script,style,textarea', true)
34+
const reCache = {}
35+
36+
const decodingMap = {
37+
'&lt;': '<',
38+
'&gt;': '>',
39+
'&quot;': '"',
40+
'&amp;': '&',
41+
'&#10;': '\n',
42+
'&#9;': '\t',
43+
'&#39;': "'"
44+
}
45+
const encodedAttr = /&(?:lt|gt|quot|amp|#39);/g
46+
const encodedAttrWithNewLines = /&(?:lt|gt|quot|amp|#39|#10|#9);/g
47+
48+
// #5992
49+
const isIgnoreNewlineTag = makeMap('pre,textarea', true)
50+
const shouldIgnoreFirstNewline = (tag, html) => tag && isIgnoreNewlineTag(tag) && html[0] === '\n'
51+
52+
function decodeAttr (value, shouldDecodeNewlines) {
53+
const re = shouldDecodeNewlines ? encodedAttrWithNewLines : encodedAttr
54+
return value.replace(re, match => decodingMap[match])
55+
}
56+
57+
export function parseHTML (html, options) {
58+
const stack = []
59+
const expectHTML = options.expectHTML
60+
// 不是成对的标签
61+
const isUnaryTag = options.isUnaryTag || no
62+
const canBeLeftOpenTag = options.canBeLeftOpenTag || no
63+
// 指针
64+
let index = 0
65+
let last, lastTag
66+
while (html) {
67+
last = html
68+
// Make sure we're not in a plaintext content element like script/style
69+
if (!lastTag || !isPlainTextElement(lastTag)) {
70+
let textEnd = html.indexOf('<')
71+
if (textEnd === 0) {
72+
// Comment:
73+
if (comment.test(html)) {
74+
const commentEnd = html.indexOf('-->')
75+
76+
if (commentEnd >= 0) {
77+
if (options.shouldKeepComment) {
78+
options.comment(html.substring(4, commentEnd), index, index + commentEnd + 3)
79+
}
80+
advance(commentEnd + 3)
81+
continue
82+
}
83+
}
84+
85+
// http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
86+
if (conditionalComment.test(html)) {
87+
const conditionalEnd = html.indexOf(']>')
88+
89+
if (conditionalEnd >= 0) {
90+
advance(conditionalEnd + 2)
91+
continue
92+
}
93+
}
94+
95+
// Doctype:
96+
const doctypeMatch = html.match(doctype)
97+
if (doctypeMatch) {
98+
advance(doctypeMatch[0].length)
99+
continue
100+
}
101+
102+
// End tag:
103+
const endTagMatch = html.match(endTag)
104+
if (endTagMatch) {
105+
const curIndex = index
106+
advance(endTagMatch[0].length)
107+
parseEndTag(endTagMatch[1], curIndex, index)
108+
continue
109+
}
110+
111+
// Start tag:
112+
const startTagMatch = parseStartTag()
113+
if (startTagMatch) {
114+
handleStartTag(startTagMatch)
115+
if (shouldIgnoreFirstNewline(startTagMatch.tagName, html)) {
116+
advance(1)
117+
}
118+
continue
119+
}
120+
}
121+
122+
let text, rest, next
123+
if (textEnd >= 0) {
124+
rest = html.slice(textEnd)
125+
while (
126+
!endTag.test(rest) &&
127+
!startTagOpen.test(rest) &&
128+
!comment.test(rest) &&
129+
!conditionalComment.test(rest)
130+
) {
131+
// < in plain text, be forgiving and treat it as text
132+
next = rest.indexOf('<', 1)
133+
if (next < 0) break
134+
textEnd += next
135+
rest = html.slice(textEnd)
136+
}
137+
text = html.substring(0, textEnd)
138+
}
139+
140+
if (textEnd < 0) {
141+
text = html
142+
}
143+
144+
if (text) {
145+
advance(text.length)
146+
}
147+
148+
if (options.chars && text) {
149+
options.chars(text, index - text.length, index)
150+
}
151+
} else {
152+
let endTagLength = 0
153+
const stackedTag = lastTag.toLowerCase()
154+
const reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)(</' + stackedTag + '[^>]*>)', 'i'))
155+
const rest = html.replace(reStackedTag, function (all, text, endTag) {
156+
endTagLength = endTag.length
157+
if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') {
158+
text = text
159+
.replace(/<!\--([\s\S]*?)-->/g, '$1') // #7298
160+
.replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1')
161+
}
162+
if (shouldIgnoreFirstNewline(stackedTag, text)) {
163+
text = text.slice(1)
164+
}
165+
if (options.chars) {
166+
options.chars(text)
167+
}
168+
return ''
169+
})
170+
index += html.length - rest.length
171+
html = rest
172+
parseEndTag(stackedTag, index - endTagLength, index)
173+
}
174+
175+
if (html === last) {
176+
options.chars && options.chars(html)
177+
if (process.env.NODE_ENV !== 'production' && !stack.length && options.warn) {
178+
options.warn(`Mal-formatted tag at end of template: "${html}"`, { start: index + html.length })
179+
}
180+
break
181+
}
182+
}
183+
184+
// Clean up any remaining tags
185+
parseEndTag()
186+
187+
function advance (n) {
188+
index += n
189+
html = html.substring(n)
190+
}
191+
192+
function parseStartTag () {
193+
const start = html.match(startTagOpen)
194+
if (start) {
195+
const match = {
196+
tagName: start[1],
197+
attrs: [],
198+
start: index
199+
}
200+
advance(start[0].length)
201+
let end, attr
202+
while (!(end = html.match(startTagClose)) && (attr = html.match(dynamicArgAttribute) || html.match(attribute))) {
203+
attr.start = index
204+
advance(attr[0].length)
205+
attr.end = index
206+
match.attrs.push(attr)
207+
}
208+
if (end) {
209+
match.unarySlash = end[1]
210+
advance(end[0].length)
211+
match.end = index
212+
return match
213+
}
214+
}
215+
}
216+
217+
function handleStartTag (match) {
218+
const tagName = match.tagName
219+
const unarySlash = match.unarySlash
220+
221+
if (expectHTML) {
222+
if (lastTag === 'p' && isNonPhrasingTag(tagName)) {
223+
F438 parseEndTag(lastTag)
224+
}
225+
if (canBeLeftOpenTag(tagName) && lastTag === tagName) {
226+
parseEndTag(tagName)
227+
}
228+
}
229+
230+
const unary = isUnaryTag(tagName) || !!unarySlash
231+
232+
const l = match.attrs.length
233+
const attrs = new Array(l)
234+
for (let i = 0; i < l; i++) {
235+
const args = match.attrs[i]
236+
const value = args[3] || args[4] || args[5] || ''
237+
const shouldDecodeNewlines = tagName === 'a' && args[1] === 'href'
238+
? options.shouldDecodeNewlinesForHref
239+
: options.shouldDecodeNewlines
240+
attrs[i] = {
241+
name: args[1],
242+
value: decodeAttr(value, shouldDecodeNewlines)
243+
}
244+
if (process.env.NODE_ENV !== 'production' && options.outputSourceRange) {
245+
attrs[i].start = args.start + args[0].match(/^\s*/).length
246+
attrs[i].end = args.end
247+
}
248+
}
249+
250+
if (!unary) {
251+
stack.push({ tag: tagName, lowerCasedTag: tagName.toLowerCase(), attrs: attrs, start: match.start, end: match.end })
252+
lastTag = tagName
253+
}
254+
255+
if (options.start) {
256+
options.start(tagName, attrs, unary, match.start, match.end)
257+
}
258+
}
259+
260+
function parseEndTag (tagName, start, end) {
261+
let pos, lowerCasedTagName
262+
if (start == null) start = index
263+
if (end == null) end = index
264+
265+
// Find the closest opened tag of the same type
266+
if (tagName) {
267+
lowerCasedTagName = tagName.toLowerCase()
268+
for (pos = stack.length - 1; pos >= 0; pos--) {
269+
if (stack[pos].lowerCasedTag === lowerCasedTagName) {
270+
break
271+
}
272+
}
273+
} else {
274+
// If no tag name is provided, clean shop
275+
pos = 0
276+
}
277+
278+
if (pos >= 0) {
279+
// Close all the open elements, up the stack
280+
for (let i = stack.length - 1; i >= pos; i--) {
281+
if (process.env.NODE_ENV !== 'production' &&
282+
(i > pos || !tagName) &&
283+
options.warn
284+
) {
285+
options.warn(
286+
`tag <${stack[i].tag}> has no matching end tag.`,
287+
{ start: stack[i].start, end: stack[i].end }
288+
)
289+
}
290+
if (options.end) {
291+
options.end(stack[i].tag, start, end)
292+
}
293+
}
294+
295+
// Remove the open elements from the stack
296+
stack.length = pos
297+
lastTag = pos && stack[pos - 1].tag
298+
} else if (lowerCasedTagName === 'br') {
299+
if (options.start) {
300+
options.start(tagName, [], true, start, end)
301+
}
302+
} else if (lowerCasedTagName === 'p') {
303+
if (options.start) {
304+
options.start(tagName, [], false, start, end)
305+
}
306+
if (options.end) {
307+
options.end(tagName, start, end)
308+
}
309+
}
310+
}
311+
}
312+
313+
```
314+
315+
316+
317+
**match的内容:**
318+
319+
* tagName: 标签名
320+
321+
* start:匹配的开始位置
322+
323+
* end:匹配的结束位置
324+
325+
* attrs: 标签的属性配置内容
326+
327+
* unarySlash:
328+
329+
330+
331+
**attrs的内容:**
332+
333+
* name: 属性的key
334+
* value:属性的value

0 commit comments

Comments
 (0)
0