1/**2 * Not type-checking this file because it's mostly vendor code.3 */45/*!6 * HTML Parser By John Resig (ejohn.org)7 * Modified by Juriy "kangax" Zaytsev8 * Original code by Erik Arvidsson (MPL-1.1 OR Apache-2.0 OR GPL-2.0-or-later)9 * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js10 */1112import { makeMap, no } from 'shared/util'13import { isNonPhrasingTag } from 'web/compiler/util'14import { unicodeRegExp } from 'core/util/lang'15import { ASTAttr, CompilerOptions } from 'types/compiler'1617// Regular Expressions for parsing tags and attributes18const attribute =19 /^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/20const dynamicArgAttribute =21 /^\s*((?:v-[\w-]+:|@|:|#)\[[^=]+?\][^\s"'<>\/=]*)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/22const ncname = `[a-zA-Z_][\\-\\.0-9_a-zA-Z${unicodeRegExp.source}]*`23const qnameCapture = `((?:${ncname}\\:)?${ncname})`24const startTagOpen = new RegExp(`^<${qnameCapture}`)25const startTagClose = /^\s*(\/?)>/26const endTag = new RegExp(`^<\\/${qnameCapture}[^>]*>`)27const doctype = /^<!DOCTYPE [^>]+>/i28// #7298: escape - to avoid being passed as HTML comment when inlined in page29const comment = /^<!\--/30const conditionalComment = /^<!\[/3132// Special Elements (can contain anything)33export const isPlainTextElement = makeMap('script,style,textarea', true)34const reCache = {}3536const decodingMap = {37 '<': '<',38 '>': '>',39 '"': '"',40 '&': '&',41 ' ': '\n',42 '	': '\t',43 ''': "'"44}45const encodedAttr = /&(?:lt|gt|quot|amp|#39);/g46const encodedAttrWithNewLines = /&(?:lt|gt|quot|amp|#39|#10|#9);/g4748// #599249const isIgnoreNewlineTag = makeMap('pre,textarea', true)50const shouldIgnoreFirstNewline = (tag, html) =>51 tag && isIgnoreNewlineTag(tag) && html[0] === '\n'5253function decodeAttr(value, shouldDecodeNewlines) {54 const re = shouldDecodeNewlines ? encodedAttrWithNewLines : encodedAttr55 return value.replace(re, match => decodingMap[match])56}5758export interface HTMLParserOptions extends CompilerOptions {59 start?: (60 tag: string,61 attrs: ASTAttr[],62 unary: boolean,63 start: number,64 end: number65 ) => void66 end?: (tag: string, start: number, end: number) => void67 chars?: (text: string, start?: number, end?: number) => void68 comment?: (content: string, start: number, end: number) => void69}7071export function parseHTML(html, options: HTMLParserOptions) {72 const stack: any[] = []73 const expectHTML = options.expectHTML74 const isUnaryTag = options.isUnaryTag || no75 const canBeLeftOpenTag = options.canBeLeftOpenTag || no76 let index = 077 let last, lastTag78 while (html) {79 last = html80 // Make sure we're not in a plaintext content element like script/style81 if (!lastTag || !isPlainTextElement(lastTag)) {82 let textEnd = html.indexOf('<')83 if (textEnd === 0) {84 // Comment:85 if (comment.test(html)) {86 const commentEnd = html.indexOf('-->')8788 if (commentEnd >= 0) {89 if (options.shouldKeepComment && options.comment) {90 options.comment(91 html.substring(4, commentEnd),92 index,93 index + commentEnd + 394 )95 }96 advance(commentEnd + 3)97 continue98 }99 }100101 // https://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment102 if (conditionalComment.test(html)) {103 const conditionalEnd = html.indexOf(']>')104105 if (conditionalEnd >= 0) {106 advance(conditionalEnd + 2)107 continue108 }109 }110111 // Doctype:112 const doctypeMatch = html.match(doctype)113 if (doctypeMatch) {114 advance(doctypeMatch[0].length)115 continue116 }117118 // End tag:119 const endTagMatch = html.match(endTag)120 if (endTagMatch) {121 const curIndex = index122 advance(endTagMatch[0].length)123 parseEndTag(endTagMatch[1], curIndex, index)124 continue125 }126127 // Start tag:128 const startTagMatch = parseStartTag()129 if (startTagMatch) {130 handleStartTag(startTagMatch)131 if (shouldIgnoreFirstNewline(startTagMatch.tagName, html)) {132 advance(1)133 }134 continue135 }136 }137138 let text, rest, next139 if (textEnd >= 0) {140 rest = html.slice(textEnd)141 while (142 !endTag.test(rest) &&143 !startTagOpen.test(rest) &&144 !comment.test(rest) &&145 !conditionalComment.test(rest)146 ) {147 // < in plain text, be forgiving and treat it as text148 next = rest.indexOf('<', 1)149 if (next < 0) break150 textEnd += next151 rest = html.slice(textEnd)152 }153 text = html.substring(0, textEnd)154 }155156 if (textEnd < 0) {157 text = html158 }159160 if (text) {161 advance(text.length)162 }163164 if (options.chars && text) {165 options.chars(text, index - text.length, index)166 }167 } else {168 let endTagLength = 0169 const stackedTag = lastTag.toLowerCase()170 const reStackedTag =171 reCache[stackedTag] ||172 (reCache[stackedTag] = new RegExp(173 '([\\s\\S]*?)(</' + stackedTag + '[^>]*>)',174 'i'175 ))176 const rest = html.replace(reStackedTag, function (all, text, endTag) {177 endTagLength = endTag.length178 if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') {179 text = text180 .replace(/<!\--([\s\S]*?)-->/g, '$1') // #7298181 .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1')182 }183 if (shouldIgnoreFirstNewline(stackedTag, text)) {184 text = text.slice(1)185 }186 if (options.chars) {187 options.chars(text)188 }189 return ''190 })191 index += html.length - rest.length192 html = rest193 parseEndTag(stackedTag, index - endTagLength, index)194 }195196 if (html === last) {197 options.chars && options.chars(html)198 if (__DEV__ && !stack.length && options.warn) {199 options.warn(`Mal-formatted tag at end of template: "${html}"`, {200 start: index + html.length201 })202 }203 break204 }205 }206207 // Clean up any remaining tags208 parseEndTag()209210 function advance(n) {211 index += n212 html = html.substring(n)213 }214215 function parseStartTag() {216 const start = html.match(startTagOpen)217 if (start) {218 const match: any = {219 tagName: start[1],220 attrs: [],221 start: index222 }223 advance(start[0].length)224 let end, attr225 while (226 !(end = html.match(startTagClose)) &&227 (attr = html.match(dynamicArgAttribute) || html.match(attribute))228 ) {229 attr.start = index230 advance(attr[0].length)231 attr.end = index232 match.attrs.push(attr)233 }234 if (end) {235 match.unarySlash = end[1]236 advance(end[0].length)237 match.end = index238 return match239 }240 }241 }242243 function handleStartTag(match) {244 const tagName = match.tagName245 const unarySlash = match.unarySlash246247 if (expectHTML) {248 if (lastTag === 'p' && isNonPhrasingTag(tagName)) {249 parseEndTag(lastTag)250 }251 if (canBeLeftOpenTag(tagName) && lastTag === tagName) {252 parseEndTag(tagName)253 }254 }255256 const unary = isUnaryTag(tagName) || !!unarySlash257258 const l = match.attrs.length259 const attrs: ASTAttr[] = new Array(l)260 for (let i = 0; i < l; i++) {261 const args = match.attrs[i]262 const value = args[3] || args[4] || args[5] || ''263 const shouldDecodeNewlines =264 tagName === 'a' && args[1] === 'href'265 ? options.shouldDecodeNewlinesForHref266 : options.shouldDecodeNewlines267 attrs[i] = {268 name: args[1],269 value: decodeAttr(value, shouldDecodeNewlines)270 }271 if (__DEV__ && options.outputSourceRange) {272 attrs[i].start = args.start + args[0].match(/^\s*/).length273 attrs[i].end = args.end274 }275 }276277 if (!unary) {278 stack.push({279 tag: tagName,280 lowerCasedTag: tagName.toLowerCase(),281 attrs: attrs,282 start: match.start,283 end: match.end284 })285 lastTag = tagName286 }287288 if (options.start) {289 options.start(tagName, attrs, unary, match.start, match.end)290 }291 }292293 function parseEndTag(tagName?: any, start?: any, end?: any) {294 let pos, lowerCasedTagName295 if (start == null) start = index296 if (end == null) end = index297298 // Find the closest opened tag of the same type299 if (tagName) {300 lowerCasedTagName = tagName.toLowerCase()301 for (pos = stack.length - 1; pos >= 0; pos--) {302 if (stack[pos].lowerCasedTag === lowerCasedTagName) {303 break304 }305 }306 } else {307 // If no tag name is provided, clean shop308 pos = 0309 }310311 if (pos >= 0) {312 // Close all the open elements, up the stack313 for (let i = stack.length - 1; i >= pos; i--) {314 if (__DEV__ && (i > pos || !tagName) && options.warn) {315 options.warn(`tag <${stack[i].tag}> has no matching end tag.`, {316 start: stack[i].start,317 end: stack[i].end318 })319 }320 if (options.end) {321 options.end(stack[i].tag, start, end)322 }323 }324325 // Remove the open elements from the stack326 stack.length = pos327 lastTag = pos && stack[pos - 1].tag328 } else if (lowerCasedTagName === 'br') {329 if (options.start) {330 options.start(tagName, [], true, start, end)331 }332 } else if (lowerCasedTagName === 'p') {333 if (options.start) {334 options.start(tagName, [], false, start, end)335 }336 if (options.end) {337 options.end(tagName, start, end)338 }339 }340 }341}
Findings
✓ No findings reported for this file.