src/compiler/parser/html-parser.ts TYPESCRIPT 342 lines View on github.com → Search inside
1/**2 * Not type-checking this file because it's mostly vendor code.3 */45/*!6 * HTML Parser By John Resig (ejohn.org)7 * Modified by Juriy "kangax" Zaytsev8 * Original code by Erik Arvidsson (MPL-1.1 OR Apache-2.0 OR GPL-2.0-or-later)9 * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js10 */1112import { makeMap, no } from 'shared/util'13import { isNonPhrasingTag } from 'web/compiler/util'14import { unicodeRegExp } from 'core/util/lang'15import { ASTAttr, CompilerOptions } from 'types/compiler'1617// Regular Expressions for parsing tags and attributes18const attribute =19  /^\s*([^\s"'<>\/=]+)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/20const dynamicArgAttribute =21  /^\s*((?:v-[\w-]+:|@|:|#)\[[^=]+?\][^\s"'<>\/=]*)(?:\s*(=)\s*(?:"([^"]*)"+|'([^']*)'+|([^\s"'=<>`]+)))?/22const ncname = `[a-zA-Z_][\\-\\.0-9_a-zA-Z${unicodeRegExp.source}]*`23const qnameCapture = `((?:${ncname}\\:)?${ncname})`24const startTagOpen = new RegExp(`^<${qnameCapture}`)25const startTagClose = /^\s*(\/?)>/26const endTag = new RegExp(`^<\\/${qnameCapture}[^>]*>`)27const doctype = /^<!DOCTYPE [^>]+>/i28// #7298: escape - to avoid being passed as HTML comment when inlined in page29const comment = /^<!\--/30const conditionalComment = /^<!\[/3132// Special Elements (can contain anything)33export const isPlainTextElement = makeMap('script,style,textarea', true)34const reCache = {}3536const decodingMap = {37  '&lt;': '<',38  '&gt;': '>',39  '&quot;': '"',40  '&amp;': '&',41  '&#10;': '\n',42  '&#9;': '\t',43  '&#39;': "'"44}45const encodedAttr = /&(?:lt|gt|quot|amp|#39);/g46const encodedAttrWithNewLines = /&(?:lt|gt|quot|amp|#39|#10|#9);/g4748// #599249const isIgnoreNewlineTag = makeMap('pre,textarea', true)50const shouldIgnoreFirstNewline = (tag, html) =>51  tag && isIgnoreNewlineTag(tag) && html[0] === '\n'5253function decodeAttr(value, shouldDecodeNewlines) {54  const re = shouldDecodeNewlines ? encodedAttrWithNewLines : encodedAttr55  return value.replace(re, match => decodingMap[match])56}5758export interface HTMLParserOptions extends CompilerOptions {59  start?: (60    tag: string,61    attrs: ASTAttr[],62    unary: boolean,63    start: number,64    end: number65  ) => void66  end?: (tag: string, start: number, end: number) => void67  chars?: (text: string, start?: number, end?: number) => void68  comment?: (content: string, start: number, end: number) => void69}7071export function parseHTML(html, options: HTMLParserOptions) {72  const stack: any[] = []73  const expectHTML = options.expectHTML74  const isUnaryTag = options.isUnaryTag || no75  const canBeLeftOpenTag = options.canBeLeftOpenTag || no76  let index = 077  let last, lastTag78  while (html) {79    last = html80    // Make sure we're not in a plaintext content element like script/style81    if (!lastTag || !isPlainTextElement(lastTag)) {82      let textEnd = html.indexOf('<')83      if (textEnd === 0) {84        // Comment:85        if (comment.test(html)) {86          const commentEnd = html.indexOf('-->')8788          if (commentEnd >= 0) {89            if (options.shouldKeepComment && options.comment) {90              options.comment(91                html.substring(4, commentEnd),92                index,93                index + commentEnd + 394              )95            }96            advance(commentEnd + 3)97            continue98          }99        }100101        // https://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment102        if (conditionalComment.test(html)) {103          const conditionalEnd = html.indexOf(']>')104105          if (conditionalEnd >= 0) {106            advance(conditionalEnd + 2)107            continue108          }109        }110111        // Doctype:112        const doctypeMatch = html.match(doctype)113        if (doctypeMatch) {114          advance(doctypeMatch[0].length)115          continue116        }117118        // End tag:119        const endTagMatch = html.match(endTag)120        if (endTagMatch) {121          const curIndex = index122          advance(endTagMatch[0].length)123          parseEndTag(endTagMatch[1], curIndex, index)124          continue125        }126127        // Start tag:128        const startTagMatch = parseStartTag()129        if (startTagMatch) {130          handleStartTag(startTagMatch)131          if (shouldIgnoreFirstNewline(startTagMatch.tagName, html)) {132            advance(1)133          }134          continue135        }136      }137138      let text, rest, next139      if (textEnd >= 0) {140        rest = html.slice(textEnd)141        while (142          !endTag.test(rest) &&143          !startTagOpen.test(rest) &&144          !comment.test(rest) &&145          !conditionalComment.test(rest)146        ) {147          // < in plain text, be forgiving and treat it as text148          next = rest.indexOf('<', 1)149          if (next < 0) break150          textEnd += next151          rest = html.slice(textEnd)152        }153        text = html.substring(0, textEnd)154      }155156      if (textEnd < 0) {157        text = html158      }159160      if (text) {161        advance(text.length)162      }163164      if (options.chars && text) {165        options.chars(text, index - text.length, index)166      }167    } else {168      let endTagLength = 0169      const stackedTag = lastTag.toLowerCase()170      const reStackedTag =171        reCache[stackedTag] ||172        (reCache[stackedTag] = new RegExp(173          '([\\s\\S]*?)(</' + stackedTag + '[^>]*>)',174          'i'175        ))176      const rest = html.replace(reStackedTag, function (all, text, endTag) {177        endTagLength = endTag.length178        if (!isPlainTextElement(stackedTag) && stackedTag !== 'noscript') {179          text = text180            .replace(/<!\--([\s\S]*?)-->/g, '$1') // #7298181            .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1')182        }183        if (shouldIgnoreFirstNewline(stackedTag, text)) {184          text = text.slice(1)185        }186        if (options.chars) {187          options.chars(text)188        }189        return ''190      })191      index += html.length - rest.length192      html = rest193      parseEndTag(stackedTag, index - endTagLength, index)194    }195196    if (html === last) {197      options.chars && options.chars(html)198      if (__DEV__ && !stack.length && options.warn) {199        options.warn(`Mal-formatted tag at end of template: "${html}"`, {200          start: index + html.length201        })202      }203      break204    }205  }206207  // Clean up any remaining tags208  parseEndTag()209210  function advance(n) {211    index += n212    html = html.substring(n)213  }214215  function parseStartTag() {216    const start = html.match(startTagOpen)217    if (start) {218      const match: any = {219        tagName: start[1],220        attrs: [],221        start: index222      }223      advance(start[0].length)224      let end, attr225      while (226        !(end = html.match(startTagClose)) &&227        (attr = html.match(dynamicArgAttribute) || html.match(attribute))228      ) {229        attr.start = index230        advance(attr[0].length)231        attr.end = index232        match.attrs.push(attr)233      }234      if (end) {235        match.unarySlash = end[1]236        advance(end[0].length)237        match.end = index238        return match239      }240    }241  }242243  function handleStartTag(match) {244    const tagName = match.tagName245    const unarySlash = match.unarySlash246247    if (expectHTML) {248      if (lastTag === 'p' && isNonPhrasingTag(tagName)) {249        parseEndTag(lastTag)250      }251      if (canBeLeftOpenTag(tagName) && lastTag === tagName) {252        parseEndTag(tagName)253      }254    }255256    const unary = isUnaryTag(tagName) || !!unarySlash257258    const l = match.attrs.length259    const attrs: ASTAttr[] = new Array(l)260    for (let i = 0; i < l; i++) {261      const args = match.attrs[i]262      const value = args[3] || args[4] || args[5] || ''263      const shouldDecodeNewlines =264        tagName === 'a' && args[1] === 'href'265          ? options.shouldDecodeNewlinesForHref266          : options.shouldDecodeNewlines267      attrs[i] = {268        name: args[1],269        value: decodeAttr(value, shouldDecodeNewlines)270      }271      if (__DEV__ && options.outputSourceRange) {272        attrs[i].start = args.start + args[0].match(/^\s*/).length273        attrs[i].end = args.end274      }275    }276277    if (!unary) {278      stack.push({279        tag: tagName,280        lowerCasedTag: tagName.toLowerCase(),281        attrs: attrs,282        start: match.start,283        end: match.end284      })285      lastTag = tagName286    }287288    if (options.start) {289      options.start(tagName, attrs, unary, match.start, match.end)290    }291  }292293  function parseEndTag(tagName?: any, start?: any, end?: any) {294    let pos, lowerCasedTagName295    if (start == null) start = index296    if (end == null) end = index297298    // Find the closest opened tag of the same type299    if (tagName) {300      lowerCasedTagName = tagName.toLowerCase()301      for (pos = stack.length - 1; pos >= 0; pos--) {302        if (stack[pos].lowerCasedTag === lowerCasedTagName) {303          break304        }305      }306    } else {307      // If no tag name is provided, clean shop308      pos = 0309    }310311    if (pos >= 0) {312      // Close all the open elements, up the stack313      for (let i = stack.length - 1; i >= pos; i--) {314        if (__DEV__ && (i > pos || !tagName) && options.warn) {315          options.warn(`tag <${stack[i].tag}> has no matching end tag.`, {316            start: stack[i].start,317            end: stack[i].end318          })319        }320        if (options.end) {321          options.end(stack[i].tag, start, end)322        }323      }324325      // Remove the open elements from the stack326      stack.length = pos327      lastTag = pos && stack[pos - 1].tag328    } else if (lowerCasedTagName === 'br') {329      if (options.start) {330        options.start(tagName, [], true, start, end)331      }332    } else if (lowerCasedTagName === 'p') {333      if (options.start) {334        options.start(tagName, [], false, start, end)335      }336      if (options.end) {337        options.end(tagName, start, end)338      }339    }340  }341}

Findings

✓ No findings reported for this file.

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.