libs/core/langchain_core/utils/mustache.py PYTHON 705 lines View on github.com → Search inside
1"""Adapted from https://github.com/noahmorrison/chevron.23MIT License.4"""56from __future__ import annotations78import logging9from collections.abc import Iterator, Mapping, Sequence10from types import MappingProxyType11from typing import (12    TYPE_CHECKING,13    Any,14    Literal,15    cast,16)1718if TYPE_CHECKING:19    from typing import TypeAlias2021logger = logging.getLogger(__name__)222324Scopes: TypeAlias = list[Literal[False, 0] | Mapping[str, Any]]252627# Globals28_CURRENT_LINE = 129_LAST_TAG_LINE = None303132class ChevronError(SyntaxError):33    """Custom exception for Chevron errors."""343536#37# Helper functions38#394041def grab_literal(template: str, l_del: str) -> tuple[str, str]:42    """Parse a literal from the template.4344    Args:45        template: The template to parse.46        l_del: The left delimiter.4748    Returns:49        The literal and the template.50    """51    global _CURRENT_LINE5253    try:54        # Look for the next tag and move the template to it55        literal, template = template.split(l_del, 1)56        _CURRENT_LINE += literal.count("\n")5758    # There are no more tags in the template?59    except ValueError:60        # Then the rest of the template is a literal61        return (template, "")6263    return (literal, template)646566def l_sa_check(67    template: str,  # noqa: ARG00168    literal: str,69    is_standalone: bool,  # noqa: FBT00170) -> bool:71    """Do a preliminary check to see if a tag could be a standalone.7273    Args:74        template: The template. (Not used.)75        literal: The literal.76        is_standalone: Whether the tag is standalone.7778    Returns:79        Whether the tag could be a standalone.80    """81    # If there is a newline, or the previous tag was a standalone82    if literal.find("\n") != -1 or is_standalone:83        padding = literal.rsplit("\n", maxsplit=1)[-1]8485        # If all the characters since the last newline are spaces86        # Then the next tag could be a standalone87        # Otherwise it can't be88        return padding.isspace() or not padding89    return False909192def r_sa_check(93    template: str,94    tag_type: str,95    is_standalone: bool,  # noqa: FBT00196) -> bool:97    """Do a final check to see if a tag could be a standalone.9899    Args:100        template: The template.101        tag_type: The type of the tag.102        is_standalone: Whether the tag is standalone.103104    Returns:105        Whether the tag could be a standalone.106    """107    # Check right side if we might be a standalone108    if is_standalone and tag_type not in {"variable", "no escape"}:109        on_newline = template.split("\n", 1)110111        # If the stuff to the right of us are spaces we're a standalone112        return on_newline[0].isspace() or not on_newline[0]113114    # If we're a tag can't be a standalone115    return False116117118def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], str]:119    """Parse a tag from a template.120121    Args:122        template: The template.123        l_del: The left delimiter.124        r_del: The right delimiter.125126    Returns:127        The tag and the template.128129    Raises:130        ChevronError: If the tag is unclosed.131        ChevronError: If the set delimiter tag is unclosed.132    """133    tag_types = {134        "!": "comment",135        "#": "section",136        "^": "inverted section",137        "/": "end",138        ">": "partial",139        "=": "set delimiter?",140        "{": "no escape?",141        "&": "no escape",142    }143144    # Get the tag145    try:146        tag, template = template.split(r_del, 1)147    except ValueError as e:148        msg = f"unclosed tag at line {_CURRENT_LINE}"149        raise ChevronError(msg) from e150151    # Check for empty tags152    if not tag.strip():153        msg = f"empty tag at line {_CURRENT_LINE}"154        raise ChevronError(msg)155156    # Find the type meaning of the first character157    tag_type = tag_types.get(tag[0], "variable")158159    # If the type is not a variable160    if tag_type != "variable":161        # Then that first character is not needed162        tag = tag[1:]163164    # If we might be a set delimiter tag165    if tag_type == "set delimiter?":166        # Double check to make sure we are167        if tag.endswith("="):168            tag_type = "set delimiter"169            # Remove the equal sign170            tag = tag[:-1]171172        # Otherwise we should complain173        else:174            msg = f"unclosed set delimiter tag\nat line {_CURRENT_LINE}"175            raise ChevronError(msg)176177    elif (178        # If we might be a no html escape tag179        tag_type == "no escape?"180        # And we have a third curly brace181        # (And are using curly braces as delimiters)182        and l_del == "{{"183        and r_del == "}}"184        and template.startswith("}")185    ):186        # Then we are a no html escape tag187        template = template[1:]188        tag_type = "no escape"189190    # Strip the whitespace off the key and return191    return ((tag_type, tag.strip()), template)192193194#195# The main tokenizing function196#197198199def tokenize(200    template: str, def_ldel: str = "{{", def_rdel: str = "}}"201) -> Iterator[tuple[str, str]]:202    """Tokenize a mustache template.203204    Tokenizes a mustache template in a generator fashion, using file-like objects. It205    also accepts a string containing the template.206207    Args:208        template: a file-like object, or a string of a mustache template209        def_ldel: The default left delimiter210            (`'{{'` by default, as in spec compliant mustache)211        def_rdel: The default right delimiter212            (`'}}'` by default, as in spec compliant mustache)213214    Yields:215        Mustache tags in the form of a tuple `(tag_type, tag_key)` where `tag_type` is216            one of:217218            * literal219            * section220            * inverted section221            * end222            * partial223            * no escape224225            ...and `tag_key` is either the key or in the case of a literal tag, the226            literal itself.227228    Raises:229        ChevronError: If there is a syntax error in the template.230    """231    global _CURRENT_LINE, _LAST_TAG_LINE232    _CURRENT_LINE = 1233    _LAST_TAG_LINE = None234235    is_standalone = True236    open_sections = []237    l_del = def_ldel238    r_del = def_rdel239240    while template:241        literal, template = grab_literal(template, l_del)242243        # If the template is completed244        if not template:245            # Then yield the literal and leave246            yield ("literal", literal)247            break248249        # Do the first check to see if we could be a standalone250        is_standalone = l_sa_check(template, literal, is_standalone)251252        # Parse the tag253        tag, template = parse_tag(template, l_del, r_del)254        tag_type, tag_key = tag255256        # Special tag logic257258        # If we are a set delimiter tag259        if tag_type == "set delimiter":260            # Then get and set the delimiters261            dels = tag_key.strip().split(" ")262            l_del, r_del = dels[0], dels[-1]263264        # If we are a section tag265        elif tag_type in {"section", "inverted section"}:266            # Then open a new section267            open_sections.append(tag_key)268            _LAST_TAG_LINE = _CURRENT_LINE269270        # If we are an end tag271        elif tag_type == "end":272            # Then check to see if the last opened section273            # is the same as us274            try:275                last_section = open_sections.pop()276            except IndexError as e:277                msg = (278                    f'Trying to close tag "{tag_key}"\n'279                    "Looks like it was not opened.\n"280                    f"line {_CURRENT_LINE + 1}"281                )282                raise ChevronError(msg) from e283            if tag_key != last_section:284                # Otherwise we need to complain285                msg = (286                    f'Trying to close tag "{tag_key}"\n'287                    f'last open tag is "{last_section}"\n'288                    f"line {_CURRENT_LINE + 1}"289                )290                raise ChevronError(msg)291292        # Do the second check to see if we're a standalone293        is_standalone = r_sa_check(template, tag_type, is_standalone)294295        # Which if we are296        if is_standalone:297            # Remove the stuff before the newline298            template = template.split("\n", 1)[-1]299300            # Partials need to keep the spaces on their left301            if tag_type != "partial":302                # But other tags don't303                literal = literal.rstrip(" ")304305        # Start yielding306        # Ignore literals that are empty307        if literal:308            yield ("literal", literal)309310        # Ignore comments and set delimiters311        if tag_type not in {"comment", "set delimiter?"}:312            yield (tag_type, tag_key)313314    # If there are any open sections when we're done315    if open_sections:316        # Then we need to complain317        msg = (318            "Unexpected EOF\n"319            f'the tag "{open_sections[-1]}" was never closed\n'320            f"was opened at line {_LAST_TAG_LINE}"321        )322        raise ChevronError(msg)323324325#326# Helper functions327#328329330def _html_escape(string: str) -> str:331    """Return the HTML-escaped string with these characters escaped: `" & < >`."""332    html_codes = {333        '"': "&quot;",334        "<": "&lt;",335        ">": "&gt;",336    }337338    # & must be handled first339    string = string.replace("&", "&amp;")340    for char, code in html_codes.items():341        string = string.replace(char, code)342    return string343344345def _get_key(346    key: str,347    scopes: Scopes,348    *,349    warn: bool,350    keep: bool,351    def_ldel: str,352    def_rdel: str,353) -> Any:354    """Retrieve a value from the current scope using a dot-separated key path.355356    Traverses through nested dictionaries and lists using dot notation.357358    Supports special key `'.'` to return the current scope.359360    Args:361        key: Dot-separated key path (e.g., `'user.name'` or `'.'` for current scope).362        scopes: List of scope dictionaries to search through.363        warn: Whether to log a warning when a key is not found.364        keep: Whether to return the original template tag when key is not found.365        def_ldel: Left delimiter for template (used when keep is `True`).366        def_rdel: Right delimiter for template (used when keep is `True`).367368    Returns:369        The value found at the key path.370371            If not found, returns the original template tag when keep is `True`,372            otherwise returns an empty string.373    """374    # If the key is a dot375    if key == ".":376        # Then just return the current scope377        return scopes[0]378379    # Loop through the scopes380    for scope in scopes:381        try:382            # Return an empty string if falsy, with two exceptions383            # 0 should return 0, and False should return False384            if scope in (0, False):385                return scope386387            resolved_scope = scope388            # For every dot separated key389            for child in key.split("."):390                # Return an empty string if falsy, with two exceptions391                # 0 should return 0, and False should return False392                if resolved_scope in (0, False):393                    return resolved_scope394                # Move into the scope395                if isinstance(resolved_scope, dict):396                    try:397                        resolved_scope = resolved_scope[child]398                    except (KeyError, TypeError):399                        # Key not found - will be caught by outer try-except400                        msg = f"Key {child!r} not found in dict"401                        raise KeyError(msg) from None402                elif isinstance(resolved_scope, (list, tuple)):403                    try:404                        resolved_scope = resolved_scope[int(child)]405                    except (ValueError, IndexError, TypeError):406                        # Invalid index - will be caught by outer try-except407                        msg = f"Invalid index {child!r} for list/tuple"408                        raise IndexError(msg) from None409                else:410                    # Reject everything else for security411                    # This prevents traversing into arbitrary Python objects412                    msg = (413                        f"Cannot traverse into {type(resolved_scope).__name__}. "414                        "Mustache templates only support dict, list, and tuple. "415                        f"Got: {type(resolved_scope)}"416                    )417                    raise TypeError(msg)  # noqa: TRY301418419            try:420                # This allows for custom falsy data types421                # https://github.com/noahmorrison/chevron/issues/35422                if resolved_scope._CHEVRON_return_scope_when_falsy:  # type: ignore[union-attr] # noqa: SLF001423                    return resolved_scope424            except AttributeError:425                if resolved_scope in (0, False):426                    return resolved_scope427                return resolved_scope or ""428        except (AttributeError, KeyError, IndexError, ValueError, TypeError):429            # We couldn't find the key in the current scope430            # TypeError: Attempted to traverse into non-dict/list type431            # We'll try again on the next pass432            pass433434    # We couldn't find the key in any of the scopes435436    if warn:437        logger.warning("Could not find key '%s'", key)438439    if keep:440        return f"{def_ldel} {key} {def_rdel}"441442    return ""443444445def _get_partial(name: str, partials_dict: Mapping[str, str]) -> str:446    """Load a partial.447448    Returns:449        The partial.450    """451    try:452        # Maybe the partial is in the dictionary453        return partials_dict[name]454    except KeyError:455        return ""456457458#459# The main rendering function460#461g_token_cache: dict[str, list[tuple[str, str]]] = {}462463EMPTY_DICT: MappingProxyType[str, str] = MappingProxyType({})464465466def render(467    template: str | list[tuple[str, str]] = "",468    data: Mapping[str, Any] = EMPTY_DICT,469    partials_dict: Mapping[str, str] = EMPTY_DICT,470    padding: str = "",471    def_ldel: str = "{{",472    def_rdel: str = "}}",473    scopes: Scopes | None = None,474    warn: bool = False,  # noqa: FBT001,FBT002475    keep: bool = False,  # noqa: FBT001,FBT002476) -> str:477    """Render a mustache template.478479    Renders a mustache template with a data scope and inline partial capability.480481    Args:482        template: A file-like object or a string containing the template.483        data: A python dictionary with your data scope.484        partials_dict: A python dictionary which will be search for partials485            before the filesystem is.486487            `{'include': 'foo'}` is the same as a file called include.mustache488            (defaults to `{}`).489        padding: This is for padding partials, and shouldn't be used490            (but can be if you really want to).491        def_ldel: The default left delimiter492493            (`'{{'` by default, as in spec compliant mustache).494        def_rdel: The default right delimiter495496            (`'}}'` by default, as in spec compliant mustache).497        scopes: The list of scopes that `get_key` will look through.498        warn: Log a warning when a template substitution isn't found in the data499        keep: Keep unreplaced tags when a substitution isn't found in the data.500501    Returns:502        A string containing the rendered template.503    """504    # If the template is a sequence but not derived from a string505    if isinstance(template, Sequence) and not isinstance(template, str):506        # Then we don't need to tokenize it507        # But it does need to be a generator508        tokens: Iterator[tuple[str, str]] = (token for token in template)509    elif template in g_token_cache:510        tokens = (token for token in g_token_cache[template])511    else:512        # Otherwise make a generator513        tokens = tokenize(template, def_ldel, def_rdel)514515    output = ""516517    if scopes is None:518        scopes = [data]519520    # Run through the tokens521    for tag, key in tokens:522        # Set the current scope523        current_scope = scopes[0]524525        # If we're an end tag526        if tag == "end":527            # Pop out of the latest scope528            del scopes[0]529530        # If the current scope is falsy and not the only scope531        elif not current_scope and len(scopes) != 1:532            if tag in {"section", "inverted section"}:533                # Set the most recent scope to a falsy value534                scopes.insert(0, False)535536        # If we're a literal tag537        elif tag == "literal":538            # Add padding to the key and add it to the output539            output += key.replace("\n", "\n" + padding)540541        # If we're a variable tag542        elif tag == "variable":543            # Add the html escaped key to the output544            thing = _get_key(545                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel546            )547            if thing is True and key == ".":548                # if we've coerced into a boolean by accident549                # (inverted tags do this)550                # then get the un-coerced object (next in the stack)551                thing = scopes[1]552            if not isinstance(thing, str):553                thing = str(thing)554            output += _html_escape(thing)555556        # If we're a no html escape tag557        elif tag == "no escape":558            # Just lookup the key and add it559            thing = _get_key(560                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel561            )562            if not isinstance(thing, str):563                thing = str(thing)564            output += thing565566        # If we're a section tag567        elif tag == "section":568            # Get the sections scope569            scope = _get_key(570                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel571            )572573            # If the scope is a callable (as described in574            # https://mustache.github.io/mustache.5.html)575            if callable(scope):576                # Generate template text from tags577                text = ""578                tags: list[tuple[str, str]] = []579                for token in tokens:580                    if token == ("end", key):581                        break582583                    tags.append(token)584                    tag_type, tag_key = token585                    if tag_type == "literal":586                        text += tag_key587                    elif tag_type == "no escape":588                        text += f"{def_ldel}& {tag_key} {def_rdel}"589                    else:590                        text += "{}{} {}{}".format(591                            def_ldel,592                            {593                                "comment": "!",594                                "section": "#",595                                "inverted section": "^",596                                "end": "/",597                                "partial": ">",598                                "set delimiter": "=",599                                "no escape": "&",600                                "variable": "",601                            }[tag_type],602                            tag_key,603                            def_rdel,604                        )605606                g_token_cache[text] = tags607608                rend = scope(609                    text,610                    lambda template, data=None: render(611                        template,612                        data={},613                        partials_dict=partials_dict,614                        padding=padding,615                        def_ldel=def_ldel,616                        def_rdel=def_rdel,617                        scopes=(data and [data, *scopes]) or scopes,618                        warn=warn,619                        keep=keep,620                    ),621                )622623                output += rend624625            # If the scope is a sequence, an iterator or generator but not626            # derived from a string627            elif isinstance(scope, (Sequence, Iterator)) and not isinstance(scope, str):628                # Then we need to do some looping629630                # Gather up all the tags inside the section631                # (And don't be tricked by nested end tags with the same key)632                # TODO: This feels like it still has edge cases, no?633                tags = []634                tags_with_same_key = 0635                for token in tokens:636                    if token == ("section", key):637                        tags_with_same_key += 1638                    if token == ("end", key):639                        tags_with_same_key -= 1640                        if tags_with_same_key < 0:641                            break642                    tags.append(token)643644                # For every item in the scope645                for thing in scope:646                    # Append it as the most recent scope and render647                    new_scope = [thing, *scopes]648                    rend = render(649                        template=tags,650                        scopes=new_scope,651                        padding=padding,652                        partials_dict=partials_dict,653                        def_ldel=def_ldel,654                        def_rdel=def_rdel,655                        warn=warn,656                        keep=keep,657                    )658659                    output += rend660661            else:662                # Otherwise we're just a scope section663                scopes.insert(0, scope)664665        # If we're an inverted section666        elif tag == "inverted section":667            # Add the flipped scope to the scopes668            scope = _get_key(669                key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel670            )671            scopes.insert(0, cast("Literal[False]", not scope))672673        # If we're a partial674        elif tag == "partial":675            # Load the partial676            partial = _get_partial(key, partials_dict)677678            # Find what to pad the partial with679            left = output.rpartition("\n")[2]680            part_padding = padding681            if left.isspace():682                part_padding += left683684            # Render the partial685            part_out = render(686                template=partial,687                partials_dict=partials_dict,688                def_ldel=def_ldel,689                def_rdel=def_rdel,690                padding=part_padding,691                scopes=scopes,692                warn=warn,693                keep=keep,694            )695696            # If the partial was indented697            if left.isspace():698                # then remove the spaces from the end699                part_out = part_out.rstrip(" \t")700701            # Add the partials output to the output702            output += part_out703704    return output

Code quality findings 14

Avoid global variables; use function parameters or class attributes for better scope management
global-variable
global _CURRENT_LINE
Ensure functions have docstrings for documentation
missing-docstring
def l_sa_check(
Ensure functions have docstrings for documentation
missing-docstring
def r_sa_check(
Ensure functions have docstrings for documentation
missing-docstring
def tokenize(
Avoid global variables; use function parameters or class attributes for better scope management
global-variable
global _CURRENT_LINE, _LAST_TAG_LINE
Ensure try blocks have corresponding except or finally blocks
try-without-except
try:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(resolved_scope, dict):
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
elif isinstance(resolved_scope, (list, tuple)):
Ensure functions have docstrings for documentation
missing-docstring
def render(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if isinstance(template, Sequence) and not isinstance(template, str):
Avoid unless necessary; Python's garbage collector typically handles object deletion
unnecessary-del
del scopes[0]
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if not isinstance(thing, str):
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
if not isinstance(thing, str):
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
elif isinstance(scope, (Sequence, Iterator)) and not isinstance(scope, str):

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.