libs/core/tests/unit_tests/load/test_serializable.py PYTHON 1,175 lines View on github.com → Search inside
1import inspect2import json3import warnings4from typing import Any56import pytest7from pydantic import BaseModel, ConfigDict, Field, SecretStr89from langchain_core._api import LangChainDeprecationWarning10from langchain_core._api.deprecation import LangChainPendingDeprecationWarning11from langchain_core.documents import Document12from langchain_core.load import InitValidator, Serializable, dumpd, dumps, load, loads13from langchain_core.load.load import (14    _get_default_allowed_class_paths,15)16from langchain_core.load.serializable import _is_field_useful17from langchain_core.messages import AIMessage18from langchain_core.outputs import ChatGeneration, Generation19from langchain_core.prompts import (20    ChatPromptTemplate,21    HumanMessagePromptTemplate,22    PromptTemplate,23)24from langchain_core.runnables.history import RunnableWithMessageHistory25from langchain_core.tracers import log_stream262728class NonBoolObj:29    def __bool__(self) -> bool:30        msg = "Truthiness can't be determined"31        raise ValueError(msg)3233    def __eq__(self, other: object) -> bool:34        msg = "Equality can't be determined"35        raise ValueError(msg)3637    def __str__(self) -> str:38        return self.__class__.__name__3940    def __repr__(self) -> str:41        return self.__class__.__name__4243    __hash__ = None  # type: ignore[assignment]444546def test_simple_serialization() -> None:47    class Foo(Serializable):48        bar: int49        baz: str5051    foo = Foo(bar=1, baz="hello")52    assert dumpd(foo) == {53        "id": ["tests", "unit_tests", "load", "test_serializable", "Foo"],54        "lc": 1,55        "repr": "Foo(bar=1, baz='hello')",56        "type": "not_implemented",57    }585960def test_simple_serialization_is_serializable() -> None:61    class Foo(Serializable):62        bar: int63        baz: str6465        @classmethod66        def is_lc_serializable(cls) -> bool:67            return True6869    foo = Foo(bar=1, baz="hello")70    assert foo.lc_id() == ["tests", "unit_tests", "load", "test_serializable", "Foo"]71    assert dumpd(foo) == {72        "id": ["tests", "unit_tests", "load", "test_serializable", "Foo"],73        "kwargs": {"bar": 1, "baz": "hello"},74        "lc": 1,75        "type": "constructor",76    }777879def test_simple_serialization_secret() -> None:80    """Test handling of secrets."""8182    class Foo(Serializable):83        bar: int84        baz: str85        secret: SecretStr86        secret_2: str8788        @classmethod89        def is_lc_serializable(cls) -> bool:90            return True9192        @property93        def lc_secrets(self) -> dict[str, str]:94            return {"secret": "MASKED_SECRET", "secret_2": "MASKED_SECRET_2"}9596    foo = Foo(97        bar=1, baz="baz", secret=SecretStr("SUPER_SECRET"), secret_2="SUPER_SECRET"98    )99    assert dumpd(foo) == {100        "id": ["tests", "unit_tests", "load", "test_serializable", "Foo"],101        "kwargs": {102            "bar": 1,103            "baz": "baz",104            "secret": {"id": ["MASKED_SECRET"], "lc": 1, "type": "secret"},105            "secret_2": {"id": ["MASKED_SECRET_2"], "lc": 1, "type": "secret"},106        },107        "lc": 1,108        "type": "constructor",109    }110111112def test__is_field_useful() -> None:113    class ArrayObj:114        def __bool__(self) -> bool:115            msg = "Truthiness can't be determined"116            raise ValueError(msg)117118        def __eq__(self, other: object) -> bool:119            return self  # type: ignore[return-value]120121        __hash__ = None  # type: ignore[assignment]122123    default_x = ArrayObj()124    default_y = NonBoolObj()125126    class Foo(Serializable):127        x: ArrayObj = Field(default=default_x)128        y: NonBoolObj = Field(default=default_y)129        # Make sure works for fields without default.130        z: ArrayObj131132        model_config = ConfigDict(133            arbitrary_types_allowed=True,134        )135136    foo = Foo(x=ArrayObj(), y=NonBoolObj(), z=ArrayObj())137    assert _is_field_useful(foo, "x", foo.x)138    assert _is_field_useful(foo, "y", foo.y)139140    foo = Foo(x=default_x, y=default_y, z=ArrayObj())141    assert not _is_field_useful(foo, "x", foo.x)142    assert not _is_field_useful(foo, "y", foo.y)143144145class Foo(Serializable):146    bar: int147    baz: str148149    @classmethod150    def is_lc_serializable(cls) -> bool:151        return True152153154def test_simple_deserialization() -> None:155    foo = Foo(bar=1, baz="hello")156    assert foo.lc_id() == ["tests", "unit_tests", "load", "test_serializable", "Foo"]157    serialized_foo = dumpd(foo)158    assert serialized_foo == {159        "id": ["tests", "unit_tests", "load", "test_serializable", "Foo"],160        "kwargs": {"bar": 1, "baz": "hello"},161        "lc": 1,162        "type": "constructor",163    }164    new_foo = load(serialized_foo, allowed_objects=[Foo], valid_namespaces=["tests"])165    assert new_foo == foo166167168def test_disallowed_deserialization() -> None:169    foo = Foo(bar=1, baz="hello")170    serialized_foo = dumpd(foo)171    with pytest.raises(ValueError, match="not allowed"):172        load(serialized_foo, allowed_objects=[], valid_namespaces=["tests"])173174175class Foo2(Serializable):176    bar: int177    baz: str178179    @classmethod180    def is_lc_serializable(cls) -> bool:181        return True182183184def test_simple_deserialization_with_additional_imports() -> None:185    foo = Foo(bar=1, baz="hello")186    assert foo.lc_id() == ["tests", "unit_tests", "load", "test_serializable", "Foo"]187    serialized_foo = dumpd(foo)188    assert serialized_foo == {189        "id": ["tests", "unit_tests", "load", "test_serializable", "Foo"],190        "kwargs": {"bar": 1, "baz": "hello"},191        "lc": 1,192        "type": "constructor",193    }194    new_foo = load(195        serialized_foo,196        allowed_objects=[Foo2],197        valid_namespaces=["tests"],198        additional_import_mappings={199            ("tests", "unit_tests", "load", "test_serializable", "Foo"): (200                "tests",201                "unit_tests",202                "load",203                "test_serializable",204                "Foo2",205            )206        },207    )208    assert isinstance(new_foo, Foo2)209210211class Foo3(Serializable):212    model_config = ConfigDict(arbitrary_types_allowed=True)213214    content: str215    non_bool: NonBoolObj216217    @classmethod218    def is_lc_serializable(cls) -> bool:219        return True220221222def test_repr() -> None:223    foo = Foo3(224        content="repr",225        non_bool=NonBoolObj(),226    )227    assert repr(foo) == "Foo3(content='repr', non_bool=NonBoolObj)"228229230def test_str() -> None:231    foo = Foo3(232        content="str",233        non_bool=NonBoolObj(),234    )235    assert str(foo) == "content='str' non_bool=NonBoolObj"236237238def test_serialization_with_pydantic() -> None:239    class MyModel(BaseModel):240        x: int241        y: str242243    my_model = MyModel(x=1, y="hello")244    llm_response = ChatGeneration(245        message=AIMessage(246            content='{"x": 1, "y": "hello"}', additional_kwargs={"parsed": my_model}247        )248    )249    ser = dumpd(llm_response)250    deser = load(ser, allowed_objects=[ChatGeneration, AIMessage])251    assert isinstance(deser, ChatGeneration)252    assert deser.message.content253    assert deser.message.additional_kwargs["parsed"] == my_model.model_dump()254255256def test_serialization_with_generation() -> None:257    generation = Generation(text="hello-world")258    assert dumpd(generation)["kwargs"] == {"text": "hello-world", "type": "Generation"}259260261def test_serialization_with_ignore_unserializable_fields() -> None:262    data = {263        "messages": [264            [265                {266                    "lc": 1,267                    "type": "constructor",268                    "id": ["langchain", "schema", "messages", "AIMessage"],269                    "kwargs": {270                        "content": "Call tools to get entity details",271                        "response_metadata": {272                            "other_field": "foo",273                            "create_date": {274                                "lc": 1,275                                "type": "not_implemented",276                                "id": ["datetime", "datetime"],277                                "repr": "datetime.datetime(2025, 7, 15, 13, 14, 0, 000000, tzinfo=datetime.timezone.utc)",  # noqa: E501278                            },279                        },280                        "type": "ai",281                        "id": "00000000-0000-0000-0000-000000000000",282                    },283                },284            ]285        ]286    }287    # Load directly (no dumpd - this is already serialized data)288    deser = load(data, allowed_objects=[AIMessage], ignore_unserializable_fields=True)289    assert deser == {290        "messages": [291            [292                AIMessage(293                    id="00000000-0000-0000-0000-000000000000",294                    content="Call tools to get entity details",295                    response_metadata={296                        "other_field": "foo",297                        "create_date": None,298                    },299                )300            ]301        ]302    }303304305# Tests for dumps() function306def test_dumps_basic_serialization() -> None:307    """Test basic string serialization with `dumps()`."""308    foo = Foo(bar=42, baz="test")309    json_str = dumps(foo)310311    # Should be valid JSON312    parsed = json.loads(json_str)313    assert parsed == {314        "id": ["tests", "unit_tests", "load", "test_serializable", "Foo"],315        "kwargs": {"bar": 42, "baz": "test"},316        "lc": 1,317        "type": "constructor",318    }319320321def test_dumps_pretty_formatting() -> None:322    """Test pretty printing functionality."""323    foo = Foo(bar=1, baz="hello")324325    # Test pretty=True with default indent326    pretty_json = dumps(foo, pretty=True)327    assert "  " in pretty_json328329    # Test custom indent (4-space)330    custom_indent = dumps(foo, pretty=True, indent=4)331    assert "    " in custom_indent332333    # Verify it's still valid JSON334    parsed = json.loads(pretty_json)335    assert parsed["kwargs"]["bar"] == 1336337338def test_dumps_invalid_default_kwarg() -> None:339    """Test that passing `'default'` as kwarg raises ValueError."""340    foo = Foo(bar=1, baz="test")341342    with pytest.raises(ValueError, match="`default` should not be passed to dumps"):343        dumps(foo, default=lambda x: x)344345346def test_dumps_additional_json_kwargs() -> None:347    """Test that additional JSON kwargs are passed through."""348    foo = Foo(bar=1, baz="test")349350    compact_json = dumps(foo, separators=(",", ":"))351    assert ", " not in compact_json  # Should be compact352353    # Test sort_keys354    sorted_json = dumps(foo, sort_keys=True)355    parsed = json.loads(sorted_json)356    assert parsed == dumpd(foo)357358359def test_dumps_non_serializable_object() -> None:360    """Test `dumps()` behavior with non-serializable objects."""361362    class NonSerializable:363        def __init__(self, value: int) -> None:364            self.value = value365366    obj = NonSerializable(42)367    json_str = dumps(obj)368369    # Should create a "not_implemented" representation370    parsed = json.loads(json_str)371    assert parsed["lc"] == 1372    assert parsed["type"] == "not_implemented"373    assert "NonSerializable" in parsed["repr"]374375376def test_dumps_mixed_data_structure() -> None:377    """Test `dumps()` with complex nested data structures."""378    data = {379        "serializable": Foo(bar=1, baz="test"),380        "list": [1, 2, {"nested": "value"}],381        "primitive": "string",382    }383384    json_str = dumps(data)385    parsed = json.loads(json_str)386387    # Serializable object should be properly serialized388    assert parsed["serializable"]["type"] == "constructor"389    # Primitives should remain unchanged390    assert parsed["list"] == [1, 2, {"nested": "value"}]391    assert parsed["primitive"] == "string"392393394def test_document_normal_metadata_allowed() -> None:395    """Test that `Document` metadata without `'lc'` key works fine."""396    doc = Document(397        page_content="Hello world",398        metadata={"source": "test.txt", "page": 1, "nested": {"key": "value"}},399    )400    serialized = dumpd(doc)401402    loaded = load(serialized, allowed_objects=[Document])403    assert loaded.page_content == "Hello world"404405    expected = {"source": "test.txt", "page": 1, "nested": {"key": "value"}}406    assert loaded.metadata == expected407408409class TestEscaping:410    """Tests that escape-based serialization prevents injection attacks.411412    When user data contains an `'lc'` key, it's escaped during serialization413    (wrapped in `{"__lc_escaped__": ...}`). During deserialization, escaped414    dicts are unwrapped and returned as plain dicts - NOT instantiated as415    LC objects.416    """417418    def test_document_metadata_with_lc_key_escaped(self) -> None:419        """Test that `Document` metadata with `'lc'` key round-trips as plain dict."""420        # User data that looks like an LC constructor - should be escaped, not executed421        suspicious_metadata = {"lc": 1, "type": "constructor", "id": ["some", "module"]}422        doc = Document(page_content="test", metadata=suspicious_metadata)423424        # Serialize - should escape the metadata425        serialized = dumpd(doc)426        assert serialized["kwargs"]["metadata"] == {427            "__lc_escaped__": suspicious_metadata428        }429430        # Deserialize - should restore original metadata as plain dict431        loaded = load(serialized, allowed_objects=[Document])432        assert loaded.metadata == suspicious_metadata  # Plain dict, not instantiated433434    def test_document_metadata_with_nested_lc_key_escaped(self) -> None:435        """Test that nested `'lc'` key in `Document` metadata is escaped."""436        suspicious_nested = {"lc": 1, "type": "constructor", "id": ["some", "module"]}437        doc = Document(page_content="test", metadata={"nested": suspicious_nested})438439        serialized = dumpd(doc)440        # The nested dict with 'lc' key should be escaped441        assert serialized["kwargs"]["metadata"]["nested"] == {442            "__lc_escaped__": suspicious_nested443        }444445        loaded = load(serialized, allowed_objects=[Document])446        assert loaded.metadata == {"nested": suspicious_nested}447448    def test_document_metadata_with_lc_key_in_list_escaped(self) -> None:449        """Test that `'lc'` key in list items within `Document` metadata is escaped."""450        suspicious_item = {"lc": 1, "type": "constructor", "id": ["some", "module"]}451        doc = Document(page_content="test", metadata={"items": [suspicious_item]})452453        serialized = dumpd(doc)454        assert serialized["kwargs"]["metadata"]["items"][0] == {455            "__lc_escaped__": suspicious_item456        }457458        loaded = load(serialized, allowed_objects=[Document])459        assert loaded.metadata == {"items": [suspicious_item]}460461    def test_malicious_payload_not_instantiated(self) -> None:462        """Test that malicious LC-like structures in user data are NOT instantiated."""463        # An attacker might craft a payload with a valid AIMessage structure in metadata464        malicious_data = {465            "lc": 1,466            "type": "constructor",467            "id": ["langchain", "schema", "document", "Document"],468            "kwargs": {469                "page_content": "test",470                "metadata": {471                    # This looks like a valid LC object but is in escaped form472                    "__lc_escaped__": {473                        "lc": 1,474                        "type": "constructor",475                        "id": ["langchain_core", "messages", "ai", "AIMessage"],476                        "kwargs": {"content": "injected message"},477                    }478                },479            },480        }481482        # Even though AIMessage is allowed, the metadata should remain as dict483        loaded = load(malicious_data, allowed_objects=[Document, AIMessage])484        assert loaded.page_content == "test"485        # The metadata is the original dict (unescaped), NOT an AIMessage instance486        assert loaded.metadata == {487            "lc": 1,488            "type": "constructor",489            "id": ["langchain_core", "messages", "ai", "AIMessage"],490            "kwargs": {"content": "injected message"},491        }492        assert not isinstance(loaded.metadata, AIMessage)493494    def test_message_additional_kwargs_with_lc_key_escaped(self) -> None:495        """Test that `AIMessage` `additional_kwargs` with `'lc'` is escaped."""496        suspicious_data = {"lc": 1, "type": "constructor", "id": ["x", "y"]}497        msg = AIMessage(498            content="Hello",499            additional_kwargs={"data": suspicious_data},500        )501502        serialized = dumpd(msg)503        assert serialized["kwargs"]["additional_kwargs"]["data"] == {504            "__lc_escaped__": suspicious_data505        }506507        loaded = load(serialized, allowed_objects=[AIMessage])508        assert loaded.additional_kwargs == {"data": suspicious_data}509510    def test_message_response_metadata_with_lc_key_escaped(self) -> None:511        """Test that `AIMessage` `response_metadata` with `'lc'` is escaped."""512        suspicious_data = {"lc": 1, "type": "constructor", "id": ["x", "y"]}513        msg = AIMessage(content="Hello", response_metadata=suspicious_data)514515        serialized = dumpd(msg)516        assert serialized["kwargs"]["response_metadata"] == {517            "__lc_escaped__": suspicious_data518        }519520        loaded = load(serialized, allowed_objects=[AIMessage])521        assert loaded.response_metadata == suspicious_data522523    def test_double_escape_handling(self) -> None:524        """Test that data containing escape key itself is properly handled."""525        # User data that contains our escape key526        data_with_escape_key = {"__lc_escaped__": "some_value"}527        doc = Document(page_content="test", metadata=data_with_escape_key)528529        serialized = dumpd(doc)530        # Should be double-escaped since it looks like an escaped dict531        assert serialized["kwargs"]["metadata"] == {532            "__lc_escaped__": {"__lc_escaped__": "some_value"}533        }534535        loaded = load(serialized, allowed_objects=[Document])536        assert loaded.metadata == {"__lc_escaped__": "some_value"}537538539class TestDumpdEscapesLcKeyInPlainDicts:540    """Tests that `dumpd()` escapes `'lc'` keys in plain dict kwargs."""541542    def test_normal_message_not_escaped(self) -> None:543        """Test that normal `AIMessage` without `'lc'` key is not escaped."""544        msg = AIMessage(545            content="Hello",546            additional_kwargs={"tool_calls": []},547            response_metadata={"model": "gpt-4"},548        )549        serialized = dumpd(msg)550        assert serialized["kwargs"]["content"] == "Hello"551        # No escape wrappers for normal data552        assert "__lc_escaped__" not in str(serialized)553554    def test_document_metadata_with_lc_key_escaped(self) -> None:555        """Test that `Document` with `'lc'` key in metadata is escaped."""556        doc = Document(557            page_content="test",558            metadata={"lc": 1, "type": "constructor"},559        )560561        serialized = dumpd(doc)562        # Should be escaped, not blocked563        assert serialized["kwargs"]["metadata"] == {564            "__lc_escaped__": {"lc": 1, "type": "constructor"}565        }566567    def test_document_metadata_with_nested_lc_key_escaped(self) -> None:568        """Test that `Document` with nested `'lc'` in metadata is escaped."""569        doc = Document(570            page_content="test",571            metadata={"nested": {"lc": 1}},572        )573574        serialized = dumpd(doc)575        assert serialized["kwargs"]["metadata"]["nested"] == {576            "__lc_escaped__": {"lc": 1}577        }578579    def test_message_additional_kwargs_with_lc_key_escaped(self) -> None:580        """Test `AIMessage` with `'lc'` in `additional_kwargs` is escaped."""581        msg = AIMessage(582            content="Hello",583            additional_kwargs={"malicious": {"lc": 1}},584        )585586        serialized = dumpd(msg)587        assert serialized["kwargs"]["additional_kwargs"]["malicious"] == {588            "__lc_escaped__": {"lc": 1}589        }590591    def test_message_response_metadata_with_lc_key_escaped(self) -> None:592        """Test `AIMessage` with `'lc'` in `response_metadata` is escaped."""593        msg = AIMessage(594            content="Hello",595            response_metadata={"lc": 1},596        )597598        serialized = dumpd(msg)599        assert serialized["kwargs"]["response_metadata"] == {600            "__lc_escaped__": {"lc": 1}601        }602603    def test_fake_secret_marker_in_metadata_is_escaped(self) -> None:604        """A free-form dict shaped like a secret marker must not bypass escaping.605606        Previously the shape check accepted any value for `id`, letting a607        constructor dict nested inside `id` reach the Reviver and get608        instantiated on the way back in.609        """610        poisoned_metadata = {611            "lc": 1,612            "type": "secret",613            "id": [614                {615                    "lc": 1,616                    "type": "constructor",617                    "id": ["langchain_core", "documents", "base", "Document"],618                    "kwargs": {"page_content": "injected"},619                }620            ],621        }622        doc = Document(page_content="hello", metadata=poisoned_metadata)623624        serialized = dumpd(doc)625        # The fake marker must be wrapped in `__lc_escaped__`, not passed626        # through as if it were a real secret.627        assert serialized["kwargs"]["metadata"] == {"__lc_escaped__": poisoned_metadata}628629        # And on round-trip, the nested constructor must not be instantiated:630        # the metadata comes back as plain data, even with the most permissive631        # allowlist.632        roundtripped = load(serialized, allowed_objects="all")633        assert isinstance(roundtripped, Document)634        assert roundtripped.metadata == poisoned_metadata635        assert isinstance(roundtripped.metadata["id"][0], dict)636637638class TestInitValidator:639    """Tests for `init_validator` on `load()` and `loads()`."""640641    def test_init_validator_allows_valid_kwargs(self) -> None:642        """Test that `init_validator` returning None allows deserialization."""643        msg = AIMessage(content="Hello")644        serialized = dumpd(msg)645646        def allow_all(_class_path: tuple[str, ...], _kwargs: dict[str, Any]) -> None:647            pass  # Allow all by doing nothing648649        loaded = load(serialized, allowed_objects=[AIMessage], init_validator=allow_all)650        assert loaded == msg651652    def test_init_validator_blocks_deserialization(self) -> None:653        """Test that `init_validator` can block deserialization by raising."""654        doc = Document(page_content="test", metadata={"source": "test.txt"})655        serialized = dumpd(doc)656657        def block_metadata(658            _class_path: tuple[str, ...], kwargs: dict[str, Any]659        ) -> None:660            if "metadata" in kwargs:661                msg = "Metadata not allowed"662                raise ValueError(msg)663664        with pytest.raises(ValueError, match="Metadata not allowed"):665            load(serialized, allowed_objects=[Document], init_validator=block_metadata)666667    def test_init_validator_receives_correct_class_path(self) -> None:668        """Test that `init_validator` receives the correct class path."""669        msg = AIMessage(content="Hello")670        serialized = dumpd(msg)671672        received_class_paths: list[tuple[str, ...]] = []673674        def capture_class_path(675            class_path: tuple[str, ...], _kwargs: dict[str, Any]676        ) -> None:677            received_class_paths.append(class_path)678679        load(serialized, allowed_objects=[AIMessage], init_validator=capture_class_path)680681        assert len(received_class_paths) == 1682        assert received_class_paths[0] == (683            "langchain",684            "schema",685            "messages",686            "AIMessage",687        )688689    def test_init_validator_receives_correct_kwargs(self) -> None:690        """Test that `init_validator` receives the kwargs dict."""691        msg = AIMessage(content="Hello world", name="test_name")692        serialized = dumpd(msg)693694        received_kwargs: list[dict[str, Any]] = []695696        def capture_kwargs(697            _class_path: tuple[str, ...], kwargs: dict[str, Any]698        ) -> None:699            received_kwargs.append(kwargs)700701        load(serialized, allowed_objects=[AIMessage], init_validator=capture_kwargs)702703        assert len(received_kwargs) == 1704        assert "content" in received_kwargs[0]705        assert received_kwargs[0]["content"] == "Hello world"706        assert "name" in received_kwargs[0]707        assert received_kwargs[0]["name"] == "test_name"708709    def test_init_validator_with_loads(self) -> None:710        """Test that `init_validator` works with `loads()` function."""711        doc = Document(page_content="test", metadata={"key": "value"})712        json_str = dumps(doc)713714        def block_metadata(715            _class_path: tuple[str, ...], kwargs: dict[str, Any]716        ) -> None:717            if "metadata" in kwargs:718                msg = "Metadata not allowed"719                raise ValueError(msg)720721        with pytest.raises(ValueError, match="Metadata not allowed"):722            loads(json_str, allowed_objects=[Document], init_validator=block_metadata)723724    def test_init_validator_none_allows_all(self) -> None:725        """Test that `init_validator=None` (default) allows all kwargs."""726        msg = AIMessage(content="Hello")727        serialized = dumpd(msg)728729        # Should work without init_validator730        loaded = load(serialized, allowed_objects=[AIMessage])731        assert loaded == msg732733    def test_init_validator_type_alias_exists(self) -> None:734        """Test that `InitValidator` type alias is exported and usable."""735736        def my_validator(_class_path: tuple[str, ...], _kwargs: dict[str, Any]) -> None:737            pass738739        validator_typed: InitValidator = my_validator740        assert callable(validator_typed)741742    def test_init_validator_blocks_specific_class(self) -> None:743        """Test blocking deserialization for a specific class."""744        doc = Document(page_content="test", metadata={"source": "test.txt"})745        serialized = dumpd(doc)746747        def block_documents(748            class_path: tuple[str, ...], _kwargs: dict[str, Any]749        ) -> None:750            if class_path == ("langchain", "schema", "document", "Document"):751                msg = "Documents not allowed"752                raise ValueError(msg)753754        with pytest.raises(ValueError, match="Documents not allowed"):755            load(serialized, allowed_objects=[Document], init_validator=block_documents)756757758class TestJinja2SecurityBlocking:759    """Tests blocking Jinja2 templates by default."""760761    def test_fstring_template_allowed(self) -> None:762        """Test that f-string templates deserialize successfully."""763        # Serialized ChatPromptTemplate with f-string format764        serialized = {765            "lc": 1,766            "type": "constructor",767            "id": ["langchain", "prompts", "chat", "ChatPromptTemplate"],768            "kwargs": {769                "input_variables": ["name"],770                "messages": [771                    {772                        "lc": 1,773                        "type": "constructor",774                        "id": [775                            "langchain",776                            "prompts",777                            "chat",778                            "HumanMessagePromptTemplate",779                        ],780                        "kwargs": {781                            "prompt": {782                                "lc": 1,783                                "type": "constructor",784                                "id": [785                                    "langchain",786                                    "prompts",787                                    "prompt",788                                    "PromptTemplate",789                                ],790                                "kwargs": {791                                    "input_variables": ["name"],792                                    "template": "Hello {name}",793                                    "template_format": "f-string",794                                },795                            }796                        },797                    }798                ],799            },800        }801802        # f-string should deserialize successfully803        loaded = load(804            serialized,805            allowed_objects=[806                ChatPromptTemplate,807                HumanMessagePromptTemplate,808                PromptTemplate,809            ],810        )811        assert isinstance(loaded, ChatPromptTemplate)812        assert loaded.input_variables == ["name"]813814    def test_jinja2_template_blocked(self) -> None:815        """Test that Jinja2 templates are blocked by default."""816        # Malicious serialized payload attempting to use jinja2817        malicious_serialized = {818            "lc": 1,819            "type": "constructor",820            "id": ["langchain", "prompts", "chat", "ChatPromptTemplate"],821            "kwargs": {822                "input_variables": ["name"],823                "messages": [824                    {825                        "lc": 1,826                        "type": "constructor",827                        "id": [828                            "langchain",829                            "prompts",830                            "chat",831                            "HumanMessagePromptTemplate",832                        ],833                        "kwargs": {834                            "prompt": {835                                "lc": 1,836                                "type": "constructor",837                                "id": [838                                    "langchain",839                                    "prompts",840                                    "prompt",841                                    "PromptTemplate",842                                ],843                                "kwargs": {844                                    "input_variables": ["name"],845                                    "template": "{{ name }}",846                                    "template_format": "jinja2",847                                },848                            }849                        },850                    }851                ],852            },853        }854855        # jinja2 should be blocked by default856        with pytest.raises(ValueError, match="Jinja2 templates are not allowed"):857            load(858                malicious_serialized,859                allowed_objects=[860                    ChatPromptTemplate,861                    HumanMessagePromptTemplate,862                    PromptTemplate,863                ],864            )865866    def test_jinja2_blocked_standalone_prompt_template(self) -> None:867        """Test blocking Jinja2 on standalone `PromptTemplate`."""868        serialized_jinja2 = {869            "lc": 1,870            "type": "constructor",871            "id": ["langchain", "prompts", "prompt", "PromptTemplate"],872            "kwargs": {873                "input_variables": ["name"],874                "template": "{{ name }}",875                "template_format": "jinja2",876            },877        }878879        serialized_fstring = {880            "lc": 1,881            "type": "constructor",882            "id": ["langchain", "prompts", "prompt", "PromptTemplate"],883            "kwargs": {884                "input_variables": ["name"],885                "template": "{name}",886                "template_format": "f-string",887            },888        }889890        # f-string should work891        loaded = load(892            serialized_fstring,893            allowed_objects=[PromptTemplate],894        )895        assert isinstance(loaded, PromptTemplate)896        assert loaded.template == "{name}"897898        # jinja2 should be blocked by default899        with pytest.raises(ValueError, match="Jinja2 templates are not allowed"):900            load(901                serialized_jinja2,902                allowed_objects=[PromptTemplate],903            )904905    def test_jinja2_blocked_by_default(self) -> None:906        """Test that Jinja2 templates are blocked by default."""907        serialized_jinja2 = {908            "lc": 1,909            "type": "constructor",910            "id": ["langchain", "prompts", "prompt", "PromptTemplate"],911            "kwargs": {912                "input_variables": ["name"],913                "template": "{{ name }}",914                "template_format": "jinja2",915            },916        }917918        serialized_fstring = {919            "lc": 1,920            "type": "constructor",921            "id": ["langchain", "prompts", "prompt", "PromptTemplate"],922            "kwargs": {923                "input_variables": ["name"],924                "template": "{name}",925                "template_format": "f-string",926            },927        }928929        # f-string should work930        loaded = load(serialized_fstring, allowed_objects=[PromptTemplate])931        assert isinstance(loaded, PromptTemplate)932        assert loaded.template == "{name}"933934        # jinja2 should be blocked by default935        with pytest.raises(ValueError, match="Jinja2 templates are not allowed"):936            load(serialized_jinja2, allowed_objects=[PromptTemplate])937938939class TestInitValidatorInLoad:940    """Tests that load() properly integrates with the init_validator."""941942    def test_init_validator_called(self) -> None:943        """Test init_validator fires during deserialization."""944        msg = AIMessage(content="test")945        serialized = dumpd(msg)946947        init_validator_called = []948949        def custom_init_validator(950            _class_path: tuple[str, ...], _kwargs: dict[str, Any]951        ) -> None:952            init_validator_called.append(True)953954        loaded = load(955            serialized,956            allowed_objects=[AIMessage],957            init_validator=custom_init_validator,958        )959        assert loaded == msg960        assert len(init_validator_called) == 1961962963class TestMessagesAllowlistTier:964    """Tests for the 'messages' allowlist tier."""965966    def test_messages_tier_contains_expected_types(self) -> None:967        expected = {968            "AIMessage",969            "AIMessageChunk",970            "HumanMessage",971            "HumanMessageChunk",972            "SystemMessage",973            "SystemMessageChunk",974            "ToolMessage",975            "ToolMessageChunk",976            "RemoveMessage",977        }978        paths = _get_default_allowed_class_paths("messages")979        actual = {t[-1] for t in paths}980        assert expected.issubset(actual), f"Missing: {expected - actual}"981982    def test_messages_tier_excludes_legacy_and_abstract_types(self) -> None:983        legacy = {984            "BaseMessage",985            "BaseMessageChunk",986            "ChatMessage",987            "ChatMessageChunk",988            "FunctionMessage",989            "FunctionMessageChunk",990        }991        paths = _get_default_allowed_class_paths("messages")992        actual = {t[-1] for t in paths}993        overlap = legacy & actual994        assert not overlap, f"Legacy/abstract message types in tier: {overlap}"995996    def test_messages_tier_excludes_non_message_types(self) -> None:997        non_messages = {998            "Document",999            "Generation",1000            "ChatGeneration",1001            "GenerationChunk",1002            "ChatGenerationChunk",1003            "PromptValue",1004            "StringPromptValue",1005            "ChatPromptValue",1006            "AgentAction",1007            "AgentActionMessageLog",1008            "AgentFinish",1009        }1010        paths = _get_default_allowed_class_paths("messages")1011        actual = {t[-1] for t in paths}1012        overlap = non_messages & actual1013        assert not overlap, f"Non-message types in messages tier: {overlap}"10141015    def test_messages_tier_excludes_dangerous_types(self) -> None:1016        dangerous = {1017            "ChatOpenAI",1018            "ChatAnthropic",1019            "OpenAI",1020            "PromptTemplate",1021            "ChatPromptTemplate",1022            "FewShotPromptWithTemplates",1023            "RunnableBinding",1024            "RunnableBranch",1025            "RunnableParallel",1026            "RunnableConfigurableFields",1027            "RunnableConfigurableAlternatives",1028            "DynamicRunnable",1029            "HubRunnable",1030            "OutputFixingParser",1031        }1032        paths = _get_default_allowed_class_paths("messages")1033        actual = {t[-1] for t in paths}1034        overlap = dangerous & actual1035        assert not overlap, f"Dangerous types in messages tier: {overlap}"10361037    def test_messages_tier_load_allows_message(self) -> None:1038        serialized = {1039            "lc": 1,1040            "type": "constructor",1041            "id": ["langchain", "schema", "messages", "AIMessage"],1042            "kwargs": {"content": "hello"},1043        }1044        loaded = load(serialized, allowed_objects="messages")1045        assert isinstance(loaded, AIMessage)1046        assert loaded.content == "hello"10471048    def test_messages_tier_load_blocks_prompt_template(self) -> None:1049        serialized = {1050            "lc": 1,1051            "type": "constructor",1052            "id": ["langchain", "prompts", "prompt", "PromptTemplate"],1053            "kwargs": {1054                "input_variables": ["name"],1055                "template": "{name}",1056                "template_format": "f-string",1057            },1058        }1059        with pytest.raises(ValueError, match="not allowed"):1060            load(serialized, allowed_objects="messages")10611062    def test_messages_tier_load_blocks_chat_model(self) -> None:1063        serialized = {1064            "lc": 1,1065            "type": "constructor",1066            "id": ["langchain", "chat_models", "openai", "ChatOpenAI"],1067            "kwargs": {"model": "gpt-4"},1068        }1069        with pytest.raises(ValueError, match="not allowed"):1070            load(serialized, allowed_objects="messages")107110721073class TestAllowedObjectsDeprecation:1074    """Tests for the pending-default warning emitted when `allowed_objects` is unset."""10751076    def test_unset_default_emits_pending_warning(self) -> None:1077        """load() with no allowed_objects emits pending deprecation warning."""1078        serialized = {1079            "lc": 1,1080            "type": "constructor",1081            "id": ["langchain", "schema", "messages", "AIMessage"],1082            "kwargs": {"content": "hello"},1083        }1084        with warnings.catch_warnings(record=True) as w:1085            warnings.simplefilter("always")1086            loaded = load(serialized)1087            dep_warnings = [1088                x1089                for x in w1090                if issubclass(1091                    x.category,1092                    (1093                        LangChainDeprecationWarning,1094                        LangChainPendingDeprecationWarning,1095                    ),1096                )1097            ]1098            assert len(dep_warnings) >= 11099            assert "allowed_objects" in str(dep_warnings[0].message)1100        assert isinstance(loaded, AIMessage)11011102    def test_explicit_core_no_warning(self) -> None:1103        """load() with explicit allowed_objects='core' does NOT warn."""1104        serialized = {1105            "lc": 1,1106            "type": "constructor",1107            "id": ["langchain", "schema", "messages", "AIMessage"],1108            "kwargs": {"content": "hello"},1109        }1110        with warnings.catch_warnings(record=True) as w:1111            warnings.simplefilter("always")1112            load(serialized, allowed_objects="core")1113            dep_warnings = [1114                x1115                for x in w1116                if issubclass(1117                    x.category,1118                    (1119                        LangChainDeprecationWarning,1120                        LangChainPendingDeprecationWarning,1121                    ),1122                )1123            ]1124            assert len(dep_warnings) == 011251126    def test_explicit_messages_no_deprecation_warning(self) -> None:1127        serialized = {1128            "lc": 1,1129            "type": "constructor",1130            "id": ["langchain", "schema", "messages", "AIMessage"],1131            "kwargs": {"content": "hello"},1132        }1133        with warnings.catch_warnings(record=True) as w:1134            warnings.simplefilter("always")1135            load(serialized, allowed_objects="messages")1136            dep_warnings = [1137                x for x in w if issubclass(x.category, LangChainDeprecationWarning)1138            ]1139            assert len(dep_warnings) == 011401141    def test_explicit_list_no_deprecation_warning(self) -> None:1142        serialized = {1143            "lc": 1,1144            "type": "constructor",1145            "id": ["langchain", "schema", "messages", "AIMessage"],1146            "kwargs": {"content": "hello"},1147        }1148        with warnings.catch_warnings(record=True) as w:1149            warnings.simplefilter("always")1150            load(serialized, allowed_objects=[AIMessage])1151            dep_warnings = [1152                x for x in w if issubclass(x.category, LangChainDeprecationWarning)1153            ]1154            assert len(dep_warnings) == 0115511561157class TestInternalCallSitesUseMessages:1158    """Tests that internal call sites use 'messages' tier, not 'all'."""11591160    def test_history_py_does_not_use_all(self) -> None:1161        source = inspect.getsource(RunnableWithMessageHistory)1162        assert 'allowed_objects="all"' not in source1163        assert (1164            'allowed_objects="messages"' in source1165            or "allowed_objects='messages'" in source1166        )11671168    def test_log_stream_does_not_use_all(self) -> None:1169        source = inspect.getsource(log_stream)1170        assert 'allowed_objects="all"' not in source1171        assert (1172            'allowed_objects="messages"' in source1173            or "allowed_objects='messages'" in source1174        )

Code quality findings 46

Ensure functions have docstrings for documentation
missing-docstring
def test_simple_serialization() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_simple_serialization_is_serializable() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def is_lc_serializable(cls) -> bool:
Ensure functions have docstrings for documentation
missing-docstring
def is_lc_serializable(cls) -> bool:
Ensure functions have docstrings for documentation
missing-docstring
def lc_secrets(self) -> dict[str, str]:
Ensure functions have docstrings for documentation
missing-docstring
def test__is_field_useful() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def is_lc_serializable(cls) -> bool:
Ensure functions have docstrings for documentation
missing-docstring
def test_simple_deserialization() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_disallowed_deserialization() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def is_lc_serializable(cls) -> bool:
Ensure functions have docstrings for documentation
missing-docstring
def test_simple_deserialization_with_additional_imports() -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(new_foo, Foo2)
Ensure functions have docstrings for documentation
missing-docstring
def is_lc_serializable(cls) -> bool:
Ensure functions have docstrings for documentation
missing-docstring
def test_repr() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_str() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_serialization_with_pydantic() -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(deser, ChatGeneration)
Ensure functions have docstrings for documentation
missing-docstring
def test_serialization_with_generation() -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_serialization_with_ignore_unserializable_fields() -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert not isinstance(loaded.metadata, AIMessage)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(roundtripped, Document)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(roundtripped.metadata["id"][0], dict)
Ensure functions have docstrings for documentation
missing-docstring
def allow_all(_class_path: tuple[str, ...], _kwargs: dict[str, Any]) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def block_metadata(
Ensure functions have docstrings for documentation
missing-docstring
def capture_class_path(
Ensure functions have docstrings for documentation
missing-docstring
def capture_kwargs(
Ensure functions have docstrings for documentation
missing-docstring
def block_metadata(
Ensure functions have docstrings for documentation
missing-docstring
def my_validator(_class_path: tuple[str, ...], _kwargs: dict[str, Any]) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def block_documents(
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(loaded, ChatPromptTemplate)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(loaded, PromptTemplate)
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(loaded, PromptTemplate)
Ensure functions have docstrings for documentation
missing-docstring
def custom_init_validator(
Ensure functions have docstrings for documentation
missing-docstring
def test_messages_tier_contains_expected_types(self) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_messages_tier_excludes_legacy_and_abstract_types(self) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_messages_tier_excludes_non_message_types(self) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_messages_tier_excludes_dangerous_types(self) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_messages_tier_load_allows_message(self) -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(loaded, AIMessage)
Ensure functions have docstrings for documentation
missing-docstring
def test_messages_tier_load_blocks_prompt_template(self) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_messages_tier_load_blocks_chat_model(self) -> None:
Overuse may indicate design issues; consider polymorphism
isinstance-overuse
assert isinstance(loaded, AIMessage)
Ensure functions have docstrings for documentation
missing-docstring
def test_explicit_messages_no_deprecation_warning(self) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_explicit_list_no_deprecation_warning(self) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_history_py_does_not_use_all(self) -> None:
Ensure functions have docstrings for documentation
missing-docstring
def test_log_stream_does_not_use_all(self) -> None:

Get this view in your editor

Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.