libs/core/langchain_core/caches.py · langchain-ai/langchain

1"""Optional caching layer for language models.23Distinct from provider-based [prompt caching](https://docs.langchain.com/oss/python/langchain/models#prompt-caching).45!!! warning "Beta feature"67    This is a beta feature. Please be wary of deploying experimental code to production8    unless you've taken appropriate precautions.910A cache is useful for two reasons:11121. It can save you money by reducing the number of API calls you make to the LLM13    provider if you're often requesting the same completion multiple times.142. It can speed up your application by reducing the number of API calls you make to the15    LLM provider.16"""1718from __future__ import annotations1920from abc import ABC, abstractmethod21from collections.abc import Sequence22from typing import Any2324from typing_extensions import override2526from langchain_core.outputs import Generation27from langchain_core.runnables import run_in_executor2829RETURN_VAL_TYPE = Sequence[Generation]303132class BaseCache(ABC):33    """Interface for a caching layer for LLMs and Chat models.3435    The cache interface consists of the following methods:3637    - lookup: Look up a value based on a prompt and `llm_string`.38    - update: Update the cache based on a prompt and `llm_string`.39    - clear: Clear the cache.4041    In addition, the cache interface provides an async version of each method.4243    The default implementation of the async methods is to run the synchronous44    method in an executor. It's recommended to override the async methods45    and provide async implementations to avoid unnecessary overhead.46    """4748    @abstractmethod49    def lookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:50        """Look up based on `prompt` and `llm_string`.5152        A cache implementation is expected to generate a key from the 2-tuple53        of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).5455        Args:56            prompt: A string representation of the prompt.5758                In the case of a chat model, the prompt is a non-trivial59                serialization of the prompt into the language model.60            llm_string: A string representation of the LLM configuration.6162                This is used to capture the invocation parameters of the LLM63                (e.g., model name, temperature, stop tokens, max tokens, etc.).6465                These invocation parameters are serialized into a string representation.6667        Returns:68            On a cache miss, return `None`. On a cache hit, return the cached value.69                The cached value is a list of `Generation` (or subclasses).70        """7172    @abstractmethod73    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:74        """Update cache based on `prompt` and `llm_string`.7576        The `prompt` and `llm_string` are used to generate a key for the cache. The key77        should match that of the lookup method.7879        Args:80            prompt: A string representation of the prompt.8182                In the case of a chat model, the prompt is a non-trivial83                serialization of the prompt into the language model.84            llm_string: A string representation of the LLM configuration.8586                This is used to capture the invocation parameters of the LLM87                (e.g., model name, temperature, stop tokens, max tokens, etc.).8889                These invocation parameters are serialized into a string90                representation.91            return_val: The value to be cached.9293                The value is a list of `Generation` (or subclasses).94        """9596    @abstractmethod97    def clear(self, **kwargs: Any) -> None:98        """Clear cache that can take additional keyword arguments."""99100    async def alookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:101        """Async look up based on `prompt` and `llm_string`.102103        A cache implementation is expected to generate a key from the 2-tuple104        of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).105106        Args:107            prompt: A string representation of the prompt.108109                In the case of a chat model, the prompt is a non-trivial110                serialization of the prompt into the language model.111            llm_string: A string representation of the LLM configuration.112113                This is used to capture the invocation parameters of the LLM114                (e.g., model name, temperature, stop tokens, max tokens, etc.).115116                These invocation parameters are serialized into a string117                representation.118119        Returns:120            On a cache miss, return `None`. On a cache hit, return the cached value.121                The cached value is a list of `Generation` (or subclasses).122        """123        return await run_in_executor(None, self.lookup, prompt, llm_string)124125    async def aupdate(126        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE127    ) -> None:128        """Async update cache based on `prompt` and `llm_string`.129130        The prompt and llm_string are used to generate a key for the cache.131        The key should match that of the look up method.132133        Args:134            prompt: A string representation of the prompt.135136                In the case of a chat model, the prompt is a non-trivial137                serialization of the prompt into the language model.138            llm_string: A string representation of the LLM configuration.139140                This is used to capture the invocation parameters of the LLM141                (e.g., model name, temperature, stop tokens, max tokens, etc.).142143                These invocation parameters are serialized into a string144                representation.145            return_val: The value to be cached. The value is a list of `Generation`146                (or subclasses).147        """148        return await run_in_executor(None, self.update, prompt, llm_string, return_val)149150    async def aclear(self, **kwargs: Any) -> None:151        """Async clear cache that can take additional keyword arguments."""152        return await run_in_executor(None, self.clear, **kwargs)153154155class InMemoryCache(BaseCache):156    """Cache that stores things in memory.157158    Example:159        ```python160        from langchain_core.caches import InMemoryCache161        from langchain_core.outputs import Generation162163        # Initialize cache164        cache = InMemoryCache()165166        # Update cache167        cache.update(168            prompt="What is the capital of France?",169            llm_string="model='gpt-5.4-mini',170            return_val=[Generation(text="Paris")],171        )172173        # Lookup cache174        result = cache.lookup(175            prompt="What is the capital of France?",176            llm_string="model='gpt-5.4-mini',177        )178        # result is [Generation(text="Paris")]179        ```180    """181182    def __init__(self, *, maxsize: int | None = None) -> None:183        """Initialize with empty cache.184185        Args:186            maxsize: The maximum number of items to store in the cache.187188                If `None`, the cache has no maximum size.189190                If the cache exceeds the maximum size, the oldest items are removed.191192        Raises:193            ValueError: If `maxsize` is less than or equal to `0`.194        """195        self._cache: dict[tuple[str, str], RETURN_VAL_TYPE] = {}196        if maxsize is not None and maxsize <= 0:197            msg = "maxsize must be greater than 0"198            raise ValueError(msg)199        self._maxsize = maxsize200201    def lookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:202        """Look up based on `prompt` and `llm_string`.203204        Args:205            prompt: A string representation of the prompt.206207                In the case of a chat model, the prompt is a non-trivial208                serialization of the prompt into the language model.209            llm_string: A string representation of the LLM configuration.210211        Returns:212            On a cache miss, return `None`. On a cache hit, return the cached value.213        """214        return self._cache.get((prompt, llm_string), None)215216    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:217        """Update cache based on `prompt` and `llm_string`.218219        Args:220            prompt: A string representation of the prompt.221222                In the case of a chat model, the prompt is a non-trivial223                serialization of the prompt into the language model.224            llm_string: A string representation of the LLM configuration.225            return_val: The value to be cached.226227                The value is a list of `Generation` (or subclasses).228        """229        if self._maxsize is not None and len(self._cache) == self._maxsize:230            del self._cache[next(iter(self._cache))]231        self._cache[prompt, llm_string] = return_val232233    @override234    def clear(self, **kwargs: Any) -> None:235        """Clear cache."""236        self._cache = {}237238    async def alookup(self, prompt: str, llm_string: str) -> RETURN_VAL_TYPE | None:239        """Async look up based on `prompt` and `llm_string`.240241        Args:242            prompt: A string representation of the prompt.243244                In the case of a chat model, the prompt is a non-trivial245                serialization of the prompt into the language model.246            llm_string: A string representation of the LLM configuration.247248        Returns:249            On a cache miss, return `None`. On a cache hit, return the cached value.250        """251        return self.lookup(prompt, llm_string)252253    async def aupdate(254        self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE255    ) -> None:256        """Async update cache based on `prompt` and `llm_string`.257258        Args:259            prompt: A string representation of the prompt.260261                In the case of a chat model, the prompt is a non-trivial262                serialization of the prompt into the language model.263            llm_string: A string representation of the LLM configuration.264            return_val: The value to be cached. The value is a list of `Generation`265                (or subclasses).266        """267        self.update(prompt, llm_string, return_val)268269    @override270    async def aclear(self, **kwargs: Any) -> None:271        """Async clear cache."""272        self.clear()
Code quality findings 3

Ensure functions have docstrings for documentation
L125
missing-docstring
async def aupdate(
Avoid unless necessary; Python's garbage collector typically handles object deletion
L230
unnecessary-del
del self._cache[next(iter(self._cache))]
Ensure functions have docstrings for documentation
L253
missing-docstring
async def aupdate(
Code quality findings 3

Get this view in your editor