Overuse may indicate design issues; consider polymorphism
if isinstance(model, str):
1"""Summarization middleware."""23import uuid4import warnings5from collections.abc import Callable, Iterable, Mapping6from functools import partial7from typing import Any, Literal, TypedDict, cast89from langchain_core.messages import (10 AIMessage,11 AnyMessage,12 MessageLikeRepresentation,13 RemoveMessage,14 ToolMessage,15)16from langchain_core.messages.human import HumanMessage17from langchain_core.messages.utils import (18 count_tokens_approximately,19 get_buffer_string,20 trim_messages,21)22from langgraph.graph.message import (23 REMOVE_ALL_MESSAGES,24)25from langgraph.runtime import Runtime26from typing_extensions import override2728from langchain.agents.middleware.types import AgentMiddleware, AgentState, ContextT, ResponseT29from langchain.chat_models import BaseChatModel, init_chat_model3031TokenCounter = Callable[[Iterable[MessageLikeRepresentation]], int]3233DEFAULT_SUMMARY_PROMPT = """<role>34Context Extraction Assistant35</role>3637<primary_objective>38Your sole objective in this task is to extract the highest quality/most relevant context from the conversation history below.39</primary_objective>4041<objective_information>42You're nearing the total number of input tokens you can accept, so you must extract the highest quality/most relevant pieces of information from your conversation history.43This context will then overwrite the conversation history presented below. Because of this, ensure the context you extract is only the most important information to continue working toward your overall goal.44</objective_information>4546<instructions>47The conversation history below will be replaced with the context you extract in this step.48You want to ensure that you don't repeat any actions you've already completed, so the context you extract from the conversation history should be focused on the most important information to your overall goal.4950You should structure your summary using the following sections. Each section acts as a checklist - you must populate it with relevant information or explicitly state "None" if there is nothing to report for that section:5152## SESSION INTENT5354What is the user's primary goal or request? What overall task are you trying to accomplish? This should be concise but complete enough to understand the purpose of the entire session.5556## SUMMARY5758Extract and record all of the most important context from the conversation history. Include important choices, conclusions, or strategies determined during this conversation. Include the reasoning behind key decisions. Document any rejected options and why they were not pursued.5960## ARTIFACTS6162What artifacts, files, or resources were created, modified, or accessed during this conversation? For file modifications, list specific file paths and briefly describe the changes made to each. This section prevents silent loss of artifact information.6364## NEXT STEPS6566What specific tasks remain to be completed to achieve the session intent? What should you do next?6768</instructions>6970The user will message you with the full message history from which you'll extract context to create a replacement. Carefully read through it all and think deeply about what information is most important to your overall goal and should be saved:7172With all of this in mind, please carefully read over the entire conversation history, and extract the most important and relevant context to replace it so that you can free up space in the conversation history.73Respond ONLY with the extracted context. Do not include any additional information, or text before or after the extracted context.7475<messages>76Messages to summarize:77{messages}78</messages>""" # noqa: E5017980_DEFAULT_MESSAGES_TO_KEEP = 2081_DEFAULT_TRIM_TOKEN_LIMIT = 400082_DEFAULT_FALLBACK_MESSAGE_COUNT = 158384# Some providers tag emitted messages with a `model_provider` string that differs from85# their LangSmith `ls_provider`. The reported-token check below compares the two, so we86# accept known aliases per `ls_provider`.87_LS_PROVIDER_ALIASES: dict[str, frozenset[str]] = {88 "amazon_bedrock": frozenset({"bedrock", "bedrock_converse"}),89}909192def _provider_matches(message_provider: str, model_ls_provider: str | None) -> bool:93 if model_ls_provider is None:94 return False95 if message_provider == model_ls_provider:96 return True97 aliases = _LS_PROVIDER_ALIASES.get(model_ls_provider)98 return aliases is not None and message_provider in aliases99100101ContextFraction = tuple[Literal["fraction"], float]102"""Fraction of model's maximum input tokens.103104Example:105 To specify 50% of the model's max input tokens:106107 ```python108 ("fraction", 0.5)109 ```110"""111112ContextTokens = tuple[Literal["tokens"], int]113"""Absolute number of tokens.114115Example:116 To specify 3000 tokens:117118 ```python119 ("tokens", 3000)120 ```121"""122123ContextMessages = tuple[Literal["messages"], int]124"""Absolute number of messages.125126Example:127 To specify 50 messages:128129 ```python130 ("messages", 50)131 ```132"""133134ContextSize = ContextFraction | ContextTokens | ContextMessages135"""Union type for context size specifications.136137Can be either:138139- [`ContextFraction`][langchain.agents.middleware.summarization.ContextFraction]: A140 fraction of the model's maximum input tokens.141- [`ContextTokens`][langchain.agents.middleware.summarization.ContextTokens]: An absolute142 number of tokens.143- [`ContextMessages`][langchain.agents.middleware.summarization.ContextMessages]: An144 absolute number of messages.145146Depending on use with `trigger` or `keep` parameters, this type indicates either147when to trigger summarization or how much context to retain.148149Example:150 ```python151 # ContextFraction152 context_size: ContextSize = ("fraction", 0.5)153154 # ContextTokens155 context_size: ContextSize = ("tokens", 3000)156157 # ContextMessages158 context_size: ContextSize = ("messages", 50)159 ```160"""161162163class TriggerClause(TypedDict, total=False):164 """Dictionary-based trigger specification for AND conditions.165166 All specified thresholds in a single `TriggerClause` must be met for the clause to167 trigger summarization (AND semantics). When multiple clauses are provided in a list,168 summarization triggers if any clause is met (OR semantics).169170 Example:171 ```python172 # AND: Trigger when tokens >= 4000 AND messages >= 10173 trigger_clause: TriggerClause = {"tokens": 4000, "messages": 10}174175 # Use in a list for OR semantics:176 trigger_list: list[TriggerClause] = [177 {"tokens": 5000, "messages": 3},178 {"tokens": 3000, "messages": 6},179 ]180 ```181 """182183 tokens: int184 """Trigger when the computed (or provider-reported) token count reaches or185 exceeds this value.186 """187188 messages: int189 """Trigger when message count reaches or exceeds this value."""190191 fraction: float192 """Trigger when the computed (or provider-reported) token count reaches or193 exceeds this fraction of the model's maximum input tokens.194 """195196197def _get_approximate_token_counter(model: BaseChatModel) -> TokenCounter:198 """Tune parameters of approximate token counter based on model type."""199 if model._llm_type.startswith("anthropic-chat"): # noqa: SLF001200 # 3.3 was estimated in an offline experiment, comparing with Claude's token-counting201 # API: https://platform.claude.com/docs/en/build-with-claude/token-counting202 return partial(203 count_tokens_approximately, use_usage_metadata_scaling=True, chars_per_token=3.3204 )205 return partial(count_tokens_approximately, use_usage_metadata_scaling=True)206207208class SummarizationMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, ResponseT]):209 """Summarizes conversation history when token limits are approached.210211 This middleware monitors message token counts and automatically summarizes older212 messages when a threshold is reached, preserving recent messages and maintaining213 context continuity by ensuring AI/Tool message pairs remain together.214 """215216 def __init__(217 self,218 model: str | BaseChatModel,219 *,220 trigger: (ContextSize | TriggerClause | list[ContextSize | TriggerClause] | None) = None,221 keep: ContextSize = ("messages", _DEFAULT_MESSAGES_TO_KEEP),222 token_counter: TokenCounter = count_tokens_approximately,223 summary_prompt: str = DEFAULT_SUMMARY_PROMPT,224 trim_tokens_to_summarize: int | None = _DEFAULT_TRIM_TOKEN_LIMIT,225 **deprecated_kwargs: Any,226 ) -> None:227 """Initialize summarization middleware.228229 Args:230 model: The language model to use for generating summaries.231 trigger: One or more thresholds that trigger summarization.232233 Provide a single234 [`ContextSize`][langchain.agents.middleware.summarization.ContextSize]235 tuple, or a single236 [`TriggerClause`][langchain.agents.middleware.summarization.TriggerClause]237 dict, or a list mixing either form.238239 A `ContextSize` tuple expresses one threshold. A `TriggerClause` dict240 expresses multiple thresholds that must *all* be met (AND). When a list is241 provided, summarization runs if *any* item is met (OR).242243 !!! example244245 ```python246 # Trigger summarization when 50 messages is reached247 ("messages", 50)248249 # Trigger summarization when 3000 tokens is reached250 ("tokens", 3000)251252 # Trigger summarization either when 80% of model's max input tokens253 # is reached or when 100 messages is reached (whichever comes first)254 [("fraction", 0.8), ("messages", 100)]255256 # Trigger when tokens >= 4000 AND messages >= 10257 {"tokens": 4000, "messages": 10}258259 # Trigger when (tokens >= 5000 AND messages >= 3) OR260 # (tokens >= 3000 AND messages >= 6)261 [{"tokens": 5000, "messages": 3}, {"tokens": 3000, "messages": 6}]262 ```263264 See [`ContextSize`][langchain.agents.middleware.summarization.ContextSize]265 for more details.266 keep: Context retention policy applied after summarization.267268 Provide a [`ContextSize`][langchain.agents.middleware.summarization.ContextSize]269 tuple to specify how much history to preserve.270271 Defaults to keeping the most recent `20` messages.272273 Does not support multiple values like `trigger`.274275 !!! example276277 ```python278 # Keep the most recent 20 messages279 ("messages", 20)280281 # Keep the most recent 3000 tokens282 ("tokens", 3000)283284 # Keep the most recent 30% of the model's max input tokens285 ("fraction", 0.3)286 ```287 token_counter: Function to count tokens in messages.288 summary_prompt: Prompt template for generating summaries.289 trim_tokens_to_summarize: Maximum tokens to keep when preparing messages for290 the summarization call.291292 Pass `None` to skip trimming entirely.293 """294 # Handle deprecated parameters295 if "max_tokens_before_summary" in deprecated_kwargs:296 value = deprecated_kwargs["max_tokens_before_summary"]297 warnings.warn(298 "max_tokens_before_summary is deprecated. Use trigger=('tokens', value) instead.",299 DeprecationWarning,300 stacklevel=2,301 )302 if trigger is None and value is not None:303 trigger = ("tokens", value)304305 if "messages_to_keep" in deprecated_kwargs:306 value = deprecated_kwargs["messages_to_keep"]307 warnings.warn(308 "messages_to_keep is deprecated. Use keep=('messages', value) instead.",309 DeprecationWarning,310 stacklevel=2,311 )312 if keep == ("messages", _DEFAULT_MESSAGES_TO_KEEP):313 keep = ("messages", value)314315 super().__init__()316317 if isinstance(model, str):318 model = init_chat_model(model)319320 self.model = model321322 self.trigger: ContextSize | TriggerClause | list[ContextSize | TriggerClause] | None = (323 self._copy_trigger(trigger)324 )325326 # Canonical trigger representation: AND within a clause, OR across clauses.327 self._trigger_clauses = self._normalize_trigger(self.trigger)328 # Legacy compatibility view for private consumers that inspected the previous329 # tuple-normalized representation. LangChain behavior is driven by330 # `_trigger_clauses`, not this attribute. Remove in LangChain 2.0.331 self._trigger_conditions = self._legacy_trigger_conditions(self.trigger)332333 self.keep = self._validate_context_size(keep, "keep")334 if token_counter is count_tokens_approximately:335 self.token_counter = _get_approximate_token_counter(self.model)336 self._partial_token_counter: TokenCounter = partial( # type: ignore[call-arg]337 self.token_counter, use_usage_metadata_scaling=False338 )339 else:340 self.token_counter = token_counter341 self._partial_token_counter = token_counter342 self.summary_prompt = summary_prompt343 self.trim_tokens_to_summarize = trim_tokens_to_summarize344345 requires_profile = any("fraction" in clause for clause in self._trigger_clauses)346 if self.keep[0] == "fraction":347 requires_profile = True348 if requires_profile and self._get_profile_limits() is None:349 msg = (350 "Model profile information is required to use fractional token limits, "351 "and is unavailable for the specified model. Please use absolute token "352 "counts instead, or pass "353 '`\n\nChatModel(..., profile={"max_input_tokens": ...})`.\n\n'354 "with a desired integer value of the model's maximum input tokens."355 )356 raise ValueError(msg)357358 @override359 def before_model(360 self, state: AgentState[Any], runtime: Runtime[ContextT]361 ) -> dict[str, Any] | None:362 """Process messages before model invocation, potentially triggering summarization.363364 Args:365 state: The agent state.366 runtime: The runtime environment.367368 Returns:369 An updated state with summarized messages if summarization was performed.370 """371 messages = state["messages"]372 self._ensure_message_ids(messages)373374 total_tokens = self.token_counter(messages)375 if not self._should_summarize(messages, total_tokens):376 return None377378 cutoff_index = self._determine_cutoff_index(messages)379380 if cutoff_index <= 0:381 return None382383 messages_to_summarize, preserved_messages = self._partition_messages(messages, cutoff_index)384385 summary = self._create_summary(messages_to_summarize)386 new_messages = self._build_new_messages(summary)387388 return {389 "messages": [390 RemoveMessage(id=REMOVE_ALL_MESSAGES),391 *new_messages,392 *preserved_messages,393 ]394 }395396 @override397 async def abefore_model(398 self, state: AgentState[Any], runtime: Runtime[ContextT]399 ) -> dict[str, Any] | None:400 """Process messages before model invocation, potentially triggering summarization.401402 Args:403 state: The agent state.404 runtime: The runtime environment.405406 Returns:407 An updated state with summarized messages if summarization was performed.408 """409 messages = state["messages"]410 self._ensure_message_ids(messages)411412 total_tokens = self.token_counter(messages)413 if not self._should_summarize(messages, total_tokens):414 return None415416 cutoff_index = self._determine_cutoff_index(messages)417418 if cutoff_index <= 0:419 return None420421 messages_to_summarize, preserved_messages = self._partition_messages(messages, cutoff_index)422423 summary = await self._acreate_summary(messages_to_summarize)424 new_messages = self._build_new_messages(summary)425426 return {427 "messages": [428 RemoveMessage(id=REMOVE_ALL_MESSAGES),429 *new_messages,430 *preserved_messages,431 ]432 }433434 @staticmethod435 def _copy_trigger(436 trigger: ContextSize | TriggerClause | list[ContextSize | TriggerClause] | None,437 ) -> ContextSize | TriggerClause | list[ContextSize | TriggerClause] | None:438 """Copy mutable trigger containers so caller mutations do not affect this instance."""439 if isinstance(trigger, Mapping):440 return cast("TriggerClause", dict(trigger))441 if isinstance(trigger, list):442 return [443 cast("TriggerClause", dict(item)) if isinstance(item, Mapping) else item444 for item in trigger445 ]446 return trigger447448 def _legacy_trigger_conditions(449 self,450 trigger: ContextSize | TriggerClause | list[ContextSize | TriggerClause] | None,451 ) -> list[ContextSize]:452 """Project tuple-expressible triggers to the legacy private representation."""453 if trigger is None:454 return []455 if isinstance(trigger, tuple):456 return [self._validate_context_size(trigger, "trigger")]457 if isinstance(trigger, Mapping):458 if len(trigger) != 1:459 return []460 kind, value = next(iter(trigger.items()))461 return [self._validate_context_size(cast("ContextSize", (kind, value)), "trigger")]462463 conditions: list[ContextSize] = []464 for item in trigger:465 if isinstance(item, tuple):466 conditions.append(self._validate_context_size(item, "trigger"))467 elif isinstance(item, Mapping) and len(item) == 1:468 kind, value = next(iter(item.items()))469 conditions.append(470 self._validate_context_size(cast("ContextSize", (kind, value)), "trigger")471 )472 return conditions473474 def _normalize_trigger(475 self,476 trigger: (ContextSize | TriggerClause | list[ContextSize | TriggerClause] | None),477 ) -> list[TriggerClause]:478 """Normalize supported trigger inputs into list of Trigger clauses.479480 - tuple ("tokens", 3000) -> [{"tokens": 3000}]481 - dict {"tokens": 4000, "messages": 10} -> [{"tokens": 4000, "messages": 10}]482 - list of either -> OR across items483 """484 if trigger is None:485 return []486487 def _validate_and_convert_tuple(t: ContextSize) -> TriggerClause:488 kind, value = self._validate_context_size(t, "trigger")489 return cast("TriggerClause", {kind: value})490491 def _validate_mapping(m: Mapping[str, Any]) -> TriggerClause:492 """Validate and convert a mapping to a TriggerClause.493494 Type checks reject silent coercion (booleans, numeric strings, and495 fractional floats for integer metrics) so a misconfigured clause fails loudly496 at construction. Range and positivity checks are delegated to497 `_validate_context_size`, keeping a single source of truth for the rules and498 error messages shared with the tuple form.499 """500 if not m:501 msg = "trigger clause must specify at least one of 'tokens', 'messages', 'fraction'"502 raise ValueError(msg)503 out: dict[str, float | int] = {}504 for k, v in m.items():505 if k not in {"tokens", "messages", "fraction"}:506 msg = f"Unsupported trigger metric: {k!r}"507 raise ValueError(msg)508 # `bool` is an `int` subclass; reject it so `{"messages": True}` cannot509 # silently become a threshold of 1. Raise `ValueError` (not `TypeError`)510 # so every trigger-config error stays one catchable type.511 if isinstance(v, bool):512 msg = f"{k} trigger value must be numeric, got {v!r}"513 raise ValueError(msg) # noqa: TRY004514 if k == "fraction":515 if not isinstance(v, (int, float)):516 msg = f"Fraction trigger values must be numeric, got {v!r}"517 raise ValueError(msg)518 elif not isinstance(v, int):519 # Reject floats and numeric strings rather than truncating/coercing.520 msg = f"{k} trigger values must be integers, got {v!r}"521 raise ValueError(msg)522 # Delegate range/positivity validation so dict and tuple forms share523 # identical rules and error messages.524 self._validate_context_size(cast("ContextSize", (k, v)), "trigger")525 out[k] = v526 return cast("TriggerClause", out)527528 clauses: list[TriggerClause] = []529 # `trigger` may originate from untyped callers, so dispatch on the runtime type530 # and raise on anything unsupported.531 subject: Any = trigger532 if isinstance(subject, Mapping):533 clauses.append(_validate_mapping(subject))534 elif isinstance(subject, tuple):535 clauses.append(_validate_and_convert_tuple(cast("ContextSize", subject)))536 elif isinstance(subject, list):537 for item in subject:538 if isinstance(item, Mapping):539 clauses.append(_validate_mapping(item))540 elif isinstance(item, tuple):541 clauses.append(_validate_and_convert_tuple(cast("ContextSize", item)))542 else:543 msg = f"Unsupported trigger item type: {type(item)}"544 raise TypeError(msg)545 else:546 msg = f"Unsupported trigger type: {type(subject)}"547 raise TypeError(msg)548 return clauses549550 def _should_summarize_based_on_reported_tokens(551 self, messages: list[AnyMessage], threshold: float552 ) -> bool:553 """Check if reported token usage from last AIMessage exceeds threshold."""554 last_ai_message = next(555 (msg for msg in reversed(messages) if isinstance(msg, AIMessage)),556 None,557 )558 if ( # noqa: SIM103559 isinstance(last_ai_message, AIMessage)560 and last_ai_message.usage_metadata is not None561 and (reported_tokens := last_ai_message.usage_metadata.get("total_tokens", -1))562 and reported_tokens >= threshold563 and (message_provider := last_ai_message.response_metadata.get("model_provider"))564 and _provider_matches(565 message_provider,566 self.model._get_ls_params().get("ls_provider"), # noqa: SLF001567 )568 ):569 return True570 return False571572 def _should_summarize(self, messages: list[AnyMessage], total_tokens: int) -> bool:573 """Determine whether summarization should run for the current token usage."""574 if not self._trigger_clauses:575 return False576577 for clause in self._trigger_clauses:578 clause_met = True579 for kind, value in clause.items():580 if kind == "messages" and len(messages) < cast("int", value):581 clause_met = False582 break583 if kind == "tokens":584 threshold_tokens = cast("int", value)585 # Trigger if total tokens exceed threshold OR reported tokens do586 if (587 total_tokens < threshold_tokens588 and not self._should_summarize_based_on_reported_tokens(589 messages, float(threshold_tokens)590 )591 ):592 clause_met = False593 break594 if kind == "fraction":595 max_input_tokens = self._get_profile_limits()596 if max_input_tokens is None:597 clause_met = False598 break599 threshold = int(max_input_tokens * cast("float", value))600 if threshold <= 0:601 threshold = 1602 if (603 total_tokens < threshold604 and not self._should_summarize_based_on_reported_tokens(605 messages, float(threshold)606 )607 ):608 clause_met = False609 break610 if clause_met:611 return True612 return False613614 def _determine_cutoff_index(self, messages: list[AnyMessage]) -> int:615 """Choose cutoff index respecting retention configuration."""616 kind, value = self.keep617 if kind in {"tokens", "fraction"}:618 token_based_cutoff = self._find_token_based_cutoff(messages)619 if token_based_cutoff is not None:620 return token_based_cutoff621 # None cutoff -> model profile data not available (caught in __init__ but622 # here for safety), fallback to message count623 return self._find_safe_cutoff(messages, _DEFAULT_MESSAGES_TO_KEEP)624 return self._find_safe_cutoff(messages, cast("int", value))625626 def _find_token_based_cutoff(self, messages: list[AnyMessage]) -> int | None:627 """Find cutoff index based on target token retention."""628 if not messages:629 return 0630631 kind, value = self.keep632 if kind == "fraction":633 max_input_tokens = self._get_profile_limits()634 if max_input_tokens is None:635 return None636 target_token_count = int(max_input_tokens * value)637 elif kind == "tokens":638 target_token_count = int(value)639 else:640 return None641642 if target_token_count <= 0:643 target_token_count = 1644645 if self.token_counter(messages) <= target_token_count:646 return 0647648 # Use binary search to identify the earliest message index that keeps the649 # suffix within the token budget.650 left, right = 0, len(messages)651 cutoff_candidate = len(messages)652 max_iterations = len(messages).bit_length() + 1653 for _ in range(max_iterations):654 if left >= right:655 break656657 mid = (left + right) // 2658 if self._partial_token_counter(messages[mid:]) <= target_token_count:659 cutoff_candidate = mid660 right = mid661 else:662 left = mid + 1663664 if cutoff_candidate == len(messages):665 cutoff_candidate = left666667 if cutoff_candidate >= len(messages):668 if len(messages) == 1:669 return 0670 cutoff_candidate = len(messages) - 1671672 # Advance past any ToolMessages to avoid splitting AI/Tool pairs673 return self._find_safe_cutoff_point(messages, cutoff_candidate)674675 def _get_profile_limits(self) -> int | None:676 """Retrieve max input token limit from the model profile."""677 try:678 profile = self.model.profile679 except AttributeError:680 return None681682 if not isinstance(profile, Mapping):683 return None684685 max_input_tokens = profile.get("max_input_tokens")686687 if not isinstance(max_input_tokens, int):688 return None689690 return max_input_tokens691692 @staticmethod693 def _validate_context_size(context: ContextSize, parameter_name: str) -> ContextSize:694 """Validate context configuration tuples."""695 kind, value = context696 if kind == "fraction":697 if not 0 < value <= 1:698 msg = f"Fractional {parameter_name} values must be between 0 and 1, got {value}."699 raise ValueError(msg)700 elif kind in {"tokens", "messages"}:701 if value <= 0:702 msg = f"{parameter_name} thresholds must be greater than 0, got {value}."703 raise ValueError(msg)704 else:705 msg = f"Unsupported context size type {kind} for {parameter_name}."706 raise ValueError(msg)707 return context708709 @staticmethod710 def _build_new_messages(summary: str) -> list[HumanMessage]:711 return [712 HumanMessage(713 content=f"Here is a summary of the conversation to date:\n\n{summary}",714 additional_kwargs={"lc_source": "summarization"},715 )716 ]717718 @staticmethod719 def _ensure_message_ids(messages: list[AnyMessage]) -> None:720 """Ensure all messages have unique IDs for the add_messages reducer."""721 for msg in messages:722 if msg.id is None:723 msg.id = str(uuid.uuid4())724725 @staticmethod726 def _partition_messages(727 conversation_messages: list[AnyMessage],728 cutoff_index: int,729 ) -> tuple[list[AnyMessage], list[AnyMessage]]:730 """Partition messages into those to summarize and those to preserve."""731 messages_to_summarize = conversation_messages[:cutoff_index]732 preserved_messages = conversation_messages[cutoff_index:]733734 return messages_to_summarize, preserved_messages735736 def _find_safe_cutoff(self, messages: list[AnyMessage], messages_to_keep: int) -> int:737 """Find safe cutoff point that preserves AI/Tool message pairs.738739 Returns the index where messages can be safely cut without separating740 related AI and Tool messages. Returns `0` if no safe cutoff is found.741742 This is aggressive with summarization - if the target cutoff lands in the743 middle of tool messages, we advance past all of them (summarizing more).744 """745 if len(messages) <= messages_to_keep:746 return 0747748 target_cutoff = len(messages) - messages_to_keep749 return self._find_safe_cutoff_point(messages, target_cutoff)750751 @staticmethod752 def _find_safe_cutoff_point(messages: list[AnyMessage], cutoff_index: int) -> int:753 """Find a safe cutoff point that doesn't split AI/Tool message pairs.754755 If the message at `cutoff_index` is a `ToolMessage`, search backward for the756 `AIMessage` containing the corresponding `tool_calls` and adjust the cutoff to757 include it. This ensures tool call requests and responses stay together.758759 Falls back to advancing forward past `ToolMessage` objects only if no matching760 `AIMessage` is found (edge case).761 """762 if cutoff_index >= len(messages) or not isinstance(messages[cutoff_index], ToolMessage):763 return cutoff_index764765 # Collect tool_call_ids from consecutive ToolMessages at/after cutoff766 tool_call_ids: set[str] = set()767 idx = cutoff_index768 while idx < len(messages) and isinstance(messages[idx], ToolMessage):769 tool_msg = cast("ToolMessage", messages[idx])770 if tool_msg.tool_call_id:771 tool_call_ids.add(tool_msg.tool_call_id)772 idx += 1773774 # Search backward for AIMessage with matching tool_calls775 for i in range(cutoff_index - 1, -1, -1):776 msg = messages[i]777 if isinstance(msg, AIMessage) and msg.tool_calls:778 ai_tool_call_ids = {tc.get("id") for tc in msg.tool_calls if tc.get("id")}779 if tool_call_ids & ai_tool_call_ids:780 # Found the AIMessage - move cutoff to include it781 return i782783 # Fallback: no matching AIMessage found, advance past ToolMessages to avoid784 # orphaned tool responses785 return idx786787 def _create_summary(self, messages_to_summarize: list[AnyMessage]) -> str:788 """Generate summary for the given messages.789790 Args:791 messages_to_summarize: Messages to summarize.792 """793 if not messages_to_summarize:794 return "No previous conversation history."795796 trimmed_messages = self._trim_messages_for_summary(messages_to_summarize)797 if not trimmed_messages:798 return "Previous conversation was too long to summarize."799800 # Format messages to avoid token inflation from metadata when str() is called on801 # message objects802 formatted_messages = get_buffer_string(trimmed_messages)803804 try:805 response = self.model.invoke(806 self.summary_prompt.format(messages=formatted_messages).rstrip(),807 config={"metadata": {"lc_source": "summarization"}},808 )809 return response.text.strip()810 except Exception as e:811 return f"Error generating summary: {e!s}"812813 async def _acreate_summary(self, messages_to_summarize: list[AnyMessage]) -> str:814 """Generate summary for the given messages.815816 Args:817 messages_to_summarize: Messages to summarize.818 """819 if not messages_to_summarize:820 return "No previous conversation history."821822 trimmed_messages = self._trim_messages_for_summary(messages_to_summarize)823 if not trimmed_messages:824 return "Previous conversation was too long to summarize."825826 # Format messages to avoid token inflation from metadata when str() is called on827 # message objects828 formatted_messages = get_buffer_string(trimmed_messages)829830 try:831 response = await self.model.ainvoke(832 self.summary_prompt.format(messages=formatted_messages).rstrip(),833 config={"metadata": {"lc_source": "summarization"}},834 )835 return response.text.strip()836 except Exception as e:837 return f"Error generating summary: {e!s}"838839 def _trim_messages_for_summary(self, messages: list[AnyMessage]) -> list[AnyMessage]:840 """Trim messages to fit within summary generation limits."""841 try:842 if self.trim_tokens_to_summarize is None:843 return messages844 return cast(845 "list[AnyMessage]",846 trim_messages(847 messages,848 max_tokens=self.trim_tokens_to_summarize,849 token_counter=self.token_counter,850 start_on="human",851 strategy="last",852 allow_partial=True,853 include_system=True,854 ),855 )856 except Exception:857 return messages[-_DEFAULT_FALLBACK_MESSAGE_COUNT:]
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.