Ensure functions have docstrings for documentation
def init(self) -> dict[str, Any]:
1"""PII detection and handling middleware for agents."""23from __future__ import annotations45from functools import partial6from typing import TYPE_CHECKING, Any, ClassVar, Literal78from langchain_core.messages import AIMessage, AnyMessage, BaseMessage, HumanMessage, ToolMessage9from langgraph.stream import StreamTransformer10from typing_extensions import override1112from langchain.agents.middleware._redaction import (13 PIIDetectionError,14 PIIMatch,15 RedactionRule,16 ResolvedRedactionRule,17 apply_strategy,18 detect_credit_card,19 detect_email,20 detect_ip,21 detect_mac_address,22 detect_url,23)24from langchain.agents.middleware.types import (25 AgentMiddleware,26 AgentState,27 ContextT,28 ResponseT,29 hook_config,30)3132if TYPE_CHECKING:33 from collections.abc import Callable3435 from langgraph.runtime import Runtime36 from langgraph.stream._types import ProtocolEvent373839_DEFAULT_STREAM_LOOKBACK = 12840"""Default trailing-buffer size for cross-delta PII detection.4142The transformer always holds the last `lookback` characters in a per-content43block buffer so that PII patterns straddling delta boundaries are detected44before any text is released downstream. 128 comfortably covers the built-in45detectors (the credit-card regex tops out at 19 characters; URLs and emails46are typically well under 100) while bounding first-token latency.47"""484950class _PIIStreamTransformer(StreamTransformer):51 """Mutates `content-block-delta` text on `messages` events in flight.5253 Runs before built-in stream transformers so the redacted text is what54 every downstream consumer sees — both the main protocol event log and55 the `run.messages` projection that `MessagesTransformer` snapshots into.5657 Holds a sliding buffer of the most recent text per (run_id, content58 block index) so PII patterns that straddle delta boundaries are caught.59 Anything older than `lookback` characters is redacted with the resolved60 rule's strategy and emitted as the new delta text; the trailing tail61 stays in the buffer until a later delta extends it past the cap or the62 block's finish event flushes the snapshot.63 """6465 before_builtins: ClassVar[bool] = True66 required_stream_modes: ClassVar[tuple[str, ...]] = ("messages", "tools", "values")6768 def __init__(69 self,70 scope: tuple[str, ...] = (),71 *,72 rule: ResolvedRedactionRule,73 lookback: int = _DEFAULT_STREAM_LOOKBACK,74 ) -> None:75 super().__init__(scope)76 self._rule = rule77 self._lookback = lookback78 # Text/reasoning deltas keyed by `(run_id, content_block_index)`.79 self._buffers: dict[tuple[str, int], str] = {}80 # Tool-output-delta buffers keyed by `tool_call_id`. Held in a81 # separate dict so `_drop_run` on the messages channel can't82 # sweep active tool-output state.83 self._tool_buffers: dict[str, str] = {}8485 def init(self) -> dict[str, Any]:86 # No projection — this transformer mutates events in place rather87 # than building a derived view.88 return {}8990 def process(self, event: ProtocolEvent) -> bool:91 method = event["method"]92 if method == "messages":93 return self._process_messages_event(event)94 if method == "tools":95 return self._process_tools_event(event)96 if method == "values":97 return self._process_values_event(event)98 return True99100 def _process_values_event(self, event: ProtocolEvent) -> bool:101 """Redact the state snapshot on the `values` channel.102103 State snapshots emitted between nodes carry the full state dict,104 which typically includes the messages list. Walking the snapshot105 with `_redact_value` returns a fresh structure where every106 message has a redacted copy of its content — the original107 objects in graph state remain intact for the state-level108 enforcer (`apply_to_tool_results` via `before_model`) to act on109 independently when the agent loops back.110 """111 data = event["params"].get("data")112 if data is None:113 return True114 event["params"]["data"] = self._redact_value(data)115 return True116117 def _process_messages_event(self, event: ProtocolEvent) -> bool:118 params = event["params"]119 data = params.get("data")120 if not isinstance(data, tuple) or len(data) != 2: # noqa: PLR2004121 return True122 payload, metadata = data123124 # Legacy `(BaseMessage, metadata)` shape: the langgraph→langchain125 # integration emits this when a model only implements `_generate`126 # (or when its `_astream` falls back), producing a single event127 # carrying the full message rather than streamed content-block128 # deltas. Swap in a redacted copy so the consumer sees scrubbed129 # text on the wire while the original stays intact in graph state130 # for `after_model` to act on independently. Under `block`,131 # `_redact_base_message` raises `PIIDetectionError` via132 # `apply_strategy` before we get here.133 if isinstance(payload, BaseMessage):134 redacted = self._redact_base_message(payload)135 if redacted is not payload:136 params["data"] = (redacted, metadata)137 return True138139 if not isinstance(payload, dict):140 return True141 kind = payload.get("event")142 run_id = str(metadata.get("run_id") or "") if metadata else ""143144 if kind == "content-block-delta":145 self._mutate_delta(payload, run_id)146 elif kind == "content-block-finish":147 self._finalize_block(payload, run_id)148 elif kind in {"message-finish", "error"}:149 self._drop_run(run_id)150 return True151152 def _process_tools_event(self, event: ProtocolEvent) -> bool:153 data = event["params"].get("data")154 if not isinstance(data, dict):155 return True156 kind = data.get("event")157 tool_call_id = data.get("tool_call_id")158159 if kind == "tool-started":160 # Tool inputs may be a dict (multi-arg tools), a string161 # (single-arg tools — `BaseTool._parse_input` passes the162 # raw string through), or a list (array-input tools).163 # `_redact_value` handles all three uniformly.164 if "input" in data:165 data["input"] = self._redact_value(data["input"])166 elif kind == "tool-output-delta":167 # Use the tool_call_id as buffer key when present; fall back168 # to a None-keyed slot for the rare malformed/custom emitter169 # case (the buffer becomes shared but at least redaction runs).170 self._mutate_tool_output_delta(171 data, tool_call_id if isinstance(tool_call_id, str) else ""172 )173 elif kind == "tool-finished":174 if "output" in data:175 data["output"] = self._redact_value(data["output"])176 if isinstance(tool_call_id, str):177 self._tool_buffers.pop(tool_call_id, None)178 elif kind == "tool-error":179 msg = data.get("message")180 if isinstance(msg, str) and msg:181 matches = self._rule.detector(msg)182 if matches:183 data["message"] = apply_strategy(msg, matches, self._rule.strategy)184 if isinstance(tool_call_id, str):185 self._tool_buffers.pop(tool_call_id, None)186187 return True188189 def _mutate_tool_output_delta(self, data: dict[str, Any], tool_call_id: str) -> None:190 """Redact a `tool-output-delta` payload.191192 String deltas go through the same lookback machinery as193 text-deltas, keyed by `tool_call_id` in the disjoint194 `_tool_buffers` dict so `_drop_run` on the messages channel195 can't sweep active tool-output state.196197 Structured deltas (dict/list) walk recursively without198 buffering — they don't have a position-stable shape across199 deltas to buffer against.200 """201 delta = data.get("delta")202 if isinstance(delta, str):203 held = self._tool_buffers.get(tool_call_id, "")204 combined = held + delta205206 matches = self._rule.detector(combined)207 if matches:208 # `apply_strategy` raises `PIIDetectionError` under209 # `strategy="block"`, failing the run immediately —210 # cleaner than withholding deltas until `after_model`211 # raises later.212 combined = apply_strategy(combined, matches, self._rule.strategy)213214 emit_end = max(0, len(combined) - self._lookback)215 self._tool_buffers[tool_call_id] = combined[emit_end:]216 data["delta"] = combined[:emit_end]217 elif isinstance(delta, (dict, list)):218 data["delta"] = self._redact_value(delta)219220 def _redact_tool_call_list(self, calls: list[Any] | None) -> tuple[list[Any], bool]:221 """Walk a list of tool-call (or invalid-tool-call) dicts.222223 Returns `(new_list, changed)`. Each element's `args` is run224 through `_redact_value` regardless of its type — `tool_call.args`225 is a dict, `invalid_tool_call.args` is a raw JSON string, and226 `_redact_value` handles both shapes uniformly. If nothing227 changed, returns the input list and `changed=False`.228 """229 if not calls:230 return calls or [], False231 new_calls: list[Any] = []232 changed = False233 for tc in calls:234 if isinstance(tc, dict) and "args" in tc and tc["args"] is not None:235 redacted = self._redact_value(tc["args"])236 if redacted != tc["args"]:237 new_tc = dict(tc)238 new_tc["args"] = redacted239 new_calls.append(new_tc)240 changed = True241 continue242 new_calls.append(tc)243 return new_calls, changed244245 def _redact_value(self, value: Any) -> Any:246 """Recursively redact PII in string leaves of a nested structure.247248 Returns a new value where every `str` leaf that contains PII has249 been replaced (or emptied under `block`). Non-string leaves and250 the structure itself are preserved.251252 `BaseMessage` payloads (typically `ToolMessage` from253 `tool-finished.output`, or any message reached via the `values`254 channel) return a fresh copy with `.content` redacted plus255 `AIMessage.tool_calls[*].args` / `invalid_tool_calls[*].args`256 walked. The original object stays intact for state-level257 enforcers (`after_model`, `before_model` with258 `apply_to_tool_results`) to act on independently.259260 Scope mirrors the pre-streaming state-level surfaces:261 `.content` (string or list-of-content-blocks) and `tool_calls`262 args. Other message attributes (`additional_kwargs`,263 `response_metadata`, `ToolMessage.artifact`) are intentionally264 not walked here — they aren't scrubbed in graph state by the265 existing hooks, so scrubbing them on the wire would create266 a wire/state divergence.267 """268 if isinstance(value, str):269 if not value:270 return value271 matches = self._rule.detector(value)272 if not matches:273 return value274 # `apply_strategy` raises `PIIDetectionError` under `block`275 # — the run fails immediately rather than buffering until a276 # state-level hook can raise.277 return apply_strategy(value, matches, self._rule.strategy)278 if isinstance(value, BaseMessage):279 return self._redact_base_message(value)280 if isinstance(value, dict):281 return {k: self._redact_value(v) for k, v in value.items()}282 if isinstance(value, list):283 return [self._redact_value(v) for v in value]284 if isinstance(value, tuple):285 return tuple(self._redact_value(v) for v in value)286 return value287288 def _redact_base_message(self, value: BaseMessage) -> BaseMessage:289 """Return a fresh copy of `value` with PII-carrying surfaces redacted."""290 update: dict[str, Any] = {}291292 content = value.content293 if isinstance(content, str) and content:294 matches = self._rule.detector(content)295 if matches:296 update["content"] = apply_strategy(content, matches, self._rule.strategy)297 elif isinstance(content, list) and content:298 # Structured content-blocks shape:299 # `[{"type": "text", "text": "..."}, {"type": "tool_call", ...}, ...]`.300 redacted_content = self._redact_value(content)301 if redacted_content != content:302 update["content"] = redacted_content303304 # `AIMessage.tool_calls` and `.invalid_tool_calls` carry PII in305 # `args` independently of `.content`. `tool_call.args` is a306 # dict; `invalid_tool_call.args` is a raw JSON string —307 # `_redact_value` handles both shapes via the recursion.308 if isinstance(value, AIMessage):309 new_tc_list, tc_changed = self._redact_tool_call_list(value.tool_calls)310 if tc_changed:311 update["tool_calls"] = new_tc_list312 new_inv_list, inv_changed = self._redact_tool_call_list(value.invalid_tool_calls)313 if inv_changed:314 update["invalid_tool_calls"] = new_inv_list315316 if not update:317 return value318 return value.model_copy(update=update)319320 def _mutate_delta(self, payload: dict[str, Any], run_id: str) -> None:321 delta = payload.get("delta")322 if not isinstance(delta, dict):323 return324 delta_type = delta.get("type")325 if delta_type == "text-delta":326 self._mutate_string_field_delta(delta, payload, run_id, "text")327 return328 if delta_type == "reasoning-delta":329 # Reasoning content (chain-of-thought from extended-thinking330 # models) is a real PII surface — models echo back331 # user-supplied data or synthesize it from context. Run the332 # same lookback machinery as text-delta against the333 # `reasoning` field. Block indices are unique within a334 # message regardless of block type, so the buffer key335 # `(run_id, index)` naturally disjoint from text-delta keys.336 self._mutate_string_field_delta(delta, payload, run_id, "reasoning")337 return338 if delta_type == "block-delta":339 fields = delta.get("fields")340 if isinstance(fields, dict) and fields.get("type") in {341 "tool_call_chunk",342 "server_tool_call_chunk",343 }:344 self._mutate_tool_call_chunk_delta(fields)345 # Other delta types (`data-delta`, vendor block types) pass346 # through. The pre-streaming middleware scrubbed `.content` text347 # on state messages only; binary payloads and provider-specific348 # block shapes are out of scope for parity with that surface.349350 def _mutate_string_field_delta(351 self,352 delta: dict[str, Any],353 payload: dict[str, Any],354 run_id: str,355 field: str,356 ) -> None:357 """Apply the lookback-buffer redaction to a string field on a delta.358359 Shared by `text-delta` (`field="text"`) and `reasoning-delta`360 (`field="reasoning"`). Buffer is keyed by `(run_id, block_index)`;361 block indices are unique within a message so different block362 types share the same key space without collision.363 """364 text = delta.get(field)365 if not isinstance(text, str) or not text:366 return367 index = payload.get("index")368 if not isinstance(index, int):369 return370371 key = (run_id, index)372 held = self._buffers.get(key, "")373 combined = held + text374375 # Run detection on the full accumulated buffer before splitting.376 # Detecting only on the about-to-emit prefix would miss matches377 # that straddle the lookback boundary — the detector's regex378 # needs a complete, boundary-anchored hit, so a truncated prefix379 # would fail to match and the partial PII would leak on the380 # wire. Under `strategy="block"`, `apply_strategy` raises381 # `PIIDetectionError` here, failing the run as soon as PII382 # arrives rather than buffering until `after_model`.383 matches = self._rule.detector(combined)384 if matches:385 combined = apply_strategy(combined, matches, self._rule.strategy)386387 emit_end = max(0, len(combined) - self._lookback)388 self._buffers[key] = combined[emit_end:]389 delta[field] = combined[:emit_end]390391 def _mutate_tool_call_chunk_delta(self, fields: dict[str, Any]) -> None:392 """Redact cumulative tool-call args with lookback withholding.393394 Each `tool_call_chunk` `block-delta` event carries the full395 accumulated args string (verified against `_compat_bridge.py`396 — `delta_source = current` for these block types — and against397 the consumer-side `_merge_block_delta_into_store`, which398 replaces wholesale rather than appends).399400 Detection runs on the full cumulative args so any complete PII401 anywhere in the string is redacted before emission. Lookback402 withholding then trims the trailing the lookback window characters403 from what reaches the consumer — those characters might be the404 start of a partial PII match that completes in a future405 cumulative delta. The trimmed tail surfaces at `content-block-406 finish` where `_finalize_block` redacts the parsed args dict.407408 For args that fit within the lookback window (the typical case),409 this withholds the entire args string during streaming — the410 redacted args dict appears only at finalize. For args that411 exceed the lookback window, the safe prefix streams incrementally412 as the cumulative state grows. PII that appears more than413 the lookback window characters from the cumulative tail in a414 delta where it hasn't yet completed can still surface in the415 emit prefix — same residual exposure as PII longer than416 the lookback window on the text path. The `content-block-finish`417 snapshot redaction is the backstop.418 """419 args = fields.get("args")420 if not isinstance(args, str) or not args:421 return422423 matches = self._rule.detector(args)424 if matches:425 # `apply_strategy` raises `PIIDetectionError` under426 # `strategy="block"` — the run fails the moment a complete427 # PII pattern surfaces in the cumulative args string.428 args = apply_strategy(args, matches, self._rule.strategy)429430 emit_end = max(0, len(args) - self._lookback)431 fields["args"] = args[:emit_end]432433 def _finalize_block(self, payload: dict[str, Any], run_id: str) -> None:434 index = payload.get("index")435 if not isinstance(index, int):436 return437 key = (run_id, index)438 # The finalized block carries the model's original concatenation439 # of deltas, not what we emitted on the wire. Re-run detection over440 # its full text so the snapshot matches the redacted stream.441 content = payload.get("content")442 if isinstance(content, dict):443 ctype = content.get("type")444 if ctype == "text":445 self._finalize_string_field(content, "text")446 elif ctype == "reasoning":447 self._finalize_string_field(content, "reasoning")448 elif (449 ctype in {"tool_call", "server_tool_call", "invalid_tool_call"}450 and "args" in content451 and content["args"] is not None452 ):453 # `tool_call` / `server_tool_call` args are dicts;454 # `invalid_tool_call.args` is the raw unparsed JSON455 # string. `_redact_value` handles both shapes.456 content["args"] = self._redact_value(content["args"])457 self._buffers.pop(key, None)458459 def _finalize_string_field(self, content: dict[str, Any], field: str) -> None:460 """Re-redact a string content-block field on `content-block-finish`.461462 Used for `text` and `reasoning` content blocks. Under463 `strategy="block"` `apply_strategy` raises `PIIDetectionError`,464 failing the run immediately.465 """466 text = content.get(field)467 if not isinstance(text, str) or not text:468 return469 matches = self._rule.detector(text)470 if not matches:471 return472 content[field] = apply_strategy(text, matches, self._rule.strategy)473474 def _drop_run(self, run_id: str) -> None:475 # Release any buffered tails for this run_id — content-block-finish476 # should have already done so for normal completion, but message-finish477 # / error paths need an explicit sweep so abandoned blocks don't478 # accumulate in long-lived processes.479 stale = [key for key in self._buffers if key[0] == run_id]480 for key in stale:481 del self._buffers[key]482483 def finalize(self) -> None:484 self._buffers.clear()485 self._tool_buffers.clear()486487 def fail(self, err: BaseException) -> None: # noqa: ARG002488 self._buffers.clear()489 self._tool_buffers.clear()490491492class PIIMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, ResponseT]):493 """Detect and handle Personally Identifiable Information (PII) in conversations.494495 This middleware detects common PII types and applies configurable strategies496 to handle them. It can detect emails, credit cards, IP addresses, MAC addresses, and497 URLs in both user input and agent output.498499 Built-in PII types:500501 - `email`: Email addresses502 - `credit_card`: Credit card numbers (validated with Luhn algorithm)503 - `ip`: IP addresses (validated with stdlib)504 - `mac_address`: MAC addresses505 - `url`: URLs (both `http`/`https` and bare URLs)506507 Strategies:508509 - `block`: Raise an exception when PII is detected510 - `redact`: Replace PII with `[REDACTED_TYPE]` placeholders511 - `mask`: Partially mask PII (e.g., `****-****-****-1234` for credit card)512 - `hash`: Replace PII with deterministic hash (e.g., `<email_hash:a1b2c3d4>`)513514 Strategy Selection Guide:515516 | Strategy | Preserves Identity? | Best For |517 | -------- | ------------------- | --------------------------------------- |518 | `block` | N/A | Avoid PII completely |519 | `redact` | No | General compliance, log sanitization |520 | `mask` | No | Human readability, customer service UIs |521 | `hash` | Yes (pseudonymous) | Analytics, debugging |522523 Example:524 ```python525 from langchain.agents.middleware import PIIMiddleware526 from langchain.agents import create_agent527528 # Redact all emails in user input529 agent = create_agent(530 "openai:gpt-5.5",531 middleware=[532 PIIMiddleware("email", strategy="redact"),533 ],534 )535536 # Use different strategies for different PII types537 agent = create_agent(538 "openai:gpt-5.5",539 middleware=[540 PIIMiddleware("credit_card", strategy="mask"),541 PIIMiddleware("url", strategy="redact"),542 PIIMiddleware("ip", strategy="hash"),543 ],544 )545546 # Custom PII type with regex547 agent = create_agent(548 "openai:gpt-5.5",549 middleware=[550 PIIMiddleware("api_key", detector=r"sk-[a-zA-Z0-9]{32}", strategy="block"),551 ],552 )553 ```554 """555556 def __init__(557 self,558 # From a typing point of view, the literals are covered by 'str'.559 # Nonetheless, we escape PYI051 to keep hints and autocompletion for the caller.560 pii_type: Literal["email", "credit_card", "ip", "mac_address", "url"] | str, # noqa: PYI051561 *,562 strategy: Literal["block", "redact", "mask", "hash"] = "redact",563 detector: Callable[[str], list[PIIMatch]] | str | None = None,564 apply_to_input: bool = True,565 apply_to_output: bool = False,566 apply_to_tool_results: bool = False,567 ) -> None:568 """Initialize the PII detection middleware.569570 Args:571 pii_type: Type of PII to detect.572573 Can be a built-in type (`email`, `credit_card`, `ip`, `mac_address`,574 `url`) or a custom type name.575 strategy: How to handle detected PII.576577 Options:578579 * `block`: Raise `PIIDetectionError` when PII is detected580 * `redact`: Replace with `[REDACTED_TYPE]` placeholders581 * `mask`: Partially mask PII (show last few characters)582 * `hash`: Replace with deterministic hash (format: `<type_hash:digest>`)583584 detector: Custom detector function or regex pattern.585586 * If `Callable`: Function that takes content string and returns587 list of `PIIMatch` objects588 * If `str`: Regex pattern to match PII589 * If `None`: Uses built-in detector for the `pii_type`590 apply_to_input: Whether to check user messages before model call.591 apply_to_output: Whether to check AI messages after model call.592593 When `True`, a stream transformer is also installed so594 that every wire surface of an agent run is redacted in595 flight:596597 * Streamed AI text deltas (`content-block-delta` of type598 `text-delta`)599 * Streamed tool-call arguments (`content-block-delta`600 with `tool_call_chunk` / `server_tool_call_chunk`601 fields, plus the finalized `tool_call` content block602 on `content-block-finish`)603 * Tool execution events on the `tools` channel604 (`tool-started.input`, `tool-output-delta`,605 `tool-finished.output`, `tool-error.message`)606 * State snapshots on the `values` channel — message607 lists are walked and each message's `.content` is608 redacted on a fresh copy (state itself stays intact609 for `before_model` / `after_model` to act on610 independently)611612 State-level redaction via `after_model` (and613 `before_model` with `apply_to_tool_results`) remains the614 canonical enforcer; the streaming transformer ensures615 consumers reading `astream_events(version="v3")` or616 `run.messages` / `run.tool_calls` / `run.values` never617 see PII on the wire.618 apply_to_tool_results: Whether to check tool result messages after tool execution.619620 Raises:621 ValueError: If `pii_type` is not built-in and no detector is provided.622 """623 super().__init__()624625 self.apply_to_input = apply_to_input626 self.apply_to_output = apply_to_output627 self.apply_to_tool_results = apply_to_tool_results628629 self._resolved_rule: ResolvedRedactionRule = RedactionRule(630 pii_type=pii_type,631 strategy=strategy,632 detector=detector,633 ).resolve()634 self.pii_type = self._resolved_rule.pii_type635 self.strategy = self._resolved_rule.strategy636 self.detector = self._resolved_rule.detector637638 # Stream transformer scrubs the streamed surface of the same639 # messages that the state-level hooks scrub in graph state.640 # Installed whenever any output-side scrubbing is enabled —641 # `apply_to_output` covers AI messages (text, tool-call args,642 # reasoning), `apply_to_tool_results` covers tool execution643 # (the `tools` channel + ToolMessage content on `values` and644 # `messages`). For `block` the transformer raises645 # `PIIDetectionError` directly from its event handler the646 # moment a complete PII pattern is detected, failing the run647 # via langgraph's `StreamMux.afail` path. The state-level648 # `after_model` / `before_model` hooks remain a backstop for649 # non-streaming consumers.650 if self.apply_to_output or self.apply_to_tool_results:651 self.transformers = (652 partial(653 _PIIStreamTransformer,654 rule=self._resolved_rule,655 ),656 )657658 @property659 def name(self) -> str:660 """Name of the middleware."""661 return f"{self.__class__.__name__}[{self.pii_type}]"662663 def _process_content(self, content: str) -> tuple[str, list[PIIMatch]]:664 """Apply the configured redaction rule to the provided content."""665 matches = self.detector(content)666 if not matches:667 return content, []668 sanitized = apply_strategy(content, matches, self.strategy)669 return sanitized, matches670671 @hook_config(can_jump_to=["end"])672 @override673 def before_model(674 self,675 state: AgentState[Any],676 runtime: Runtime[ContextT],677 ) -> dict[str, Any] | None:678 """Check user messages and tool results for PII before model invocation.679680 Args:681 state: The current agent state.682 runtime: The langgraph runtime.683684 Returns:685 Updated state with PII handled according to strategy, or `None` if no PII686 detected.687688 Raises:689 PIIDetectionError: If PII is detected and strategy is `'block'`.690 """691 if not self.apply_to_input and not self.apply_to_tool_results:692 return None693694 messages = state["messages"]695 if not messages:696 return None697698 new_messages = list(messages)699 any_modified = False700701 # Check user input if enabled702 if self.apply_to_input:703 # Get last user message704 last_user_msg = None705 last_user_idx = None706 for i in range(len(messages) - 1, -1, -1):707 if isinstance(messages[i], HumanMessage):708 last_user_msg = messages[i]709 last_user_idx = i710 break711712 if last_user_idx is not None and last_user_msg and last_user_msg.content:713 # Detect PII in message content714 content = str(last_user_msg.content)715 new_content, matches = self._process_content(content)716717 if matches:718 updated_message: AnyMessage = HumanMessage(719 content=new_content,720 id=last_user_msg.id,721 name=last_user_msg.name,722 )723724 new_messages[last_user_idx] = updated_message725 any_modified = True726727 # Check tool results if enabled728 if self.apply_to_tool_results:729 # Find the last AIMessage, then process all `ToolMessage` objects after it730 last_ai_idx = None731 for i in range(len(messages) - 1, -1, -1):732 if isinstance(messages[i], AIMessage):733 last_ai_idx = i734 break735736 if last_ai_idx is not None:737 # Get all tool messages after the last AI message738 for i in range(last_ai_idx + 1, len(messages)):739 msg = messages[i]740 if isinstance(msg, ToolMessage):741 tool_msg = msg742 if not tool_msg.content:743 continue744745 content = str(tool_msg.content)746 new_content, matches = self._process_content(content)747748 if not matches:749 continue750751 # Create updated tool message752 updated_message = ToolMessage(753 content=new_content,754 id=tool_msg.id,755 name=tool_msg.name,756 tool_call_id=tool_msg.tool_call_id,757 )758759 new_messages[i] = updated_message760 any_modified = True761762 if any_modified:763 return {"messages": new_messages}764765 return None766767 @hook_config(can_jump_to=["end"])768 async def abefore_model(769 self,770 state: AgentState[Any],771 runtime: Runtime[ContextT],772 ) -> dict[str, Any] | None:773 """Async check user messages and tool results for PII before model invocation.774775 Args:776 state: The current agent state.777 runtime: The langgraph runtime.778779 Returns:780 Updated state with PII handled according to strategy, or `None` if no PII781 detected.782783 Raises:784 PIIDetectionError: If PII is detected and strategy is `'block'`.785 """786 return self.before_model(state, runtime)787788 @override789 def after_model(790 self,791 state: AgentState[Any],792 runtime: Runtime[ContextT],793 ) -> dict[str, Any] | None:794 """Check AI messages for PII after model invocation.795796 Args:797 state: The current agent state.798 runtime: The langgraph runtime.799800 Returns:801 Updated state with PII handled according to strategy, or None if no PII802 detected.803804 Raises:805 PIIDetectionError: If PII is detected and strategy is `'block'`.806 """807 if not self.apply_to_output:808 return None809810 messages = state["messages"]811 if not messages:812 return None813814 # Get last AI message815 last_ai_msg = None816 last_ai_idx = None817 for i in range(len(messages) - 1, -1, -1):818 msg = messages[i]819 if isinstance(msg, AIMessage):820 last_ai_msg = msg821 last_ai_idx = i822 break823824 if last_ai_idx is None or not last_ai_msg or not last_ai_msg.content:825 return None826827 # Detect PII in message content828 content = str(last_ai_msg.content)829 new_content, matches = self._process_content(content)830831 if not matches:832 return None833834 # Create updated message835 updated_message = AIMessage(836 content=new_content,837 id=last_ai_msg.id,838 name=last_ai_msg.name,839 tool_calls=last_ai_msg.tool_calls,840 )841842 # Return updated messages843 new_messages = list(messages)844 new_messages[last_ai_idx] = updated_message845846 return {"messages": new_messages}847848 async def aafter_model(849 self,850 state: AgentState[Any],851 runtime: Runtime[ContextT],852 ) -> dict[str, Any] | None:853 """Async check AI messages for PII after model invocation.854855 Args:856 state: The current agent state.857 runtime: The langgraph runtime.858859 Returns:860 Updated state with PII handled according to strategy, or None if no PII861 detected.862863 Raises:864 PIIDetectionError: If PII is detected and strategy is `'block'`.865 """866 return self.after_model(state, runtime)867868869__all__ = [870 "PIIDetectionError",871 "PIIMatch",872 "PIIMiddleware",873 "detect_credit_card",874 "detect_email",875 "detect_ip",876 "detect_mac_address",877 "detect_url",878]
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.