Overuse may indicate design issues; consider polymorphism
if isinstance(addr, ipaddress.IPv4Address):
1"""SSRF protection policy with IP validation and DNS-aware URL checking."""23import asyncio4import dataclasses5import ipaddress6import os7import socket8import urllib.parse910from langchain_core._security._exceptions import SSRFBlockedError1112# ---------------------------------------------------------------------------13# Blocklist constants14# ---------------------------------------------------------------------------1516_BLOCKED_IPV4_NETWORKS: tuple[ipaddress.IPv4Network, ...] = tuple(17 ipaddress.IPv4Network(n)18 for n in (19 "10.0.0.0/8", # RFC 1918 - private class A20 "172.16.0.0/12", # RFC 1918 - private class B21 "192.168.0.0/16", # RFC 1918 - private class C22 "127.0.0.0/8", # RFC 1122 - loopback23 "169.254.0.0/16", # RFC 3927 - link-local24 "0.0.0.0/8", # RFC 1122 - "this network"25 "100.64.0.0/10", # RFC 6598 - shared/CGN address space26 "192.0.0.0/24", # RFC 6890 - IETF protocol assignments27 "192.0.2.0/24", # RFC 5737 - TEST-NET-1 (documentation)28 "198.18.0.0/15", # RFC 2544 - benchmarking29 "198.51.100.0/24", # RFC 5737 - TEST-NET-2 (documentation)30 "203.0.113.0/24", # RFC 5737 - TEST-NET-3 (documentation)31 "224.0.0.0/4", # RFC 5771 - multicast32 "240.0.0.0/4", # RFC 1112 - reserved for future use33 "255.255.255.255/32", # RFC 919 - limited broadcast34 )35)3637_BLOCKED_IPV6_NETWORKS: tuple[ipaddress.IPv6Network, ...] = tuple(38 ipaddress.IPv6Network(n)39 for n in (40 "::1/128", # RFC 4291 - loopback41 "fc00::/7", # RFC 4193 - unique local addresses (ULA)42 "fe80::/10", # RFC 4291 - link-local43 "ff00::/8", # RFC 4291 - multicast44 "::ffff:0:0/96", # RFC 4291 - IPv4-mapped IPv6 addresses45 "::0.0.0.0/96", # RFC 4291 - IPv4-compatible IPv6 (deprecated)46 "64:ff9b::/96", # RFC 6052 - NAT64 well-known prefix47 "64:ff9b:1::/48", # RFC 8215 - NAT64 discovery prefix48 )49)5051_CLOUD_METADATA_IPS: frozenset[str] = frozenset(52 {53 "169.254.169.254", # AWS, GCP, Azure, DigitalOcean, Oracle Cloud54 "169.254.170.2", # AWS ECS task metadata55 "169.254.170.23", # AWS EKS Pod Identity Agent56 "100.100.100.200", # Alibaba Cloud metadata57 "fd00:ec2::254", # AWS EC2 IMDSv2 over IPv6 (Nitro instances)58 "fd00:ec2::23", # AWS EKS Pod Identity Agent (IPv6)59 "fe80::a9fe:a9fe", # OpenStack Nova metadata (IPv6 link-local)60 }61)6263# Network ranges that are always blocked when block_cloud_metadata=True,64# independent of block_private_ips. The entire link-local range is used by65# cloud metadata services across providers.66_CLOUD_METADATA_NETWORKS: tuple[ipaddress.IPv4Network | ipaddress.IPv6Network, ...] = (67 ipaddress.IPv4Network("169.254.0.0/16"),68)6970_CLOUD_METADATA_HOSTNAMES: frozenset[str] = frozenset(71 {72 "metadata.google.internal",73 "metadata.amazonaws.com",74 "metadata",75 "instance-data",76 }77)7879_LOCALHOST_NAMES: frozenset[str] = frozenset(80 {81 "localhost",82 "localhost.localdomain",83 "host.docker.internal",84 }85)8687_K8S_SUFFIX = ".svc.cluster.local"8889_LOOPBACK_IPV4 = ipaddress.IPv4Network("127.0.0.0/8")90_LOOPBACK_IPV6 = ipaddress.IPv6Address("::1")9192# NAT64 well-known prefixes93_NAT64_PREFIX = ipaddress.IPv6Network("64:ff9b::/96")94_NAT64_DISCOVERY_PREFIX = ipaddress.IPv6Network("64:ff9b:1::/48")959697# ---------------------------------------------------------------------------98# SSRFPolicy99# ---------------------------------------------------------------------------100101102@dataclasses.dataclass(frozen=True)103class SSRFPolicy:104 """Immutable policy controlling which URLs/IPs are considered safe."""105106 allowed_schemes: frozenset[str] = frozenset({"http", "https"})107 block_private_ips: bool = True108 block_localhost: bool = True109 block_cloud_metadata: bool = True110 block_k8s_internal: bool = True111 allowed_hosts: frozenset[str] = frozenset()112 additional_blocked_cidrs: tuple[113 ipaddress.IPv4Network | ipaddress.IPv6Network, ...114 ] = ()115116117# ---------------------------------------------------------------------------118# Helpers119# ---------------------------------------------------------------------------120121122def _extract_embedded_ipv4(123 addr: ipaddress.IPv6Address,124) -> ipaddress.IPv4Address | None:125 """Extract an embedded IPv4 from IPv4-mapped or NAT64 IPv6 addresses."""126 # Check ipv4_mapped first (covers ::ffff:x.x.x.x)127 if addr.ipv4_mapped is not None:128 return addr.ipv4_mapped129130 # Check NAT64 prefixes — embedded IPv4 is in the last 4 bytes131 if addr in _NAT64_PREFIX or addr in _NAT64_DISCOVERY_PREFIX:132 raw = addr.packed133 return ipaddress.IPv4Address(raw[-4:])134135 return None136137138def _ip_in_blocked_networks(139 addr: ipaddress.IPv4Address | ipaddress.IPv6Address,140 policy: SSRFPolicy,141) -> str | None:142 """Return a reason string if *addr* falls in a blocked range, else None."""143 # NOTE: if profiling shows this is a hot path, consider memoising with144 # @functools.lru_cache (key on (addr, id(policy))).145 if isinstance(addr, ipaddress.IPv4Address):146 if policy.block_private_ips:147 for net in _BLOCKED_IPV4_NETWORKS:148 if addr in net:149 return "private IP range"150 for net in policy.additional_blocked_cidrs: # type: ignore[assignment]151 if isinstance(net, ipaddress.IPv4Network) and addr in net:152 return "blocked CIDR"153 else:154 if policy.block_private_ips:155 for net in _BLOCKED_IPV6_NETWORKS: # type: ignore[assignment]156 if addr in net:157 return "private IP range"158 for net in policy.additional_blocked_cidrs: # type: ignore[assignment]159 if isinstance(net, ipaddress.IPv6Network) and addr in net:160 return "blocked CIDR"161162 # Loopback check — independent of block_private_ips so that163 # block_localhost=True still catches 127.x.x.x / ::1 even when164 # private IPs are allowed.165 if policy.block_localhost:166 if isinstance(addr, ipaddress.IPv4Address) and (167 addr in _LOOPBACK_IPV4 or addr in ipaddress.IPv4Network("0.0.0.0/8")168 ):169 return "localhost address"170 if isinstance(addr, ipaddress.IPv6Address) and addr == _LOOPBACK_IPV6:171 return "localhost address"172173 # Cloud metadata check — IP set *and* network ranges (e.g. 169.254.0.0/16).174 # Independent of block_private_ips so that allow_private=True still blocks175 # cloud metadata endpoints.176 if policy.block_cloud_metadata:177 if str(addr) in _CLOUD_METADATA_IPS:178 return "cloud metadata endpoint"179 for net in _CLOUD_METADATA_NETWORKS: # type: ignore[assignment]180 if addr in net:181 return "cloud metadata endpoint"182183 return None184185186# ---------------------------------------------------------------------------187# Public validation functions188# ---------------------------------------------------------------------------189190191def validate_resolved_ip(ip_str: str, policy: SSRFPolicy) -> None:192 """Validate a resolved IP address against the SSRF policy.193194 Raises SSRFBlockedError if the IP is blocked.195 """196 try:197 addr = ipaddress.ip_address(ip_str)198 except ValueError as exc:199 raise SSRFBlockedError("invalid IP address") from exc200201 if isinstance(addr, ipaddress.IPv6Address):202 inner = _extract_embedded_ipv4(addr)203 if inner is not None:204 addr = inner205206 reason = _ip_in_blocked_networks(addr, policy)207 if reason is not None:208 raise SSRFBlockedError(reason)209210211def validate_hostname(hostname: str, policy: SSRFPolicy) -> None:212 """Validate a hostname against the SSRF policy.213214 Raises SSRFBlockedError if the hostname is blocked.215 """216 lower = hostname.lower()217218 if policy.block_localhost and lower in _LOCALHOST_NAMES:219 raise SSRFBlockedError("localhost address")220221 if policy.block_cloud_metadata and lower in _CLOUD_METADATA_HOSTNAMES:222 raise SSRFBlockedError("cloud metadata endpoint")223224 if policy.block_k8s_internal and lower.endswith(_K8S_SUFFIX):225 raise SSRFBlockedError("Kubernetes internal DNS")226227228def _effective_allowed_hosts(policy: SSRFPolicy) -> frozenset[str]:229 """Return allowed_hosts, augmented for local environments."""230 extra: set[str] = set()231 if os.environ.get("LANGCHAIN_ENV", "").startswith("local"):232 extra.update({"localhost", "testserver"})233 if extra:234 return policy.allowed_hosts | frozenset(extra)235 return policy.allowed_hosts236237238async def validate_url(url: str, policy: SSRFPolicy = SSRFPolicy()) -> None:239 """Validate a URL against the SSRF policy, including DNS resolution.240241 This is the primary entry-point for async code paths. It delegates242 scheme/hostname/allowed-hosts checks to `validate_url_sync`, then243 resolves DNS and validates every resolved IP.244245 Raises:246 SSRFBlockedError: If the URL violates the policy.247 """248 parsed = urllib.parse.urlparse(url)249 hostname = parsed.hostname or ""250251 validate_url_sync(url, policy)252253 allowed = {h.lower() for h in _effective_allowed_hosts(policy)}254 if hostname.lower() in allowed:255 return256257 scheme = (parsed.scheme or "").lower()258 port = parsed.port or (443 if scheme == "https" else 80)259 try:260 addrinfo = await asyncio.to_thread(261 socket.getaddrinfo, hostname, port, type=socket.SOCK_STREAM262 )263 except socket.gaierror as exc:264 msg = "DNS resolution failed"265 raise SSRFBlockedError(msg) from exc266267 for _family, _type, _proto, _canonname, sockaddr in addrinfo:268 validate_resolved_ip(str(sockaddr[0]), policy)269270271def validate_url_sync(url: str, policy: SSRFPolicy = SSRFPolicy()) -> None:272 """Synchronous URL validation (no DNS resolution).273274 Suitable for Pydantic validators and other sync contexts. Checks scheme275 and hostname patterns only - use `validate_url` for full DNS-aware checking.276277 Raises:278 SSRFBlockedError: If the URL violates the policy.279 """280 parsed = urllib.parse.urlparse(url)281282 scheme = (parsed.scheme or "").lower()283 if scheme not in policy.allowed_schemes:284 msg = f"scheme '{scheme}' not allowed"285 raise SSRFBlockedError(msg)286287 hostname = parsed.hostname288 if not hostname:289 msg = "missing hostname"290 raise SSRFBlockedError(msg)291292 allowed = _effective_allowed_hosts(policy)293 if hostname.lower() in {h.lower() for h in allowed}:294 return295296 try:297 ipaddress.ip_address(hostname)298 validate_resolved_ip(hostname, policy)299 except SSRFBlockedError:300 raise301 except ValueError:302 pass303 else:304 return305306 validate_hostname(hostname, policy)
Same data, no extra tab — call code_get_file + code_get_findings over MCP from Claude/Cursor/Copilot.