From 17a94787261c6310de28dffe65446e0a45d1935c Mon Sep 17 00:00:00 2001 From: HS-Gray <1978489546@qq.com> Date: Thu, 30 Apr 2026 14:23:13 +0800 Subject: [PATCH 1/2] =?UTF-8?q?python=20sdk=E6=94=AF=E6=8C=81=E5=9F=9F?= =?UTF-8?q?=E5=90=8D=E5=AE=B9=E7=81=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tencentcloud/common/abstract_client.py | 83 +++- tencentcloud/common/domain_failover.py | 216 ++++++++++ tencentcloud/common/http/request.py | 11 +- tencentcloud/common/profile/client_profile.py | 35 +- tests/unit/test_dns_failures.py | 273 +++++++++++++ tests/unit/test_domain_failover.py | 380 ++++++++++++++++++ 6 files changed, 990 insertions(+), 8 deletions(-) create mode 100644 tencentcloud/common/domain_failover.py create mode 100644 tests/unit/test_dns_failures.py create mode 100644 tests/unit/test_domain_failover.py diff --git a/tencentcloud/common/abstract_client.py b/tencentcloud/common/abstract_client.py index 698f52f29..292580b8c 100644 --- a/tencentcloud/common/abstract_client.py +++ b/tencentcloud/common/abstract_client.py @@ -41,6 +41,9 @@ from tencentcloud.common.profile.client_profile import ClientProfile, RegionBreakerProfile from tencentcloud.common.sign import Sign from tencentcloud.common.circuit_breaker import CircuitBreaker +from tencentcloud.common.domain_failover import ( + DomainFailoverManager, _classify_exception, is_failover_triggered, +) from tencentcloud.common.retry import NoopRetryer warnings.filterwarnings("ignore", module="tencentcloud", category=UserWarning) @@ -89,6 +92,10 @@ def __init__(self, credential, region, profile=None): if self.profile.region_breaker_profile is None: self.profile.region_breaker_profile = RegionBreakerProfile() self.circuit_breaker = CircuitBreaker(self.profile.region_breaker_profile) + + # 域名级容灾管理器(默认启用,.com → .com.cn → .cn) + self.domain_failover = DomainFailoverManager(self.profile.domain_failover_profile) + if self.profile.request_client: self.request_client = self._sdkVersion + "; " + self.profile.request_client else: @@ -427,16 +434,80 @@ def _call(self, action, params, options=None, headers=None): headers["X-TC-TraceId"] = str(uuid.uuid4()) if not self.profile.disable_region_breaker: return self._call_with_region_breaker(action, params, options, headers) - req = RequestInternal(self._get_endpoint(options=options), - self.profile.httpProfile.reqMethod, - self._requestPath, - header=headers) - self._build_req_inter(action, params, req, options) + # apigw_endpoint 由用户显式指定,跳过域名切换 if self.profile.httpProfile.apigw_endpoint: + req = RequestInternal(self._get_endpoint(options=options), + self.profile.httpProfile.reqMethod, + self._requestPath, + header=headers) + self._build_req_inter(action, params, req, options) req.host = self.profile.httpProfile.apigw_endpoint req.header["Host"] = req.host - return self.request.send_request(req) + return self.request.send_request(req) + + origin_endpoint = self._get_endpoint(options=options) + # 未启用域名容灾:按原逻辑一次性请求 + if not self.domain_failover.enabled: + req = RequestInternal(origin_endpoint, + self.profile.httpProfile.reqMethod, + self._requestPath, + header=headers) + self._build_req_inter(action, params, req, options) + return self.request.send_request(req) + + return self._call_with_domain_failover(origin_endpoint, action, params, options, headers) + + def _call_with_domain_failover(self, origin_endpoint, action, params, options, headers): + """按候选域名顺序串行尝试,首次可切换异常即切到下一个候选。 + + 每个候选都携带独立的断路器;任何一次成功都会重置对应候选的失败计数。 + 全部候选失败,抛出最后一次的 TencentCloudSDKException(异常链保留)。 + """ + usable = self.domain_failover.iter_available_candidates(origin_endpoint) + last_err = None + + for idx, (cand_host, breaker, generation) in enumerate(usable): + # 每个候选都需要重新构造 req 并重新签名(因为 Host 变了,TC3 签名里 + # `host:` 也要跟着变)。注意 headers 是外部传入的字典,为避免签名残留 + # 污染下个候选,这里深拷贝一份。 + cand_headers = dict(headers) + req = RequestInternal(cand_host, + self.profile.httpProfile.reqMethod, + self._requestPath, + header=cand_headers) + self._build_req_inter(action, params, req, options) + # 覆写 Host,确保即便老签名版本 (HmacSHA1/256) 没设 Host 也能生效 + req.header["Host"] = cand_host + + # ProxyConnection.request_host 会在请求时作为 setdefault("Host") 的兜底; + # 为确保 HTTP 层也看到正确的 Host,这里一并同步(不影响 rootDomain 配置)。 + prev_request_host = self.request.conn.request_host + self.request.conn.request_host = cand_host + try: + resp = self.request.send_request(req) + breaker.after_requests(generation, True) + return resp + except TencentCloudSDKException as e: + kind = _classify_exception(e) + if is_failover_triggered(kind): + # 触发切换:反馈失败并尝试下一个候选 + breaker.after_requests(generation, False) + last_err = e + logger.debug( + "domain_failover: candidate=%s kind=%s err=%s, try next", + cand_host, kind, e) + continue + # 非网络类异常:不切换,直接抛;不影响断路器计数(避免业务错误污染) + raise + finally: + self.request.conn.request_host = prev_request_host + + # 全部候选失败:抛出最后一次的异常(异常链已经通过 `raise ... from e` 保留) + if last_err is not None: + raise last_err + # 理论上走不到这里 + raise TencentCloudSDKException("ClientNetworkError", "all failover candidates failed") def call(self, action, params, options=None, headers=None): diff --git a/tencentcloud/common/domain_failover.py b/tencentcloud/common/domain_failover.py new file mode 100644 index 000000000..8012cf6d3 --- /dev/null +++ b/tencentcloud/common/domain_failover.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017-2026 Tencent Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +""" +域名级容灾切换模块。 + +当 SDK 发起的请求命中 DNS/TCP/TLS 类故障(详见 `tests/dns_failure_test/ +DNS_FAILURE_SDK_EXCEPTION_ANALYSIS.md`)时,本模块按"主域名 → .com.cn → +.cn"的顺序串行重试,并为每个候选域名维护一个独立的 CircuitBreaker。 + +规则: + - *.tencentcloudapi.com -> *.tencentcloudapi.com.cn -> *.tencentcloudapi.cn + - *.{region}.tencentcloudapi.com -> *.{region}.tencentcloudapi.com.cn -> *.{region}.tencentcloudapi.cn + - *.internal.tencentcloudapi.com -> 按通用规则切换 + - *.intl.tencentcloudapi.com -> 不切换(国际站) +""" +import json +import logging +import socket +import threading + +try: + import ssl as _ssl +except ImportError: # pragma: no cover + _ssl = None + +from tencentcloud.common.circuit_breaker import CircuitBreaker +from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException + +logger = logging.getLogger("tencentcloud_sdk_common") + +# 主域名根 → 备份候选根(按优先级排列) +_FAILOVER_SUFFIX_RULES = [ + ("tencentcloudapi.com", ["tencentcloudapi.com.cn", "tencentcloudapi.cn"]), +] + +# 国际站域名后缀:严格匹配,不做切换 +_INTL_SUFFIX = ".intl.tencentcloudapi.com" + + +def _classify_exception(exc): + """沿 __cause__ / __context__ 链识别原始异常类型,返回可触发域名切换的 kind。 + + 返回值: + - "DNS_NXDOMAIN" / "DNS_TIMEOUT" -> A 类 DNS 故障 + - "TCP_CONN_REFUSED" -> B 类 连接被拒 + - "TCP_READ_TIMEOUT" -> B 类 读超时 + - "TLS_ERROR" -> C 类 证书错误 + - "JSON_DECODE_ERROR" -> C 类 JSON 解析失败(不切换) + - None -> 非网络类异常(不切换) + """ + # 业务方法层的 JSONDecodeError 包装 + if isinstance(exc, TencentCloudSDKException) and exc.get_code() == "JSONDecodeError": + return "JSON_DECODE_ERROR" + + # 沿异常链找到原始异常 + raw = None + if isinstance(exc, TencentCloudSDKException): + raw = exc.__cause__ or exc.__context__ + else: + raw = exc + if raw is None: + return None + + # 走到链末端 + root = raw + seen = set() + while True: + nxt = getattr(root, "__cause__", None) or getattr(root, "__context__", None) + if nxt is None or id(nxt) in seen: + break + seen.add(id(root)) + root = nxt + + # 延迟导入 requests,避免影响未使用 http 的调用路径 + try: + import requests + req_conn_err = requests.exceptions.ConnectionError + req_read_timeout = requests.exceptions.ReadTimeout + req_connect_timeout = requests.exceptions.ConnectTimeout + req_ssl_error = requests.exceptions.SSLError + except ImportError: # pragma: no cover + req_conn_err = req_read_timeout = req_connect_timeout = req_ssl_error = () + + # TLS 错误 + if req_ssl_error and isinstance(raw, req_ssl_error): + return "TLS_ERROR" + if _ssl is not None and isinstance(root, _ssl.SSLError): + return "TLS_ERROR" + + # 读超时 + if req_read_timeout and isinstance(raw, req_read_timeout): + return "TCP_READ_TIMEOUT" + if isinstance(root, socket.timeout): + return "TCP_READ_TIMEOUT" + + # 连接超时 + if req_connect_timeout and isinstance(raw, req_connect_timeout): + return "TCP_READ_TIMEOUT" + + # 连接被拒(包括 DNS 返回 0.0.0.0 / 被劫持到无服务 IP) + if isinstance(root, ConnectionRefusedError): + return "TCP_CONN_REFUSED" + + # DNS 解析失败 + if isinstance(root, socket.gaierror): + errno = getattr(root, "errno", None) + # EAI_AGAIN = -3 on glibc, 11002 on Windows → 多为 DNS 超时 + if errno in (socket.EAI_AGAIN, -3, 11002): + return "DNS_TIMEOUT" + return "DNS_NXDOMAIN" + + # 其他 ConnectionError(兜底也触发切换,避免漏判) + if req_conn_err and isinstance(raw, req_conn_err): + return "DNS_NXDOMAIN" + + return None + + +def is_failover_triggered(kind): + """kind 是否触发域名切换。JSON_DECODE_ERROR 和 None 均不触发。""" + return kind in ("DNS_NXDOMAIN", "DNS_TIMEOUT", + "TCP_CONN_REFUSED", "TCP_READ_TIMEOUT", "TLS_ERROR") + + +def _split_host_suffix(host): + """将 host 按 "tencentcloudapi.com" 等已知后缀拆分为 (prefix, matched_suffix)。 + 若未命中任何受支持后缀则返回 (None, None)。 + """ + if not host: + return None, None + for suffix, _ in _FAILOVER_SUFFIX_RULES: + if host == suffix or host.endswith("." + suffix): + prefix = host[: -len(suffix)] # 含结尾的 '.'(或空串) + return prefix, suffix + return None, None + + +def build_candidates(host): + """根据原始 host 构造候选域名序列,首项始终是 host 自身。 + + 若 host 命中 `*.intl.tencentcloudapi.com`,则返回 `[host]`(不切换)。 + 若 host 未命中任何受支持后缀(比如用户自定义 endpoint / ip),也返回 `[host]`。 + """ + if not host: + return [host] + + # 国际站不切换 + if host == _INTL_SUFFIX.lstrip(".") or host.endswith(_INTL_SUFFIX): + return [host] + + prefix, suffix = _split_host_suffix(host) + if suffix is None: + return [host] + + candidates = [host] + for alt in dict(_FAILOVER_SUFFIX_RULES)[suffix]: + candidates.append(prefix + alt) + return candidates + + +class DomainFailoverManager(object): + """按候选域名维度维护断路器的容器。 + + 生命周期:AbstractClient 持有一个实例;每个候选域名首次出现时动态 + 创建 CircuitBreaker。不同 client 实例间不共享(与现有 region_breaker + 的作用域一致)。 + """ + + def __init__(self, profile): + """:param profile: `DomainFailoverProfile`""" + self._profile = profile + self._breakers = {} + self._lock = threading.Lock() + + @property + def enabled(self): + return self._profile is not None and self._profile.enabled + + def get_breaker(self, host): + with self._lock: + br = self._breakers.get(host) + if br is None: + br = CircuitBreaker(self._profile.breaker_setting) + self._breakers[host] = br + return br + + def iter_available_candidates(self, host): + """按顺序返回 (candidate_host, breaker, generation)。 + + - 若断路器为 OPEN,则跳过该候选;若全部 OPEN,则降级为"仍然尝试主域名" + 以避免流量全部被拒(与现有 region_breaker 行为一致)。 + - 调用方负责调用 breaker.after_requests(generation, success) 回写结果。 + """ + candidates = build_candidates(host) + usable = [] + for c in candidates: + br = self.get_breaker(c) + generation, need_skip = br.before_requests() + if need_skip: + logger.debug("domain_failover: skip %s (breaker open)", c) + continue + usable.append((c, br, generation)) + + if not usable: + # 全部断路器都 OPEN,这种情况也要给一次机会,选择主域名 + br = self.get_breaker(candidates[0]) + generation, _ = br.before_requests() + usable.append((candidates[0], br, generation)) + return usable diff --git a/tencentcloud/common/http/request.py b/tencentcloud/common/http/request.py index dc4581eb8..40ddcc3a1 100644 --- a/tencentcloud/common/http/request.py +++ b/tencentcloud/common/http/request.py @@ -114,8 +114,17 @@ def send_request(self, req_inter): http_resp = self._request(req_inter) self.request_size = self.conn.request_length return http_resp + except TencentCloudSDKException: + # 已经是 SDK 异常(例如 _request 里抛出的 ClientParamsError),原样抛出,避免二次包装 + raise except Exception as e: - raise TencentCloudSDKException("ClientNetworkError", str(e)) + # 保留原有的 ClientNetworkError 外壳以兼容重试机制(StandardRetryer 依赖该错误码), + # 同时使用 PEP 3134 标准的 `raise ... from e` 建立异常链,上层可通过 + # e.__cause__ 直接拿到原始异常(如 requests.exceptions.ConnectionError / + # ReadTimeout / SSLError 等),并沿 __cause__ 继续追到末端 socket.gaierror、 + # ConnectionRefusedError、socket.timeout、CertificateError 等,从而对 + # DNS/网络故障做精细化容灾判断。 + raise TencentCloudSDKException("ClientNetworkError", str(e)) from e class RequestInternal(object): diff --git a/tencentcloud/common/profile/client_profile.py b/tencentcloud/common/profile/client_profile.py index 6802133ce..8ed29f533 100644 --- a/tencentcloud/common/profile/client_profile.py +++ b/tencentcloud/common/profile/client_profile.py @@ -34,11 +34,14 @@ class ClientProfile(object): :type request_client: str :param retryer: Custom retry configuration. :type retryer: :class:`tencentcloud.common.retry.StandardRetryer` + :param domain_failover_profile: The domain-level failover profile. + :type domain_failover_profile: :class:`DomainFailoverProfile` """ unsignedPayload = False def __init__(self, signMethod=None, httpProfile=None, language="zh-CN", - disable_region_breaker=True, region_breaker_profile=None, request_client=None, retryer=None): + disable_region_breaker=True, region_breaker_profile=None, request_client=None, retryer=None, + domain_failover_profile=None): self.httpProfile = HttpProfile() if httpProfile is None else httpProfile self.signMethod = "TC3-HMAC-SHA256" if signMethod is None else signMethod valid_language = ["zh-CN", "en-US"] @@ -59,6 +62,36 @@ def __init__(self, signMethod=None, httpProfile=None, language="zh-CN", self.retryer = retryer + # 域名级容灾(*.tencentcloudapi.com → .com.cn → .cn) + self.domain_failover_profile = domain_failover_profile or DomainFailoverProfile() + + +class DomainFailoverProfile(object): + """域名级容灾切换配置。 + + 当请求发生 DNS / TCP / TLS 类故障时,自动按 ``tencentcloudapi.com → + tencentcloudapi.com.cn → tencentcloudapi.cn`` 的顺序串行切换域名重试。 + + - 对 ``*.intl.tencentcloudapi.com`` (严格后缀匹配)不切换。 + - 切换仅在当次请求的 ``ApiRequest.host`` 上生效,不会改写 ``HttpProfile.rootDomain``。 + - 每个候选域名都有独立的 ``CircuitBreaker``,失败到达阈值后在 ``timeout`` 时间内 + 跳过该候选,避免反复撞墙。 + + :param enabled: 是否启用,默认 ``True``。设置为 ``False`` 可完全关闭本机制。 + :type enabled: bool + :param breaker_setting: 候选域名级断路器的阈值配置,未显式指定时使用默认 ``RegionBreakerProfile`` + 的阈值(max_fail_num=5, timeout=60s 等),详见 :class:`RegionBreakerProfile`。 + :type breaker_setting: :class:`RegionBreakerProfile` + """ + + def __init__(self, enabled=True, breaker_setting=None): + self.enabled = enabled + # 复用 RegionBreakerProfile 的阈值字段(max_fail_num / max_fail_percent / + # window_interval / timeout / max_requests),断路器会从中读取配置 + if breaker_setting is None: + breaker_setting = RegionBreakerProfile() + self.breaker_setting = breaker_setting + class RegionBreakerProfile(object): """RegionBreaker profile. diff --git a/tests/unit/test_dns_failures.py b/tests/unit/test_dns_failures.py new file mode 100644 index 000000000..25804b967 --- /dev/null +++ b/tests/unit/test_dns_failures.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- +""" +DNS 故障场景对 Tencent Cloud Python SDK 异常表现的测试脚本 + +参考文档: + https://iwiki.woa.com/p/4019996855 《2.3 SDK 视角:DNS 故障的异常表现》 + +本脚本针对文档中列出的 A/B/C 三大类、共 11 种 DNS 故障场景,构造等价的本地/网络 +模拟条件,通过调用 SDK 的 CVM.DescribeRegions 接口触发异常,并打印: + 1) 最外层 SDK 抛出的 TencentCloudSDKException(code + message) + 2) SDK 捕获到的"原始异常"的真实类型与 repr(通过 PEP 3134 标准异常链 + e.__cause__ 获取;若该层未用 ``raise ... from``,则回退到 e.__context__) + 3) 耗时,以辅助判断故障类型 + +运行方式(不依赖真实腾讯云账号,SecretId/SecretKey 可任意填,域名错误时请求根本 +不会到达签名校验阶段): + python tests/dns_failure_test/test_dns_failures.py +""" +from __future__ import print_function + +import os +import socket +import sys +import time +import traceback + +# 强制使用工程根目录下的 SDK +_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _PROJECT_ROOT not in sys.path: + sys.path.insert(0, _PROJECT_ROOT) + +from tencentcloud.common import credential +from tencentcloud.common.profile.client_profile import ClientProfile +from tencentcloud.common.profile.http_profile import HttpProfile +from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException +from tencentcloud.cvm.v20170312 import cvm_client, models + + +def make_client(endpoint=None, req_timeout=10, disable_region_breaker=True, scheme=None): + cred = credential.Credential("AKIDFakeIdForDnsTest", "FakeKeyForDnsTest") + http_profile = HttpProfile() + http_profile.reqTimeout = req_timeout + if scheme: + http_profile.scheme = scheme + if endpoint: + http_profile.endpoint = endpoint + client_profile = ClientProfile() + client_profile.httpProfile = http_profile + client_profile.disable_region_breaker = disable_region_breaker + return cvm_client.CvmClient(cred, "ap-guangzhou", client_profile) + + +def run_case(title, case_fn): + print("\n" + "=" * 80) + print("[CASE] " + title) + print("=" * 80) + t0 = time.time() + try: + case_fn() + print("!!! 本次调用没有触发异常,用例不符合预期 !!!") + except TencentCloudSDKException as e: + cost = time.time() - t0 + # 通过 Python 标准异常链获取原始异常:优先 __cause__(raise ... from e), + # 回退 __context__(except 块内裸 raise 时 CPython 自动记录)。 + raw = e.__cause__ or e.__context__ + print("SDK Exception code : %s" % e.code) + print("SDK Exception message : %s" % e.message) + if raw is not None: + raw_type = "%s.%s" % (type(raw).__module__, type(raw).__name__) + print("Raw exception type : %s" % raw_type) + print("Raw exception repr : %r" % (raw,)) + # 进一步遍历 __cause__/__context__ 链,展示 requests → urllib3 → socket 的完整传递 + chain = [] + cur = raw + seen = set() + while cur is not None and id(cur) not in seen: + seen.add(id(cur)) + chain.append("%s.%s: %s" % (type(cur).__module__, type(cur).__name__, cur)) + cur = getattr(cur, "__cause__", None) or getattr(cur, "__context__", None) + if len(chain) > 1: + print("Raw exception chain :") + for i, line in enumerate(chain): + print(" [%d] %s" % (i, line)) + else: + print("Raw exception : (无 __cause__/__context__)") + print("耗时 : %.3fs" % cost) + except Exception as e: + cost = time.time() - t0 + print("未被 SDK 包装的异常: %s: %s (%.3fs)" % (type(e).__name__, e, cost)) + traceback.print_exc() + + +def call_describe_regions(client): + req = models.DescribeRegionsRequest() + client.DescribeRegions(req) + + +# ========== A 类:DNS 解析失败 ========== + +def case_a_operator_nxdomain(): + client = make_client(endpoint="cvm.definitely-not-exist-nxdomain.invalid") + call_describe_regions(client) + + +def _patch_gai_with(fn): + import socket as _socket + real_gai = _socket.getaddrinfo + + def wrapper(host, *args, **kwargs): + if host and ("tencentcloudapi.com" in host or host.startswith("cvm.")): + return fn(host, *args, **kwargs) + return real_gai(host, *args, **kwargs) + + _socket.getaddrinfo = wrapper + return real_gai + + +def _restore_gai(real_gai): + socket.getaddrinfo = real_gai + + +def case_a_public_dns_timeout(): + def fake(host, *args, **kwargs): + time.sleep(2) + raise socket.gaierror(-3, "Temporary failure in name resolution") + real = _patch_gai_with(fake) + try: + client = make_client(req_timeout=10) + call_describe_regions(client) + finally: + _restore_gai(real) + + +def case_a_root_server_ddos(): + case_a_public_dns_timeout() + + +def case_a_tld_ns_tampered(): + case_a_operator_nxdomain() + + +def case_a_authoritative_dns_ddos(): + def fake(host, *args, **kwargs): + raise socket.gaierror(-2, "Name or service not known") + real = _patch_gai_with(fake) + try: + client = make_client(req_timeout=10) + call_describe_regions(client) + finally: + _restore_gai(real) + + +def case_a_gslb_misconfig(): + case_a_authoritative_dns_ddos() + + +# ========== B 类:连接层异常 ========== + +def case_b_operator_return_zero_ip(): + def fake(host, *args, **kwargs): + return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', ("0.0.0.0", 443))] + real = _patch_gai_with(fake) + try: + client = make_client(req_timeout=5) + call_describe_regions(client) + finally: + _restore_gai(real) + + +def case_b_hijack_port_closed(): + def fake(host, *args, **kwargs): + return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', ("127.0.0.1", 1))] + real = _patch_gai_with(fake) + try: + client = make_client(req_timeout=5) + call_describe_regions(client) + finally: + _restore_gai(real) + + +def case_b_hijack_no_response(): + def fake(host, *args, **kwargs): + # 192.0.2.1 是 RFC 5737 TEST-NET-1 保留地址,公网不可路由 + return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', ("192.0.2.1", 443))] + real = _patch_gai_with(fake) + try: + client = make_client(req_timeout=3) + call_describe_regions(client) + finally: + _restore_gai(real) + + +# ========== C 类:应用层异常 ========== + +def case_c_hijack_tls_mismatch(): + try: + baidu_ip = socket.gethostbyname("www.baidu.com") + except Exception: + print("[SKIP] 无法解析 www.baidu.com,跳过此用例") + return + + def fake(host, *args, **kwargs): + return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', (baidu_ip, 443))] + real = _patch_gai_with(fake) + try: + client = make_client(req_timeout=10) + call_describe_regions(client) + finally: + _restore_gai(real) + + +def case_c_hijack_http_unexpected_body(): + import threading + try: + from http.server import BaseHTTPRequestHandler, HTTPServer + except ImportError: + from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer + + class Handler(BaseHTTPRequestHandler): + def do_POST(self): + length = int(self.headers.get('Content-Length', 0)) + self.rfile.read(length) + self.send_response(200) + self.send_header('Content-Type', 'text/plain') + self.end_headers() + self.wfile.write(b"hello world (not a tencent cloud api response)") + + def do_GET(self): + self.send_response(200) + self.send_header('Content-Type', 'text/plain') + self.end_headers() + self.wfile.write(b"hello world (not a tencent cloud api response)") + + def log_message(self, fmt, *a): + return + + srv = HTTPServer(("127.0.0.1", 0), Handler) + port = srv.server_address[1] + t = threading.Thread(target=srv.serve_forever) + t.daemon = True + t.start() + try: + client = make_client(endpoint="127.0.0.1:%d" % port, scheme="http", + req_timeout=5) + call_describe_regions(client) + finally: + srv.shutdown() + + +CASES = [ + ("A-3 operator DNS blocking (NXDOMAIN)", case_a_operator_nxdomain), + ("A-3 public DNS down (timeout)", case_a_public_dns_timeout), + ("A-4 root server DDoS (timeout)", case_a_root_server_ddos), + ("A-5 TLD NS tampered (NXDOMAIN)", case_a_tld_ns_tampered), + ("A-6 authoritative DNS DDoS (SERVFAIL)", case_a_authoritative_dns_ddos), + ("A-6 GSLB misconfig (NXDOMAIN)", case_a_gslb_misconfig), + + ("B-3 operator blocking (return 0.0.0.0)", case_b_operator_return_zero_ip), + ("B-5 hijack (port closed)", case_b_hijack_port_closed), + ("B-5 hijack (no response/timeout)", case_b_hijack_no_response), + + ("C-5 hijack (HTTPS cert mismatch)", case_c_hijack_tls_mismatch), + ("C-5 hijack (HTTP 200 unexpected body)", case_c_hijack_http_unexpected_body), +] + + +def main(): + for title, fn in CASES: + run_case(title, fn) + + +if __name__ == "__main__": + main() diff --git a/tests/unit/test_domain_failover.py b/tests/unit/test_domain_failover.py new file mode 100644 index 000000000..e815b3e21 --- /dev/null +++ b/tests/unit/test_domain_failover.py @@ -0,0 +1,380 @@ +# -*- coding: utf-8 -*- +""" +域名切换容灾(Domain Failover)功能的测试脚本。 + +覆盖: + 1. 候选域名生成(build_candidates)的白盒验证。 + 2. 异常识别(_classify_exception)对 6 类故障的分类是否准确。 + 3. 主域名故障时是否能按 [主 → .com.cn → .cn] 顺序切换。 + 4. `.intl.` 域名是否严格不切换。 + 5. 自定义 endpoint / IP 是否不受影响。 + 6. 断路器是否在连续失败后跳过坏候选。 + 7. JSON_DECODE_ERROR 场景是否**不**触发切换。 + +运行方式: + python tests/dns_failure_test/test_domain_failover.py +""" +from __future__ import print_function + +import json +import os +import socket +import sys +import threading +import time +import traceback + +# 强制使用工程根目录下的 SDK,而非系统全局安装的旧版本 +_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +if _PROJECT_ROOT not in sys.path: + sys.path.insert(0, _PROJECT_ROOT) + +try: + from http.server import BaseHTTPRequestHandler, HTTPServer +except ImportError: # py2 + from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer + +from tencentcloud.common import credential +from tencentcloud.common.profile.client_profile import ClientProfile, DomainFailoverProfile, RegionBreakerProfile +from tencentcloud.common.profile.http_profile import HttpProfile +from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException +from tencentcloud.common.domain_failover import ( + build_candidates, _classify_exception, is_failover_triggered, +) +from tencentcloud.cvm.v20170312 import cvm_client, models + + +# --------------------------------------------------------------------------- # +# 1) 白盒测试:候选域名生成 +# --------------------------------------------------------------------------- # + +def test_build_candidates(): + print("\n" + "=" * 80) + print("[UNIT] build_candidates") + print("=" * 80) + cases = [ + ("cvm.tencentcloudapi.com", + ["cvm.tencentcloudapi.com", "cvm.tencentcloudapi.com.cn", "cvm.tencentcloudapi.cn"]), + ("cvm.ap-guangzhou.tencentcloudapi.com", + ["cvm.ap-guangzhou.tencentcloudapi.com", + "cvm.ap-guangzhou.tencentcloudapi.com.cn", + "cvm.ap-guangzhou.tencentcloudapi.cn"]), + ("cvm.internal.tencentcloudapi.com", + ["cvm.internal.tencentcloudapi.com", + "cvm.internal.tencentcloudapi.com.cn", + "cvm.internal.tencentcloudapi.cn"]), + ("cvm.intl.tencentcloudapi.com", ["cvm.intl.tencentcloudapi.com"]), + ("cvm.ap-guangzhou.intl.tencentcloudapi.com", + ["cvm.ap-guangzhou.intl.tencentcloudapi.com"]), + ("custom.example.com", ["custom.example.com"]), + ("127.0.0.1:8080", ["127.0.0.1:8080"]), + ] + passed = 0 + for host, expected in cases: + got = build_candidates(host) + ok = (got == expected) + print((" [OK] " if ok else " [FAIL] ") + host) + if not ok: + print(" expected=%s" % expected) + print(" got =%s" % got) + passed += int(ok) + print("Summary: %d/%d passed" % (passed, len(cases))) + + +# --------------------------------------------------------------------------- # +# 2) 白盒测试:_classify_exception +# --------------------------------------------------------------------------- # + +def test_classify_exception(): + print("\n" + "=" * 80) + print("[UNIT] _classify_exception") + print("=" * 80) + import requests + + def _wrap(raw_exc): + """模拟网络层包装:TencentCloudSDKException('ClientNetworkError', ...) from e""" + try: + raise raw_exc + except Exception as e: + try: + raise TencentCloudSDKException("ClientNetworkError", str(e)) from e + except TencentCloudSDKException as tce: + return tce + + def _chain(inner, outer_cls): + """构造 outer(inner) 的链(outer 捕获 inner 再抛自己),用于模拟 requests 对 + 底层异常的包装。""" + try: + try: + raise inner + except Exception: + raise outer_cls(str(inner)) + except outer_cls as e: + return e + + cases = [ + # DNS NXDOMAIN/SERVFAIL → errno=-2 + (_wrap(_chain(socket.gaierror(-2, "Name or service not known"), + requests.exceptions.ConnectionError)), "DNS_NXDOMAIN"), + # DNS 超时 → errno=-3 + (_wrap(_chain(socket.gaierror(-3, "Temporary failure in name resolution"), + requests.exceptions.ConnectionError)), "DNS_TIMEOUT"), + # TCP Connection refused + (_wrap(_chain(ConnectionRefusedError(111, "Connection refused"), + requests.exceptions.ConnectionError)), "TCP_CONN_REFUSED"), + # 读超时 + (_wrap(_chain(socket.timeout("timed out"), + requests.exceptions.ReadTimeout)), "TCP_READ_TIMEOUT"), + # TLS 错误 + (_wrap(_chain(Exception("cert mismatch"), + requests.exceptions.SSLError)), "TLS_ERROR"), + # JSON 解析失败:业务方法层会包装成 TencentCloudSDKException(code="JSONDecodeError",...) + (TencentCloudSDKException("JSONDecodeError", "Expecting value: line 1 column 1 (char 0)"), + "JSON_DECODE_ERROR"), + ] + + passed = 0 + for exc, expected in cases: + got = _classify_exception(exc) + ok = (got == expected) + print((" [OK] " if ok else " [FAIL] ") + + "%s expected=%s got=%s" % (type(exc).__name__, expected, got)) + passed += int(ok) + print("Summary: %d/%d passed" % (passed, len(cases))) + + # 验证 is_failover_triggered 的口径 + assert is_failover_triggered("DNS_NXDOMAIN") is True + assert is_failover_triggered("DNS_TIMEOUT") is True + assert is_failover_triggered("TCP_CONN_REFUSED") is True + assert is_failover_triggered("TCP_READ_TIMEOUT") is True + assert is_failover_triggered("TLS_ERROR") is True + assert is_failover_triggered("JSON_DECODE_ERROR") is False + assert is_failover_triggered(None) is False + print(" [OK] is_failover_triggered semantics") + + +# --------------------------------------------------------------------------- # +# 3) 集成测试:主域名 DNS 失败 → 验证切换轨迹 +# --------------------------------------------------------------------------- # + +def make_client(endpoint, req_timeout=5, enabled=True, scheme=None): + cred = credential.Credential("AKIDFakeIdForDomainFailoverTest", + "FakeKeyForDomainFailoverTest") + http_profile = HttpProfile() + http_profile.reqTimeout = req_timeout + if scheme: + http_profile.scheme = scheme + http_profile.endpoint = endpoint + client_profile = ClientProfile() + client_profile.httpProfile = http_profile + client_profile.disable_region_breaker = True # 避免干扰 + client_profile.domain_failover_profile = DomainFailoverProfile(enabled=enabled) + return cvm_client.CvmClient(cred, "ap-guangzhou", client_profile) + + +class _GaiPatcher(object): + """记录哪些 host 被解析,可以控制哪些 host 返回 NXDOMAIN、哪些走真实解析。""" + + def __init__(self, resolvable_hosts=None, fake_ip=None): + """:param resolvable_hosts: 集合,在这个集合中的 host 会"解析成功" + 返回 fake_ip 或本地 127.0.0.1。 + :param fake_ip: 解析后的 IP 地址;None 时使用 127.0.0.1。 + """ + self.resolvable = set(resolvable_hosts or []) + self.fake_ip = fake_ip or "127.0.0.1" + self.resolved_log = [] + self._real = None + + def __enter__(self): + self._real = socket.getaddrinfo + + def fake(host, *args, **kwargs): + if host and ("tencentcloudapi" in host): + self.resolved_log.append(host) + if host in self.resolvable: + return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', (self.fake_ip, 443))] + raise socket.gaierror(-2, "Name or service not known") + return self._real(host, *args, **kwargs) + + socket.getaddrinfo = fake + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + socket.getaddrinfo = self._real + + +def run_case(title, fn): + print("\n" + "=" * 80) + print("[CASE] " + title) + print("=" * 80) + t0 = time.time() + try: + fn() + except Exception as e: + print("!!! 用例抛出未捕获的异常: %s: %s" % (type(e).__name__, e)) + traceback.print_exc() + print("耗时: %.3fs" % (time.time() - t0)) + + +def case_all_dns_fail_primary_and_backup(): + """所有 3 个候选都 DNS 失败:期望 SDK 抛异常,但日志中记录尝试了全部 3 个。""" + client = make_client("cvm.tencentcloudapi.com", req_timeout=3) + with _GaiPatcher() as p: + try: + client.DescribeRegions(models.DescribeRegionsRequest()) + print("!!! 未抛异常,不符合预期") + except TencentCloudSDKException as e: + pass + print("resolved hosts sequence:") + for h in p.resolved_log: + print(" - %s" % h) + expected = ["cvm.tencentcloudapi.com", + "cvm.tencentcloudapi.com.cn", + "cvm.tencentcloudapi.cn"] + actually_tried = [h for h in p.resolved_log if h in expected] + # 可能每个 host 因重试被解析多次,这里用"唯一化后包含 3 项"断言顺序 + uniq = [] + for h in actually_tried: + if h not in uniq: + uniq.append(h) + ok = (uniq == expected) + print("顺序校验: " + ("OK" if ok else "FAIL, got=%s" % uniq)) + + +def case_primary_fail_backup_ok(): + """主域名 DNS 失败,第二候选被解析到本地 HTTP server(我们让它返回假的非 JSON + → 会抛 JSONDecodeError,不触发再切换;但能证明已经切到了第二候选)。""" + # 启动本地 HTTP server + class Handler(BaseHTTPRequestHandler): + def do_POST(self): + length = int(self.headers.get('Content-Length', 0)) + self.rfile.read(length) + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(b'{"Response": {"Error": {"Code":"InternalFake","Message":"mocked"}, "RequestId":"mock-rid"}}') + + def do_GET(self): + self.do_POST() + + def log_message(self, fmt, *a): + return + + srv = HTTPServer(("127.0.0.1", 0), Handler) + port = srv.server_address[1] + t = threading.Thread(target=srv.serve_forever) + t.daemon = True + t.start() + try: + # 用 http + 端口,让 2 级候选走本地 server;同时让主域名 DNS 失败 + # 注意:build_candidates 基于 host 的后缀;我们用 "cvm.tencentcloudapi.com" + # 作为 endpoint,第二候选是 "cvm.tencentcloudapi.com.cn" + client = make_client("cvm.tencentcloudapi.com", req_timeout=3, scheme="http") + # 把 cvm.tencentcloudapi.com 做成解析失败,把 com.cn 解析到本地端口 + # 需要改监听地址使 HTTP client 能连上。用一个 Patch:主域名失败,第二域名成功。 + # 但由于 requests 会用 DNS 解析 host,我们把 com.cn 解析到 127.0.0.1, + # 然后另外劫持 urlparse / port:直接在 URL 路径里拼端口是最简方式。 + # 这里退而求其次:仅验证"切换发生",通过 _GaiPatcher 的 resolved_log 观察。 + with _GaiPatcher(resolvable_hosts={"cvm.tencentcloudapi.com.cn"}, + fake_ip="127.0.0.1") as p: + # 由于解析到了 127.0.0.1:443(非 HTTP server 端口),会连接拒绝 → 第二候选 + # 也失败,继续到第三候选 cvm.tencentcloudapi.cn → 再 DNS 失败 + try: + client.DescribeRegions(models.DescribeRegionsRequest()) + except TencentCloudSDKException as e: + pass + print("resolved hosts sequence:") + for h in p.resolved_log: + print(" - %s" % h) + # 期望至少尝试了前两个候选 + tried = set(p.resolved_log) + ok_primary = "cvm.tencentcloudapi.com" in tried + ok_backup = "cvm.tencentcloudapi.com.cn" in tried + print("主域名被尝试: %s" % ok_primary) + print("第二候选被尝试: %s" % ok_backup) + finally: + srv.shutdown() + + +def case_intl_no_failover(): + """intl 域名必须不做切换。""" + client = make_client("cvm.intl.tencentcloudapi.com", req_timeout=3) + with _GaiPatcher() as p: + try: + client.DescribeRegions(models.DescribeRegionsRequest()) + except TencentCloudSDKException: + pass + uniq_tried = [h for h in dict.fromkeys(p.resolved_log)] + print("resolved hosts: %s" % uniq_tried) + ok = (uniq_tried == ["cvm.intl.tencentcloudapi.com"]) + print("仅尝试主域名(intl 不切换): " + ("OK" if ok else "FAIL")) + + +def case_custom_endpoint_no_failover(): + """非 tencentcloudapi 后缀的 endpoint 不切换。""" + client = make_client("custom.example.invalid", req_timeout=3) + try: + client.DescribeRegions(models.DescribeRegionsRequest()) + except TencentCloudSDKException as e: + raw = e.__cause__ or e.__context__ + # 应只有一个候选被尝试(没法直接看 log,但可通过断路器的 get_breaker 数量判断) + breakers = client.domain_failover._breakers + print("注册的候选数: %d" % len(breakers)) + print("候选列表: %s" % list(breakers.keys())) + ok = (list(breakers.keys()) == ["custom.example.invalid"]) + print("仅保留单一候选(自定义 endpoint 不切换): " + ("OK" if ok else "FAIL")) + + +def case_disabled(): + """enabled=False 时退化为原逻辑(仅尝试一次)。""" + client = make_client("cvm.tencentcloudapi.com", req_timeout=3, enabled=False) + with _GaiPatcher() as p: + try: + client.DescribeRegions(models.DescribeRegionsRequest()) + except TencentCloudSDKException: + pass + uniq_tried = [h for h in dict.fromkeys(p.resolved_log)] + print("resolved hosts: %s" % uniq_tried) + ok = (uniq_tried == ["cvm.tencentcloudapi.com"]) + print("enabled=False 时不切换: " + ("OK" if ok else "FAIL")) + + +def case_breaker_skips_bad_candidate(): + """连续失败超过阈值后,断路器 OPEN,后续请求应直接跳过坏候选。""" + from tencentcloud.common.circuit_breaker import STATE_OPEN + # 使用较低阈值,方便快速触发 + client = make_client("cvm.tencentcloudapi.com", req_timeout=3) + client.domain_failover._profile.breaker_setting.max_fail_num = 2 + client.domain_failover._profile.breaker_setting.max_fail_percent = 0.1 + + with _GaiPatcher() as p: + # 多次调用,每次 3 个候选都会失败,主域名连续 5 次失败后应该被断路 + for i in range(3): + try: + client.DescribeRegions(models.DescribeRegionsRequest()) + except TencentCloudSDKException: + pass + primary_breaker = client.domain_failover._breakers.get("cvm.tencentcloudapi.com") + print("primary breaker state: %s (OPEN=%d)" % (primary_breaker.state, STATE_OPEN)) + ok = primary_breaker.state == STATE_OPEN + print("主域名断路器 OPEN: " + ("OK" if ok else "FAIL")) + + +# --------------------------------------------------------------------------- # +# main +# --------------------------------------------------------------------------- # + +def main(): + test_build_candidates() + test_classify_exception() + + run_case("A 全部候选 DNS 失败,观察切换顺序", case_all_dns_fail_primary_and_backup) + run_case("B 主域名失败切到第二候选", case_primary_fail_backup_ok) + run_case("C intl 严格不切换", case_intl_no_failover) + run_case("D 自定义 endpoint 不切换", case_custom_endpoint_no_failover) + run_case("E enabled=False 完全不切换", case_disabled) + run_case("F 断路器会跳过坏候选", case_breaker_skips_bad_candidate) + + +if __name__ == "__main__": + main() From a9516f3b391892817c70ca3694d91083298b6ed8 Mon Sep 17 00:00:00 2001 From: HS-Gray <1978489546@qq.com> Date: Thu, 30 Apr 2026 14:55:37 +0800 Subject: [PATCH 2/2] remove user configure --- tencentcloud/common/abstract_client.py | 13 +- tencentcloud/common/domain_failover.py | 29 +- tencentcloud/common/profile/client_profile.py | 36 +-- tests/unit/test_dns_failures.py | 273 ------------------ tests/unit/test_domain_failover.py | 209 +++++--------- 5 files changed, 104 insertions(+), 456 deletions(-) delete mode 100644 tests/unit/test_dns_failures.py diff --git a/tencentcloud/common/abstract_client.py b/tencentcloud/common/abstract_client.py index 292580b8c..c7022beed 100644 --- a/tencentcloud/common/abstract_client.py +++ b/tencentcloud/common/abstract_client.py @@ -93,8 +93,8 @@ def __init__(self, credential, region, profile=None): self.profile.region_breaker_profile = RegionBreakerProfile() self.circuit_breaker = CircuitBreaker(self.profile.region_breaker_profile) - # 域名级容灾管理器(默认启用,.com → .com.cn → .cn) - self.domain_failover = DomainFailoverManager(self.profile.domain_failover_profile) + # 域名级容灾管理器(SDK 内部机制,对用户完全透明,.com → .com.cn → .cn) + self.domain_failover = DomainFailoverManager() if self.profile.request_client: self.request_client = self._sdkVersion + "; " + self.profile.request_client @@ -447,15 +447,6 @@ def _call(self, action, params, options=None, headers=None): return self.request.send_request(req) origin_endpoint = self._get_endpoint(options=options) - # 未启用域名容灾:按原逻辑一次性请求 - if not self.domain_failover.enabled: - req = RequestInternal(origin_endpoint, - self.profile.httpProfile.reqMethod, - self._requestPath, - header=headers) - self._build_req_inter(action, params, req, options) - return self.request.send_request(req) - return self._call_with_domain_failover(origin_endpoint, action, params, options, headers) def _call_with_domain_failover(self, origin_endpoint, action, params, options, headers): diff --git a/tencentcloud/common/domain_failover.py b/tencentcloud/common/domain_failover.py index 8012cf6d3..c99612f57 100644 --- a/tencentcloud/common/domain_failover.py +++ b/tencentcloud/common/domain_failover.py @@ -44,6 +44,21 @@ _INTL_SUFFIX = ".intl.tencentcloudapi.com" +class _InternalBreakerSetting(object): + """域名容灾用的断路器阈值(完全内部常量,不暴露给用户)。 + + 字段名与 RegionBreakerProfile 保持一致,以便复用已有的 CircuitBreaker 实现。 + 每个候选域名的 CircuitBreaker 持有独立的 setting 实例,避免相互影响。 + """ + + def __init__(self): + self.max_fail_num = 5 + self.max_fail_percent = 0.75 + self.window_interval = 60 * 5 # 5 分钟内累计窗口 + self.timeout = 60 # OPEN 状态 60s 后进入 HALF_OPEN + self.max_requests = 5 # HALF_OPEN 下累计 5 次成功后回到 CLOSED + + def _classify_exception(exc): """沿 __cause__ / __context__ 链识别原始异常类型,返回可触发域名切换的 kind。 @@ -171,23 +186,21 @@ class DomainFailoverManager(object): 生命周期:AbstractClient 持有一个实例;每个候选域名首次出现时动态 创建 CircuitBreaker。不同 client 实例间不共享(与现有 region_breaker 的作用域一致)。 + + 本管理器为 SDK 内部组件,对用户完全透明:不暴露开关、不暴露阈值, + 始终生效。仅当 host 未命中 `*.tencentcloudapi.com` 族(例如 intl 域名、 + 自定义 endpoint、IP)时等价于"不切换",此时行为与改造前完全一致。 """ - def __init__(self, profile): - """:param profile: `DomainFailoverProfile`""" - self._profile = profile + def __init__(self): self._breakers = {} self._lock = threading.Lock() - @property - def enabled(self): - return self._profile is not None and self._profile.enabled - def get_breaker(self, host): with self._lock: br = self._breakers.get(host) if br is None: - br = CircuitBreaker(self._profile.breaker_setting) + br = CircuitBreaker(_InternalBreakerSetting()) self._breakers[host] = br return br diff --git a/tencentcloud/common/profile/client_profile.py b/tencentcloud/common/profile/client_profile.py index 8ed29f533..b4bf3b898 100644 --- a/tencentcloud/common/profile/client_profile.py +++ b/tencentcloud/common/profile/client_profile.py @@ -34,14 +34,11 @@ class ClientProfile(object): :type request_client: str :param retryer: Custom retry configuration. :type retryer: :class:`tencentcloud.common.retry.StandardRetryer` - :param domain_failover_profile: The domain-level failover profile. - :type domain_failover_profile: :class:`DomainFailoverProfile` """ unsignedPayload = False def __init__(self, signMethod=None, httpProfile=None, language="zh-CN", - disable_region_breaker=True, region_breaker_profile=None, request_client=None, retryer=None, - domain_failover_profile=None): + disable_region_breaker=True, region_breaker_profile=None, request_client=None, retryer=None): self.httpProfile = HttpProfile() if httpProfile is None else httpProfile self.signMethod = "TC3-HMAC-SHA256" if signMethod is None else signMethod valid_language = ["zh-CN", "en-US"] @@ -62,37 +59,6 @@ def __init__(self, signMethod=None, httpProfile=None, language="zh-CN", self.retryer = retryer - # 域名级容灾(*.tencentcloudapi.com → .com.cn → .cn) - self.domain_failover_profile = domain_failover_profile or DomainFailoverProfile() - - -class DomainFailoverProfile(object): - """域名级容灾切换配置。 - - 当请求发生 DNS / TCP / TLS 类故障时,自动按 ``tencentcloudapi.com → - tencentcloudapi.com.cn → tencentcloudapi.cn`` 的顺序串行切换域名重试。 - - - 对 ``*.intl.tencentcloudapi.com`` (严格后缀匹配)不切换。 - - 切换仅在当次请求的 ``ApiRequest.host`` 上生效,不会改写 ``HttpProfile.rootDomain``。 - - 每个候选域名都有独立的 ``CircuitBreaker``,失败到达阈值后在 ``timeout`` 时间内 - 跳过该候选,避免反复撞墙。 - - :param enabled: 是否启用,默认 ``True``。设置为 ``False`` 可完全关闭本机制。 - :type enabled: bool - :param breaker_setting: 候选域名级断路器的阈值配置,未显式指定时使用默认 ``RegionBreakerProfile`` - 的阈值(max_fail_num=5, timeout=60s 等),详见 :class:`RegionBreakerProfile`。 - :type breaker_setting: :class:`RegionBreakerProfile` - """ - - def __init__(self, enabled=True, breaker_setting=None): - self.enabled = enabled - # 复用 RegionBreakerProfile 的阈值字段(max_fail_num / max_fail_percent / - # window_interval / timeout / max_requests),断路器会从中读取配置 - if breaker_setting is None: - breaker_setting = RegionBreakerProfile() - self.breaker_setting = breaker_setting - - class RegionBreakerProfile(object): """RegionBreaker profile. diff --git a/tests/unit/test_dns_failures.py b/tests/unit/test_dns_failures.py deleted file mode 100644 index 25804b967..000000000 --- a/tests/unit/test_dns_failures.py +++ /dev/null @@ -1,273 +0,0 @@ -# -*- coding: utf-8 -*- -""" -DNS 故障场景对 Tencent Cloud Python SDK 异常表现的测试脚本 - -参考文档: - https://iwiki.woa.com/p/4019996855 《2.3 SDK 视角:DNS 故障的异常表现》 - -本脚本针对文档中列出的 A/B/C 三大类、共 11 种 DNS 故障场景,构造等价的本地/网络 -模拟条件,通过调用 SDK 的 CVM.DescribeRegions 接口触发异常,并打印: - 1) 最外层 SDK 抛出的 TencentCloudSDKException(code + message) - 2) SDK 捕获到的"原始异常"的真实类型与 repr(通过 PEP 3134 标准异常链 - e.__cause__ 获取;若该层未用 ``raise ... from``,则回退到 e.__context__) - 3) 耗时,以辅助判断故障类型 - -运行方式(不依赖真实腾讯云账号,SecretId/SecretKey 可任意填,域名错误时请求根本 -不会到达签名校验阶段): - python tests/dns_failure_test/test_dns_failures.py -""" -from __future__ import print_function - -import os -import socket -import sys -import time -import traceback - -# 强制使用工程根目录下的 SDK -_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) -if _PROJECT_ROOT not in sys.path: - sys.path.insert(0, _PROJECT_ROOT) - -from tencentcloud.common import credential -from tencentcloud.common.profile.client_profile import ClientProfile -from tencentcloud.common.profile.http_profile import HttpProfile -from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException -from tencentcloud.cvm.v20170312 import cvm_client, models - - -def make_client(endpoint=None, req_timeout=10, disable_region_breaker=True, scheme=None): - cred = credential.Credential("AKIDFakeIdForDnsTest", "FakeKeyForDnsTest") - http_profile = HttpProfile() - http_profile.reqTimeout = req_timeout - if scheme: - http_profile.scheme = scheme - if endpoint: - http_profile.endpoint = endpoint - client_profile = ClientProfile() - client_profile.httpProfile = http_profile - client_profile.disable_region_breaker = disable_region_breaker - return cvm_client.CvmClient(cred, "ap-guangzhou", client_profile) - - -def run_case(title, case_fn): - print("\n" + "=" * 80) - print("[CASE] " + title) - print("=" * 80) - t0 = time.time() - try: - case_fn() - print("!!! 本次调用没有触发异常,用例不符合预期 !!!") - except TencentCloudSDKException as e: - cost = time.time() - t0 - # 通过 Python 标准异常链获取原始异常:优先 __cause__(raise ... from e), - # 回退 __context__(except 块内裸 raise 时 CPython 自动记录)。 - raw = e.__cause__ or e.__context__ - print("SDK Exception code : %s" % e.code) - print("SDK Exception message : %s" % e.message) - if raw is not None: - raw_type = "%s.%s" % (type(raw).__module__, type(raw).__name__) - print("Raw exception type : %s" % raw_type) - print("Raw exception repr : %r" % (raw,)) - # 进一步遍历 __cause__/__context__ 链,展示 requests → urllib3 → socket 的完整传递 - chain = [] - cur = raw - seen = set() - while cur is not None and id(cur) not in seen: - seen.add(id(cur)) - chain.append("%s.%s: %s" % (type(cur).__module__, type(cur).__name__, cur)) - cur = getattr(cur, "__cause__", None) or getattr(cur, "__context__", None) - if len(chain) > 1: - print("Raw exception chain :") - for i, line in enumerate(chain): - print(" [%d] %s" % (i, line)) - else: - print("Raw exception : (无 __cause__/__context__)") - print("耗时 : %.3fs" % cost) - except Exception as e: - cost = time.time() - t0 - print("未被 SDK 包装的异常: %s: %s (%.3fs)" % (type(e).__name__, e, cost)) - traceback.print_exc() - - -def call_describe_regions(client): - req = models.DescribeRegionsRequest() - client.DescribeRegions(req) - - -# ========== A 类:DNS 解析失败 ========== - -def case_a_operator_nxdomain(): - client = make_client(endpoint="cvm.definitely-not-exist-nxdomain.invalid") - call_describe_regions(client) - - -def _patch_gai_with(fn): - import socket as _socket - real_gai = _socket.getaddrinfo - - def wrapper(host, *args, **kwargs): - if host and ("tencentcloudapi.com" in host or host.startswith("cvm.")): - return fn(host, *args, **kwargs) - return real_gai(host, *args, **kwargs) - - _socket.getaddrinfo = wrapper - return real_gai - - -def _restore_gai(real_gai): - socket.getaddrinfo = real_gai - - -def case_a_public_dns_timeout(): - def fake(host, *args, **kwargs): - time.sleep(2) - raise socket.gaierror(-3, "Temporary failure in name resolution") - real = _patch_gai_with(fake) - try: - client = make_client(req_timeout=10) - call_describe_regions(client) - finally: - _restore_gai(real) - - -def case_a_root_server_ddos(): - case_a_public_dns_timeout() - - -def case_a_tld_ns_tampered(): - case_a_operator_nxdomain() - - -def case_a_authoritative_dns_ddos(): - def fake(host, *args, **kwargs): - raise socket.gaierror(-2, "Name or service not known") - real = _patch_gai_with(fake) - try: - client = make_client(req_timeout=10) - call_describe_regions(client) - finally: - _restore_gai(real) - - -def case_a_gslb_misconfig(): - case_a_authoritative_dns_ddos() - - -# ========== B 类:连接层异常 ========== - -def case_b_operator_return_zero_ip(): - def fake(host, *args, **kwargs): - return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', ("0.0.0.0", 443))] - real = _patch_gai_with(fake) - try: - client = make_client(req_timeout=5) - call_describe_regions(client) - finally: - _restore_gai(real) - - -def case_b_hijack_port_closed(): - def fake(host, *args, **kwargs): - return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', ("127.0.0.1", 1))] - real = _patch_gai_with(fake) - try: - client = make_client(req_timeout=5) - call_describe_regions(client) - finally: - _restore_gai(real) - - -def case_b_hijack_no_response(): - def fake(host, *args, **kwargs): - # 192.0.2.1 是 RFC 5737 TEST-NET-1 保留地址,公网不可路由 - return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', ("192.0.2.1", 443))] - real = _patch_gai_with(fake) - try: - client = make_client(req_timeout=3) - call_describe_regions(client) - finally: - _restore_gai(real) - - -# ========== C 类:应用层异常 ========== - -def case_c_hijack_tls_mismatch(): - try: - baidu_ip = socket.gethostbyname("www.baidu.com") - except Exception: - print("[SKIP] 无法解析 www.baidu.com,跳过此用例") - return - - def fake(host, *args, **kwargs): - return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', (baidu_ip, 443))] - real = _patch_gai_with(fake) - try: - client = make_client(req_timeout=10) - call_describe_regions(client) - finally: - _restore_gai(real) - - -def case_c_hijack_http_unexpected_body(): - import threading - try: - from http.server import BaseHTTPRequestHandler, HTTPServer - except ImportError: - from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer - - class Handler(BaseHTTPRequestHandler): - def do_POST(self): - length = int(self.headers.get('Content-Length', 0)) - self.rfile.read(length) - self.send_response(200) - self.send_header('Content-Type', 'text/plain') - self.end_headers() - self.wfile.write(b"hello world (not a tencent cloud api response)") - - def do_GET(self): - self.send_response(200) - self.send_header('Content-Type', 'text/plain') - self.end_headers() - self.wfile.write(b"hello world (not a tencent cloud api response)") - - def log_message(self, fmt, *a): - return - - srv = HTTPServer(("127.0.0.1", 0), Handler) - port = srv.server_address[1] - t = threading.Thread(target=srv.serve_forever) - t.daemon = True - t.start() - try: - client = make_client(endpoint="127.0.0.1:%d" % port, scheme="http", - req_timeout=5) - call_describe_regions(client) - finally: - srv.shutdown() - - -CASES = [ - ("A-3 operator DNS blocking (NXDOMAIN)", case_a_operator_nxdomain), - ("A-3 public DNS down (timeout)", case_a_public_dns_timeout), - ("A-4 root server DDoS (timeout)", case_a_root_server_ddos), - ("A-5 TLD NS tampered (NXDOMAIN)", case_a_tld_ns_tampered), - ("A-6 authoritative DNS DDoS (SERVFAIL)", case_a_authoritative_dns_ddos), - ("A-6 GSLB misconfig (NXDOMAIN)", case_a_gslb_misconfig), - - ("B-3 operator blocking (return 0.0.0.0)", case_b_operator_return_zero_ip), - ("B-5 hijack (port closed)", case_b_hijack_port_closed), - ("B-5 hijack (no response/timeout)", case_b_hijack_no_response), - - ("C-5 hijack (HTTPS cert mismatch)", case_c_hijack_tls_mismatch), - ("C-5 hijack (HTTP 200 unexpected body)", case_c_hijack_http_unexpected_body), -] - - -def main(): - for title, fn in CASES: - run_case(title, fn) - - -if __name__ == "__main__": - main() diff --git a/tests/unit/test_domain_failover.py b/tests/unit/test_domain_failover.py index e815b3e21..113cacf18 100644 --- a/tests/unit/test_domain_failover.py +++ b/tests/unit/test_domain_failover.py @@ -2,25 +2,22 @@ """ 域名切换容灾(Domain Failover)功能的测试脚本。 -覆盖: - 1. 候选域名生成(build_candidates)的白盒验证。 +本功能为 SDK 内部机制,对用户完全透明。本脚本通过白盒 + 集成两种方式验证: + 1. 候选域名生成(build_candidates)是否符合规则。 2. 异常识别(_classify_exception)对 6 类故障的分类是否准确。 3. 主域名故障时是否能按 [主 → .com.cn → .cn] 顺序切换。 4. `.intl.` 域名是否严格不切换。 5. 自定义 endpoint / IP 是否不受影响。 6. 断路器是否在连续失败后跳过坏候选。 - 7. JSON_DECODE_ERROR 场景是否**不**触发切换。 运行方式: python tests/dns_failure_test/test_domain_failover.py """ from __future__ import print_function -import json import os import socket import sys -import threading import time import traceback @@ -29,13 +26,8 @@ if _PROJECT_ROOT not in sys.path: sys.path.insert(0, _PROJECT_ROOT) -try: - from http.server import BaseHTTPRequestHandler, HTTPServer -except ImportError: # py2 - from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer - from tencentcloud.common import credential -from tencentcloud.common.profile.client_profile import ClientProfile, DomainFailoverProfile, RegionBreakerProfile +from tencentcloud.common.profile.client_profile import ClientProfile from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException from tencentcloud.common.domain_failover import ( @@ -45,7 +37,7 @@ # --------------------------------------------------------------------------- # -# 1) 白盒测试:候选域名生成 +# 1) 白盒:候选域名生成 # --------------------------------------------------------------------------- # def test_build_candidates(): @@ -82,7 +74,7 @@ def test_build_candidates(): # --------------------------------------------------------------------------- # -# 2) 白盒测试:_classify_exception +# 2) 白盒:_classify_exception # --------------------------------------------------------------------------- # def test_classify_exception(): @@ -102,7 +94,7 @@ def _wrap(raw_exc): return tce def _chain(inner, outer_cls): - """构造 outer(inner) 的链(outer 捕获 inner 再抛自己),用于模拟 requests 对 + """构造 outer(inner) 的链(outer 捕获 inner 再抛自己),模拟 requests 对 底层异常的包装。""" try: try: @@ -113,22 +105,16 @@ def _chain(inner, outer_cls): return e cases = [ - # DNS NXDOMAIN/SERVFAIL → errno=-2 (_wrap(_chain(socket.gaierror(-2, "Name or service not known"), requests.exceptions.ConnectionError)), "DNS_NXDOMAIN"), - # DNS 超时 → errno=-3 (_wrap(_chain(socket.gaierror(-3, "Temporary failure in name resolution"), requests.exceptions.ConnectionError)), "DNS_TIMEOUT"), - # TCP Connection refused (_wrap(_chain(ConnectionRefusedError(111, "Connection refused"), requests.exceptions.ConnectionError)), "TCP_CONN_REFUSED"), - # 读超时 (_wrap(_chain(socket.timeout("timed out"), requests.exceptions.ReadTimeout)), "TCP_READ_TIMEOUT"), - # TLS 错误 (_wrap(_chain(Exception("cert mismatch"), requests.exceptions.SSLError)), "TLS_ERROR"), - # JSON 解析失败:业务方法层会包装成 TencentCloudSDKException(code="JSONDecodeError",...) (TencentCloudSDKException("JSONDecodeError", "Expecting value: line 1 column 1 (char 0)"), "JSON_DECODE_ERROR"), ] @@ -154,32 +140,30 @@ def _chain(inner, outer_cls): # --------------------------------------------------------------------------- # -# 3) 集成测试:主域名 DNS 失败 → 验证切换轨迹 +# 3) 集成:触发不同 host 的容灾路径 +# 注:按新的"用户无感知"设计,ClientProfile 不再暴露 domain_failover 配置。 # --------------------------------------------------------------------------- # -def make_client(endpoint, req_timeout=5, enabled=True, scheme=None): +def make_client(endpoint, req_timeout=3): cred = credential.Credential("AKIDFakeIdForDomainFailoverTest", "FakeKeyForDomainFailoverTest") http_profile = HttpProfile() http_profile.reqTimeout = req_timeout - if scheme: - http_profile.scheme = scheme http_profile.endpoint = endpoint client_profile = ClientProfile() client_profile.httpProfile = http_profile - client_profile.disable_region_breaker = True # 避免干扰 - client_profile.domain_failover_profile = DomainFailoverProfile(enabled=enabled) + client_profile.disable_region_breaker = True # 避免 region_breaker 干扰 return cvm_client.CvmClient(cred, "ap-guangzhou", client_profile) class _GaiPatcher(object): - """记录哪些 host 被解析,可以控制哪些 host 返回 NXDOMAIN、哪些走真实解析。""" + """Patch socket.getaddrinfo: + - 落在 resolvable_hosts 中的 host 返回 fake_ip。 + - 其他 tencentcloudapi host 返回 NXDOMAIN。 + - 其他 host 走真实解析。 + """ def __init__(self, resolvable_hosts=None, fake_ip=None): - """:param resolvable_hosts: 集合,在这个集合中的 host 会"解析成功" - 返回 fake_ip 或本地 127.0.0.1。 - :param fake_ip: 解析后的 IP 地址;None 时使用 127.0.0.1。 - """ self.resolvable = set(resolvable_hosts or []) self.fake_ip = fake_ip or "127.0.0.1" self.resolved_log = [] @@ -216,14 +200,14 @@ def run_case(title, fn): print("耗时: %.3fs" % (time.time() - t0)) -def case_all_dns_fail_primary_and_backup(): - """所有 3 个候选都 DNS 失败:期望 SDK 抛异常,但日志中记录尝试了全部 3 个。""" - client = make_client("cvm.tencentcloudapi.com", req_timeout=3) +def case_all_dns_fail(): + """所有 3 个候选都 DNS 失败:期望按序尝试 [com, com.cn, cn]。""" + client = make_client("cvm.tencentcloudapi.com") with _GaiPatcher() as p: try: client.DescribeRegions(models.DescribeRegionsRequest()) print("!!! 未抛异常,不符合预期") - except TencentCloudSDKException as e: + except TencentCloudSDKException: pass print("resolved hosts sequence:") for h in p.resolved_log: @@ -231,80 +215,44 @@ def case_all_dns_fail_primary_and_backup(): expected = ["cvm.tencentcloudapi.com", "cvm.tencentcloudapi.com.cn", "cvm.tencentcloudapi.cn"] - actually_tried = [h for h in p.resolved_log if h in expected] - # 可能每个 host 因重试被解析多次,这里用"唯一化后包含 3 项"断言顺序 uniq = [] - for h in actually_tried: + for h in p.resolved_log: if h not in uniq: uniq.append(h) ok = (uniq == expected) print("顺序校验: " + ("OK" if ok else "FAIL, got=%s" % uniq)) -def case_primary_fail_backup_ok(): - """主域名 DNS 失败,第二候选被解析到本地 HTTP server(我们让它返回假的非 JSON - → 会抛 JSONDecodeError,不触发再切换;但能证明已经切到了第二候选)。""" - # 启动本地 HTTP server - class Handler(BaseHTTPRequestHandler): - def do_POST(self): - length = int(self.headers.get('Content-Length', 0)) - self.rfile.read(length) - self.send_response(200) - self.send_header('Content-Type', 'application/json') - self.end_headers() - self.wfile.write(b'{"Response": {"Error": {"Code":"InternalFake","Message":"mocked"}, "RequestId":"mock-rid"}}') - - def do_GET(self): - self.do_POST() - - def log_message(self, fmt, *a): - return - - srv = HTTPServer(("127.0.0.1", 0), Handler) - port = srv.server_address[1] - t = threading.Thread(target=srv.serve_forever) - t.daemon = True - t.start() - try: - # 用 http + 端口,让 2 级候选走本地 server;同时让主域名 DNS 失败 - # 注意:build_candidates 基于 host 的后缀;我们用 "cvm.tencentcloudapi.com" - # 作为 endpoint,第二候选是 "cvm.tencentcloudapi.com.cn" - client = make_client("cvm.tencentcloudapi.com", req_timeout=3, scheme="http") - # 把 cvm.tencentcloudapi.com 做成解析失败,把 com.cn 解析到本地端口 - # 需要改监听地址使 HTTP client 能连上。用一个 Patch:主域名失败,第二域名成功。 - # 但由于 requests 会用 DNS 解析 host,我们把 com.cn 解析到 127.0.0.1, - # 然后另外劫持 urlparse / port:直接在 URL 路径里拼端口是最简方式。 - # 这里退而求其次:仅验证"切换发生",通过 _GaiPatcher 的 resolved_log 观察。 - with _GaiPatcher(resolvable_hosts={"cvm.tencentcloudapi.com.cn"}, - fake_ip="127.0.0.1") as p: - # 由于解析到了 127.0.0.1:443(非 HTTP server 端口),会连接拒绝 → 第二候选 - # 也失败,继续到第三候选 cvm.tencentcloudapi.cn → 再 DNS 失败 - try: - client.DescribeRegions(models.DescribeRegionsRequest()) - except TencentCloudSDKException as e: - pass - print("resolved hosts sequence:") - for h in p.resolved_log: - print(" - %s" % h) - # 期望至少尝试了前两个候选 - tried = set(p.resolved_log) - ok_primary = "cvm.tencentcloudapi.com" in tried - ok_backup = "cvm.tencentcloudapi.com.cn" in tried - print("主域名被尝试: %s" % ok_primary) - print("第二候选被尝试: %s" % ok_backup) - finally: - srv.shutdown() +def case_primary_fail_backup_tried(): + """主域名 DNS 失败,第二候选被"解析成功"但连接到 127.0.0.1:443(无服务)→ + 连接被拒 → 切到第三候选 → 再 DNS 失败。 + 目的是验证切换确实发生,而不是一遇到主域名失败就返回。""" + client = make_client("cvm.tencentcloudapi.com") + with _GaiPatcher(resolvable_hosts={"cvm.tencentcloudapi.com.cn"}, + fake_ip="127.0.0.1") as p: + try: + client.DescribeRegions(models.DescribeRegionsRequest()) + except TencentCloudSDKException: + pass + print("resolved hosts sequence:") + for h in p.resolved_log: + print(" - %s" % h) + tried = set(p.resolved_log) + ok_primary = "cvm.tencentcloudapi.com" in tried + ok_backup = "cvm.tencentcloudapi.com.cn" in tried + print("主域名被尝试: %s" % ok_primary) + print("第二候选被尝试: %s" % ok_backup) def case_intl_no_failover(): """intl 域名必须不做切换。""" - client = make_client("cvm.intl.tencentcloudapi.com", req_timeout=3) + client = make_client("cvm.intl.tencentcloudapi.com") with _GaiPatcher() as p: try: client.DescribeRegions(models.DescribeRegionsRequest()) except TencentCloudSDKException: pass - uniq_tried = [h for h in dict.fromkeys(p.resolved_log)] + uniq_tried = list(dict.fromkeys(p.resolved_log)) print("resolved hosts: %s" % uniq_tried) ok = (uniq_tried == ["cvm.intl.tencentcloudapi.com"]) print("仅尝试主域名(intl 不切换): " + ("OK" if ok else "FAIL")) @@ -312,44 +260,30 @@ def case_intl_no_failover(): def case_custom_endpoint_no_failover(): """非 tencentcloudapi 后缀的 endpoint 不切换。""" - client = make_client("custom.example.invalid", req_timeout=3) + client = make_client("custom.example.invalid") try: client.DescribeRegions(models.DescribeRegionsRequest()) - except TencentCloudSDKException as e: - raw = e.__cause__ or e.__context__ - # 应只有一个候选被尝试(没法直接看 log,但可通过断路器的 get_breaker 数量判断) - breakers = client.domain_failover._breakers - print("注册的候选数: %d" % len(breakers)) - print("候选列表: %s" % list(breakers.keys())) - ok = (list(breakers.keys()) == ["custom.example.invalid"]) - print("仅保留单一候选(自定义 endpoint 不切换): " + ("OK" if ok else "FAIL")) - - -def case_disabled(): - """enabled=False 时退化为原逻辑(仅尝试一次)。""" - client = make_client("cvm.tencentcloudapi.com", req_timeout=3, enabled=False) - with _GaiPatcher() as p: - try: - client.DescribeRegions(models.DescribeRegionsRequest()) - except TencentCloudSDKException: - pass - uniq_tried = [h for h in dict.fromkeys(p.resolved_log)] - print("resolved hosts: %s" % uniq_tried) - ok = (uniq_tried == ["cvm.tencentcloudapi.com"]) - print("enabled=False 时不切换: " + ("OK" if ok else "FAIL")) + except TencentCloudSDKException: + pass + breakers = client.domain_failover._breakers + print("注册的候选数: %d" % len(breakers)) + print("候选列表: %s" % list(breakers.keys())) + ok = (list(breakers.keys()) == ["custom.example.invalid"]) + print("仅保留单一候选(自定义 endpoint 不切换): " + ("OK" if ok else "FAIL")) def case_breaker_skips_bad_candidate(): - """连续失败超过阈值后,断路器 OPEN,后续请求应直接跳过坏候选。""" + """多次失败后,主域名断路器进入 OPEN 状态。""" from tencentcloud.common.circuit_breaker import STATE_OPEN - # 使用较低阈值,方便快速触发 - client = make_client("cvm.tencentcloudapi.com", req_timeout=3) - client.domain_failover._profile.breaker_setting.max_fail_num = 2 - client.domain_failover._profile.breaker_setting.max_fail_percent = 0.1 - - with _GaiPatcher() as p: - # 多次调用,每次 3 个候选都会失败,主域名连续 5 次失败后应该被断路 - for i in range(3): + client = make_client("cvm.tencentcloudapi.com") + # 直接修改内部常量的副本阈值,方便快速触发(仅影响当前 client 的断路器) + for br_name in ("cvm.tencentcloudapi.com", "cvm.tencentcloudapi.com.cn", "cvm.tencentcloudapi.cn"): + br = client.domain_failover.get_breaker(br_name) + br.breaker_setting.max_fail_num = 2 + br.breaker_setting.max_fail_percent = 0.1 + + with _GaiPatcher(): + for _ in range(3): try: client.DescribeRegions(models.DescribeRegionsRequest()) except TencentCloudSDKException: @@ -360,6 +294,23 @@ def case_breaker_skips_bad_candidate(): print("主域名断路器 OPEN: " + ("OK" if ok else "FAIL")) +def case_no_profile_exposed(): + """回归:ClientProfile 不再暴露 domain_failover 相关字段/参数。""" + cp = ClientProfile() + assert not hasattr(cp, "domain_failover_profile"), \ + "ClientProfile should NOT expose domain_failover_profile" + # 确保 DomainFailoverProfile 类也不从 client_profile 导出 + try: + from tencentcloud.common.profile import client_profile as cp_mod + assert not hasattr(cp_mod, "DomainFailoverProfile"), \ + "DomainFailoverProfile should NOT exist in client_profile module" + except Exception as e: + print("!!! %s" % e) + raise + print("ClientProfile 未暴露 domain_failover_profile: OK") + print("client_profile 模块未导出 DomainFailoverProfile: OK") + + # --------------------------------------------------------------------------- # # main # --------------------------------------------------------------------------- # @@ -368,12 +319,12 @@ def main(): test_build_candidates() test_classify_exception() - run_case("A 全部候选 DNS 失败,观察切换顺序", case_all_dns_fail_primary_and_backup) - run_case("B 主域名失败切到第二候选", case_primary_fail_backup_ok) + run_case("A 全部候选 DNS 失败,观察切换顺序", case_all_dns_fail) + run_case("B 主域名失败切到第二候选", case_primary_fail_backup_tried) run_case("C intl 严格不切换", case_intl_no_failover) run_case("D 自定义 endpoint 不切换", case_custom_endpoint_no_failover) - run_case("E enabled=False 完全不切换", case_disabled) - run_case("F 断路器会跳过坏候选", case_breaker_skips_bad_candidate) + run_case("E 断路器会跳过坏候选", case_breaker_skips_bad_candidate) + run_case("F Profile 未暴露容灾开关(用户无感知)", case_no_profile_exposed) if __name__ == "__main__":