
调用外部大模型 API 时什么网络抖动、限流、服务端 500……简直家常便饭。如果代码一遇到错误就直接把堆栈甩给用户体验基本为零。本文就从最基础的异常捕获讲起一路到重试、熔断降级帮大家把这套防御体系搭完整。一、先认清“敌人”常见错误类型面对错误首先要搞清楚是什么错才能对症下药。我用一张表把常见情况归好类HTTP 状态码错误类型处理方式401API Key 无效或过期报警不重试403无权限未开通某模型报警不重试429限流请求太频繁等待后重试或切备用模型500/502/503服务端错误重试 1-3 次超时网络或模型响应太慢重试或降级余额不足API 账户额度用完报警切备用Spring AI 中这些 HTTP 错误会被包装成HttpClientErrorException或HttpServerErrorException的子类结构清晰分支处理起来很方便。二、基础异常处理别再用一个大 catch 吞一切此方法不优雅了解下即可最基础的一层根据异常类型分别处理别一个catch (Exception e)把所有情况都混在一起。RestController public class ChatController { private final ChatClient chatClient; public ChatController(ChatClient chatClient) { this.chatClient chatClient; } GetMapping(/chat) public ResponseEntityString chat(RequestParam String prompt) { try { String response chatClient.call(prompt); return ResponseEntity.ok(response); } catch (HttpClientErrorException.Unauthorized e) { // 401告警 log.error(API Key 无效, e); return ResponseEntity.status(401).body(认证失败请检查 API Key); } catch (HttpClientErrorException.Forbidden e) { // 403 log.error(无权限访问, e); return ResponseEntity.status(403).body(无权使用该模型); } catch (HttpClientErrorException.TooManyRequests e) { // 429重试或切备用 log.warn(触发限流); return ResponseEntity.status(429).body(请求太频繁请稍后重试); } catch (HttpServerErrorException e) { // 5xx重试 log.error(服务端错误, e); return ResponseEntity.status(502).body(AI 服务暂时不可用); } } }但这只能做到“有错就报”对于 429 和 5xx 这类偶发性错误更优雅的做法是等一等再试。三、重试机制给请求多一次机会方案一Spring Retry推荐声明式重试引入依赖dependency groupIdorg.springframework.retry/groupId artifactIdspring-retry/artifactId /dependency dependency groupIdorg.springframework/groupId artifactIdspring-aspects/artifactId /dependency启动类加EnableRetrySpringBootApplication EnableRetry public class AiApplication { //... }在需要重试的 Service 方法上加Retryable失败全部由Recover兜底Service public class AiService { private final ChatClient chatClient; public AiService(ChatClient chatClient) { this.chatClient chatClient; } Retryable( retryFor {HttpServerErrorException.class, HttpClientErrorException.TooManyRequests.class}, maxAttempts 3, backoff Backoff(delay 1000, multiplier 2) ) public String callWithRetry(String prompt) { return chatClient.call(prompt); } Recover public String recover(Exception e, String prompt) { log.error(重试全部失败降级处理, e); return AI 服务暂时不可用请稍后重试; } }Controller 只需调用 Service 即可GetMapping(/chat-retry) public ResponseEntityString chatRetry(RequestParam String prompt) { String result aiService.callWithRetry(prompt); return ResponseEntity.ok(result); }方案二手动重试处理 Retry-After 头某些 API 在返回 429 时响应头里会有Retry-After告诉你需要等多少秒。Retryable无法直接读取响应头这时手动循环更合适package com.jichi.springaialibaba.service; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.ai.chat.client.ChatClient; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.stereotype.Service; import org.springframework.web.client.HttpClientErrorException; import org.springframework.web.client.HttpServerErrorException; Service public class ManualRetryChatService { private static final Logger log LoggerFactory.getLogger(ManualRetryChatService.class); private final ChatClient chatClient; public ManualRetryChatService(Qualifier(primaryChatClient) ChatClient chatClient) { this.chatClient chatClient; } public String chatWithManualRetry(String message) { int maxAttempts 3; long delayMs 1000; for (int attempt 1; attempt maxAttempts; attempt) { try { return chatClient.prompt() .user(message) .call() .content(); } catch (HttpClientErrorException.TooManyRequests e) { if (attempt maxAttempts) { throw new RuntimeException(请求频率超限请稍后再试, e); } // 优先读响应头里的 Retry-After没有就用默认等待时间 String retryAfter e.getResponseHeaders() ! null ? e.getResponseHeaders().getFirst(Retry-After) : null; long waitMs retryAfter ! null ? Long.parseLong(retryAfter) * 1000 : delayMs; log.warn(触发限流等待 {}ms 后重试第 {}/{} 次, waitMs, attempt, maxAttempts); sleep(waitMs); } catch (HttpServerErrorException e) { if (attempt maxAttempts) throw new RuntimeException(AI 服务异常, e); log.warn(服务端错误{}ms 后重试第 {}/{} 次, delayMs, attempt, maxAttempts); sleep(delayMs); delayMs * 2; // 指数退避 } catch (Exception e) { throw new RuntimeException(AI 调用失败, e); } } return AI 服务暂时不可用; } private void sleep(long ms) { try { Thread.sleep(ms); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(重试被中断, e); } } }对应的 Controllerpackage com.jichi.springaialibaba.controller; import com.jichi.springaialibaba.service.ManualRetryChatService; import org.springframework.web.bind.annotation.*; RestController RequestMapping(/api/manual-retry) public class ManualRetryChatController { private final ManualRetryChatService manualRetryChatService; public ManualRetryChatController(ManualRetryChatService manualRetryChatService) { this.manualRetryChatService manualRetryChatService; } GetMapping public String chat(RequestParam String message) { return manualRetryChatService.chatWithManualRetry(message); } }四、熔断降级避免雪崩效应重试只能应对偶发问题。如果模型 API 持续不可用比如连续 5 分钟每次还重试 3 次会严重拖慢整个系统。这时候需要熔断器——错误率超过阈值就直接快速失败不再调用真实 API等一段时间后自动尝试恢复。Resilience4j 是 Spring 生态中的首选。引入依赖dependency groupIdio.github.resilience4j/groupId artifactIdresilience4j-spring-boot3/artifactId version2.3.0/version /dependency配置熔断器resilience4j: circuitbreaker: instances: aiService: sliding-window-size: 10 # 统计最近 10 次调用 failure-rate-threshold: 50 # 失败率超过 50% 触发熔断 wait-duration-in-open-state: 30s # 熔断后等待 30s 再尝试半开 permitted-number-of-calls-in-half-open-state: 3 # 半开状态测试 3 次 retry: instances: aiService: max-attempts: 3 wait-duration: 1s retry-exceptions: - org.springframework.web.client.HttpServerErrorExceptionpackage com.jichi.springaialibaba.service; import io.github.resilience4j.circuitbreaker.annotation.CircuitBreaker; import io.github.resilience4j.retry.annotation.Retry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.ai.chat.client.ChatClient; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.stereotype.Service; Service public class ResilientChatService { private static final Logger log LoggerFactory.getLogger(ResilientChatService.class); private final ChatClient primaryChatClient; private final ChatClient backupChatClient; public ResilientChatService( Qualifier(primaryChatClient) ChatClient primaryChatClient, Qualifier(backupChatClient) ChatClient backupChatClient) { this.primaryChatClient primaryChatClient; this.backupChatClient backupChatClient; } /** * 先重试重试都失败后触发熔断熔断后走降级方法 */ CircuitBreaker(name aiService, fallbackMethod fallbackChat) Retry(name aiService) public String chat(String message) { return primaryChatClient.prompt() .user(message) .call() .content(); } /** * 降级方法主模型熔断时切换到备用模型 * 签名必须和原方法一致最后加一个 Throwable 参数 */ public String fallbackChat(String message, Throwable throwable) { log.warn(主模型不可用{}切换备用模型, throwable.getMessage()); try { return backupChatClient.prompt() .user(message) .call() .content(); } catch (Exception e) { log.error(备用模型也不可用, e); return AI 服务暂时不可用请稍后重试。如有紧急需求请联系客服。; } } }对应的 Controllerpackage com.jichi.springaialibaba.controller; import com.jichi.springaialibaba.service.ResilientChatService; import org.springframework.web.bind.annotation.*; RestController RequestMapping(/api/resilient) public class ResilientChatController { private final ResilientChatService resilientChatService; public ResilientChatController(ResilientChatService resilientChatService) { this.resilientChatService resilientChatService; } GetMapping public String chat(RequestParam String message) { return resilientChatService.chat(message); } }五、全局异常处理让 Controller 清爽起来前面我们在 Controller 里写了不少 try-catch重复又难看。用RestControllerAdvice统一拦截 AI 相关异常返回统一的错误结构。package com.jichi.springaialibaba.exception; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ExceptionHandler; import org.springframework.web.bind.annotation.RestControllerAdvice; import org.springframework.web.client.HttpClientErrorException; import org.springframework.web.client.HttpServerErrorException; import java.util.concurrent.TimeoutException; RestControllerAdvice public class AiExceptionHandler { private static final Logger log LoggerFactory.getLogger(AiExceptionHandler.class); record ErrorResponse(String code, String message) {} ExceptionHandler(HttpClientErrorException.Unauthorized.class) public ResponseEntityErrorResponse handleUnauthorized(HttpClientErrorException.Unauthorized e) { log.error(API Key 无效, e); return ResponseEntity.status(HttpStatus.UNAUTHORIZED) .body(new ErrorResponse(UNAUTHORIZED, API Key 无效或已过期)); } ExceptionHandler(HttpClientErrorException.Forbidden.class) public ResponseEntityErrorResponse handleForbidden(HttpClientErrorException.Forbidden e) { log.error(权限不足, e); return ResponseEntity.status(HttpStatus.FORBIDDEN) .body(new ErrorResponse(FORBIDDEN, 无权访问该模型)); } ExceptionHandler(HttpClientErrorException.TooManyRequests.class) public ResponseEntityErrorResponse handleRateLimit(HttpClientErrorException.TooManyRequests e) { return ResponseEntity.status(HttpStatus.TOO_MANY_REQUESTS) .body(new ErrorResponse(RATE_LIMIT, 请求过于频繁请稍后再试)); } ExceptionHandler(HttpServerErrorException.class) public ResponseEntityErrorResponse handleServerError(HttpServerErrorException e) { log.error(AI 服务端错误, e); return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE) .body(new ErrorResponse(AI_SERVICE_ERROR, AI 服务暂时不可用)); } ExceptionHandler(TimeoutException.class) public ResponseEntityErrorResponse handleTimeout(TimeoutException e) { return ResponseEntity.status(HttpStatus.GATEWAY_TIMEOUT) .body(new ErrorResponse(TIMEOUT, AI 响应超时请重试)); } }这样一来Controller 里只需要专注业务逻辑异常全被统一处理代码干净多了。六、总结一套完整的 AI 调用防御体系应该分层考虑基础异常处理分支响应不同类型错误重试偶发性错误的即时补偿Spring Retry 声明式 / 手动读取 Retry-After熔断降级长期故障下的快速失败Resilience4j超时控制避免线程阻塞全局异常拦截统一对外输出把这套组合拳加到你的项目里AI 调用就能从“一碰就碎”变成“稳如老狗”。鸡哥博客里还会持续更新更多实战细节欢迎继续关注