1

2026-01-08 12:56:00 +08:00
parent 1561c054b7
commit 37b0e5f8e6
13 changed files with 258 additions and 140 deletions
--- a/prisma/services/deepThink/expert.ts
+++ b/prisma/services/deepThink/expert.ts
@@ -1,5 +1,6 @@
 import { ModelOption, ExpertResult } from '../../types';
 import { getExpertSystemInstruction } from './prompts';
+import { withRetry } from '../utils/retry';

 export const streamExpertResponse = async (
  ai: any,
@@ -10,7 +11,10 @@ export const streamExpertResponse = async (
  signal: AbortSignal,
  onChunk: (text: string, thought: string) => void
 ): Promise<void> => {
-  const streamResult = await ai.models.generateContentStream({
+  // We wrap the stream initiation in retry. 
+  // If the stream is successfully established but fails during iteration, 
+  // we catch that separately.
+  const streamResult = await withRetry(() => ai.models.generateContentStream({
    model: model,
    contents: expert.prompt,
    config: {
@@ -21,23 +25,30 @@ export const streamExpertResponse = async (
          includeThoughts: true 
      }
    }
-  });
+  }));

-  for await (const chunk of streamResult) {
-     if (signal.aborted) break;
+  try {
+    for await (const chunk of streamResult) {
+       if (signal.aborted) break;

-     let chunkText = "";
-     let chunkThought = "";
+       let chunkText = "";
+       let chunkThought = "";

-     if (chunk.candidates?.[0]?.content?.parts) {
-         for (const part of chunk.candidates[0].content.parts) {
-             if (part.thought) {
-                 chunkThought += (part.text || "");
-             } else if (part.text) {
-                 chunkText += part.text;
-             }
-         }
-         onChunk(chunkText, chunkThought);
-     }
+       if (chunk.candidates?.[0]?.content?.parts) {
+           for (const part of chunk.candidates[0].content.parts) {
+               if (part.thought) {
+                   chunkThought += (part.text || "");
+               } else if (part.text) {
+                   chunkText += part.text;
+               }
+           }
+           onChunk(chunkText, chunkThought);
+       }
+    }
+  } catch (streamError) {
+    console.error(`Stream interrupted for expert ${expert.role}:`, streamError);
+    // We don't retry mid-stream automatically here to avoid complex state management,
+    // but the initial connection is protected by withRetry.
+    throw streamError;
  }
 };
--- a/prisma/services/deepThink/manager.ts
+++ b/prisma/services/deepThink/manager.ts
@@ -2,6 +2,7 @@ import { Type } from "@google/genai";
 import { ModelOption, AnalysisResult, ExpertResult, ReviewResult } from '../../types';
 import { cleanJsonString } from '../../utils';
 import { MANAGER_SYSTEM_PROMPT, MANAGER_REVIEW_SYSTEM_PROMPT } from './prompts';
+import { withRetry } from '../utils/retry';

 export const executeManagerAnalysis = async (
  ai: any,
@@ -31,32 +32,36 @@ export const executeManagerAnalysis = async (
    required: ["thought_process", "experts"]
  };

-  const analysisResp = await ai.models.generateContent({
-    model: model,
-    contents: `Context:\n${context}\n\nCurrent Query: "${query}"`,
-    config: {
-      systemInstruction: MANAGER_SYSTEM_PROMPT,
-      responseMimeType: "application/json",
-      responseSchema: managerSchema,
-      thinkingConfig: { 
-         includeThoughts: true, 
-         thinkingBudget: budget 
-      }
-    }
-  });
-
-  const rawText = analysisResp.text || '{}';
-  const cleanText = cleanJsonString(rawText);
-  
  try {
+    const analysisResp = await withRetry(() => ai.models.generateContent({
+      model: model,
+      contents: `Context:\n${context}\n\nCurrent Query: "${query}"`,
+      config: {
+        systemInstruction: MANAGER_SYSTEM_PROMPT,
+        responseMimeType: "application/json",
+        responseSchema: managerSchema,
+        thinkingConfig: { 
+           includeThoughts: true, 
+           thinkingBudget: budget 
+        }
+      }
+    }));
+
+    const rawText = analysisResp.text || '{}';
+    const cleanText = cleanJsonString(rawText);
+    
    const analysisJson = JSON.parse(cleanText) as AnalysisResult;
    if (!analysisJson.experts || !Array.isArray(analysisJson.experts)) {
       throw new Error("Invalid schema structure");
    }
    return analysisJson;
  } catch (e) {
-    console.error("JSON Parse Error:", e, rawText);
-    return { thought_process: "Direct processing.", experts: [] };
+    console.error("Manager Analysis Error:", e);
+    // Return a fallback so the process doesn't completely die if planning fails
+    return { 
+      thought_process: "Direct processing fallback due to analysis error.", 
+      experts: [] 
+    };
  }
 };

@@ -97,28 +102,27 @@ export const executeManagerReview = async (

  const content = `User Query: "${query}"\n\nCurrent Expert Outputs:\n${expertOutputs}`;

-  const resp = await ai.models.generateContent({
-    model: model,
-    contents: content,
-    config: {
-      systemInstruction: MANAGER_REVIEW_SYSTEM_PROMPT,
-      responseMimeType: "application/json",
-      responseSchema: reviewSchema,
-      thinkingConfig: { 
-         includeThoughts: true, 
-         thinkingBudget: budget 
-      }
-    }
-  });
-
-  const rawText = resp.text || '{}';
-  const cleanText = cleanJsonString(rawText);
-
  try {
+    const resp = await withRetry(() => ai.models.generateContent({
+      model: model,
+      contents: content,
+      config: {
+        systemInstruction: MANAGER_REVIEW_SYSTEM_PROMPT,
+        responseMimeType: "application/json",
+        responseSchema: reviewSchema,
+        thinkingConfig: { 
+           includeThoughts: true, 
+           thinkingBudget: budget 
+        }
+      }
+    }));
+
+    const rawText = resp.text || '{}';
+    const cleanText = cleanJsonString(rawText);
    return JSON.parse(cleanText) as ReviewResult;
  } catch (e) {
-    console.error("Review JSON Parse Error:", e);
-    // Fallback: Assume satisfied if JSON fails to avoid infinite loops due to format errors
-    return { satisfied: true, critique: "JSON Error, proceeding to synthesis." };
+    console.error("Review Error:", e);
+    // Fallback: Assume satisfied if JSON or API fails to avoid infinite loops
+    return { satisfied: true, critique: "Processing Error, proceeding to synthesis." };
  }
-};
+};
--- a/prisma/services/deepThink/synthesis.ts
+++ b/prisma/services/deepThink/synthesis.ts
@@ -1,5 +1,6 @@
 import { ModelOption, ExpertResult } from '../../types';
 import { getSynthesisPrompt } from './prompts';
+import { withRetry } from '../utils/retry';

 export const streamSynthesisResponse = async (
  ai: any,
@@ -13,7 +14,7 @@ export const streamSynthesisResponse = async (
 ): Promise<void> => {
  const prompt = getSynthesisPrompt(historyContext, query, expertResults);

-  const synthesisStream = await ai.models.generateContentStream({
+  const synthesisStream = await withRetry(() => ai.models.generateContentStream({
    model: model,
    contents: prompt,
    config: {
@@ -22,23 +23,28 @@ export const streamSynthesisResponse = async (
          includeThoughts: true
      } 
    }
-  });
+  }));

-  for await (const chunk of synthesisStream) {
-    if (signal.aborted) break;
+  try {
+    for await (const chunk of synthesisStream) {
+      if (signal.aborted) break;

-    let chunkText = "";
-    let chunkThought = "";
+      let chunkText = "";
+      let chunkThought = "";

-    if (chunk.candidates?.[0]?.content?.parts) {
-        for (const part of chunk.candidates[0].content.parts) {
-            if (part.thought) {
-                chunkThought += (part.text || "");
-            } else if (part.text) {
-                chunkText += part.text;
-            }
-        }
-        onChunk(chunkText, chunkThought);
+      if (chunk.candidates?.[0]?.content?.parts) {
+          for (const part of chunk.candidates[0].content.parts) {
+              if (part.thought) {
+                  chunkThought += (part.text || "");
+              } else if (part.text) {
+                  chunkText += part.text;
+              }
+          }
+          onChunk(chunkText, chunkThought);
+      }
    }
+  } catch (streamError) {
+    console.error("Synthesis stream interrupted:", streamError);
+    throw streamError;
  }
 };
--- a/prisma/services/utils/retry.ts
+++ b/prisma/services/utils/retry.ts
@@ -0,0 +1,50 @@
+/**
+ * Retry Utility for API calls
+ * Implements exponential backoff and handles transient errors (429, 5xx).
+ */
+
+export async function withRetry<T>(
+  fn: () => Promise<T>,
+  maxRetries: number = 3,
+  initialDelay: number = 1500
+): Promise<T> {
+  let lastError: any;
+  
+  for (let attempt = 1; attempt <= maxRetries; attempt++) {
+    try {
+      return await fn();
+    } catch (error: any) {
+      lastError = error;
+      
+      // Determine if the error is transient
+      // 429: Too Many Requests
+      // 5xx: Server Errors
+      // Network failures (no status)
+      const status = error?.status || error?.response?.status;
+      const message = error?.message || "";
+      
+      const isRateLimit = status === 429;
+      const isServerError = status >= 500 && status < 600;
+      const isNetworkError = !status;
+      const isTransient = isRateLimit || isServerError || isNetworkError;
+
+      // If we reached max retries or the error isn't transient, throw immediately
+      if (attempt === maxRetries || !isTransient) {
+        console.error(`[Prisma] Final attempt ${attempt} failed:`, error);
+        throw error;
+      }
+
+      // Calculate delay with exponential backoff: 1.5s, 3s, 6s...
+      const delay = initialDelay * Math.pow(2, attempt - 1);
+      
+      console.warn(
+        `[Prisma] API call failed (Attempt ${attempt}/${maxRetries}). ` +
+        `Status: ${status || 'Network Error'}. Retrying in ${delay}ms...`
+      );
+      
+      await new Promise(resolve => setTimeout(resolve, delay));
+    }
+  }
+
+  throw lastError || new Error("Maximum retries reached without success");
+}