1

2026-01-12 17:45:36 +08:00
parent 586cf48a5a
commit bd297716b0
6 changed files with 41 additions and 89 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/prisma/services/deepThink/expert.ts
+++ b/prisma/services/deepThink/expert.ts
@@ -44,8 +44,7 @@ export const streamExpertResponse = async (
        systemInstruction: getExpertSystemInstruction(expert.role, expert.description, context),
        temperature: expert.temperature,
        thinkingConfig: {
-          thinkingBudget: budget,
-          includeThoughts: true
+          thinkingBudget: budget
        }
      }
    }));
@@ -54,19 +53,8 @@ export const streamExpertResponse = async (
      for await (const chunk of (streamResult as any)) {
        if (signal.aborted) break;

-        let chunkText = "";
-        let chunkThought = "";
-
-        if (chunk.candidates?.[0]?.content?.parts) {
-          for (const part of chunk.candidates[0].content.parts) {
-            if (part.thought) {
-              chunkThought += (part.text || "");
-            } else if (part.text) {
-              chunkText += part.text;
-            }
-          }
-          onChunk(chunkText, chunkThought);
-        }
+        const chunkText = chunk.text || "";
+        onChunk(chunkText, "");
      }
    } catch (streamError) {
      console.error(`Stream interrupted for expert ${expert.role}:`, streamError);
--- a/prisma/services/deepThink/manager.ts
+++ b/prisma/services/deepThink/manager.ts
@@ -68,7 +68,6 @@ export const executeManagerAnalysis = async (
          responseMimeType: "application/json",
          responseSchema: managerSchema,
          thinkingConfig: {
-           includeThoughts: true,
           thinkingBudget: budget
        }
        }
@@ -193,7 +192,6 @@ export const executeManagerReview = async (
          responseMimeType: "application/json",
          responseSchema: reviewSchema,
          thinkingConfig: {
-           includeThoughts: true,
           thinkingBudget: budget
        }
        }
--- a/prisma/services/deepThink/openaiClient.ts
+++ b/prisma/services/deepThink/openaiClient.ts
@@ -56,8 +56,7 @@ export const generateContent = async (
  const requestOptions: any = {
    model: config.model,
    messages,
-    // Clamp temperature to 1.0 max for compatibility with strict providers (NVIDIA, vLLM, etc.)
-    temperature: typeof config.temperature === 'number' ? Math.min(config.temperature, 1.0) : undefined,
+    temperature: config.temperature,
  };

  if (config.responseFormat === 'json_object') {
@@ -66,15 +65,7 @@ export const generateContent = async (

  try {
    const response = await withRetry(() => ai.chat.completions.create(requestOptions));
-    const message = response.choices[0]?.message;
-    const content = message?.content || '';
-    
-    // Check for native reasoning_content field (DeepSeek/NVIDIA style)
-    const reasoningContent = (message as any)?.reasoning_content;
-
-    if (reasoningContent && config.thinkingConfig?.includeThoughts) {
-       return { text: content, thought: reasoningContent };
-    }
+    const content = response.choices[0]?.message?.content || '';

    if (config.thinkingConfig?.includeThoughts) {
      const { thought, text } = parseThinkingTokens(content);
@@ -109,8 +100,7 @@ export async function* generateContentStream(
  const requestOptions: any = {
    model: config.model,
    messages,
-    // Clamp temperature to 1.0 max for compatibility with strict providers
-    temperature: typeof config.temperature === 'number' ? Math.min(config.temperature, 1.0) : undefined,
+    temperature: config.temperature,
    stream: true,
  };

@@ -121,56 +111,44 @@ export async function* generateContentStream(
  let currentThought = '';

  for await (const chunk of (stream as any)) {
-    const delta = chunk.choices[0]?.delta;
+    const delta = chunk.choices[0]?.delta?.content || '';
+
    if (!delta) continue;

-    const content = delta.content || '';
-    // Check for native reasoning_content field (DeepSeek/NVIDIA style)
-    const reasoning = delta.reasoning_content || '';
+    accumulatedText += delta;

-    // If native reasoning field exists, emit it immediately
-    if (reasoning && config.thinkingConfig?.includeThoughts) {
-      yield { text: '', thought: reasoning };
-    }
+    if (config.thinkingConfig?.includeThoughts) {
+      if (delta.includes('<thinking>')) {
+        inThinking = true;
+        continue;
+      }

-    if (content) {
-      accumulatedText += content;
+      if (inThinking) {
+        if (delta.includes('</thinking>')) {
+          inThinking = false;
+          const parts = delta.split('</thinking>', 2);
+          currentThought += parts[0];

-      if (config.thinkingConfig?.includeThoughts) {
-        // Fallback to tag parsing if reasoning_content wasn't provided but tags exist
-        if (content.includes('<thinking>')) {
-          inThinking = true;
-          continue;
-        }
+          if (currentThought.trim()) {
+            yield { text: '', thought: currentThought };
+            currentThought = '';
+          }

-        if (inThinking) {
-          if (content.includes('</thinking>')) {
-            inThinking = false;
-            const parts = content.split('</thinking>', 2);
-            currentThought += parts[0];
-
-            if (currentThought.trim()) {
-              yield { text: '', thought: currentThought };
-              currentThought = '';
-            }
-
-            if (parts[1]) {
-              yield { text: parts[1], thought: '' };
-            }
-          } else {
-            currentThought += content;
-            // Emit thought chunks periodically so it doesn't hang
-            if (currentThought.length > 50) {
-              yield { text: '', thought: currentThought };
-              currentThought = '';
-            }
+          if (parts[1]) {
+            yield { text: parts[1], thought: '' };
          }
        } else {
-          yield { text: content, thought: '' };
+          currentThought += delta;
+          if (currentThought.length > 100) {
+            yield { text: '', thought: currentThought };
+            currentThought = '';
+          }
        }
      } else {
-        yield { text: content, thought: '' };
+        yield { text: delta, thought: '' };
      }
+    } else {
+      yield { text: delta, thought: '' };
    }
  }

--- a/prisma/services/deepThink/prompts.ts
+++ b/prisma/services/deepThink/prompts.ts
@@ -5,11 +5,11 @@ export const MANAGER_SYSTEM_PROMPT = `You are the "Dynamic Planning Engine". You

 Your job is to create SUPPLEMENTARY experts

-For each expert, you must assign a specific 'temperature' (0.0 to 1.0) based on the nature of their task:
+For each expert, you must assign a specific 'temperature' (0.0 to 2.0) based on the nature of their task:

-*   High temperature (0.7 - 1.0) - For creative, brainstorming, or open-ended tasks.
-*   Low temperature (0.0 - 0.3) - For code, math, logic, or factual tasks.
-*   Medium temperature (0.3 - 0.7) - For balanced analysis and general explanation.`;
+*   High temperature (1.0 - 2.0) 
+*   Low temperature (0.0 - 0.4) 
+*   Medium temperature (0.4 - 1.0)`;

 export const MANAGER_REVIEW_SYSTEM_PROMPT = `
 You are the "Quality Assurance & Orchestration Engine". 
--- a/prisma/services/deepThink/synthesis.ts
+++ b/prisma/services/deepThink/synthesis.ts
@@ -44,8 +44,7 @@ export const streamSynthesisResponse = async (
      contents: contents,
      config: {
        thinkingConfig: {
-          thinkingBudget: budget,
-          includeThoughts: true
+          thinkingBudget: budget
        }
      }
    }));
@@ -53,20 +52,9 @@ export const streamSynthesisResponse = async (
    try {
      for await (const chunk of (synthesisStream as any)) {
        if (signal.aborted) break;
-
-        let chunkText = "";
-        let chunkThought = "";
-
-        if (chunk.candidates?.[0]?.content?.parts) {
-          for (const part of chunk.candidates[0].content.parts) {
-            if (part.thought) {
-              chunkThought += (part.text || "");
-            } else if (part.text) {
-              chunkText += part.text;
-            }
-          }
-          onChunk(chunkText, chunkThought);
-        }
+        
+        const chunkText = chunk.text || "";
+        onChunk(chunkText, "");
      }
    } catch (streamError) {
      console.error("Synthesis stream interrupted:", streamError);