diff --git a/.DS_Store b/.DS_Store index bea1c96..de2c147 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/prisma/services/deepThink/expert.ts b/prisma/services/deepThink/expert.ts index 39031c3..a9128b7 100644 --- a/prisma/services/deepThink/expert.ts +++ b/prisma/services/deepThink/expert.ts @@ -44,8 +44,7 @@ export const streamExpertResponse = async ( systemInstruction: getExpertSystemInstruction(expert.role, expert.description, context), temperature: expert.temperature, thinkingConfig: { - thinkingBudget: budget, - includeThoughts: true + thinkingBudget: budget } } })); @@ -54,19 +53,8 @@ export const streamExpertResponse = async ( for await (const chunk of (streamResult as any)) { if (signal.aborted) break; - let chunkText = ""; - let chunkThought = ""; - - if (chunk.candidates?.[0]?.content?.parts) { - for (const part of chunk.candidates[0].content.parts) { - if (part.thought) { - chunkThought += (part.text || ""); - } else if (part.text) { - chunkText += part.text; - } - } - onChunk(chunkText, chunkThought); - } + const chunkText = chunk.text || ""; + onChunk(chunkText, ""); } } catch (streamError) { console.error(`Stream interrupted for expert ${expert.role}:`, streamError); diff --git a/prisma/services/deepThink/manager.ts b/prisma/services/deepThink/manager.ts index c7f71b9..a1f612b 100644 --- a/prisma/services/deepThink/manager.ts +++ b/prisma/services/deepThink/manager.ts @@ -68,7 +68,6 @@ export const executeManagerAnalysis = async ( responseMimeType: "application/json", responseSchema: managerSchema, thinkingConfig: { - includeThoughts: true, thinkingBudget: budget } } @@ -193,7 +192,6 @@ export const executeManagerReview = async ( responseMimeType: "application/json", responseSchema: reviewSchema, thinkingConfig: { - includeThoughts: true, thinkingBudget: budget } } diff --git a/prisma/services/deepThink/openaiClient.ts b/prisma/services/deepThink/openaiClient.ts index 2034a7d..5f7c9a3 100644 --- a/prisma/services/deepThink/openaiClient.ts +++ b/prisma/services/deepThink/openaiClient.ts @@ -56,8 +56,7 @@ export const generateContent = async ( const requestOptions: any = { model: config.model, messages, - // Clamp temperature to 1.0 max for compatibility with strict providers (NVIDIA, vLLM, etc.) - temperature: typeof config.temperature === 'number' ? Math.min(config.temperature, 1.0) : undefined, + temperature: config.temperature, }; if (config.responseFormat === 'json_object') { @@ -66,15 +65,7 @@ export const generateContent = async ( try { const response = await withRetry(() => ai.chat.completions.create(requestOptions)); - const message = response.choices[0]?.message; - const content = message?.content || ''; - - // Check for native reasoning_content field (DeepSeek/NVIDIA style) - const reasoningContent = (message as any)?.reasoning_content; - - if (reasoningContent && config.thinkingConfig?.includeThoughts) { - return { text: content, thought: reasoningContent }; - } + const content = response.choices[0]?.message?.content || ''; if (config.thinkingConfig?.includeThoughts) { const { thought, text } = parseThinkingTokens(content); @@ -109,8 +100,7 @@ export async function* generateContentStream( const requestOptions: any = { model: config.model, messages, - // Clamp temperature to 1.0 max for compatibility with strict providers - temperature: typeof config.temperature === 'number' ? Math.min(config.temperature, 1.0) : undefined, + temperature: config.temperature, stream: true, }; @@ -121,56 +111,44 @@ export async function* generateContentStream( let currentThought = ''; for await (const chunk of (stream as any)) { - const delta = chunk.choices[0]?.delta; + const delta = chunk.choices[0]?.delta?.content || ''; + if (!delta) continue; - const content = delta.content || ''; - // Check for native reasoning_content field (DeepSeek/NVIDIA style) - const reasoning = delta.reasoning_content || ''; + accumulatedText += delta; - // If native reasoning field exists, emit it immediately - if (reasoning && config.thinkingConfig?.includeThoughts) { - yield { text: '', thought: reasoning }; - } + if (config.thinkingConfig?.includeThoughts) { + if (delta.includes('')) { + inThinking = true; + continue; + } - if (content) { - accumulatedText += content; + if (inThinking) { + if (delta.includes('')) { + inThinking = false; + const parts = delta.split('', 2); + currentThought += parts[0]; - if (config.thinkingConfig?.includeThoughts) { - // Fallback to tag parsing if reasoning_content wasn't provided but tags exist - if (content.includes('')) { - inThinking = true; - continue; - } + if (currentThought.trim()) { + yield { text: '', thought: currentThought }; + currentThought = ''; + } - if (inThinking) { - if (content.includes('')) { - inThinking = false; - const parts = content.split('', 2); - currentThought += parts[0]; - - if (currentThought.trim()) { - yield { text: '', thought: currentThought }; - currentThought = ''; - } - - if (parts[1]) { - yield { text: parts[1], thought: '' }; - } - } else { - currentThought += content; - // Emit thought chunks periodically so it doesn't hang - if (currentThought.length > 50) { - yield { text: '', thought: currentThought }; - currentThought = ''; - } + if (parts[1]) { + yield { text: parts[1], thought: '' }; } } else { - yield { text: content, thought: '' }; + currentThought += delta; + if (currentThought.length > 100) { + yield { text: '', thought: currentThought }; + currentThought = ''; + } } } else { - yield { text: content, thought: '' }; + yield { text: delta, thought: '' }; } + } else { + yield { text: delta, thought: '' }; } } diff --git a/prisma/services/deepThink/prompts.ts b/prisma/services/deepThink/prompts.ts index 7e944ee..1da24df 100644 --- a/prisma/services/deepThink/prompts.ts +++ b/prisma/services/deepThink/prompts.ts @@ -5,11 +5,11 @@ export const MANAGER_SYSTEM_PROMPT = `You are the "Dynamic Planning Engine". You Your job is to create SUPPLEMENTARY experts -For each expert, you must assign a specific 'temperature' (0.0 to 1.0) based on the nature of their task: +For each expert, you must assign a specific 'temperature' (0.0 to 2.0) based on the nature of their task: -* High temperature (0.7 - 1.0) - For creative, brainstorming, or open-ended tasks. -* Low temperature (0.0 - 0.3) - For code, math, logic, or factual tasks. -* Medium temperature (0.3 - 0.7) - For balanced analysis and general explanation.`; +* High temperature (1.0 - 2.0) +* Low temperature (0.0 - 0.4) +* Medium temperature (0.4 - 1.0)`; export const MANAGER_REVIEW_SYSTEM_PROMPT = ` You are the "Quality Assurance & Orchestration Engine". diff --git a/prisma/services/deepThink/synthesis.ts b/prisma/services/deepThink/synthesis.ts index 699d2e9..a1dd913 100644 --- a/prisma/services/deepThink/synthesis.ts +++ b/prisma/services/deepThink/synthesis.ts @@ -44,8 +44,7 @@ export const streamSynthesisResponse = async ( contents: contents, config: { thinkingConfig: { - thinkingBudget: budget, - includeThoughts: true + thinkingBudget: budget } } })); @@ -53,20 +52,9 @@ export const streamSynthesisResponse = async ( try { for await (const chunk of (synthesisStream as any)) { if (signal.aborted) break; - - let chunkText = ""; - let chunkThought = ""; - - if (chunk.candidates?.[0]?.content?.parts) { - for (const part of chunk.candidates[0].content.parts) { - if (part.thought) { - chunkThought += (part.text || ""); - } else if (part.text) { - chunkText += part.text; - } - } - onChunk(chunkText, chunkThought); - } + + const chunkText = chunk.text || ""; + onChunk(chunkText, ""); } } catch (streamError) { console.error("Synthesis stream interrupted:", streamError);