diff --git a/.DS_Store b/.DS_Store index cc36e27..bea1c96 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/prisma/config.ts b/prisma/config.ts index 78ce52b..5f0d9bd 100644 --- a/prisma/config.ts +++ b/prisma/config.ts @@ -14,12 +14,6 @@ export const MODELS: { value: ModelOption; label: string; desc: string; provider desc: 'Deep reasoning, complex tasks, higher intelligence.', provider: 'google' }, - { - value: 'deepseek-reasoner', - label: 'DeepSeek R1', - desc: 'State-of-the-art open reasoning model.', - provider: 'deepseek' - }, { value: 'custom', label: 'Custom Model', diff --git a/prisma/services/deepThink/openaiClient.ts b/prisma/services/deepThink/openaiClient.ts index f45f9e6..2034a7d 100644 --- a/prisma/services/deepThink/openaiClient.ts +++ b/prisma/services/deepThink/openaiClient.ts @@ -56,7 +56,8 @@ export const generateContent = async ( const requestOptions: any = { model: config.model, messages, - temperature: config.temperature, + // Clamp temperature to 1.0 max for compatibility with strict providers (NVIDIA, vLLM, etc.) + temperature: typeof config.temperature === 'number' ? Math.min(config.temperature, 1.0) : undefined, }; if (config.responseFormat === 'json_object') { @@ -68,11 +69,11 @@ export const generateContent = async ( const message = response.choices[0]?.message; const content = message?.content || ''; - // Check for DeepSeek native reasoning field + // Check for native reasoning_content field (DeepSeek/NVIDIA style) const reasoningContent = (message as any)?.reasoning_content; if (reasoningContent && config.thinkingConfig?.includeThoughts) { - return { text: content, thought: reasoningContent }; + return { text: content, thought: reasoningContent }; } if (config.thinkingConfig?.includeThoughts) { @@ -108,7 +109,8 @@ export async function* generateContentStream( const requestOptions: any = { model: config.model, messages, - temperature: config.temperature, + // Clamp temperature to 1.0 max for compatibility with strict providers + temperature: typeof config.temperature === 'number' ? Math.min(config.temperature, 1.0) : undefined, stream: true, }; @@ -119,51 +121,56 @@ export async function* generateContentStream( let currentThought = ''; for await (const chunk of (stream as any)) { - const delta = chunk.choices[0]?.delta?.content || ''; - // Support DeepSeek native reasoning field - const reasoningDelta = (chunk.choices[0]?.delta as any)?.reasoning_content || ''; - - // If we have native reasoning content, yield it immediately as thought - if (reasoningDelta) { - yield { text: '', thought: reasoningDelta }; - } - + const delta = chunk.choices[0]?.delta; if (!delta) continue; - accumulatedText += delta; + const content = delta.content || ''; + // Check for native reasoning_content field (DeepSeek/NVIDIA style) + const reasoning = delta.reasoning_content || ''; - if (config.thinkingConfig?.includeThoughts) { - if (delta.includes('')) { - inThinking = true; - continue; - } + // If native reasoning field exists, emit it immediately + if (reasoning && config.thinkingConfig?.includeThoughts) { + yield { text: '', thought: reasoning }; + } - if (inThinking) { - if (delta.includes('')) { - inThinking = false; - const parts = delta.split('', 2); - currentThought += parts[0]; + if (content) { + accumulatedText += content; - if (currentThought.trim()) { - yield { text: '', thought: currentThought }; - currentThought = ''; - } + if (config.thinkingConfig?.includeThoughts) { + // Fallback to tag parsing if reasoning_content wasn't provided but tags exist + if (content.includes('')) { + inThinking = true; + continue; + } - if (parts[1]) { - yield { text: parts[1], thought: '' }; + if (inThinking) { + if (content.includes('')) { + inThinking = false; + const parts = content.split('', 2); + currentThought += parts[0]; + + if (currentThought.trim()) { + yield { text: '', thought: currentThought }; + currentThought = ''; + } + + if (parts[1]) { + yield { text: parts[1], thought: '' }; + } + } else { + currentThought += content; + // Emit thought chunks periodically so it doesn't hang + if (currentThought.length > 50) { + yield { text: '', thought: currentThought }; + currentThought = ''; + } } } else { - currentThought += delta; - if (currentThought.length > 100) { - yield { text: '', thought: currentThought }; - currentThought = ''; - } + yield { text: content, thought: '' }; } } else { - yield { text: delta, thought: '' }; + yield { text: content, thought: '' }; } - } else { - yield { text: delta, thought: '' }; } } diff --git a/prisma/services/deepThink/prompts.ts b/prisma/services/deepThink/prompts.ts index 1da24df..7e944ee 100644 --- a/prisma/services/deepThink/prompts.ts +++ b/prisma/services/deepThink/prompts.ts @@ -5,11 +5,11 @@ export const MANAGER_SYSTEM_PROMPT = `You are the "Dynamic Planning Engine". You Your job is to create SUPPLEMENTARY experts -For each expert, you must assign a specific 'temperature' (0.0 to 2.0) based on the nature of their task: +For each expert, you must assign a specific 'temperature' (0.0 to 1.0) based on the nature of their task: -* High temperature (1.0 - 2.0) -* Low temperature (0.0 - 0.4) -* Medium temperature (0.4 - 1.0)`; +* High temperature (0.7 - 1.0) - For creative, brainstorming, or open-ended tasks. +* Low temperature (0.0 - 0.3) - For code, math, logic, or factual tasks. +* Medium temperature (0.3 - 0.7) - For balanced analysis and general explanation.`; export const MANAGER_REVIEW_SYSTEM_PROMPT = ` You are the "Quality Assurance & Orchestration Engine".