diff --git a/.DS_Store b/.DS_Store
index bea1c96..de2c147 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/prisma/services/deepThink/expert.ts b/prisma/services/deepThink/expert.ts
index 39031c3..a9128b7 100644
--- a/prisma/services/deepThink/expert.ts
+++ b/prisma/services/deepThink/expert.ts
@@ -44,8 +44,7 @@ export const streamExpertResponse = async (
systemInstruction: getExpertSystemInstruction(expert.role, expert.description, context),
temperature: expert.temperature,
thinkingConfig: {
- thinkingBudget: budget,
- includeThoughts: true
+ thinkingBudget: budget
}
}
}));
@@ -54,19 +53,8 @@ export const streamExpertResponse = async (
for await (const chunk of (streamResult as any)) {
if (signal.aborted) break;
- let chunkText = "";
- let chunkThought = "";
-
- if (chunk.candidates?.[0]?.content?.parts) {
- for (const part of chunk.candidates[0].content.parts) {
- if (part.thought) {
- chunkThought += (part.text || "");
- } else if (part.text) {
- chunkText += part.text;
- }
- }
- onChunk(chunkText, chunkThought);
- }
+ const chunkText = chunk.text || "";
+ onChunk(chunkText, "");
}
} catch (streamError) {
console.error(`Stream interrupted for expert ${expert.role}:`, streamError);
diff --git a/prisma/services/deepThink/manager.ts b/prisma/services/deepThink/manager.ts
index c7f71b9..a1f612b 100644
--- a/prisma/services/deepThink/manager.ts
+++ b/prisma/services/deepThink/manager.ts
@@ -68,7 +68,6 @@ export const executeManagerAnalysis = async (
responseMimeType: "application/json",
responseSchema: managerSchema,
thinkingConfig: {
- includeThoughts: true,
thinkingBudget: budget
}
}
@@ -193,7 +192,6 @@ export const executeManagerReview = async (
responseMimeType: "application/json",
responseSchema: reviewSchema,
thinkingConfig: {
- includeThoughts: true,
thinkingBudget: budget
}
}
diff --git a/prisma/services/deepThink/openaiClient.ts b/prisma/services/deepThink/openaiClient.ts
index 2034a7d..5f7c9a3 100644
--- a/prisma/services/deepThink/openaiClient.ts
+++ b/prisma/services/deepThink/openaiClient.ts
@@ -56,8 +56,7 @@ export const generateContent = async (
const requestOptions: any = {
model: config.model,
messages,
- // Clamp temperature to 1.0 max for compatibility with strict providers (NVIDIA, vLLM, etc.)
- temperature: typeof config.temperature === 'number' ? Math.min(config.temperature, 1.0) : undefined,
+ temperature: config.temperature,
};
if (config.responseFormat === 'json_object') {
@@ -66,15 +65,7 @@ export const generateContent = async (
try {
const response = await withRetry(() => ai.chat.completions.create(requestOptions));
- const message = response.choices[0]?.message;
- const content = message?.content || '';
-
- // Check for native reasoning_content field (DeepSeek/NVIDIA style)
- const reasoningContent = (message as any)?.reasoning_content;
-
- if (reasoningContent && config.thinkingConfig?.includeThoughts) {
- return { text: content, thought: reasoningContent };
- }
+ const content = response.choices[0]?.message?.content || '';
if (config.thinkingConfig?.includeThoughts) {
const { thought, text } = parseThinkingTokens(content);
@@ -109,8 +100,7 @@ export async function* generateContentStream(
const requestOptions: any = {
model: config.model,
messages,
- // Clamp temperature to 1.0 max for compatibility with strict providers
- temperature: typeof config.temperature === 'number' ? Math.min(config.temperature, 1.0) : undefined,
+ temperature: config.temperature,
stream: true,
};
@@ -121,56 +111,44 @@ export async function* generateContentStream(
let currentThought = '';
for await (const chunk of (stream as any)) {
- const delta = chunk.choices[0]?.delta;
+ const delta = chunk.choices[0]?.delta?.content || '';
+
if (!delta) continue;
- const content = delta.content || '';
- // Check for native reasoning_content field (DeepSeek/NVIDIA style)
- const reasoning = delta.reasoning_content || '';
+ accumulatedText += delta;
- // If native reasoning field exists, emit it immediately
- if (reasoning && config.thinkingConfig?.includeThoughts) {
- yield { text: '', thought: reasoning };
- }
+ if (config.thinkingConfig?.includeThoughts) {
+ if (delta.includes('')) {
+ inThinking = true;
+ continue;
+ }
- if (content) {
- accumulatedText += content;
+ if (inThinking) {
+ if (delta.includes('')) {
+ inThinking = false;
+ const parts = delta.split('', 2);
+ currentThought += parts[0];
- if (config.thinkingConfig?.includeThoughts) {
- // Fallback to tag parsing if reasoning_content wasn't provided but tags exist
- if (content.includes('')) {
- inThinking = true;
- continue;
- }
+ if (currentThought.trim()) {
+ yield { text: '', thought: currentThought };
+ currentThought = '';
+ }
- if (inThinking) {
- if (content.includes('')) {
- inThinking = false;
- const parts = content.split('', 2);
- currentThought += parts[0];
-
- if (currentThought.trim()) {
- yield { text: '', thought: currentThought };
- currentThought = '';
- }
-
- if (parts[1]) {
- yield { text: parts[1], thought: '' };
- }
- } else {
- currentThought += content;
- // Emit thought chunks periodically so it doesn't hang
- if (currentThought.length > 50) {
- yield { text: '', thought: currentThought };
- currentThought = '';
- }
+ if (parts[1]) {
+ yield { text: parts[1], thought: '' };
}
} else {
- yield { text: content, thought: '' };
+ currentThought += delta;
+ if (currentThought.length > 100) {
+ yield { text: '', thought: currentThought };
+ currentThought = '';
+ }
}
} else {
- yield { text: content, thought: '' };
+ yield { text: delta, thought: '' };
}
+ } else {
+ yield { text: delta, thought: '' };
}
}
diff --git a/prisma/services/deepThink/prompts.ts b/prisma/services/deepThink/prompts.ts
index 7e944ee..1da24df 100644
--- a/prisma/services/deepThink/prompts.ts
+++ b/prisma/services/deepThink/prompts.ts
@@ -5,11 +5,11 @@ export const MANAGER_SYSTEM_PROMPT = `You are the "Dynamic Planning Engine". You
Your job is to create SUPPLEMENTARY experts
-For each expert, you must assign a specific 'temperature' (0.0 to 1.0) based on the nature of their task:
+For each expert, you must assign a specific 'temperature' (0.0 to 2.0) based on the nature of their task:
-* High temperature (0.7 - 1.0) - For creative, brainstorming, or open-ended tasks.
-* Low temperature (0.0 - 0.3) - For code, math, logic, or factual tasks.
-* Medium temperature (0.3 - 0.7) - For balanced analysis and general explanation.`;
+* High temperature (1.0 - 2.0)
+* Low temperature (0.0 - 0.4)
+* Medium temperature (0.4 - 1.0)`;
export const MANAGER_REVIEW_SYSTEM_PROMPT = `
You are the "Quality Assurance & Orchestration Engine".
diff --git a/prisma/services/deepThink/synthesis.ts b/prisma/services/deepThink/synthesis.ts
index 699d2e9..a1dd913 100644
--- a/prisma/services/deepThink/synthesis.ts
+++ b/prisma/services/deepThink/synthesis.ts
@@ -44,8 +44,7 @@ export const streamSynthesisResponse = async (
contents: contents,
config: {
thinkingConfig: {
- thinkingBudget: budget,
- includeThoughts: true
+ thinkingBudget: budget
}
}
}));
@@ -53,20 +52,9 @@ export const streamSynthesisResponse = async (
try {
for await (const chunk of (synthesisStream as any)) {
if (signal.aborted) break;
-
- let chunkText = "";
- let chunkThought = "";
-
- if (chunk.candidates?.[0]?.content?.parts) {
- for (const part of chunk.candidates[0].content.parts) {
- if (part.thought) {
- chunkThought += (part.text || "");
- } else if (part.text) {
- chunkText += part.text;
- }
- }
- onChunk(chunkText, chunkThought);
- }
+
+ const chunkText = chunk.text || "";
+ onChunk(chunkText, "");
}
} catch (streamError) {
console.error("Synthesis stream interrupted:", streamError);