Files
insta-recipe/src/lib/server/parser.ts
Giancarmine Salucci 0b9f598c7d
Some checks failed
Build & Push Docker Image / test-and-build (push) Failing after 38s
fix(parser): handle thinking models in recipe detection
Increase max_tokens from 10 to 1024 for detection so thinking
models have room to reason. Also fall back to reasoning_content
if content is empty, since some local models (e.g. Gemma 4
thinking variants) put their answer there.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-12 21:11:50 +02:00

242 lines
7.0 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { createLLM, checkModelAvailability, isModelLoaded } from './llm';
import { zodResponseFormat } from 'openai/helpers/zod';
import { z } from 'zod';
import { RECIPE_DETECTION_PROMPT, RECIPE_EXTRACTION_PROMPT } from './prompts/recipe-extraction';
import { logError } from './utils/logger';
import type { ProgressCallback } from './extraction';
const RecipeSchema = z.object({
name: z.string(),
servings: z.number().nullable(),
description: z.string().nullable(),
ingredients: z
.array(
z.object({
item: z.string(),
amount: z.string(),
unit: z.string()
})
)
.nullable(),
steps: z.array(z.string()).nullable(),
image: z.string().nullable().optional()
});
export type Recipe = z.infer<typeof RecipeSchema>;
/**
* Detect if the text contains a recipe using binary classification
* @param text - The text to analyze
* @returns True if a recipe is detected, false otherwise
*/
export async function detectRecipe(text: string): Promise<boolean> {
try {
const { client, model } = createLLM();
console.log('[LLM] Starting recipe detection...');
console.log('[LLM] Model:', model);
console.log('[LLM] Text length:', text.length);
const detectionResponse = await client.chat.completions.create({
model,
messages: [
{
role: 'system',
content: RECIPE_DETECTION_PROMPT
},
{
role: 'user',
content: `Does this text contain a recipe?\n\n${text}`
}
],
// 1024 gives thinking models room to reason before answering
max_tokens: 1024,
temperature: 0
});
const msg = detectionResponse.choices[0].message;
// Some local models (e.g. Gemma thinking variants) return the answer in
// reasoning_content instead of content when max_tokens is tight.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const reasoning: string = (msg as any).reasoning_content ?? '';
const detectionResult = (msg.content ?? reasoning).toLowerCase();
console.log('[LLM] Detection response:', detectionResult);
return detectionResult.includes('yes');
} catch (e) {
logError('[LLM] Recipe detection error', e);
// Check if this is a model-related error
const errorMessage = (e as Error).message || '';
const isModelError =
errorMessage.includes('400') &&
(errorMessage.toLowerCase().includes('model') || errorMessage.toLowerCase().includes('load'));
if (isModelError) {
const { model } = createLLM();
const modelCheck = await checkModelAvailability(model);
if (!modelCheck.available) {
throw new Error(modelCheck.message || `Model "${model}" is not available`);
}
}
throw new Error(`Failed to detect recipe: ${(e as Error).message}`);
}
}
/**
* Extract recipe data from text using LLM structured output
* @param text - The text containing the recipe
* @returns Parsed recipe object
*/
export async function parseRecipe(text: string): Promise<Recipe> {
try {
const { client, model } = createLLM();
console.log('[LLM] Starting recipe parsing...');
console.log('[LLM] Model:', model);
const completion = await client.beta.chat.completions.parse({
model,
messages: [
{
role: 'system',
content: RECIPE_EXTRACTION_PROMPT
},
{
role: 'user',
content: `Extract the recipe from this text:\n\n${text}`
}
],
response_format: zodResponseFormat(RecipeSchema, 'recipe'),
temperature: 0.3
});
const recipe = completion.choices[0].message.parsed;
console.log('[LLM] Parse response:', recipe?.name);
if (!recipe || !recipe.name) {
throw new Error('Failed to extract recipe - missing name');
}
return recipe;
} catch (e) {
logError('[LLM] Recipe parsing error', e);
// Check if this is a model-related error
const errorMessage = (e as Error).message || '';
const isModelError =
errorMessage.includes('400') &&
(errorMessage.toLowerCase().includes('model') || errorMessage.toLowerCase().includes('load'));
if (isModelError) {
const { model } = createLLM();
const modelCheck = await checkModelAvailability(model);
if (!modelCheck.available) {
throw new Error(modelCheck.message || `Model "${model}" is not available`);
}
}
// If structured output fails, try standard completion
if (
(e as any).message?.includes('response_format') ||
(e as any).message?.includes('structured output')
) {
console.warn('[LLM] Falling back to standard completion');
return await parseRecipeWithStandardCompletion(text);
}
throw new Error(`Failed to parse recipe: ${(e as Error).message}`);
}
}
/**
* Complete workflow: detect recipe and parse if found.
*
* Emits a `model_loading` progress event (if a callback is supplied) when the
* configured llama-swap model is not yet warm — the first request after idle
* blocks for several seconds while llama-swap loads the model into VRAM.
*
* @param text - The text to analyze
* @param progressCallback - Optional callback for surfacing cold-load state
* @returns Parsed recipe object if detected, null otherwise
*/
export async function extractRecipe(
text: string,
progressCallback?: ProgressCallback
): Promise<Recipe | null> {
if (progressCallback) {
const { model } = createLLM();
const warm = await isModelLoaded(model);
if (!warm) {
progressCallback({
type: 'model_loading',
message: `Inference server cold — loading ${model} into VRAM (530s)...`,
data: { model },
timestamp: new Date().toISOString()
});
}
}
const isRecipe = await detectRecipe(text);
if (!isRecipe) {
return null;
}
return parseRecipe(text);
}
/**
* Fallback parser using standard completion (no structured output)
* Used when the model doesn't support beta.chat.completions.parse()
*/
async function parseRecipeWithStandardCompletion(text: string): Promise<Recipe> {
const { client, model } = createLLM();
console.log('[LLM] Using standard completion fallback');
const completion = await client.chat.completions.create({
model,
messages: [
{
role: 'system',
content: `You are a recipe extractor. Return ONLY valid JSON matching this schema:
{
"name": "recipe name in Italian",
"servings": number or null,
"description": "description in Italian or null",
"ingredients": [{"item": "ingredient name", "amount": "quantity", "unit": "SI unit"}],
"steps": ["First step", "Second step", ...]
}
Convert all measurements to SI units (g, mL, °C).
Translate everything to Italian.
Extract ONLY what's in the text.`
},
{
role: 'user',
content: `Extract the recipe from this text:\n\n${text}`
}
],
max_tokens: 2000,
temperature: 0.3
});
const jsonResponse = completion.choices[0].message.content;
if (!jsonResponse) {
throw new Error('Empty response from LLM');
}
console.log('[LLM] Standard completion raw response:', jsonResponse.substring(0, 200));
// Parse and validate JSON (remove code fences if present)
const cleanedJson = jsonResponse.replace(/```json\n?|```\n?/g, '').trim();
const parsedData = JSON.parse(cleanedJson);
const recipe = RecipeSchema.parse(parsedData);
console.log('[LLM] Standard completion parsed recipe:', recipe.name);
return recipe;
}