Some checks failed
Build & Push Docker Image / test-and-build (push) Failing after 38s
Increase max_tokens from 10 to 1024 for detection so thinking models have room to reason. Also fall back to reasoning_content if content is empty, since some local models (e.g. Gemma 4 thinking variants) put their answer there. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
242 lines
7.0 KiB
TypeScript
242 lines
7.0 KiB
TypeScript
import { createLLM, checkModelAvailability, isModelLoaded } from './llm';
|
||
import { zodResponseFormat } from 'openai/helpers/zod';
|
||
import { z } from 'zod';
|
||
import { RECIPE_DETECTION_PROMPT, RECIPE_EXTRACTION_PROMPT } from './prompts/recipe-extraction';
|
||
import { logError } from './utils/logger';
|
||
import type { ProgressCallback } from './extraction';
|
||
|
||
const RecipeSchema = z.object({
|
||
name: z.string(),
|
||
servings: z.number().nullable(),
|
||
description: z.string().nullable(),
|
||
ingredients: z
|
||
.array(
|
||
z.object({
|
||
item: z.string(),
|
||
amount: z.string(),
|
||
unit: z.string()
|
||
})
|
||
)
|
||
.nullable(),
|
||
steps: z.array(z.string()).nullable(),
|
||
image: z.string().nullable().optional()
|
||
});
|
||
|
||
export type Recipe = z.infer<typeof RecipeSchema>;
|
||
|
||
/**
|
||
* Detect if the text contains a recipe using binary classification
|
||
* @param text - The text to analyze
|
||
* @returns True if a recipe is detected, false otherwise
|
||
*/
|
||
export async function detectRecipe(text: string): Promise<boolean> {
|
||
try {
|
||
const { client, model } = createLLM();
|
||
|
||
console.log('[LLM] Starting recipe detection...');
|
||
console.log('[LLM] Model:', model);
|
||
console.log('[LLM] Text length:', text.length);
|
||
|
||
const detectionResponse = await client.chat.completions.create({
|
||
model,
|
||
messages: [
|
||
{
|
||
role: 'system',
|
||
content: RECIPE_DETECTION_PROMPT
|
||
},
|
||
{
|
||
role: 'user',
|
||
content: `Does this text contain a recipe?\n\n${text}`
|
||
}
|
||
],
|
||
// 1024 gives thinking models room to reason before answering
|
||
max_tokens: 1024,
|
||
temperature: 0
|
||
});
|
||
|
||
const msg = detectionResponse.choices[0].message;
|
||
// Some local models (e.g. Gemma thinking variants) return the answer in
|
||
// reasoning_content instead of content when max_tokens is tight.
|
||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||
const reasoning: string = (msg as any).reasoning_content ?? '';
|
||
const detectionResult = (msg.content ?? reasoning).toLowerCase();
|
||
console.log('[LLM] Detection response:', detectionResult);
|
||
|
||
return detectionResult.includes('yes');
|
||
} catch (e) {
|
||
logError('[LLM] Recipe detection error', e);
|
||
|
||
// Check if this is a model-related error
|
||
const errorMessage = (e as Error).message || '';
|
||
const isModelError =
|
||
errorMessage.includes('400') &&
|
||
(errorMessage.toLowerCase().includes('model') || errorMessage.toLowerCase().includes('load'));
|
||
|
||
if (isModelError) {
|
||
const { model } = createLLM();
|
||
const modelCheck = await checkModelAvailability(model);
|
||
if (!modelCheck.available) {
|
||
throw new Error(modelCheck.message || `Model "${model}" is not available`);
|
||
}
|
||
}
|
||
|
||
throw new Error(`Failed to detect recipe: ${(e as Error).message}`);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Extract recipe data from text using LLM structured output
|
||
* @param text - The text containing the recipe
|
||
* @returns Parsed recipe object
|
||
*/
|
||
export async function parseRecipe(text: string): Promise<Recipe> {
|
||
try {
|
||
const { client, model } = createLLM();
|
||
|
||
console.log('[LLM] Starting recipe parsing...');
|
||
console.log('[LLM] Model:', model);
|
||
|
||
const completion = await client.beta.chat.completions.parse({
|
||
model,
|
||
messages: [
|
||
{
|
||
role: 'system',
|
||
content: RECIPE_EXTRACTION_PROMPT
|
||
},
|
||
{
|
||
role: 'user',
|
||
content: `Extract the recipe from this text:\n\n${text}`
|
||
}
|
||
],
|
||
response_format: zodResponseFormat(RecipeSchema, 'recipe'),
|
||
temperature: 0.3
|
||
});
|
||
|
||
const recipe = completion.choices[0].message.parsed;
|
||
console.log('[LLM] Parse response:', recipe?.name);
|
||
|
||
if (!recipe || !recipe.name) {
|
||
throw new Error('Failed to extract recipe - missing name');
|
||
}
|
||
|
||
return recipe;
|
||
} catch (e) {
|
||
logError('[LLM] Recipe parsing error', e);
|
||
|
||
// Check if this is a model-related error
|
||
const errorMessage = (e as Error).message || '';
|
||
const isModelError =
|
||
errorMessage.includes('400') &&
|
||
(errorMessage.toLowerCase().includes('model') || errorMessage.toLowerCase().includes('load'));
|
||
|
||
if (isModelError) {
|
||
const { model } = createLLM();
|
||
const modelCheck = await checkModelAvailability(model);
|
||
if (!modelCheck.available) {
|
||
throw new Error(modelCheck.message || `Model "${model}" is not available`);
|
||
}
|
||
}
|
||
|
||
// If structured output fails, try standard completion
|
||
if (
|
||
(e as any).message?.includes('response_format') ||
|
||
(e as any).message?.includes('structured output')
|
||
) {
|
||
console.warn('[LLM] Falling back to standard completion');
|
||
return await parseRecipeWithStandardCompletion(text);
|
||
}
|
||
|
||
throw new Error(`Failed to parse recipe: ${(e as Error).message}`);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Complete workflow: detect recipe and parse if found.
|
||
*
|
||
* Emits a `model_loading` progress event (if a callback is supplied) when the
|
||
* configured llama-swap model is not yet warm — the first request after idle
|
||
* blocks for several seconds while llama-swap loads the model into VRAM.
|
||
*
|
||
* @param text - The text to analyze
|
||
* @param progressCallback - Optional callback for surfacing cold-load state
|
||
* @returns Parsed recipe object if detected, null otherwise
|
||
*/
|
||
export async function extractRecipe(
|
||
text: string,
|
||
progressCallback?: ProgressCallback
|
||
): Promise<Recipe | null> {
|
||
if (progressCallback) {
|
||
const { model } = createLLM();
|
||
const warm = await isModelLoaded(model);
|
||
if (!warm) {
|
||
progressCallback({
|
||
type: 'model_loading',
|
||
message: `Inference server cold — loading ${model} into VRAM (5–30s)...`,
|
||
data: { model },
|
||
timestamp: new Date().toISOString()
|
||
});
|
||
}
|
||
}
|
||
|
||
const isRecipe = await detectRecipe(text);
|
||
|
||
if (!isRecipe) {
|
||
return null;
|
||
}
|
||
|
||
return parseRecipe(text);
|
||
}
|
||
|
||
/**
|
||
* Fallback parser using standard completion (no structured output)
|
||
* Used when the model doesn't support beta.chat.completions.parse()
|
||
*/
|
||
async function parseRecipeWithStandardCompletion(text: string): Promise<Recipe> {
|
||
const { client, model } = createLLM();
|
||
|
||
console.log('[LLM] Using standard completion fallback');
|
||
|
||
const completion = await client.chat.completions.create({
|
||
model,
|
||
messages: [
|
||
{
|
||
role: 'system',
|
||
content: `You are a recipe extractor. Return ONLY valid JSON matching this schema:
|
||
{
|
||
"name": "recipe name in Italian",
|
||
"servings": number or null,
|
||
"description": "description in Italian or null",
|
||
"ingredients": [{"item": "ingredient name", "amount": "quantity", "unit": "SI unit"}],
|
||
"steps": ["First step", "Second step", ...]
|
||
}
|
||
|
||
Convert all measurements to SI units (g, mL, °C).
|
||
Translate everything to Italian.
|
||
Extract ONLY what's in the text.`
|
||
},
|
||
{
|
||
role: 'user',
|
||
content: `Extract the recipe from this text:\n\n${text}`
|
||
}
|
||
],
|
||
max_tokens: 2000,
|
||
temperature: 0.3
|
||
});
|
||
|
||
const jsonResponse = completion.choices[0].message.content;
|
||
if (!jsonResponse) {
|
||
throw new Error('Empty response from LLM');
|
||
}
|
||
|
||
console.log('[LLM] Standard completion raw response:', jsonResponse.substring(0, 200));
|
||
|
||
// Parse and validate JSON (remove code fences if present)
|
||
const cleanedJson = jsonResponse.replace(/```json\n?|```\n?/g, '').trim();
|
||
const parsedData = JSON.parse(cleanedJson);
|
||
const recipe = RecipeSchema.parse(parsedData);
|
||
|
||
console.log('[LLM] Standard completion parsed recipe:', recipe.name);
|
||
|
||
return recipe;
|
||
}
|