271 lines
6.9 KiB
Markdown
271 lines
6.9 KiB
Markdown
# TRUEREF-0007 — Embedding Generation & Vector Storage
|
|
|
|
**Priority:** P1
|
|
**Status:** Pending
|
|
**Depends On:** TRUEREF-0001, TRUEREF-0005
|
|
**Blocks:** TRUEREF-0008
|
|
|
|
---
|
|
|
|
## Overview
|
|
|
|
Implement a pluggable embedding generation system that produces vector representations of snippets and stores them in SQLite. Supports multiple embedding backends (OpenAI-compatible API, local ONNX models via transformers.js). When no embedding provider is configured, the system gracefully falls back to FTS5-only search.
|
|
|
|
---
|
|
|
|
## Acceptance Criteria
|
|
|
|
- [ ] Pluggable `EmbeddingProvider` interface with at least two implementations
|
|
- [ ] `OpenAIEmbeddingProvider` — works with OpenAI, Azure OpenAI, Ollama, any OpenAI-compatible endpoint
|
|
- [ ] `LocalEmbeddingProvider` — uses `@xenova/transformers` with a bundled ONNX model (optional dep)
|
|
- [ ] `NoopEmbeddingProvider` — returns null, enables graceful FTS5-only mode
|
|
- [ ] `EmbeddingService` that batches embedding requests and stores results
|
|
- [ ] Embeddings stored as `Float32Array` blobs in `snippet_embeddings` table
|
|
- [ ] Embedding provider configured via settings (stored in `settings` table)
|
|
- [ ] `GET /api/v1/settings/embedding` — get current embedding config
|
|
- [ ] `PUT /api/v1/settings/embedding` — set embedding provider configuration
|
|
- [ ] Unit tests for provider abstraction and storage logic
|
|
|
|
---
|
|
|
|
## Provider Interface
|
|
|
|
```typescript
|
|
// src/lib/server/embeddings/provider.ts
|
|
|
|
export interface EmbeddingVector {
|
|
values: Float32Array;
|
|
dimensions: number;
|
|
model: string;
|
|
}
|
|
|
|
export interface EmbeddingProvider {
|
|
readonly name: string;
|
|
readonly dimensions: number;
|
|
readonly model: string;
|
|
|
|
embed(texts: string[]): Promise<EmbeddingVector[]>;
|
|
isAvailable(): Promise<boolean>;
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## OpenAI-Compatible Provider
|
|
|
|
```typescript
|
|
export interface OpenAIProviderConfig {
|
|
baseUrl: string; // e.g. "https://api.openai.com/v1" or "http://localhost:11434/v1"
|
|
apiKey: string;
|
|
model: string; // e.g. "text-embedding-3-small", "nomic-embed-text"
|
|
dimensions?: number; // override for models that support it (e.g. text-embedding-3-small)
|
|
maxBatchSize?: number; // default: 100
|
|
}
|
|
|
|
export class OpenAIEmbeddingProvider implements EmbeddingProvider {
|
|
constructor(private config: OpenAIProviderConfig) {}
|
|
|
|
async embed(texts: string[]): Promise<EmbeddingVector[]> {
|
|
// Batch into groups of maxBatchSize
|
|
const batches = chunk(texts, this.config.maxBatchSize ?? 100);
|
|
const allEmbeddings: EmbeddingVector[] = [];
|
|
|
|
for (const batch of batches) {
|
|
const response = await fetch(`${this.config.baseUrl}/embeddings`, {
|
|
method: 'POST',
|
|
headers: {
|
|
Authorization: `Bearer ${this.config.apiKey}`,
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify({
|
|
model: this.config.model,
|
|
input: batch,
|
|
dimensions: this.config.dimensions
|
|
})
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new EmbeddingError(`API error: ${response.status}`);
|
|
}
|
|
|
|
const data = await response.json();
|
|
for (const item of data.data) {
|
|
allEmbeddings.push({
|
|
values: new Float32Array(item.embedding),
|
|
dimensions: item.embedding.length,
|
|
model: this.config.model
|
|
});
|
|
}
|
|
}
|
|
|
|
return allEmbeddings;
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Local Provider (Optional Dependency)
|
|
|
|
```typescript
|
|
// Uses @xenova/transformers — only loaded if installed
|
|
export class LocalEmbeddingProvider implements EmbeddingProvider {
|
|
private pipeline: unknown = null;
|
|
|
|
readonly name = 'local';
|
|
readonly model = 'Xenova/all-MiniLM-L6-v2'; // 384-dim, fast, small
|
|
readonly dimensions = 384;
|
|
|
|
async embed(texts: string[]): Promise<EmbeddingVector[]> {
|
|
if (!this.pipeline) {
|
|
const { pipeline } = await import('@xenova/transformers');
|
|
this.pipeline = await pipeline('feature-extraction', this.model);
|
|
}
|
|
|
|
const results: EmbeddingVector[] = [];
|
|
for (const text of texts) {
|
|
const output = await (this.pipeline as Function)(text, {
|
|
pooling: 'mean',
|
|
normalize: true
|
|
});
|
|
results.push({
|
|
values: new Float32Array(output.data),
|
|
dimensions: this.dimensions,
|
|
model: this.model
|
|
});
|
|
}
|
|
return results;
|
|
}
|
|
|
|
async isAvailable(): Promise<boolean> {
|
|
try {
|
|
await import('@xenova/transformers');
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Embedding Service
|
|
|
|
```typescript
|
|
export class EmbeddingService {
|
|
constructor(
|
|
private db: BetterSQLite3.Database,
|
|
private provider: EmbeddingProvider
|
|
) {}
|
|
|
|
async embedSnippets(
|
|
snippetIds: string[],
|
|
onProgress?: (done: number, total: number) => void
|
|
): Promise<void> {
|
|
const snippets = this.db
|
|
.prepare(
|
|
`SELECT id, content, type FROM snippets WHERE id IN (${snippetIds.map(() => '?').join(',')})`
|
|
)
|
|
.all(...snippetIds) as Snippet[];
|
|
|
|
// Prepare text for embedding: combine title + content
|
|
const texts = snippets.map((s) =>
|
|
[s.title, s.breadcrumb, s.content].filter(Boolean).join('\n').slice(0, 2048)
|
|
);
|
|
|
|
const BATCH_SIZE = 50;
|
|
const insert = this.db.prepare(`
|
|
INSERT OR REPLACE INTO snippet_embeddings (snippet_id, model, dimensions, embedding, created_at)
|
|
VALUES (?, ?, ?, ?, unixepoch())
|
|
`);
|
|
|
|
for (let i = 0; i < snippets.length; i += BATCH_SIZE) {
|
|
const batch = snippets.slice(i, i + BATCH_SIZE);
|
|
const batchTexts = texts.slice(i, i + BATCH_SIZE);
|
|
|
|
const embeddings = await this.provider.embed(batchTexts);
|
|
|
|
const insertMany = this.db.transaction(() => {
|
|
for (let j = 0; j < batch.length; j++) {
|
|
const snippet = batch[j];
|
|
const embedding = embeddings[j];
|
|
insert.run(
|
|
snippet.id,
|
|
embedding.model,
|
|
embedding.dimensions,
|
|
Buffer.from(embedding.values.buffer)
|
|
);
|
|
}
|
|
});
|
|
insertMany();
|
|
|
|
onProgress?.(Math.min(i + BATCH_SIZE, snippets.length), snippets.length);
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## Provider Configuration
|
|
|
|
Stored in the `settings` table as JSON:
|
|
|
|
```typescript
|
|
export interface EmbeddingConfig {
|
|
provider: 'openai' | 'local' | 'none';
|
|
openai?: {
|
|
baseUrl: string;
|
|
apiKey: string;
|
|
model: string;
|
|
dimensions?: number;
|
|
};
|
|
}
|
|
|
|
// Settings key: 'embedding_config'
|
|
```
|
|
|
|
### API Endpoints
|
|
|
|
`GET /api/v1/settings/embedding`
|
|
|
|
```json
|
|
{
|
|
"provider": "openai",
|
|
"openai": {
|
|
"baseUrl": "https://api.openai.com/v1",
|
|
"model": "text-embedding-3-small",
|
|
"dimensions": 1536
|
|
}
|
|
}
|
|
```
|
|
|
|
`PUT /api/v1/settings/embedding` — same shape, validates provider connectivity before saving.
|
|
|
|
---
|
|
|
|
## Blob Storage Format
|
|
|
|
Embeddings are stored as raw `Float32Array` binary blobs:
|
|
|
|
```typescript
|
|
// Store
|
|
const buffer = Buffer.from(float32Array.buffer);
|
|
|
|
// Retrieve
|
|
const float32Array = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4);
|
|
```
|
|
|
|
---
|
|
|
|
## Files to Create
|
|
|
|
- `src/lib/server/embeddings/provider.ts` — interface + noop
|
|
- `src/lib/server/embeddings/openai.provider.ts`
|
|
- `src/lib/server/embeddings/local.provider.ts`
|
|
- `src/lib/server/embeddings/embedding.service.ts`
|
|
- `src/lib/server/embeddings/factory.ts` — create provider from config
|
|
- `src/routes/api/v1/settings/embedding/+server.ts`
|
|
- `src/lib/server/embeddings/embedding.service.test.ts`
|