feat(TRUEREF-0007): implement pluggable embedding generation and vector storage
Add EmbeddingProvider interface with OpenAI-compatible, local (optional @xenova/transformers via dynamic import), and Noop (FTS5-only fallback) implementations. EmbeddingService batches requests and persists Float32Array blobs to snippet_embeddings. GET/PUT /api/v1/settings/embedding endpoints read and write embedding config from the settings table. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
61
src/lib/server/embeddings/local.provider.ts
Normal file
61
src/lib/server/embeddings/local.provider.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
/**
|
||||
* LocalEmbeddingProvider — uses @xenova/transformers (optional dependency).
|
||||
*
|
||||
* @xenova/transformers is NOT installed by default. This provider uses a
|
||||
* dynamic import so the module is only required at runtime when the local
|
||||
* provider is actually configured. If the package is absent, isAvailable()
|
||||
* returns false and embed() throws a clear error.
|
||||
*/
|
||||
|
||||
import { EmbeddingError, type EmbeddingProvider, type EmbeddingVector } from './provider.js';
|
||||
|
||||
export class LocalEmbeddingProvider implements EmbeddingProvider {
|
||||
readonly name = 'local';
|
||||
readonly model = 'Xenova/all-MiniLM-L6-v2';
|
||||
readonly dimensions = 384;
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
private pipeline: ((text: string, options: Record<string, unknown>) => Promise<any>) | null =
|
||||
null;
|
||||
|
||||
async embed(texts: string[]): Promise<EmbeddingVector[]> {
|
||||
if (!this.pipeline) {
|
||||
let transformers: { pipeline: Function };
|
||||
try {
|
||||
// Dynamic import — only succeeds when @xenova/transformers is installed.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
transformers = (await import('@xenova/transformers' as any)) as {
|
||||
pipeline: Function;
|
||||
};
|
||||
} catch {
|
||||
throw new EmbeddingError(
|
||||
'@xenova/transformers is not installed. Install it to use the local embedding provider.'
|
||||
);
|
||||
}
|
||||
this.pipeline = await transformers.pipeline('feature-extraction', this.model);
|
||||
}
|
||||
|
||||
const results: EmbeddingVector[] = [];
|
||||
for (const text of texts) {
|
||||
const output = await this.pipeline!(text, {
|
||||
pooling: 'mean',
|
||||
normalize: true
|
||||
});
|
||||
results.push({
|
||||
values: new Float32Array(output.data),
|
||||
dimensions: this.dimensions,
|
||||
model: this.model
|
||||
});
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
async isAvailable(): Promise<boolean> {
|
||||
try {
|
||||
await import('@xenova/transformers' as never);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user