orama: test different parameter #407
Some checks failed
docs-build-deployment / Run deploy (push) Has been cancelled
docs-build / Run build (push) Has been cancelled

for now PT15 seems to generate the best results for the exampleSite
This commit is contained in:
Sören Weber 2024-10-27 12:38:35 +01:00
parent 6a20332b51
commit f986a7dc40
No known key found for this signature in database
GPG key ID: BEC6D55545451B6D
2 changed files with 50 additions and 11 deletions

View file

@ -1 +1 @@
7.1.1+57b73a5f47d69d695fba57ec966ae7dd25400a66
7.1.1+6a20332b518af28c3c59ab45eced9979246ff0a6

View file

@ -1,10 +1,9 @@
import {
create,
search as oramaSearch,
insertMultiple,
} from "https://cdn.jsdelivr.net/npm/@orama/orama@latest/+esm";
// "https://unpkg.com/browse/@orama/orama@latest/dist/esm/index.js";
// https://cdn.jsdelivr.net/npm/@orama/orama@3.0.1/dist/esm/index.js
import { create, search as oramaSearch, insertMultiple } from "https://cdn.jsdelivr.net/npm/@orama/orama@latest/+esm";
import { pluginQPS } from 'https://cdn.jsdelivr.net/npm/@orama/plugin-qps@latest/+esm'
import { pluginPT15 } from 'https://cdn.jsdelivr.net/npm/@orama/plugin-pt15@latest/+esm'
//import { pluginEmbeddings } from 'https://cdn.jsdelivr.net/npm/@orama/plugin-embeddings@latest/+esm'
//import * as tf from 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-core';
//import 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-webgl';
//import { createTokenizer } from '@orama/tokenizers/japanese'
@ -14,15 +13,38 @@ let searchEngine = null;
async function init() {
async function initIndex( index ){
/*
const embeddings = await pluginEmbeddings({
embeddings: {
// Property used to store generated embeddings. Must be defined in the schema.
defaultProperty: 'embeddings',
onInsert: {
// Generate embeddings at insert-time.
// Turn off if you're inserting documents with embeddings already generated.
generate: true,
// Properties to use for generating embeddings at insert time.
// These properties will be concatenated and used to generate embeddings.
properties: ['description'],
verbose: true,
}
}
});
*/
searchEngine = await create({
schema: {
title: 'string',
content: 'string',
uri: 'string',
uri: 'string',
breadcrumb: 'string',
description: 'string',
tags: 'string[]',
// embeddings: 'vector[1]'
},
plugins: [
// embeddings,
// pluginQPS()
pluginPT15()
],
/*
defaultLanguage: 'french',
components: {
@ -53,8 +75,25 @@ async function init() {
}
async function search( term ){
const searchResponse = await oramaSearch(searchEngine, {term: term, properties: '*'});
return searchResponse.hits.map( hit => ({ page: hit.document }) );
const searchResponse = await oramaSearch(searchEngine, {
// mode: 'hybrid', // vector search seems not to work
term: term,
properties: '*',
threshold: 0, // only show results where all keywords were found
limit: 99,
boost: { // doesn't seem to make a difference in score
tags: 1.8,
title: 1.5,
descriptoin: 1.3,
breadcrumb: 1.2,
},
// distinctOn: 'title', // just to filter out changelog/releasenotes if having the same title
// exact: true, // not for PT15
// tolerance: 1, // not for PT15
});
console.log( "new term", term )
searchResponse.hits.forEach( hit => console.log(hit.score, hit.document.uri) );
return searchResponse.hits.map( hit => ({ matches: [ term, ...term.split(' ') ], page: hit.document }) );
}
export { init, search };