orama: test different parameter #407
Some checks failed
docs-build-deployment / Run deploy (push) Has been cancelled
docs-build / Run build (push) Has been cancelled

for now PT15 seems to generate the best results for the exampleSite
This commit is contained in:
Sören Weber 2024-10-27 12:38:35 +01:00
parent 6a20332b51
commit f986a7dc40
No known key found for this signature in database
GPG key ID: BEC6D55545451B6D
2 changed files with 50 additions and 11 deletions

View file

@ -1 +1 @@
7.1.1+57b73a5f47d69d695fba57ec966ae7dd25400a66 7.1.1+6a20332b518af28c3c59ab45eced9979246ff0a6

View file

@ -1,10 +1,9 @@
import { import { create, search as oramaSearch, insertMultiple } from "https://cdn.jsdelivr.net/npm/@orama/orama@latest/+esm";
create, import { pluginQPS } from 'https://cdn.jsdelivr.net/npm/@orama/plugin-qps@latest/+esm'
search as oramaSearch, import { pluginPT15 } from 'https://cdn.jsdelivr.net/npm/@orama/plugin-pt15@latest/+esm'
insertMultiple, //import { pluginEmbeddings } from 'https://cdn.jsdelivr.net/npm/@orama/plugin-embeddings@latest/+esm'
} from "https://cdn.jsdelivr.net/npm/@orama/orama@latest/+esm"; //import * as tf from 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-core';
// "https://unpkg.com/browse/@orama/orama@latest/dist/esm/index.js"; //import 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-webgl';
// https://cdn.jsdelivr.net/npm/@orama/orama@3.0.1/dist/esm/index.js
//import { createTokenizer } from '@orama/tokenizers/japanese' //import { createTokenizer } from '@orama/tokenizers/japanese'
@ -14,15 +13,38 @@ let searchEngine = null;
async function init() { async function init() {
async function initIndex( index ){ async function initIndex( index ){
/*
const embeddings = await pluginEmbeddings({
embeddings: {
// Property used to store generated embeddings. Must be defined in the schema.
defaultProperty: 'embeddings',
onInsert: {
// Generate embeddings at insert-time.
// Turn off if you're inserting documents with embeddings already generated.
generate: true,
// Properties to use for generating embeddings at insert time.
// These properties will be concatenated and used to generate embeddings.
properties: ['description'],
verbose: true,
}
}
});
*/
searchEngine = await create({ searchEngine = await create({
schema: { schema: {
title: 'string', title: 'string',
content: 'string', content: 'string',
uri: 'string', uri: 'string',
breadcrumb: 'string', breadcrumb: 'string',
description: 'string', description: 'string',
tags: 'string[]', tags: 'string[]',
// embeddings: 'vector[1]'
}, },
plugins: [
// embeddings,
// pluginQPS()
pluginPT15()
],
/* /*
defaultLanguage: 'french', defaultLanguage: 'french',
components: { components: {
@ -53,8 +75,25 @@ async function init() {
} }
async function search( term ){ async function search( term ){
const searchResponse = await oramaSearch(searchEngine, {term: term, properties: '*'}); const searchResponse = await oramaSearch(searchEngine, {
return searchResponse.hits.map( hit => ({ page: hit.document }) ); // mode: 'hybrid', // vector search seems not to work
term: term,
properties: '*',
threshold: 0, // only show results where all keywords were found
limit: 99,
boost: { // doesn't seem to make a difference in score
tags: 1.8,
title: 1.5,
descriptoin: 1.3,
breadcrumb: 1.2,
},
// distinctOn: 'title', // just to filter out changelog/releasenotes if having the same title
// exact: true, // not for PT15
// tolerance: 1, // not for PT15
});
console.log( "new term", term )
searchResponse.hits.forEach( hit => console.log(hit.score, hit.document.uri) );
return searchResponse.hits.map( hit => ({ matches: [ term, ...term.split(' ') ], page: hit.document }) );
} }
export { init, search }; export { init, search };