From b44434ce9957d65f7a7052bfa2db201395d3c960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B6ren=20Weber?= Date: Fri, 25 Oct 2024 17:48:04 +0200 Subject: [PATCH] search: refactor to swap search library #407 see lunr-adapter.js to see how other search engines can be integrated search: fix comments #407 search: adapter docs #407 search: fix triggered search #407 --- .../_relearn_searchform/views/article.html | 2 +- layouts/partials/search.html | 2 +- layouts/partials/version.txt | 2 +- static/js/lunr-adapter.js | 136 ++++++++++++ static/js/search.js | 206 ++++++------------ static/js/theme.js | 4 +- 6 files changed, 210 insertions(+), 142 deletions(-) create mode 100644 static/js/lunr-adapter.js diff --git a/layouts/_relearn_searchform/views/article.html b/layouts/_relearn_searchform/views/article.html index f37e040cdb..ed7283d6a1 100644 --- a/layouts/_relearn_searchform/views/article.html +++ b/layouts/_relearn_searchform/views/article.html @@ -6,7 +6,7 @@ {{ partial "heading-pre.html" . }}{{ partial "heading.html" . }}{{ partial "heading-post.html" . }} -
+
diff --git a/layouts/partials/search.html b/layouts/partials/search.html index d0f776bc5d..c03d156113 100644 --- a/layouts/partials/search.html +++ b/layouts/partials/search.html @@ -69,5 +69,5 @@ {{- $file := (printf "js/lunr/lunr.%s.min.js" .) }} {{- end }} - + {{- end }} \ No newline at end of file diff --git a/layouts/partials/version.txt b/layouts/partials/version.txt index ef09838cb2..56a25dcae8 100644 --- a/layouts/partials/version.txt +++ b/layouts/partials/version.txt @@ -1 +1 @@ -7.1.1 \ No newline at end of file +7.1.1+28fce6b04c414523280c53ee02f9f3a94d9d23da \ No newline at end of file diff --git a/static/js/lunr-adapter.js b/static/js/lunr-adapter.js new file mode 100644 index 0000000000..e4dbf23c8c --- /dev/null +++ b/static/js/lunr-adapter.js @@ -0,0 +1,136 @@ +/* + +# Adapter Interface + +The search adapter needs to provide the following functions that are called from search.js + +## init() + +Initialize the search engine and the search index + +### Parameters + +none + +### Returns + +none + +### Remarks + +Once successfully completed, needs to call + + ```` + window.relearn.isSearchEngineReady = true; + window.relearn.executeInitialSearch(); + ```` + +## search() + +Performs the search and returs found results. + +### Parameters + +term: string // the search term that was typed in by the user + +### Returns + +Must return an array of found pages, sorted with the most relevant page first. + +Each array item needs the following layout: + +```` +{ + index: string, // optional, id of the page in the search index + matches: string[], // optional, TODO: have to find out what it does + page: { + breadcrumb: string, + title: string, + uri: string, + content: string, + tags: string[] + } +} +```` +*/ + +let lunrIndex, pagesIndex; + +function init() { + function initLunrIndex( index ){ + pagesIndex = index; + // Set up Lunr by declaring the fields we use + // Also provide their boost level for the ranking + lunrIndex = lunr(function() { + this.use(lunr.multiLanguage.apply(null, contentLangs)); + this.ref('index'); + this.field('title', { + boost: 15 + }); + this.field('tags', { + boost: 10 + }); + this.field('content', { + boost: 5 + }); + + this.pipeline.remove(lunr.stemmer); + this.searchPipeline.remove(lunr.stemmer); + + // Feed Lunr with each file and let index them + pagesIndex.forEach(function(page, idx) { + page.index = idx; + this.add(page); + }, this); + }); + + window.relearn.isSearchEngineReady = true; + window.relearn.executeInitialSearch(); + } + + if( window.index_js_url ){ + var js = document.createElement("script"); + js.src = index_js_url; + js.setAttribute("async", ""); + js.onload = function(){ + initLunrIndex(relearn_searchindex); + }; + js.onerror = function(e){ + console.error('Error getting Hugo index file'); + }; + document.head.appendChild(js); + } +} + +function search(term) { + function searchPatterns(word) { + // for short words high amounts of typos doesn't make sense + // for long words we allow less typos because this largly increases search time + var typos = [ + { len: -1, typos: 1 }, + { len: 60, typos: 2 }, + { len: 40, typos: 3 }, + { len: 20, typos: 4 }, + { len: 16, typos: 3 }, + { len: 12, typos: 2 }, + { len: 8, typos: 1 }, + { len: 4, typos: 0 }, + ]; + return [ + word + '^100', + word + '*^10', + '*' + word + '^10', + word + '~' + typos.reduce( function( a, c, i ){ return word.length < c.len ? c : a; } ).typos + '^1' + ]; + } + + // Find the item in our index corresponding to the Lunr one to have more info + // Remove Lunr special search characters: https://lunrjs.com/guides/searching.html + term = term.replace( /[*:^~+-]/g, ' ' ); + var searchTerm = lunr.tokenizer( term ).reduce( function(a,token){return a.concat(searchPatterns(token.str))}, []).join(' '); + return !searchTerm || !lunrIndex ? [] : lunrIndex.search(searchTerm).map(function(result) { + return { index: result.ref, matches: Object.keys(result.matchData.metadata), page: pagesIndex[ result.ref ] }; + }); +} + +export { init, search }; diff --git a/static/js/search.js b/static/js/search.js index 461984c0a2..359f06fcb0 100644 --- a/static/js/search.js +++ b/static/js/search.js @@ -1,56 +1,29 @@ +import { init, search } from './lunr-adapter.js'; + +(function(){ + window.relearn = window.relearn || {}; -window.relearn.runInitialSearch = function(){ - if( window.relearn.isSearchInit && window.relearn.isLunrInit ){ +window.relearn.executeInitialSearch = +function executeInitialSearch(){ + if( window.relearn.isSearchInterfaceReady && window.relearn.isSearchEngineReady ){ var input = document.querySelector('#R-search-by-detail'); if( !input ){ return; } var value = input.value; - searchDetail( value ); + executeSearch( value ); } } -var lunrIndex, pagesIndex; - -function initLunrIndex( index ){ - pagesIndex = index; - // Set up Lunr by declaring the fields we use - // Also provide their boost level for the ranking - lunrIndex = lunr(function() { - this.use(lunr.multiLanguage.apply(null, contentLangs)); - this.ref('index'); - this.field('title', { - boost: 15 - }); - this.field('tags', { - boost: 10 - }); - this.field('content', { - boost: 5 - }); - - this.pipeline.remove(lunr.stemmer); - this.searchPipeline.remove(lunr.stemmer); - - // Feed Lunr with each file and let LUnr actually index them - pagesIndex.forEach(function(page, idx) { - page.index = idx; - this.add(page); - }, this); - }); - - window.relearn.isLunrInit = true; - window.relearn.runInitialSearch(); -} - -function triggerSearch(){ +window.relearn.executeTriggeredSearch = +function executeTriggeredSearch(){ var input = document.querySelector('#R-search-by-detail'); if( !input ){ return; } var value = input.value; - searchDetail( value ); + executeSearch( value ); // add a new entry to the history after the user // changed the term; this does not reload the page @@ -70,7 +43,7 @@ function triggerSearch(){ } } -window.addEventListener( 'popstate', function ( event ){ +function executeHistorySearch( event ){ // restart search if browsed through history if( event.state ){ var state = window.history.state || {}; @@ -91,90 +64,25 @@ window.addEventListener( 'popstate', function ( event ){ // recreate the last search results and eventually // restore the previous scrolling position - searchDetail( search ); + executeSearch( search ); } } } -}); - -var input = document.querySelector('#R-search-by-detail'); -if( input ){ - input.addEventListener( 'keydown', function(event) { - // if we are pressing ESC in the searchdetail our focus will - // be stolen by the other event handlers, so we have to refocus - // here after a short while - if (event.key == "Escape") { - setTimeout( function(){ input.focus(); }, 0 ); - } - }); } -function initLunrJs() { - // new way to load our search index - if( window.index_js_url ){ - var js = document.createElement("script"); - js.src = index_js_url; - js.setAttribute("async", ""); - js.onload = function(){ - initLunrIndex(relearn_searchindex); - }; - js.onerror = function(e){ - console.error('Error getting Hugo index file'); - }; - document.head.appendChild(js); - } -} +function executeSearch( value ) { + var input = document.querySelector('#R-search-by-detail'); + function resolvePlaceholders( s, args ) { + var args = args || []; + // use replace to iterate over the string + // select the match and check if the related argument is present + // if yes, replace the match with the argument + return s.replace(/{([0-9]+)}/g, function (match, index) { + // check if the argument is present + return typeof args[index] == 'undefined' ? match : args[index]; + }); + }; -/** - * Trigger a search in Lunr and transform the result - * - * @param {String} term - * @return {Array} results - */ -function search(term) { - // Find the item in our index corresponding to the Lunr one to have more info - // Remove Lunr special search characters: https://lunrjs.com/guides/searching.html - term = term.replace( /[*:^~+-]/g, ' ' ); - var searchTerm = lunr.tokenizer( term ).reduce( function(a,token){return a.concat(searchPatterns(token.str))}, []).join(' '); - return !searchTerm || !lunrIndex ? [] : lunrIndex.search(searchTerm).map(function(result) { - return { index: result.ref, matches: Object.keys(result.matchData.metadata) } - }); -} - -function searchPatterns(word) { - // for short words high amounts of typos doesn't make sense - // for long words we allow less typos because this largly increases search time - var typos = [ - { len: -1, typos: 1 }, - { len: 60, typos: 2 }, - { len: 40, typos: 3 }, - { len: 20, typos: 4 }, - { len: 16, typos: 3 }, - { len: 12, typos: 2 }, - { len: 8, typos: 1 }, - { len: 4, typos: 0 }, - ]; - return [ - word + '^100', - word + '*^10', - '*' + word + '^10', - word + '~' + typos.reduce( function( a, c, i ){ return word.length < c.len ? c : a; } ).typos + '^1' - ]; -} - - -function resolvePlaceholders( s, args ) { - var args = args || []; - // use replace to iterate over the string - // select the match and check if the related argument is present - // if yes, replace the match with the argument - return s.replace(/{([0-9]+)}/g, function (match, index) { - // check if the argument is present - return typeof args[index] == 'undefined' ? match : args[index]; - }); -}; - -function searchDetail( value ) { var results = document.querySelector('#R-searchresults'); var hint = document.querySelector('.searchhint'); hint.innerText = ''; @@ -183,12 +91,15 @@ function searchDetail( value ) { if( a.length ){ hint.innerText = resolvePlaceholders( window.T_N_results_found, [ value, a.length ] ); a.forEach( function(item){ - var page = pagesIndex[item.index]; - var numContextWords = 10; - var contextPattern = '(?:\\S+ +){0,' + numContextWords + '}\\S*\\b(?:' + - item.matches.map( function(match){return match.replace(/\W/g, '\\$&')} ).join('|') + - ')\\b\\S*(?: +\\S+){0,' + numContextWords + '}'; - var context = page.content.match(new RegExp(contextPattern, 'i')); + var page = item.page; + var context = []; + if( item.matches ){ + var numContextWords = 10; + var contextPattern = '(?:\\S+ +){0,' + numContextWords + '}\\S*\\b(?:' + + item.matches.map( function(match){return match.replace(/\W/g, '\\$&')} ).join('|') + + ')\\b\\S*(?: +\\S+){0,' + numContextWords + '}'; + context = page.content.match(new RegExp(contextPattern, 'i')); + } var divsuggestion = document.createElement('a'); divsuggestion.className = 'autocomplete-suggestion'; divsuggestion.setAttribute('data-term', value); @@ -233,9 +144,7 @@ function searchDetail( value ) { } } -initLunrJs(); - -function startSearch(){ +function initSearchAfterLoad(){ var input = document.querySelector('#R-search-by-detail'); if( input ){ var state = window.history.state || {}; @@ -244,22 +153,25 @@ function startSearch(){ window.history.replaceState( state, '', window.location ); } - var searchList = new autoComplete({ + new autoComplete({ /* selector for the search box element */ selectorToInsert: 'search:has(.searchbox)', selector: '#R-search-by', /* source is the callback to perform the search */ - source: function(term, response) { - response(search(term)); + source: function( term, response ) { + response( search( term ) ); }, /* renderItem displays individual search results */ - renderItem: function(item, term) { - var page = pagesIndex[item.index]; - var numContextWords = 2; - var contextPattern = '(?:\\S+ +){0,' + numContextWords + '}\\S*\\b(?:' + - item.matches.map( function(match){return match.replace(/\W/g, '\\$&')} ).join('|') + - ')\\b\\S*(?: +\\S+){0,' + numContextWords + '}'; - var context = page.content.match(new RegExp(contextPattern, 'i')); + renderItem: function( item, term ) { + var page = item.page; + var context = []; + if( item.matches ){ + var numContextWords = 2; + var contextPattern = '(?:\\S+ +){0,' + numContextWords + '}\\S*\\b(?:' + + item.matches.map( function(match){return match.replace(/\W/g, '\\$&')} ).join('|') + + ')\\b\\S*(?: +\\S+){0,' + numContextWords + '}'; + context = page.content.match(new RegExp(contextPattern, 'i')); + } var divsuggestion = document.createElement('div'); divsuggestion.className = 'autocomplete-suggestion'; divsuggestion.setAttribute('data-term', term); @@ -286,4 +198,24 @@ function startSearch(){ }); }; -ready( startSearch ); +function initSearch(){ + init(); + + window.addEventListener( 'popstate', executeHistorySearch ); + + var input = document.querySelector('#R-search-by-detail'); + if( input ){ + input.addEventListener( 'keydown', function(event) { + // if we are pressing ESC in the searchdetail our focus will + // be stolen by the other event handlers, so we have to refocus + // here after a short while + if (event.key == "Escape") { + setTimeout( function(){ input.focus(); }, 0 ); + } + }); + } + ready( initSearchAfterLoad ); +} + +initSearch(); +})(); diff --git a/static/js/theme.js b/static/js/theme.js index fe1f906fe7..e241d2ce91 100644 --- a/static/js/theme.js +++ b/static/js/theme.js @@ -1592,8 +1592,8 @@ function initSearch() { }); } - window.relearn.isSearchInit = true; - window.relearn.runInitialSearch && window.relearn.runInitialSearch(); + window.relearn.isSearchInterfaceReady = true; + window.relearn.executeInitialSearch && window.relearn.executeInitialSearch(); } function updateTheme( detail ){