mirror of
https://github.com/jackyzha0/quartz.git
synced 2025-12-19 10:54:06 -06:00
revert: redudant changes
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
parent
68682a8fe3
commit
eb8a4cce18
@ -1,14 +1,7 @@
|
|||||||
import FlexSearch, { DefaultDocumentSearchResults, Id } from "flexsearch"
|
import FlexSearch, { DefaultDocumentSearchResults, Id } from "flexsearch"
|
||||||
import type { ContentDetails } from "../../plugins/emitters/contentIndex"
|
import { ContentDetails } from "../../plugins/emitters/contentIndex"
|
||||||
import { SemanticClient, type SemanticResult } from "./semantic.inline"
|
import { SemanticClient, type SemanticResult } from "./semantic.inline"
|
||||||
import {
|
import { registerEscapeHandler, removeAllChildren, fetchCanonical } from "./util"
|
||||||
registerEscapeHandler,
|
|
||||||
removeAllChildren,
|
|
||||||
highlight,
|
|
||||||
tokenizeTerm,
|
|
||||||
encode,
|
|
||||||
fetchCanonical,
|
|
||||||
} from "./util"
|
|
||||||
import { FullSlug, normalizeRelativeURLs, resolveRelative } from "../../util/path"
|
import { FullSlug, normalizeRelativeURLs, resolveRelative } from "../../util/path"
|
||||||
|
|
||||||
interface Item {
|
interface Item {
|
||||||
@ -61,6 +54,71 @@ type SimilarityResult = { item: Item; similarity: number }
|
|||||||
let chunkMetadata: Record<string, { parentSlug: string; chunkId: number }> = {}
|
let chunkMetadata: Record<string, { parentSlug: string; chunkId: number }> = {}
|
||||||
let manifestIds: string[] = []
|
let manifestIds: string[] = []
|
||||||
|
|
||||||
|
const contextWindowWords = 30
|
||||||
|
const tokenizeTerm = (term: string) => {
|
||||||
|
const tokens = term.split(/\s+/).filter((t) => t.trim() !== "")
|
||||||
|
const tokenLen = tokens.length
|
||||||
|
if (tokenLen > 1) {
|
||||||
|
for (let i = 1; i < tokenLen; i++) {
|
||||||
|
tokens.push(tokens.slice(0, i + 1).join(" "))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens.sort((a, b) => b.length - a.length) // always highlight longest terms first
|
||||||
|
}
|
||||||
|
|
||||||
|
function highlight(searchTerm: string, text: string, trim?: boolean) {
|
||||||
|
const tokenizedTerms = tokenizeTerm(searchTerm)
|
||||||
|
let tokenizedText = text.split(/\s+/).filter((t) => t !== "")
|
||||||
|
|
||||||
|
let startIndex = 0
|
||||||
|
let endIndex = tokenizedText.length - 1
|
||||||
|
if (trim) {
|
||||||
|
const includesCheck = (tok: string) =>
|
||||||
|
tokenizedTerms.some((term) => tok.toLowerCase().startsWith(term.toLowerCase()))
|
||||||
|
const occurrencesIndices = tokenizedText.map(includesCheck)
|
||||||
|
|
||||||
|
let bestSum = 0
|
||||||
|
let bestIndex = 0
|
||||||
|
for (let i = 0; i < Math.max(tokenizedText.length - contextWindowWords, 0); i++) {
|
||||||
|
const window = occurrencesIndices.slice(i, i + contextWindowWords)
|
||||||
|
const windowSum = window.reduce((total, cur) => total + (cur ? 1 : 0), 0)
|
||||||
|
if (windowSum >= bestSum) {
|
||||||
|
bestSum = windowSum
|
||||||
|
bestIndex = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
startIndex = Math.max(bestIndex - contextWindowWords, 0)
|
||||||
|
endIndex = Math.min(startIndex + 2 * contextWindowWords, tokenizedText.length - 1)
|
||||||
|
tokenizedText = tokenizedText.slice(startIndex, endIndex)
|
||||||
|
}
|
||||||
|
|
||||||
|
const slice = tokenizedText
|
||||||
|
.map((tok) => {
|
||||||
|
// see if this tok is prefixed by any search terms
|
||||||
|
for (const searchTok of tokenizedTerms) {
|
||||||
|
if (tok.toLowerCase().includes(searchTok.toLowerCase())) {
|
||||||
|
const regex = new RegExp(searchTok.toLowerCase(), "gi")
|
||||||
|
return tok.replace(regex, `<span class="highlight">$&</span>`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tok
|
||||||
|
})
|
||||||
|
.join(" ")
|
||||||
|
|
||||||
|
return `${startIndex === 0 ? "" : "..."}${slice}${
|
||||||
|
endIndex === tokenizedText.length - 1 ? "" : "..."
|
||||||
|
}`
|
||||||
|
}
|
||||||
|
|
||||||
|
// To be used with search and everything else with flexsearch
|
||||||
|
const encoder = (str: string) =>
|
||||||
|
str
|
||||||
|
.toLowerCase()
|
||||||
|
.split(/\s+/)
|
||||||
|
.filter((token) => token.length > 0)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get parent document slug for a chunk ID
|
* Get parent document slug for a chunk ID
|
||||||
*/
|
*/
|
||||||
@ -125,7 +183,7 @@ function aggregateChunkResults(
|
|||||||
// Initialize the FlexSearch Document instance with the appropriate configuration
|
// Initialize the FlexSearch Document instance with the appropriate configuration
|
||||||
const index = new FlexSearch.Document<Item>({
|
const index = new FlexSearch.Document<Item>({
|
||||||
tokenize: "forward",
|
tokenize: "forward",
|
||||||
encode,
|
encode: encoder,
|
||||||
document: {
|
document: {
|
||||||
id: "id",
|
id: "id",
|
||||||
tag: "tags",
|
tag: "tags",
|
||||||
|
|||||||
@ -44,68 +44,3 @@ export async function fetchCanonical(url: URL): Promise<Response> {
|
|||||||
const [_, redirect] = text.match(canonicalRegex) ?? []
|
const [_, redirect] = text.match(canonicalRegex) ?? []
|
||||||
return redirect ? fetch(`${new URL(redirect, url)}`) : res
|
return redirect ? fetch(`${new URL(redirect, url)}`) : res
|
||||||
}
|
}
|
||||||
|
|
||||||
const contextWindowWords = 30
|
|
||||||
export const tokenizeTerm = (term: string) => {
|
|
||||||
const tokens = term.split(/\s+/).filter((t) => t.trim() !== "")
|
|
||||||
const tokenLen = tokens.length
|
|
||||||
if (tokenLen > 1) {
|
|
||||||
for (let i = 1; i < tokenLen; i++) {
|
|
||||||
tokens.push(tokens.slice(0, i + 1).join(" "))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return tokens.sort((a, b) => b.length - a.length) // always highlight longest terms first
|
|
||||||
}
|
|
||||||
|
|
||||||
export function highlight(searchTerm: string, text: string, trim?: boolean) {
|
|
||||||
const tokenizedTerms = tokenizeTerm(searchTerm)
|
|
||||||
let tokenizedText = text.split(/\s+/).filter((t) => t !== "")
|
|
||||||
|
|
||||||
let startIndex = 0
|
|
||||||
let endIndex = tokenizedText.length - 1
|
|
||||||
if (trim) {
|
|
||||||
const includesCheck = (tok: string) =>
|
|
||||||
tokenizedTerms.some((term) => tok.toLowerCase().startsWith(term.toLowerCase()))
|
|
||||||
const occurrencesIndices = tokenizedText.map(includesCheck)
|
|
||||||
|
|
||||||
let bestSum = 0
|
|
||||||
let bestIndex = 0
|
|
||||||
for (let i = 0; i < Math.max(tokenizedText.length - contextWindowWords, 0); i++) {
|
|
||||||
const window = occurrencesIndices.slice(i, i + contextWindowWords)
|
|
||||||
const windowSum = window.reduce((total, cur) => total + (cur ? 1 : 0), 0)
|
|
||||||
if (windowSum >= bestSum) {
|
|
||||||
bestSum = windowSum
|
|
||||||
bestIndex = i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
startIndex = Math.max(bestIndex - contextWindowWords, 0)
|
|
||||||
endIndex = Math.min(startIndex + 2 * contextWindowWords, tokenizedText.length - 1)
|
|
||||||
tokenizedText = tokenizedText.slice(startIndex, endIndex)
|
|
||||||
}
|
|
||||||
|
|
||||||
const slice = tokenizedText
|
|
||||||
.map((tok) => {
|
|
||||||
// see if this tok is prefixed by any search terms
|
|
||||||
for (const searchTok of tokenizedTerms) {
|
|
||||||
if (tok.toLowerCase().includes(searchTok.toLowerCase())) {
|
|
||||||
const regex = new RegExp(searchTok.toLowerCase(), "gi")
|
|
||||||
return tok.replace(regex, `<span class="highlight">$&</span>`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return tok
|
|
||||||
})
|
|
||||||
.join(" ")
|
|
||||||
|
|
||||||
return `${startIndex === 0 ? "" : "..."}${slice}${
|
|
||||||
endIndex === tokenizedText.length - 1 ? "" : "..."
|
|
||||||
}`
|
|
||||||
}
|
|
||||||
|
|
||||||
// To be used with search and everything else with flexsearch
|
|
||||||
export const encode = (str: string) =>
|
|
||||||
str
|
|
||||||
.toLowerCase()
|
|
||||||
.split(/\s+/)
|
|
||||||
.filter((token) => token.length > 0)
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user