MiniSearch / server /webSearchService.ts
github-actions[bot]
Sync from https://github.com/felladrin/MiniSearch
9692fe7
import { basename } from "node:path";
import debug from "debug";
import { convert as convertHtmlToPlainText } from "html-to-text";
import { strip as stripEmojis } from "node-emoji";
const fileName = basename(import.meta.url);
const printMessage = debug(fileName);
printMessage.enabled = true;
const SERVICE_HOST = "127.0.0.1";
const SERVICE_PORT = 8888;
const SERVICE_BASE_URL = `http://${SERVICE_HOST}:${SERVICE_PORT}`;
interface CircuitBreakerState {
failures: number;
lastFailureTime: number;
isOpen: boolean;
}
const circuitBreaker: CircuitBreakerState = {
failures: 0,
lastFailureTime: 0,
isOpen: false,
};
const CIRCUIT_BREAKER_THRESHOLD = 5;
const CIRCUIT_BREAKER_TIMEOUT = 60000;
const MAX_RETRIES = 3;
const BASE_RETRY_DELAY = 1000;
type SearchType = "text" | "images";
interface SearxngSearchResult {
title: string;
url: string;
content?: string;
category?: string;
template?: string;
engine?: string;
img_src?: string;
iframe_src?: string;
thumbnail?: string;
thumbnail_src?: string;
}
interface SearxngSearchResponse {
results?: SearxngSearchResult[];
}
const defaultSearchParams = {
lang: "auto",
safesearch: "1",
format: "json",
} as const;
export async function startWebSearchService() {
printMessage("Preparing service...");
const maxAttempts = 30;
const checkInterval = 1000;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
const isReady = await getWebSearchStatus();
if (isReady) {
printMessage("Service ready!");
return;
}
if (attempt === maxAttempts) {
printMessage("Service not available!");
return;
}
await new Promise((resolve) => setTimeout(resolve, checkInterval));
}
}
async function testSearch(): Promise<boolean> {
try {
const results = await performSearch("test", "text");
return Array.isArray(results) && results.length > 0;
} catch (error) {
printMessage(
"Test search failed:",
error instanceof Error ? error.message : error,
);
return false;
}
}
export async function getWebSearchStatus() {
try {
const response = await fetch(`${SERVICE_BASE_URL}/healthz`);
const responseText = await response.text();
if (responseText.trim() !== "OK") return false;
return await testSearch();
} catch {
return false;
}
}
function buildSearchUrl(query: string, searchType: SearchType) {
const params = new URLSearchParams(defaultSearchParams);
params.set("q", query);
params.set("categories", searchType === "text" ? "general" : "images,videos");
return `${SERVICE_BASE_URL}/search?${params.toString()}`;
}
function recordFailure() {
circuitBreaker.failures++;
circuitBreaker.lastFailureTime = Date.now();
if (circuitBreaker.failures >= CIRCUIT_BREAKER_THRESHOLD) {
circuitBreaker.isOpen = true;
printMessage(
`Circuit breaker opened after ${circuitBreaker.failures} failures`,
);
}
}
async function performSearch(
query: string,
searchType: SearchType,
): Promise<SearxngSearchResult[]> {
if (circuitBreaker.isOpen) {
const timeSinceLastFailure = Date.now() - circuitBreaker.lastFailureTime;
if (timeSinceLastFailure < CIRCUIT_BREAKER_TIMEOUT) {
throw new Error(
"Circuit breaker is open - SearXNG service temporarily unavailable",
);
}
circuitBreaker.isOpen = false;
circuitBreaker.failures = 0;
printMessage("Circuit breaker reset");
}
const searchUrl = buildSearchUrl(query, searchType);
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
if (attempt > 0) {
const delay = BASE_RETRY_DELAY * 2 ** (attempt - 1);
printMessage(
`SearXNG returned 500, retrying in ${delay}ms (attempt ${attempt}/${MAX_RETRIES})`,
);
await new Promise((resolve) => setTimeout(resolve, delay));
}
const response = await fetch(searchUrl, {
headers: { Accept: "application/json" },
}).catch((error: unknown) => {
recordFailure();
throw error;
});
if (!response.ok) {
if (response.status === 500 && attempt < MAX_RETRIES) {
continue;
}
recordFailure();
throw new Error(`SearXNG request failed with status ${response.status}`);
}
circuitBreaker.failures = 0;
circuitBreaker.isOpen = false;
const data = (await response.json()) as SearxngSearchResponse;
const results = Array.isArray(data.results) ? data.results : [];
if (results.length === 0) {
printMessage(`No results returned from SearXNG for query: ${query}`);
}
return results;
}
recordFailure();
throw new Error(
`SearXNG request failed with status 500 after ${MAX_RETRIES} retries`,
);
}
async function processSearchResults(
query: string,
searchType: SearchType,
limit: number,
) {
const results = await performSearch(query, searchType);
const deduplicatedResults = deduplicateResults(results);
if (searchType === "text") {
const textualResults = await Promise.all(
deduplicatedResults.slice(0, limit).map(processTextualResult),
);
return filterNullResults(textualResults);
}
const graphicalResults = await Promise.all(
deduplicatedResults.slice(0, limit).map(processGraphicalResult),
);
return filterNullResults(graphicalResults);
}
export async function fetchSearXNG(
query: string,
searchType: SearchType,
limit = 30,
) {
try {
return await processSearchResults(query, searchType, limit);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
printMessage(`Search failed: ${errorMessage}`);
return [];
}
}
async function processGraphicalResult(result: SearxngSearchResult) {
const thumbnailSource =
result.category === "videos" ? result.thumbnail : result.thumbnail_src;
const sourceUrl =
result.category === "videos"
? result.iframe_src || result.url
: result.img_src;
try {
return [result.title, result.url, thumbnailSource, sourceUrl] as [
title: string,
url: string,
thumbnailSource: string,
sourceUrl: string,
];
} catch (error) {
console.warn(
`Failed to process ${result.category} result: ${result.url}`,
error instanceof Error ? error.message : error,
);
return null;
}
}
function processSnippet(snippet: string): string {
const processedSnippet = stripEmojis(
convertHtmlToPlainText(snippet, { wordwrap: false }).trim(),
{ preserveSpaces: true },
);
if (processedSnippet.startsWith("[data:image")) return "";
return processedSnippet;
}
async function processTextualResult(result: SearxngSearchResult) {
try {
if (!result.content) return null;
const title = convertHtmlToPlainText(result.title, {
wordwrap: false,
}).trim();
const snippet = processSnippet(result.content);
if (!title || !snippet) return null;
return [title, snippet, result.url] as [
title: string,
content: string,
url: string,
];
} catch (error) {
console.warn(
`Failed to process textual result: ${result.url}`,
error instanceof Error ? error.message : error,
);
return null;
}
}
function deduplicateResults(
results: SearxngSearchResult[],
): SearxngSearchResult[] {
const urls = new Set<string>();
return results.filter((result) => {
if (urls.has(result.url)) return false;
urls.add(result.url);
return true;
});
}
function filterNullResults<T>(results: (T | null)[]): T[] {
return results.filter((result): result is T => result !== null);
}