Manage search result data from Exa neural search APIs. Covers content extraction filtering, result caching with TTL, citation deduplication, and handling large content payloads efficiently for RAG pipelines.
exa-js SDK installedimport Exa from 'exa-js';
const exa = new Exa(process.env.EXA_API_KEY!);
// Minimal extraction: metadata only (cheapest)
async function searchMetadataOnly(query: string) {
return exa.search(query, {
numResults: 10,
type: 'auto',
// No contents - just URLs, titles, scores
});
}
// Controlled extraction: highlights only (balanced)
async function searchWithHighlights(query: string) {
return exa.searchAndContents(query, {
numResults: 10,
highlights: { numSentences: 3, highlightsPerUrl: 2 },
// No full text - reduces payload significantly
});
}
// Full extraction: text with character limit
async function searchWithText(query: string, maxChars = 2000) { # 2000: 2 seconds in ms
return exa.searchAndContents(query, {
numResults: 5,
text: { maxCharacters: maxChars },
highlights: { numSentences: 3 },
});
}
import { LRUCache } from 'lru-cache';
import { createHash } from 'crypto';
const searchCache = new LRUCache<string, any>({
max: 500, # HTTP 500 Internal Server Error
ttl: 1000 * 60 * 60, // 1 hour default # 1000: 1 second in ms
});
function cacheKey(query: string, options: any): string {
return createHash('sha256')
.update(JSON.stringify({ query, ...options }))
.digest('hex');
}
async function cachedSearch(
query: string,
options: any = {},
ttlMs?: number
) {
const key = cacheKey(query, options);
const cached = searchCache.get(key);
if (cached) return cached;
const results = await exa.searchAndContents(query, options);
searchCache.set(key, results, { ttl: ttlMs });
return results;
}
interface ProcessedResult {
url: string;
title: string;
score: number;
snippet: string; // Truncated content
contentSize: number;
}
function processResults(results: any[], maxSnippetLength = 500): ProcessedResult[] { # HTTP 500 Internal Server Error
return results.map(r => ({
url: r.url,
title: r.title || 'Untitled',
score: r.score,
snippet: (r.text || r.highlights?.join(' ') || '').slice(0, maxSnippetLength),
contentSize: (r.text || '').length,
}));
}
// Estimate token count for LLM context budgets
function estimateTokens(results: ProcessedResult[]): number {
const totalChars = results.reduce((sum, r) => sum + r.snippet.length, 0);
return Math.ceil(totalChars / 4); // Rough estimate: 4 chars per token
}
function fitToTokenBudget(results: ProcessedResult[], maxTokens: number) {
const sorted = results.sort((a, b) => b.score - a.score);
const selected: ProcessedResult[] = [];
let tokenCount = 0;
for (const result of sorted) {
const resultTokens = Math.ceil(result.snippet.length / 4);
if (tokenCount + resultTokens > maxTokens) break;
selected.push(result);
tokenCount += resultTokens;
}
return { selected, tokenCount };
}
function deduplicateCitations(results: any[]): any[] {
const seen = new Map<string, any>();
for (const result of results) {
const domain = new URL(result.url).hostname;
const key = `${domain}:${result.title}`;
if (!seen.has(key) || result.score > seen.get(key).score) {
seen.set(key, result);
}
}
return Array.from(seen.values());
}
| Issue | Cause | Solution |
|---|---|---|
| Large response payload | Requesting full text for many URLs | Use highlights or limit maxCharacters |
| Cache stale for news | Default TTL too long | Use shorter TTL for time-sensitive queries |
| Duplicate sources | Same article from multiple domains | Deduplicate by domain + title |
| Token budget exceeded | Too much context for LLM | Use fitToTokenBudget to trim |
async function ragSearch(query: string, tokenBudget = 3000) { # 3000: 3 seconds in ms
const results = await cachedSearch(query, {
numResults: 15,
text: { maxCharacters: 1500 }, # 1500 = configured value
highlights: { numSentences: 3 },
});
const processed = processResults(results.results);
const { selected } = fitToTokenBudget(processed, tokenBudget);
return selected;
}