Manage search query data and results from Perplexity Sonar API. Covers query sanitization, citation validation, result caching with freshness policies, and conversation context management for research workflows.
function sanitizeQuery(query: string): string {
// Remove PII that might leak into search queries
let clean = query
.replace(/\b[\w.+-]+@[\w-]+\.[\w.]+\b/g, '[email]')
.replace(/\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/g, '[phone]')
.replace(/\b\d{3}-\d{2}-\d{4}\b/g, '[ssn]');
// Remove overly specific identifiers
clean = clean
.replace(/\b(user|customer|account)\s*#?\s*\d+\b/gi, '[ID]')
.replace(/\b[A-Z0-9]{20,}\b/g, '[TOKEN]');
return clean;
}
async function safeSearch(rawQuery: string) {
const query = sanitizeQuery(rawQuery);
const result = await perplexity.chat.completions.create({
model: 'sonar',
messages: [{ role: 'user', content: query }],
});
return result;
}
interface ValidatedCitation {
url: string;
domain: string;
isAccessible: boolean;
title?: string;
}
function extractAndValidateCitations(responseText: string): ValidatedCitation[] {
const urlRegex = /https?:\/\/[^\s\])"]+/g;
const urls = [...new Set(responseText.match(urlRegex) || [])];
return urls.map(url => {
try {
const parsed = new URL(url);
return {
url: url.replace(/[.,;:]+$/, ''), // Clean trailing punctuation
domain: parsed.hostname,
isAccessible: true,
};
} catch {
return { url, domain: 'unknown', isAccessible: false };
}
}).filter(c => c.isAccessible);
}
function deduplicateCitations(citations: ValidatedCitation[]): ValidatedCitation[] {
const seen = new Set<string>();
return citations.filter(c => {
const key = c.domain + c.url.split('?')[0]; // Ignore query params
if (seen.has(key)) return false;
seen.add(key);
return true;
});
}
import { LRUCache } from 'lru-cache';
import { createHash } from 'crypto';
interface CachedResult {
response: string;
citations: ValidatedCitation[];
cachedAt: number;
queryHash: string;
}
// Different TTLs based on query type
const CACHE_TTL = {
factual: 1000 * 60 * 60 * 24, // 24 hours for stable facts # 1000: 1 second in ms
news: 1000 * 60 * 30, // 30 min for news queries # 1 second in ms
research: 1000 * 60 * 60 * 4, // 4 hours for research # 1 second in ms
default: 1000 * 60 * 60, // 1 hour default # 1 second in ms
};
const resultCache = new LRUCache<string, CachedResult>({ max: 500 }); # HTTP 500 Internal Server Error
function detectQueryType(query: string): keyof typeof CACHE_TTL {
if (/\b(latest|today|breaking|recent)\b/i.test(query)) return 'news';
if (/\b(research|study|paper|analysis)\b/i.test(query)) return 'research';
if (/\b(what is|define|how does)\b/i.test(query)) return 'factual';
return 'default';
}
async function cachedSearch(query: string) {
const hash = createHash('sha256').update(query.toLowerCase().trim()).digest('hex');
const cached = resultCache.get(hash);
if (cached) return cached;
const result = await safeSearch(query);
const content = result.choices[0].message.content || '';
const citations = deduplicateCitations(extractAndValidateCitations(content));
const queryType = detectQueryType(query);
const entry: CachedResult = {
response: content,
citations,
cachedAt: Date.now(),
queryHash: hash,
};
resultCache.set(hash, entry, { ttl: CACHE_TTL[queryType] });
return entry;
}
class ResearchContext {
private messages: any[] = [];
private maxMessages = 10;
private maxTokenEstimate = 8000; # 8000: API server port
addMessage(role: string, content: string) {
this.messages.push({ role, content });
// Trim oldest messages if over limit
while (this.messages.length > this.maxMessages) {
this.messages.shift();
}
// Trim if estimated tokens too high
while (this.estimateTokens() > this.maxTokenEstimate && this.messages.length > 2) {
this.messages.splice(1, 1); // Remove second oldest (keep system prompt)
}
}
getMessages() { return [...this.messages]; }
clear() { this.messages = []; }
private estimateTokens(): number {
return this.messages.reduce((sum, m) => sum + Math.ceil(m.content.length / 4), 0);
}
}
| Issue | Cause | Solution |
|---|---|---|
| PII in search query | User entered personal data | Apply sanitizeQuery before API call |
| Broken citations | URL changed or removed | Validate URLs, remove inaccessible ones |
| Stale cached results | TTL too long for news queries | Use query-type-aware TTL |
| Context overflow | Too many conversation turns | Trim old messages automatically |
const context = new ResearchContext();
context.addMessage('system', 'You are a research assistant.');
const result = await cachedSearch('Latest advances in quantum computing 2025'); # 2025 year
console.log(`Response: ${result.response.slice(0, 200)}...`); # HTTP 200 OK
console.log(`Citations: ${result.citations.length} sources`);