Perplexity SDK Patterns For AI Integration

v20260423

perplexity-sdk-patterns

This skill provides production-ready patterns for integrating with the Perplexity Sonar API in both TypeScript and Python. It covers best practices for creating robust wrappers around the standard OpenAI client, including dedicated handling for citations, search results, and related questions. It also includes essential utilities like implementing exponential backoff for reliable retries. Use this when setting up complex search-augmented generation workflows or standardizing team coding practices around Perplexity usage.

Perplexity API TypeScript Python LLM Integration RAG OpenAI

Get Skill

52 downloads

Overview

Perplexity SDK Patterns

Overview

Production-ready patterns for Perplexity Sonar API. Since Perplexity uses the OpenAI wire format, you build wrappers around the openai client library with Perplexity-specific response handling (citations, search results, related questions).

Prerequisites

openai package installed (npm install openai or pip install openai)
API key configured in PERPLEXITY_API_KEY
Understanding of OpenAI chat completions format

Instructions

Step 1: Typed Client Singleton (TypeScript)

// src/perplexity/client.ts
import OpenAI from "openai";

export interface PerplexityChatCompletion extends OpenAI.ChatCompletion {
  citations?: string[];
  search_results?: Array<{
    title: string;
    url: string;
    date?: string;
    snippet: string;
  }>;
  related_questions?: string[];
}

export interface PerplexityUsage extends OpenAI.CompletionUsage {
  citation_tokens?: number;
  num_search_queries?: number;
  reasoning_tokens?: number;
}

let instance: OpenAI | null = null;

export function getClient(): OpenAI {
  if (!instance) {
    if (!process.env.PERPLEXITY_API_KEY) {
      throw new Error("PERPLEXITY_API_KEY not set");
    }
    instance = new OpenAI({
      apiKey: process.env.PERPLEXITY_API_KEY,
      baseURL: "https://api.perplexity.ai",
    });
  }
  return instance;
}

Step 2: Search with Full Response Parsing

// src/perplexity/search.ts
import { getClient, PerplexityChatCompletion } from "./client";

export type SearchModel = "sonar" | "sonar-pro" | "sonar-reasoning-pro" | "sonar-deep-research";
export type RecencyFilter = "hour" | "day" | "week" | "month";

export interface SearchOptions {
  model?: SearchModel;
  systemPrompt?: string;
  maxTokens?: number;
  temperature?: number;
  searchRecencyFilter?: RecencyFilter;
  searchDomainFilter?: string[];   // max 20 domains
  returnRelatedQuestions?: boolean;
  returnImages?: boolean;
}

export interface SearchResult {
  answer: string;
  citations: string[];
  relatedQuestions: string[];
  usage: {
    promptTokens: number;
    completionTokens: number;
    totalTokens: number;
    citationTokens?: number;
    searchQueries?: number;
  };
  model: string;
}

export async function search(
  query: string,
  opts: SearchOptions = {}
): Promise<SearchResult> {
  const client = getClient();

  const response = (await client.chat.completions.create({
    model: opts.model || "sonar",
    messages: [
      ...(opts.systemPrompt
        ? [{ role: "system" as const, content: opts.systemPrompt }]
        : []),
      { role: "user" as const, content: query },
    ],
    max_tokens: opts.maxTokens,
    temperature: opts.temperature,
    ...(opts.searchRecencyFilter && { search_recency_filter: opts.searchRecencyFilter }),
    ...(opts.searchDomainFilter && { search_domain_filter: opts.searchDomainFilter }),
    ...(opts.returnRelatedQuestions && { return_related_questions: true }),
    ...(opts.returnImages && { return_images: true }),
  } as any)) as unknown as PerplexityChatCompletion;

  return {
    answer: response.choices[0].message.content || "",
    citations: response.citations || [],
    relatedQuestions: response.related_questions || [],
    usage: {
      promptTokens: response.usage?.prompt_tokens || 0,
      completionTokens: response.usage?.completion_tokens || 0,
      totalTokens: response.usage?.total_tokens || 0,
      citationTokens: (response.usage as any)?.citation_tokens,
      searchQueries: (response.usage as any)?.num_search_queries,
    },
    model: response.model,
  };
}

Step 3: Retry with Exponential Backoff

// src/perplexity/retry.ts
export async function withRetry<T>(
  operation: () => Promise<T>,
  opts = { maxRetries: 3, baseDelayMs: 1000, maxDelayMs: 30000 }
): Promise<T> {
  for (let attempt = 0; attempt <= opts.maxRetries; attempt++) {
    try {
      return await operation();
    } catch (err: any) {
      if (attempt === opts.maxRetries) throw err;

      const status = err.status || err.response?.status;
      // Only retry on rate limit (429), timeout (408), or server errors (5xx)
      if (status && status !== 429 && status !== 408 && status < 500) throw err;

      const delay = Math.min(
        opts.baseDelayMs * Math.pow(2, attempt) + Math.random() * 500,
        opts.maxDelayMs
      );
      await new Promise((r) => setTimeout(r, delay));
    }
  }
  throw new Error("Unreachable");
}

// Usage
const result = await withRetry(() =>
  search("latest AI developments", { model: "sonar-pro" })
);

Step 4: Python Patterns

# perplexity_client.py
import os, hashlib, json
from openai import OpenAI
from functools import lru_cache

@lru_cache(maxsize=1)
def get_client() -> OpenAI:
    return OpenAI(
        api_key=os.environ["PERPLEXITY_API_KEY"],
        base_url="https://api.perplexity.ai",
    )

def search(
    query: str,
    model: str = "sonar",
    system_prompt: str | None = None,
    max_tokens: int | None = None,
    search_recency_filter: str | None = None,
    search_domain_filter: list[str] | None = None,
) -> dict:
    client = get_client()
    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
    messages.append({"role": "user", "content": query})

    kwargs = {"model": model, "messages": messages}
    if max_tokens:
        kwargs["max_tokens"] = max_tokens
    if search_recency_filter:
        kwargs["search_recency_filter"] = search_recency_filter
    if search_domain_filter:
        kwargs["search_domain_filter"] = search_domain_filter

    response = client.chat.completions.create(**kwargs)
    raw = response.model_dump()

    return {
        "answer": response.choices[0].message.content,
        "citations": raw.get("citations", []),
        "usage": {
            "prompt_tokens": response.usage.prompt_tokens,
            "completion_tokens": response.usage.completion_tokens,
            "total_tokens": response.usage.total_tokens,
        },
        "model": response.model,
    }

Step 5: Citation Formatter

// src/perplexity/citations.ts
export function formatCitationsAsMarkdown(
  answer: string,
  citations: string[]
): string {
  // Replace [1], [2], etc. with markdown links
  let formatted = answer;
  citations.forEach((url, i) => {
    const marker = `[${i + 1}]`;
    formatted = formatted.replaceAll(marker, `[${i + 1}](${url})`);
  });
  return formatted;
}

export function formatCitationsAsFootnotes(
  answer: string,
  citations: string[]
): string {
  const footnotes = citations
    .map((url, i) => `[${i + 1}]: ${url}`)
    .join("\n");
  return `${answer}\n\n---\n${footnotes}`;
}

Error Handling

Pattern	Use Case	Benefit
Typed response wrapper	All API calls	Access citations without `any` casts
Retry with backoff	Transient failures	Handles 429 rate limits gracefully
Citation formatter	User-facing output	Converts `[1]` markers to clickable links
Python `@lru_cache`	Client reuse	Single client instance across calls

Output

Type-safe Perplexity client with full response typing
Search function with all Perplexity-specific parameters
Automatic retry with exponential backoff and jitter
Citation formatting utilities

Resources

Next Steps

Apply patterns in perplexity-core-workflow-a for real-world usage.

Info

Category Artificial Intelligence

Name perplexity-sdk-patterns

Version v20260423

Size 8.15KB

Source jeremylongshore/claude-code-plugins-plus-skills

Updated At 2026-04-28