Vector DB
RAG Pattern
Three calls — embed, search, chat — and you have retrieval-augmented generation.
OpenAdapter gives you everything you need for RAG in a single account:
- Embed the question with
/v1/embeddings - Search the collection with
/v1/vectors/collections/<name>/search - Chat with the retrieved context as a system message via
/v1/chat/completions
Python
import requests
API_KEY = "sk-cv-..."
BASE = "https://api.openadapter.in"
headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
def rag_query(question, collection="docs", top_k=5):
# 1. Embed
vec = requests.post(f"{BASE}/v1/embeddings", json={
"model": "qwen3-embedding-small", "input": [question]
}, headers=headers).json()["data"][0]["embedding"]
# 2. Search
results = requests.post(
f"{BASE}/v1/vectors/collections/{collection}/search",
json={"vector": vec, "limit": top_k},
headers=headers,
).json()["results"]
context = "\n\n".join(f"[{i+1}] {r['payload']['text']}" for i, r in enumerate(results))
# 3. Chat
resp = requests.post(f"{BASE}/v1/chat/completions", json={
"model": "GLM-4.7",
"messages": [
{"role": "system", "content": f"Answer using context:\n{context}"},
{"role": "user", "content": question},
],
}, headers=headers).json()
return resp["choices"][0]["message"]["content"]
print(rag_query("How do I create a collection?"))JavaScript
const API_KEY = "sk-cv-...";
const BASE = "https://api.openadapter.in";
const headers = {
Authorization: `Bearer ${API_KEY}`,
"Content-Type": "application/json",
};
async function ragQuery(question, collection = "docs", topK = 5) {
const vec = (await (await fetch(`${BASE}/v1/embeddings`, {
method: "POST",
headers,
body: JSON.stringify({ model: "qwen3-embedding-small", input: [question] }),
})).json()).data[0].embedding;
const { results } = await (await fetch(
`${BASE}/v1/vectors/collections/${collection}/search`,
{ method: "POST", headers, body: JSON.stringify({ vector: vec, limit: topK }) },
)).json();
const context = results.map((r, i) => `[${i + 1}] ${r.payload.text}`).join("\n\n");
const resp = await (await fetch(`${BASE}/v1/chat/completions`, {
method: "POST",
headers,
body: JSON.stringify({
model: "GLM-4.7",
messages: [
{ role: "system", content: `Answer using context:\n${context}` },
{ role: "user", content: question },
],
}),
})).json();
return resp.choices[0].message.content;
}Tips
- Use the same embedding model at ingest and query time — different dimensions / spaces won't compare.
- Top-K of 3–8 is usually enough for chat-style RAG; more chunks dilute the signal.
- Include payload metadata (filenames, headings) so the model can cite sources.