Vector Search
Recipe: Build semantic search with pgvector and ASTQL.
ASTQL supports pgvector operators for similarity search on embedding vectors.
Schema Setup
Define a table with a vector column:
project := dbml.NewProject("myapp")
documents := dbml.NewTable("documents")
documents.AddColumn(dbml.NewColumn("id", "bigint"))
documents.AddColumn(dbml.NewColumn("title", "varchar"))
documents.AddColumn(dbml.NewColumn("content", "text"))
documents.AddColumn(dbml.NewColumn("embedding", "vector(1536)")) // OpenAI dimension
documents.AddColumn(dbml.NewColumn("created_at", "timestamp"))
project.AddTable(documents)
instance, _ := astql.NewFromDBML(project)
Distance Operators
| Operator | Constant | Distance Type |
|---|---|---|
<-> | VectorL2Distance | Euclidean (L2) |
<=> | VectorCosineDistance | Cosine |
<#> | VectorInnerProduct | Negative inner product |
<+> | VectorL1Distance | Manhattan (L1) |
Basic Similarity Search
Find documents similar to a query vector:
func SearchSimilar(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
return astql.Select(instance.T("documents")).
Fields(
instance.F("id"),
instance.F("title"),
instance.F("content"),
).
OrderByExpr(
instance.F("embedding"),
astql.VectorL2Distance,
instance.P("query_embedding"),
astql.ASC,
).
Limit(limit).
Render()
}
// SELECT "id", "title", "content"
// FROM "documents"
// ORDER BY "embedding" <-> :query_embedding ASC
// LIMIT 10
Execution with sqlx
result, _ := SearchSimilar(instance, 10)
// Convert embedding to pgvector format
queryEmbedding := pgvector.NewVector(embeddingFloats)
params := map[string]any{
"query_embedding": queryEmbedding,
}
var docs []Document
err := db.Select(&docs, result.SQL, params)
Selecting Distance as a Column
Use SelectBinaryExpr to include the computed distance in your results:
func SearchWithDistance(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
return astql.Select(instance.T("documents")).
Fields(
instance.F("id"),
instance.F("title"),
instance.F("content"),
).
SelectBinaryExpr(
instance.F("embedding"),
astql.VectorL2Distance,
instance.P("query_embedding"),
"distance",
).
OrderByExpr(
instance.F("embedding"),
astql.VectorL2Distance,
instance.P("query_embedding"),
astql.ASC,
).
Limit(limit).
Render()
}
// SELECT "id", "title", "content", "embedding" <-> :query_embedding AS "distance"
// FROM "documents"
// ORDER BY "embedding" <-> :query_embedding ASC
// LIMIT 10
This is useful when you need to display or filter by the actual distance value.
Cosine Similarity
For normalized embeddings, use cosine distance:
func SearchByCosine(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
return astql.Select(instance.T("documents")).
Fields(instance.F("id"), instance.F("title")).
OrderByExpr(
instance.F("embedding"),
astql.VectorCosineDistance,
instance.P("query_embedding"),
astql.ASC,
).
Limit(limit).
Render()
}
// ORDER BY "embedding" <=> :query_embedding ASC
Inner Product (for Maximum Similarity)
When embeddings are normalized, inner product gives similarity (not distance):
func SearchByInnerProduct(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
return astql.Select(instance.T("documents")).
Fields(instance.F("id"), instance.F("title")).
OrderByExpr(
instance.F("embedding"),
astql.VectorInnerProduct,
instance.P("query_embedding"),
astql.ASC, // Note: pgvector uses negative inner product
).
Limit(limit).
Render()
}
// ORDER BY "embedding" <#> :query_embedding ASC
Filtered Vector Search
Combine vector search with traditional filters:
func SearchWithFilters(
instance *astql.ASTQL,
category string,
minDate time.Time,
limit int,
) (*astql.QueryResult, error) {
return astql.Select(instance.T("documents")).
Fields(instance.F("id"), instance.F("title"), instance.F("content")).
Where(instance.And(
instance.C(instance.F("category"), astql.EQ, instance.P("category")),
instance.C(instance.F("created_at"), astql.GE, instance.P("min_date")),
)).
OrderByExpr(
instance.F("embedding"),
astql.VectorL2Distance,
instance.P("query_embedding"),
astql.ASC,
).
Limit(limit).
Render()
}
// SELECT "id", "title", "content"
// FROM "documents"
// WHERE ("category" = :category AND "created_at" >= :min_date)
// ORDER BY "embedding" <-> :query_embedding ASC
// LIMIT 10
K-Nearest Neighbors with Distance Threshold
Filter results by maximum distance:
func SearchWithThreshold(instance *astql.ASTQL, maxDistance float64, limit int) (*astql.QueryResult, error) {
return astql.Select(instance.T("documents")).
Fields(instance.F("id"), instance.F("title")).
Where(instance.C(
instance.F("embedding"),
astql.VectorL2Distance,
instance.P("query_embedding"),
)).
// Note: distance threshold requires raw SQL or a computed column
OrderByExpr(
instance.F("embedding"),
astql.VectorL2Distance,
instance.P("query_embedding"),
astql.ASC,
).
Limit(limit).
Render()
}
Hybrid Search
Combine vector similarity with full-text search:
func HybridSearch(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
// Vector search component
return astql.Select(instance.T("documents")).
Fields(
instance.F("id"),
instance.F("title"),
instance.F("content"),
).
Where(instance.C(
instance.F("content"),
astql.ILIKE,
instance.P("text_query"),
)).
OrderByExpr(
instance.F("embedding"),
astql.VectorL2Distance,
instance.P("query_embedding"),
astql.ASC,
).
Limit(limit).
Render()
}
// SELECT "id", "title", "content"
// FROM "documents"
// WHERE "content" ILIKE :text_query
// ORDER BY "embedding" <-> :query_embedding ASC
// LIMIT 10
Multi-Vector Search
Search across multiple embedding columns:
project := dbml.NewProject("myapp")
products := dbml.NewTable("products")
products.AddColumn(dbml.NewColumn("id", "bigint"))
products.AddColumn(dbml.NewColumn("name", "varchar"))
products.AddColumn(dbml.NewColumn("title_embedding", "vector(1536)"))
products.AddColumn(dbml.NewColumn("description_embedding", "vector(1536)"))
products.AddColumn(dbml.NewColumn("image_embedding", "vector(512)"))
project.AddTable(products)
// Search by title similarity
func SearchByTitle(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
return astql.Select(instance.T("products")).
Fields(instance.F("id"), instance.F("name")).
OrderByExpr(
instance.F("title_embedding"),
astql.VectorCosineDistance,
instance.P("query_embedding"),
astql.ASC,
).
Limit(limit).
Render()
}
// Search by image similarity
func SearchByImage(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
return astql.Select(instance.T("products")).
Fields(instance.F("id"), instance.F("name")).
OrderByExpr(
instance.F("image_embedding"),
astql.VectorL2Distance,
instance.P("image_query"),
astql.ASC,
).
Limit(limit).
Render()
}
Indexing Recommendations
For production vector search, create appropriate indexes:
-- HNSW index (recommended for most cases)
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);
-- IVFFlat index (for very large datasets)
CREATE INDEX ON documents USING ivfflat (embedding vector_l2_ops) WITH (lists = 100);
Choose the operator class matching your distance function:
vector_l2_opsfor L2 distance (<->)vector_cosine_opsfor cosine distance (<=>)vector_ip_opsfor inner product (<#>)
Best Practices
1. Normalize Embeddings for Cosine
Pre-normalize embeddings when using cosine distance:
func normalize(v []float32) []float32 {
var sum float32
for _, x := range v {
sum += x * x
}
norm := float32(math.Sqrt(float64(sum)))
result := make([]float32, len(v))
for i, x := range v {
result[i] = x / norm
}
return result
}
2. Use Appropriate Limits
Vector search is expensive. Always use reasonable limits:
// Good: bounded results
.Limit(100)
// Bad: unbounded
// (no limit)
3. Pre-filter When Possible
Apply WHERE clauses before vector operations:
// Good: filter first, then vector search
.Where(instance.C(instance.F("category"), astql.EQ, instance.P("cat"))).
.OrderByExpr(embedding, distance, query, astql.ASC)
// Less efficient: vector search on all rows
.OrderByExpr(embedding, distance, query, astql.ASC)