zoobzio January 20, 2026 Edit this page

Vector Search

Recipe: Build semantic search with pgvector and ASTQL.

ASTQL supports pgvector operators for similarity search on embedding vectors.

Schema Setup

Define a table with a vector column:

project := dbml.NewProject("myapp")

documents := dbml.NewTable("documents")
documents.AddColumn(dbml.NewColumn("id", "bigint"))
documents.AddColumn(dbml.NewColumn("title", "varchar"))
documents.AddColumn(dbml.NewColumn("content", "text"))
documents.AddColumn(dbml.NewColumn("embedding", "vector(1536)"))  // OpenAI dimension
documents.AddColumn(dbml.NewColumn("created_at", "timestamp"))
project.AddTable(documents)

instance, _ := astql.NewFromDBML(project)

Distance Operators

OperatorConstantDistance Type
<->VectorL2DistanceEuclidean (L2)
<=>VectorCosineDistanceCosine
<#>VectorInnerProductNegative inner product
<+>VectorL1DistanceManhattan (L1)

Find documents similar to a query vector:

func SearchSimilar(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
    return astql.Select(instance.T("documents")).
        Fields(
            instance.F("id"),
            instance.F("title"),
            instance.F("content"),
        ).
        OrderByExpr(
            instance.F("embedding"),
            astql.VectorL2Distance,
            instance.P("query_embedding"),
            astql.ASC,
        ).
        Limit(limit).
        Render()
}

// SELECT "id", "title", "content"
// FROM "documents"
// ORDER BY "embedding" <-> :query_embedding ASC
// LIMIT 10

Execution with sqlx

result, _ := SearchSimilar(instance, 10)

// Convert embedding to pgvector format
queryEmbedding := pgvector.NewVector(embeddingFloats)

params := map[string]any{
    "query_embedding": queryEmbedding,
}

var docs []Document
err := db.Select(&docs, result.SQL, params)

Selecting Distance as a Column

Use SelectBinaryExpr to include the computed distance in your results:

func SearchWithDistance(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
    return astql.Select(instance.T("documents")).
        Fields(
            instance.F("id"),
            instance.F("title"),
            instance.F("content"),
        ).
        SelectBinaryExpr(
            instance.F("embedding"),
            astql.VectorL2Distance,
            instance.P("query_embedding"),
            "distance",
        ).
        OrderByExpr(
            instance.F("embedding"),
            astql.VectorL2Distance,
            instance.P("query_embedding"),
            astql.ASC,
        ).
        Limit(limit).
        Render()
}

// SELECT "id", "title", "content", "embedding" <-> :query_embedding AS "distance"
// FROM "documents"
// ORDER BY "embedding" <-> :query_embedding ASC
// LIMIT 10

This is useful when you need to display or filter by the actual distance value.

Cosine Similarity

For normalized embeddings, use cosine distance:

func SearchByCosine(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
    return astql.Select(instance.T("documents")).
        Fields(instance.F("id"), instance.F("title")).
        OrderByExpr(
            instance.F("embedding"),
            astql.VectorCosineDistance,
            instance.P("query_embedding"),
            astql.ASC,
        ).
        Limit(limit).
        Render()
}

// ORDER BY "embedding" <=> :query_embedding ASC

Inner Product (for Maximum Similarity)

When embeddings are normalized, inner product gives similarity (not distance):

func SearchByInnerProduct(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
    return astql.Select(instance.T("documents")).
        Fields(instance.F("id"), instance.F("title")).
        OrderByExpr(
            instance.F("embedding"),
            astql.VectorInnerProduct,
            instance.P("query_embedding"),
            astql.ASC,  // Note: pgvector uses negative inner product
        ).
        Limit(limit).
        Render()
}

// ORDER BY "embedding" <#> :query_embedding ASC

Combine vector search with traditional filters:

func SearchWithFilters(
    instance *astql.ASTQL,
    category string,
    minDate time.Time,
    limit int,
) (*astql.QueryResult, error) {
    return astql.Select(instance.T("documents")).
        Fields(instance.F("id"), instance.F("title"), instance.F("content")).
        Where(instance.And(
            instance.C(instance.F("category"), astql.EQ, instance.P("category")),
            instance.C(instance.F("created_at"), astql.GE, instance.P("min_date")),
        )).
        OrderByExpr(
            instance.F("embedding"),
            astql.VectorL2Distance,
            instance.P("query_embedding"),
            astql.ASC,
        ).
        Limit(limit).
        Render()
}

// SELECT "id", "title", "content"
// FROM "documents"
// WHERE ("category" = :category AND "created_at" >= :min_date)
// ORDER BY "embedding" <-> :query_embedding ASC
// LIMIT 10

K-Nearest Neighbors with Distance Threshold

Filter results by maximum distance:

func SearchWithThreshold(instance *astql.ASTQL, maxDistance float64, limit int) (*astql.QueryResult, error) {
    return astql.Select(instance.T("documents")).
        Fields(instance.F("id"), instance.F("title")).
        Where(instance.C(
            instance.F("embedding"),
            astql.VectorL2Distance,
            instance.P("query_embedding"),
        )).
        // Note: distance threshold requires raw SQL or a computed column
        OrderByExpr(
            instance.F("embedding"),
            astql.VectorL2Distance,
            instance.P("query_embedding"),
            astql.ASC,
        ).
        Limit(limit).
        Render()
}

Combine vector similarity with full-text search:

func HybridSearch(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
    // Vector search component
    return astql.Select(instance.T("documents")).
        Fields(
            instance.F("id"),
            instance.F("title"),
            instance.F("content"),
        ).
        Where(instance.C(
            instance.F("content"),
            astql.ILIKE,
            instance.P("text_query"),
        )).
        OrderByExpr(
            instance.F("embedding"),
            astql.VectorL2Distance,
            instance.P("query_embedding"),
            astql.ASC,
        ).
        Limit(limit).
        Render()
}

// SELECT "id", "title", "content"
// FROM "documents"
// WHERE "content" ILIKE :text_query
// ORDER BY "embedding" <-> :query_embedding ASC
// LIMIT 10

Search across multiple embedding columns:

project := dbml.NewProject("myapp")

products := dbml.NewTable("products")
products.AddColumn(dbml.NewColumn("id", "bigint"))
products.AddColumn(dbml.NewColumn("name", "varchar"))
products.AddColumn(dbml.NewColumn("title_embedding", "vector(1536)"))
products.AddColumn(dbml.NewColumn("description_embedding", "vector(1536)"))
products.AddColumn(dbml.NewColumn("image_embedding", "vector(512)"))
project.AddTable(products)

// Search by title similarity
func SearchByTitle(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
    return astql.Select(instance.T("products")).
        Fields(instance.F("id"), instance.F("name")).
        OrderByExpr(
            instance.F("title_embedding"),
            astql.VectorCosineDistance,
            instance.P("query_embedding"),
            astql.ASC,
        ).
        Limit(limit).
        Render()
}

// Search by image similarity
func SearchByImage(instance *astql.ASTQL, limit int) (*astql.QueryResult, error) {
    return astql.Select(instance.T("products")).
        Fields(instance.F("id"), instance.F("name")).
        OrderByExpr(
            instance.F("image_embedding"),
            astql.VectorL2Distance,
            instance.P("image_query"),
            astql.ASC,
        ).
        Limit(limit).
        Render()
}

Indexing Recommendations

For production vector search, create appropriate indexes:

-- HNSW index (recommended for most cases)
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);

-- IVFFlat index (for very large datasets)
CREATE INDEX ON documents USING ivfflat (embedding vector_l2_ops) WITH (lists = 100);

Choose the operator class matching your distance function:

  • vector_l2_ops for L2 distance (<->)
  • vector_cosine_ops for cosine distance (<=>)
  • vector_ip_ops for inner product (<#>)

Best Practices

1. Normalize Embeddings for Cosine

Pre-normalize embeddings when using cosine distance:

func normalize(v []float32) []float32 {
    var sum float32
    for _, x := range v {
        sum += x * x
    }
    norm := float32(math.Sqrt(float64(sum)))
    result := make([]float32, len(v))
    for i, x := range v {
        result[i] = x / norm
    }
    return result
}

2. Use Appropriate Limits

Vector search is expensive. Always use reasonable limits:

// Good: bounded results
.Limit(100)

// Bad: unbounded
// (no limit)

3. Pre-filter When Possible

Apply WHERE clauses before vector operations:

// Good: filter first, then vector search
.Where(instance.C(instance.F("category"), astql.EQ, instance.P("cat"))).
.OrderByExpr(embedding, distance, query, astql.ASC)

// Less efficient: vector search on all rows
.OrderByExpr(embedding, distance, query, astql.ASC)