Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 37 additions & 4 deletions lib/mongoid/search_indexable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,17 @@ def ready?
# @param [ Integer ] limit The maximum number of results (default: 10).
# @param [ Integer | nil ] num_candidates The number of candidates to
# consider during the ANN search; defaults to limit * 10.
# @param [ true | false ] exact Use exact nearest-neighbor (ENN) search
# instead of ANN (default: false). When true, numCandidates is omitted.
# Required when using a flat vector search index.
# @param [ Hash | nil ] filter An optional MongoDB filter to pre-filter
# candidates before scoring.
# @param [ Array ] pipeline Additional aggregation stages to append after
# the vector search and score projection.
#
# @return [ Array<Mongoid::Document> ] matching documents, each with
# a populated +vector_search_score+ attribute.
def vector_search(index: nil, path: nil, limit: 10, num_candidates: nil, filter: nil, pipeline: [])
def vector_search(index: nil, path: nil, limit: 10, num_candidates: nil, exact: false, filter: nil, pipeline: []) # rubocop:disable Metrics/ParameterLists
Comment thread
jamis marked this conversation as resolved.
_index, resolved_path = self.class.send(:resolve_vector_index, index, path)
query_vector = public_send(resolved_path)

Expand All @@ -95,6 +98,7 @@ def vector_search(index: nil, path: nil, limit: 10, num_candidates: nil, filter:
path: path,
limit: limit + 1,
num_candidates: effective_candidates,
exact: exact,
filter: filter,
pipeline: post_pipeline
)
Expand Down Expand Up @@ -255,13 +259,24 @@ def search_index(name_or_defn, defn = nil)
# vector_search_index :my_vector_index, { fields: [...] }
# end
#
# @example Create a flat vector search index.
# class Person
# include Mongoid::Document
# vector_search_index fields: [
# { type: 'vector', path: 'embedding', numDimensions: 1536,
# similarity: 'cosine', indexingMethod: 'flat' }
# ]
# end
#
# @param [ Symbol | String | Hash ] name_or_defn Either the name of the index to
# define, or the index definition.
# @param [ Hash ] defn The vector search index definition.
def vector_search_index(name_or_defn, defn = nil)
name = name_or_defn
name, defn = nil, name if name.is_a?(Hash)

validate_vector_index_definition!(defn)

spec = { type: 'vectorSearch', definition: defn }.tap { |s| s[:name] = name.to_s if name }
search_index_specs.push(spec)

Expand Down Expand Up @@ -292,22 +307,25 @@ def vector_search_index(name_or_defn, defn = nil)
# consider during the ANN search; defaults to limit * 10.
# @param [ Hash | nil ] filter An optional MongoDB filter to pre-filter
# candidates before scoring.
# @param [ true | false ] exact Use exact nearest-neighbor (ENN) search
# instead of ANN (default: false). When true, numCandidates is omitted.
# Required when using a flat vector search index.
# @param [ Array ] pipeline Additional aggregation stages to append after
# the vector search and score projection.
#
# @return [ Array<Mongoid::Document> ] matching documents, each with
# a populated +vector_search_score+ attribute.
def vector_search(vector, index: nil, path: nil, limit: 10, num_candidates: nil, filter: nil, pipeline: []) # rubocop:disable Metrics/ParameterLists
def vector_search(vector, index: nil, path: nil, limit: 10, num_candidates: nil, exact: false, filter: nil, pipeline: []) # rubocop:disable Metrics/ParameterLists
resolved_index, resolved_path = resolve_vector_index(index, path)
num_candidates ||= limit * 10

vs_options = {
'index' => resolved_index,
'path' => resolved_path,
'queryVector' => vector,
'numCandidates' => num_candidates,
'limit' => limit
}
vs_options['numCandidates'] = num_candidates || (limit * 10) unless exact
Comment thread
jamis marked this conversation as resolved.
vs_options['exact'] = true if exact
vs_options['filter'] = filter if filter

agg_pipeline = [
Expand Down Expand Up @@ -376,6 +394,21 @@ def auto_embed_search(text, index: nil, path: nil, limit: 10, num_candidates: ni

private

# Validates the vector index definition, raising ArgumentError for
# combinations that MongoDB does not support.
#
# @param [ Hash ] defn The vector search index definition.
def validate_vector_index_definition!(defn)
fields = defn[:fields] || defn['fields'] || []
fields.each do |field|
method = field[:indexingMethod] || field['indexingMethod']
next unless method.to_s == 'flat'
next unless field[:hnswOptions] || field['hnswOptions']

raise ArgumentError, 'hnswOptions is only supported with indexingMethod: hnsw'
end
end

# Retrieves the index records for the indexes with the given names.
#
# @param [ Array<String> ] names the index names to query
Expand Down
123 changes: 123 additions & 0 deletions spec/mongoid/search_indexable_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,118 @@ def filter_results(result, names)
end
end

describe '.vector_search_index with flat indexingMethod' do
it 'raises ArgumentError when flat index includes hnswOptions' do
expect do
Class.new do
include Mongoid::Document

store_in collection: BSON::ObjectId.new.to_s
vector_search_index fields: [
{
type: 'vector',
path: 'embedding',
numDimensions: 3,
similarity: 'cosine',
indexingMethod: 'flat',
hnswOptions: { m: 16, efConstruction: 150 }
}
]
end
end.to raise_error(ArgumentError, /hnswOptions is only supported with indexingMethod: hnsw/)
end

it 'does not raise when flat index has no hnswOptions' do
expect do
Class.new do
include Mongoid::Document

store_in collection: BSON::ObjectId.new.to_s
vector_search_index fields: [
{
type: 'vector',
path: 'embedding',
numDimensions: 3,
similarity: 'cosine',
indexingMethod: 'flat'
}
]
end
end.not_to raise_error
end

it 'does not raise when hnsw index has hnswOptions' do
expect do
Class.new do
include Mongoid::Document

store_in collection: BSON::ObjectId.new.to_s
vector_search_index fields: [
{
type: 'vector',
path: 'embedding',
numDimensions: 3,
similarity: 'cosine',
indexingMethod: 'hnsw',
hnswOptions: { m: 16, efConstruction: 150 }
}
]
end
end.not_to raise_error
end
end

describe '.vector_search pipeline construction' do
let(:model) do
Class.new do
include Mongoid::Document

store_in collection: BSON::ObjectId.new.to_s
field :embedding, type: Array
vector_search_index fields: [ { type: 'vector', path: 'embedding', numDimensions: 3, similarity: 'cosine' } ]
end
end

let(:fake_collection) { instance_double(Mongo::Collection) }
let(:fake_cursor) { double(map: []) }

before do
allow(model).to receive(:collection).and_return(fake_collection)
allow(fake_collection).to receive(:aggregate).and_return(fake_cursor)
end

it 'includes numCandidates by default' do
expect(fake_collection).to receive(:aggregate) do |pipeline|
vs = pipeline.find { |s| s['$vectorSearch'] }
expect(vs['$vectorSearch']).to have_key('numCandidates')
fake_cursor
end

model.vector_search([ 0.1, 0.2, 0.3 ], limit: 5)
end

it 'omits numCandidates and sends exact: true when exact: true' do
expect(fake_collection).to receive(:aggregate) do |pipeline|
vs = pipeline.find { |s| s['$vectorSearch'] }
expect(vs['$vectorSearch']).not_to have_key('numCandidates')
expect(vs['$vectorSearch']['exact']).to be true
fake_cursor
end

model.vector_search([ 0.1, 0.2, 0.3 ], exact: true)
end

it 'uses limit * 10 as the default numCandidates' do
expect(fake_collection).to receive(:aggregate) do |pipeline|
vs = pipeline.find { |s| s['$vectorSearch'] }
expect(vs['$vectorSearch']['numCandidates']).to eq(50)
fake_cursor
end

model.vector_search([ 0.1, 0.2, 0.3 ], limit: 5)
end
end

describe '#vector_search pipeline construction' do
let(:model) do
Class.new do
Expand All @@ -337,6 +449,17 @@ def filter_results(result, names)
allow(fake_collection).to receive(:aggregate).and_return(fake_cursor)
end

it 'omits numCandidates and sends exact: true when exact: true' do
expect(fake_collection).to receive(:aggregate) do |pipeline|
vs = pipeline.find { |s| s['$vectorSearch'] }
expect(vs['$vectorSearch']).not_to have_key('numCandidates')
expect(vs['$vectorSearch']['exact']).to be true
fake_cursor
end

doc.vector_search(exact: true)
end

it 'passes limit + 1 to $vectorSearch so the post-filter never short-counts' do
expect(fake_collection).to receive(:aggregate) do |pipeline|
vs = pipeline.find { |s| s['$vectorSearch'] }
Expand Down
Loading