From e802b0eb41660b16225bb15b9f8a7282768a33c8 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Tue, 26 May 2026 16:32:30 -0600 Subject: [PATCH 1/2] MONGOID-5924 add support for flat indexing method in vector search index definitions - Add `exact: false` parameter to `vector_search` (class and instance) so callers can request exact nearest-neighbor search; when true, numCandidates is omitted from the $vectorSearch stage, matching the behavior already present in auto_embed_search. - Validate at index-definition time that hnswOptions is not combined with indexingMethod: flat, per the Atlas Vector Search spec. - Document the flat indexing method in the vector_search_index DSL. --- lib/mongoid/search_indexable.rb | 37 ++++++- spec/mongoid/search_indexable_spec.rb | 142 ++++++++++++++++++++++++++ 2 files changed, 175 insertions(+), 4 deletions(-) diff --git a/lib/mongoid/search_indexable.rb b/lib/mongoid/search_indexable.rb index 0782d7d32e..9491368d77 100644 --- a/lib/mongoid/search_indexable.rb +++ b/lib/mongoid/search_indexable.rb @@ -76,7 +76,7 @@ def ready? # # @return [ Array ] matching documents, each with # a populated +vector_search_score+ attribute. - def vector_search(index: nil, path: nil, limit: 10, num_candidates: nil, filter: nil, pipeline: []) + def vector_search(index: nil, path: nil, limit: 10, num_candidates: nil, exact: false, filter: nil, pipeline: []) # rubocop:disable Metrics/ParameterLists _index, resolved_path = self.class.send(:resolve_vector_index, index, path) query_vector = public_send(resolved_path) @@ -94,6 +94,7 @@ def vector_search(index: nil, path: nil, limit: 10, num_candidates: nil, filter: path: path, limit: limit, num_candidates: num_candidates, + exact: exact, filter: combined_filter, pipeline: pipeline ) @@ -253,6 +254,15 @@ def search_index(name_or_defn, defn = nil) # vector_search_index :my_vector_index, { fields: [...] } # end # + # @example Create a flat vector search index. + # class Person + # include Mongoid::Document + # vector_search_index fields: [ + # { type: 'vector', path: 'embedding', numDimensions: 1536, + # similarity: 'cosine', indexingMethod: 'flat' } + # ] + # end + # # @param [ Symbol | String | Hash ] name_or_defn Either the name of the index to # define, or the index definition. # @param [ Hash ] defn The vector search index definition. @@ -260,6 +270,8 @@ def vector_search_index(name_or_defn, defn = nil) name = name_or_defn name, defn = nil, name if name.is_a?(Hash) + validate_vector_index_definition!(defn) + spec = { type: 'vectorSearch', definition: defn }.tap { |s| s[:name] = name.to_s if name } search_index_specs.push(spec) @@ -290,22 +302,24 @@ def vector_search_index(name_or_defn, defn = nil) # consider during the ANN search; defaults to limit * 10. # @param [ Hash | nil ] filter An optional MongoDB filter to pre-filter # candidates before scoring. + # @param [ true | false ] exact Use exact nearest-neighbor (ENN) search + # instead of ANN (default: false). When true, numCandidates is omitted. + # Required when using a flat vector search index. # @param [ Array ] pipeline Additional aggregation stages to append after # the vector search and score projection. # # @return [ Array ] matching documents, each with # a populated +vector_search_score+ attribute. - def vector_search(vector, index: nil, path: nil, limit: 10, num_candidates: nil, filter: nil, pipeline: []) # rubocop:disable Metrics/ParameterLists + def vector_search(vector, index: nil, path: nil, limit: 10, num_candidates: nil, exact: false, filter: nil, pipeline: []) # rubocop:disable Metrics/ParameterLists resolved_index, resolved_path = resolve_vector_index(index, path) - num_candidates ||= limit * 10 vs_options = { 'index' => resolved_index, 'path' => resolved_path, 'queryVector' => vector, - 'numCandidates' => num_candidates, 'limit' => limit } + vs_options['numCandidates'] = num_candidates || (limit * 10) unless exact vs_options['filter'] = filter if filter agg_pipeline = [ @@ -374,6 +388,21 @@ def auto_embed_search(text, index: nil, path: nil, limit: 10, num_candidates: ni private + # Validates the vector index definition, raising ArgumentError for + # combinations that MongoDB does not support. + # + # @param [ Hash ] defn The vector search index definition. + def validate_vector_index_definition!(defn) + fields = defn[:fields] || defn['fields'] || [] + fields.each do |field| + method = field[:indexingMethod] || field['indexingMethod'] + next unless method.to_s == 'flat' + next unless field[:hnswOptions] || field['hnswOptions'] + + raise ArgumentError, 'hnswOptions is only supported with indexingMethod: hnsw' + end + end + # Retrieves the index records for the indexes with the given names. # # @param [ Array ] names the index names to query diff --git a/spec/mongoid/search_indexable_spec.rb b/spec/mongoid/search_indexable_spec.rb index 6a264dea99..1c6e94cc54 100644 --- a/spec/mongoid/search_indexable_spec.rb +++ b/spec/mongoid/search_indexable_spec.rb @@ -309,6 +309,148 @@ def filter_results(result, names) end end + describe '.vector_search_index with flat indexingMethod' do + it 'raises ArgumentError when flat index includes hnswOptions' do + expect do + Class.new do + include Mongoid::Document + + store_in collection: BSON::ObjectId.new.to_s + vector_search_index fields: [ + { + type: 'vector', + path: 'embedding', + numDimensions: 3, + similarity: 'cosine', + indexingMethod: 'flat', + hnswOptions: { m: 16, efConstruction: 150 } + } + ] + end + end.to raise_error(ArgumentError, /hnswOptions is only supported with indexingMethod: hnsw/) + end + + it 'does not raise when flat index has no hnswOptions' do + expect do + Class.new do + include Mongoid::Document + + store_in collection: BSON::ObjectId.new.to_s + vector_search_index fields: [ + { + type: 'vector', + path: 'embedding', + numDimensions: 3, + similarity: 'cosine', + indexingMethod: 'flat' + } + ] + end + end.not_to raise_error + end + + it 'does not raise when hnsw index has hnswOptions' do + expect do + Class.new do + include Mongoid::Document + + store_in collection: BSON::ObjectId.new.to_s + vector_search_index fields: [ + { + type: 'vector', + path: 'embedding', + numDimensions: 3, + similarity: 'cosine', + indexingMethod: 'hnsw', + hnswOptions: { m: 16, efConstruction: 150 } + } + ] + end + end.not_to raise_error + end + end + + describe '.vector_search pipeline construction' do + let(:model) do + Class.new do + include Mongoid::Document + + store_in collection: BSON::ObjectId.new.to_s + field :embedding, type: Array + vector_search_index fields: [ { type: 'vector', path: 'embedding', numDimensions: 3, similarity: 'cosine' } ] + end + end + + let(:fake_collection) { instance_double(Mongo::Collection) } + let(:fake_cursor) { double(map: []) } + + before do + allow(model).to receive(:collection).and_return(fake_collection) + allow(fake_collection).to receive(:aggregate).and_return(fake_cursor) + end + + it 'includes numCandidates by default' do + expect(fake_collection).to receive(:aggregate) do |pipeline| + vs = pipeline.find { |s| s['$vectorSearch'] } + expect(vs['$vectorSearch']).to have_key('numCandidates') + fake_cursor + end + + model.vector_search([ 0.1, 0.2, 0.3 ], limit: 5) + end + + it 'omits numCandidates when exact: true' do + expect(fake_collection).to receive(:aggregate) do |pipeline| + vs = pipeline.find { |s| s['$vectorSearch'] } + expect(vs['$vectorSearch']).not_to have_key('numCandidates') + fake_cursor + end + + model.vector_search([ 0.1, 0.2, 0.3 ], exact: true) + end + + it 'uses limit * 10 as the default numCandidates' do + expect(fake_collection).to receive(:aggregate) do |pipeline| + vs = pipeline.find { |s| s['$vectorSearch'] } + expect(vs['$vectorSearch']['numCandidates']).to eq(50) + fake_cursor + end + + model.vector_search([ 0.1, 0.2, 0.3 ], limit: 5) + end + end + + describe '#vector_search pipeline construction' do + let(:model) do + Class.new do + include Mongoid::Document + + store_in collection: BSON::ObjectId.new.to_s + field :embedding, type: Array + vector_search_index fields: [ { type: 'vector', path: 'embedding', numDimensions: 3, similarity: 'cosine' } ] + end + end + + let(:fake_collection) { instance_double(Mongo::Collection) } + let(:fake_cursor) { double(map: []) } + let(:doc) { model.new(embedding: [ 0.1, 0.2, 0.3 ]) } + + before do + allow(model).to receive(:collection).and_return(fake_collection) + allow(fake_collection).to receive(:aggregate).and_return(fake_cursor) + end + + it 'omits numCandidates when exact: true' do + expect(fake_collection).to receive(:aggregate) do |pipeline| + vs = pipeline.find { |s| s['$vectorSearch'] } + expect(vs['$vectorSearch']).not_to have_key('numCandidates') + fake_cursor + end + + doc.vector_search(exact: true) + end + end + # Atlas integration tests — skipped when ATLAS_URI is not set. context 'Atlas integration' do From ce119e46c8a85ff42cfcc930336fc52fb6b1655e Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Tue, 26 May 2026 16:42:23 -0600 Subject: [PATCH 2/2] MONGOID-5924 fix exact search and add missing YARD param - Send exact: true to the $vectorSearch stage when exact: true is passed, matching the auto_embed_search behavior; previously only numCandidates was being omitted but the exact flag was never forwarded to MongoDB. - Add missing @param entry for exact: on the instance-level vector_search YARD block. --- lib/mongoid/search_indexable.rb | 4 ++++ spec/mongoid/search_indexable_spec.rb | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/mongoid/search_indexable.rb b/lib/mongoid/search_indexable.rb index 9491368d77..a109348427 100644 --- a/lib/mongoid/search_indexable.rb +++ b/lib/mongoid/search_indexable.rb @@ -69,6 +69,9 @@ def ready? # @param [ Integer ] limit The maximum number of results (default: 10). # @param [ Integer | nil ] num_candidates The number of candidates to # consider during the ANN search; defaults to limit * 10. + # @param [ true | false ] exact Use exact nearest-neighbor (ENN) search + # instead of ANN (default: false). When true, numCandidates is omitted. + # Required when using a flat vector search index. # @param [ Hash | nil ] filter An optional MongoDB filter to pre-filter # candidates before scoring. # @param [ Array ] pipeline Additional aggregation stages to append after @@ -320,6 +323,7 @@ def vector_search(vector, index: nil, path: nil, limit: 10, num_candidates: nil, 'limit' => limit } vs_options['numCandidates'] = num_candidates || (limit * 10) unless exact + vs_options['exact'] = true if exact vs_options['filter'] = filter if filter agg_pipeline = [ diff --git a/spec/mongoid/search_indexable_spec.rb b/spec/mongoid/search_indexable_spec.rb index 1c6e94cc54..db4093cdbb 100644 --- a/spec/mongoid/search_indexable_spec.rb +++ b/spec/mongoid/search_indexable_spec.rb @@ -399,10 +399,11 @@ def filter_results(result, names) model.vector_search([ 0.1, 0.2, 0.3 ], limit: 5) end - it 'omits numCandidates when exact: true' do + it 'omits numCandidates and sends exact: true when exact: true' do expect(fake_collection).to receive(:aggregate) do |pipeline| vs = pipeline.find { |s| s['$vectorSearch'] } expect(vs['$vectorSearch']).not_to have_key('numCandidates') + expect(vs['$vectorSearch']['exact']).to be true fake_cursor end @@ -440,10 +441,11 @@ def filter_results(result, names) allow(fake_collection).to receive(:aggregate).and_return(fake_cursor) end - it 'omits numCandidates when exact: true' do + it 'omits numCandidates and sends exact: true when exact: true' do expect(fake_collection).to receive(:aggregate) do |pipeline| vs = pipeline.find { |s| s['$vectorSearch'] } expect(vs['$vectorSearch']).not_to have_key('numCandidates') + expect(vs['$vectorSearch']['exact']).to be true fake_cursor end