diff --git a/.gitmodules b/.gitmodules index e3b5c7b3ee..842cf707cc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,4 @@ [submodule "specifications"] path = testdata/specifications - url = https://github.com/mongodb/specifications + url = https://github.com/BorisDog/specifications + branch = benchmarks_data_update diff --git a/bson/benchmark_test.go b/bson/benchmark_test.go index 991714eb65..0f4f887839 100644 --- a/bson/benchmark_test.go +++ b/bson/benchmark_test.go @@ -7,16 +7,18 @@ package bson import ( + "archive/tar" "bytes" "compress/gzip" "encoding/json" + "errors" "fmt" "io" - "io/ioutil" "os" - "path" "sync" "testing" + + "go.mongodb.org/mongo-driver/v2/internal/require" ) var encodetestBsonD D @@ -144,56 +146,83 @@ var nestedInstance = nestedtest1{ }, } -const extendedBSONDir = "../testdata/extended_bson" - -var ( - extJSONFiles map[string]map[string]any - extJSONFilesMu sync.Mutex -) +// loadExtendedBSON is a function that returns the extended BSON data for a given filename in the +// extended_bson.tgz archive. The first call to loadExtendedBSON will decompress all the JSON files +// in the archive and cache all entries; subsequent calls will only return the cache. Caller must +// not mutate the returned value. +var loadExtendedBSON = func() func(tb testing.TB, filename string) map[string]any { + const extendedBSONTGZ = "../testdata/specifications/source/benchmarking/data/extended_bson.tgz" + + var once sync.Once + var onceErr error + entryErr := make(map[string]error) + results := make(map[string]map[string]any) + + return func(tb testing.TB, filename string) map[string]any { + tb.Helper() + + once.Do(func() { + file, err := os.Open(extendedBSONTGZ) + if err != nil { + onceErr = fmt.Errorf("error opening %q: %v", extendedBSONTGZ, err) + return + } + defer func() { + _ = file.Close() + }() + + gz, err := gzip.NewReader(file) + if err != nil { + onceErr = fmt.Errorf("error creating gzip reader: %v", err) + return + } + defer func() { + _ = gz.Close() + }() + + tr := tar.NewReader(gz) + for { + hdr, err := tr.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + onceErr = fmt.Errorf("error reading tar header: %v", err) + return + } + if hdr.Typeflag != tar.TypeReg { + continue + } + data, err := io.ReadAll(tr) + if err != nil { + entryErr[hdr.Name] = fmt.Errorf("error reading tar entry: %v", err) + continue + } -// readExtJSONFile reads the GZIP-compressed extended JSON document from the given filename in the -// "extended BSON" test data directory (../testdata/extended_bson) and returns it as a -// map[string]any. It panics on any errors. -func readExtJSONFile(filename string) map[string]any { - extJSONFilesMu.Lock() - defer extJSONFilesMu.Unlock() - if v, ok := extJSONFiles[filename]; ok { + var v map[string]any + err = UnmarshalExtJSON(data, false, &v) + if err != nil { + entryErr[hdr.Name] = fmt.Errorf("error unmarshalling: %v", err) + continue + } + results[hdr.Name] = v + } + }) + entry := "extended_bson/" + filename + if err, ok := entryErr[entry]; ok { + require.FailNowf(tb, "failed to load benchmark data", "error loading file %q from %q: %v", filename, extendedBSONTGZ, err) + } + v, ok := results[entry] + if !ok { + if onceErr != nil { + require.FailNowf(tb, "failed to load benchmark data", "error loading file %q from %q: %v", filename, extendedBSONTGZ, onceErr) + } else { + require.FailNowf(tb, "failed to load benchmark data", "error loading file %q from %q: not found", filename, extendedBSONTGZ) + } + } return v } - filePath := path.Join(extendedBSONDir, filename) - file, err := os.Open(filePath) - if err != nil { - panic(fmt.Sprintf("error opening file %q: %s", filePath, err)) - } - defer func() { - _ = file.Close() - }() - - gz, err := gzip.NewReader(file) - if err != nil { - panic(fmt.Sprintf("error creating GZIP reader: %s", err)) - } - defer func() { - _ = gz.Close() - }() - - data, err := ioutil.ReadAll(gz) - if err != nil { - panic(fmt.Sprintf("error reading GZIP contents of file: %s", err)) - } - - var v map[string]any - err = UnmarshalExtJSON(data, false, &v) - if err != nil { - panic(fmt.Sprintf("error unmarshalling extended JSON: %s", err)) - } - - if extJSONFiles == nil { - extJSONFiles = make(map[string]map[string]any) - } - extJSONFiles[filename] = v - return v -} +}() func BenchmarkMarshal(b *testing.B) { cases := []struct { @@ -213,16 +242,16 @@ func BenchmarkMarshal(b *testing.B) { value: encodetestBsonD, }, { - desc: "deep_bson.json.gz", - value: readExtJSONFile("deep_bson.json.gz"), + desc: "deep_bson.json", + value: loadExtendedBSON(b, "deep_bson.json"), }, { - desc: "flat_bson.json.gz", - value: readExtJSONFile("flat_bson.json.gz"), + desc: "flat_bson.json", + value: loadExtendedBSON(b, "flat_bson.json"), }, { - desc: "full_bson.json.gz", - value: readExtJSONFile("full_bson.json.gz"), + desc: "full_bson.json", + value: loadExtendedBSON(b, "full_bson.json"), }, } @@ -314,16 +343,16 @@ func BenchmarkUnmarshal(b *testing.B) { value: nestedInstance, }, { - name: "deep_bson.json.gz", - value: readExtJSONFile("deep_bson.json.gz"), + name: "deep_bson.json", + value: loadExtendedBSON(b, "deep_bson.json"), }, { - name: "flat_bson.json.gz", - value: readExtJSONFile("flat_bson.json.gz"), + name: "flat_bson.json", + value: loadExtendedBSON(b, "flat_bson.json"), }, { - name: "full_bson.json.gz", - value: readExtJSONFile("full_bson.json.gz"), + name: "full_bson.json", + value: loadExtendedBSON(b, "full_bson.json"), }, } diff --git a/internal/cmd/benchmark/benchmark_test.go b/internal/cmd/benchmark/benchmark_test.go index 3e0ba80dce..cfd6d40b7d 100644 --- a/internal/cmd/benchmark/benchmark_test.go +++ b/internal/cmd/benchmark/benchmark_test.go @@ -209,8 +209,56 @@ func loadSourceDocument(b *testing.B, canonicalOnly bool, pathParts ...string) b return doc } +func loadBSONExtJSONFile(b *testing.B, canonicalOnly bool, source string) bson.D { + b.Helper() + + tgzPath := filepath.Join(testdataDir(b), "specifications", "source", "benchmarking", "data", "extended_bson.tgz") + + file, err := os.Open(tgzPath) + require.NoError(b, err, "failed to open %q", tgzPath) + defer file.Close() + + gz, err := gzip.NewReader(file) + require.NoError(b, err, "failed to create gzip reader") + defer gz.Close() + + tr := tar.NewReader(gz) + for { + hdr, err := tr.Next() + if errors.Is(err, io.EOF) { + break + } + + require.NoError(b, err, "failed to read tar") + + if hdr.Typeflag != tar.TypeReg { + continue + } + + if hdr.Name != bsonDataDir+"/"+source { + continue + } + + data, err := io.ReadAll(tr) + require.NoError(b, err, "failed to read tar entry %q", hdr.Name) + + var doc bson.D + + err = bson.UnmarshalExtJSON(data, canonicalOnly, &doc) + require.NoError(b, err, "failed to unmarshal extended JSON from %q", hdr.Name) + + require.NotEmpty(b, doc) + + return doc + } + + b.Fatalf("file %q not found in %q", bsonDataDir+"/"+source, tgzPath) + + return nil +} + func benchmarkBSONEncoding(b *testing.B, canonicalOnly bool, source string) { - doc := loadSourceDocument(b, canonicalOnly, testdataPerfDir(b), bsonDataDir, source) + doc := loadBSONExtJSONFile(b, canonicalOnly, source) b.ResetTimer() @@ -229,7 +277,7 @@ func benchmarkBSONEncoding(b *testing.B, canonicalOnly bool, source string) { } func benchmarkBSONDecoding(b *testing.B, canonicalOnly bool, source string) { - doc := loadSourceDocument(b, canonicalOnly, testdataPerfDir(b), bsonDataDir, source) + doc := loadBSONExtJSONFile(b, canonicalOnly, source) raw, err := bson.Marshal(doc) require.NoError(b, err, "failed to encode bson data") diff --git a/testdata/extended_bson/deep_bson.json.gz b/testdata/extended_bson/deep_bson.json.gz deleted file mode 100644 index e2588ef533..0000000000 Binary files a/testdata/extended_bson/deep_bson.json.gz and /dev/null differ diff --git a/testdata/extended_bson/flat_bson.json.gz b/testdata/extended_bson/flat_bson.json.gz deleted file mode 100644 index 124148bf67..0000000000 Binary files a/testdata/extended_bson/flat_bson.json.gz and /dev/null differ diff --git a/testdata/extended_bson/full_bson.json.gz b/testdata/extended_bson/full_bson.json.gz deleted file mode 100644 index afd5b59f99..0000000000 Binary files a/testdata/extended_bson/full_bson.json.gz and /dev/null differ diff --git a/testdata/specifications b/testdata/specifications index 62028fdf28..ff30be5bf1 160000 --- a/testdata/specifications +++ b/testdata/specifications @@ -1 +1 @@ -Subproject commit 62028fdf28c7b56efe87fe2b44663eab7174c2f7 +Subproject commit ff30be5bf137a322caeb26b46c850824d185dc55