Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 90 additions & 61 deletions bson/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
package bson

import (
"archive/tar"
"bytes"
"compress/gzip"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path"
"sync"
"testing"

"go.mongodb.org/mongo-driver/v2/internal/require"
)

var encodetestBsonD D
Expand Down Expand Up @@ -144,56 +146,83 @@ var nestedInstance = nestedtest1{
},
}

const extendedBSONDir = "../testdata/extended_bson"

var (
extJSONFiles map[string]map[string]any
extJSONFilesMu sync.Mutex
)
// loadExtendedBSON is a function that returns the extended BSON data for a given filename in the
// extended_bson.tgz archive. The first call to loadExtendedBSON will decompress all the JSON files
// in the archive and cache all entries; subsequent calls will only return the cache. Caller must
// not mutate the returned value.
var loadExtendedBSON = func() func(tb testing.TB, filename string) map[string]any {
const extendedBSONTGZ = "../testdata/specifications/source/benchmarking/data/extended_bson.tgz"

var once sync.Once
var onceErr error
entryErr := make(map[string]error)
results := make(map[string]map[string]any)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[blocking] These variables should global variables to avoid re-parsing the tarball.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I put them in a closure to reduce global variables. They are only invoked once before initializing loadExtendedBSON with the returned function at L161. When loadExtendedBSON is called, e.g., at L246, L250, or L254, the decompressing and parsing is performed once, which is guarded by the sync.Once.Do().

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@qingyang-hu Oh I didn't see that loadExtendedBSON is acting as a package initializer. Is there a reason not to just make the variables global? The IIFE solution is hard to read. For example:

var (
    loadExtendedBSONOnce    sync.Once
    loadExtendedBSONErr     error
    loadExtendedBSONEntries map[string]map[string]any
    loadExtendedBSONEntryErrs map[string]error
)

func loadExtendedBSON(tb testing.TB, filename string) map[string]any {
    tb.Helper()
    loadExtendedBSONOnce.Do(func() {
        // ... same loading logic ...
    })
    // ... same lookup logic ...
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main reason I've picked up this pattern is to minimize the surface of global variables. I'm also open to global variables for readability.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, I think package-level variables are acceptable. We already have precedent for that pattern in the driver’s business logic, for example memoryPool. The current approach is correct, but it’s difficult to read.


return func(tb testing.TB, filename string) map[string]any {
tb.Helper()

once.Do(func() {
file, err := os.Open(extendedBSONTGZ)
if err != nil {
onceErr = fmt.Errorf("error opening %q: %v", extendedBSONTGZ, err)
return
}
defer func() {
_ = file.Close()
}()

gz, err := gzip.NewReader(file)
if err != nil {
onceErr = fmt.Errorf("error creating gzip reader: %v", err)
return
}
defer func() {
_ = gz.Close()
}()

tr := tar.NewReader(gz)
for {
hdr, err := tr.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
onceErr = fmt.Errorf("error reading tar header: %v", err)
return
}
if hdr.Typeflag != tar.TypeReg {
continue
}
data, err := io.ReadAll(tr)
if err != nil {
entryErr[hdr.Name] = fmt.Errorf("error reading tar entry: %v", err)
continue
}

// readExtJSONFile reads the GZIP-compressed extended JSON document from the given filename in the
// "extended BSON" test data directory (../testdata/extended_bson) and returns it as a
// map[string]any. It panics on any errors.
func readExtJSONFile(filename string) map[string]any {
extJSONFilesMu.Lock()
defer extJSONFilesMu.Unlock()
if v, ok := extJSONFiles[filename]; ok {
var v map[string]any
err = UnmarshalExtJSON(data, false, &v)
if err != nil {
entryErr[hdr.Name] = fmt.Errorf("error unmarshalling: %v", err)
continue
}
results[hdr.Name] = v
}
})
entry := "extended_bson/" + filename
if err, ok := entryErr[entry]; ok {
require.FailNowf(tb, "failed to load benchmark data", "error loading file %q from %q: %v", filename, extendedBSONTGZ, err)
}
v, ok := results[entry]
if !ok {
if onceErr != nil {
require.FailNowf(tb, "failed to load benchmark data", "error loading file %q from %q: %v", filename, extendedBSONTGZ, onceErr)
} else {
require.FailNowf(tb, "failed to load benchmark data", "error loading file %q from %q: not found", filename, extendedBSONTGZ)
}
}
return v
}
filePath := path.Join(extendedBSONDir, filename)
file, err := os.Open(filePath)
if err != nil {
panic(fmt.Sprintf("error opening file %q: %s", filePath, err))
}
defer func() {
_ = file.Close()
}()

gz, err := gzip.NewReader(file)
if err != nil {
panic(fmt.Sprintf("error creating GZIP reader: %s", err))
}
defer func() {
_ = gz.Close()
}()

data, err := ioutil.ReadAll(gz)
if err != nil {
panic(fmt.Sprintf("error reading GZIP contents of file: %s", err))
}

var v map[string]any
err = UnmarshalExtJSON(data, false, &v)
if err != nil {
panic(fmt.Sprintf("error unmarshalling extended JSON: %s", err))
}

if extJSONFiles == nil {
extJSONFiles = make(map[string]map[string]any)
}
extJSONFiles[filename] = v
return v
}
}()

func BenchmarkMarshal(b *testing.B) {
cases := []struct {
Expand All @@ -213,16 +242,16 @@ func BenchmarkMarshal(b *testing.B) {
value: encodetestBsonD,
},
{
desc: "deep_bson.json.gz",
value: readExtJSONFile("deep_bson.json.gz"),
desc: "deep_bson.json",
value: loadExtendedBSON(b, "deep_bson.json"),
},
{
desc: "flat_bson.json.gz",
value: readExtJSONFile("flat_bson.json.gz"),
desc: "flat_bson.json",
value: loadExtendedBSON(b, "flat_bson.json"),
},
{
desc: "full_bson.json.gz",
value: readExtJSONFile("full_bson.json.gz"),
desc: "full_bson.json",
value: loadExtendedBSON(b, "full_bson.json"),
},
}

Expand Down Expand Up @@ -314,16 +343,16 @@ func BenchmarkUnmarshal(b *testing.B) {
value: nestedInstance,
},
{
name: "deep_bson.json.gz",
value: readExtJSONFile("deep_bson.json.gz"),
name: "deep_bson.json",
value: loadExtendedBSON(b, "deep_bson.json"),
},
{
name: "flat_bson.json.gz",
value: readExtJSONFile("flat_bson.json.gz"),
name: "flat_bson.json",
value: loadExtendedBSON(b, "flat_bson.json"),
},
{
name: "full_bson.json.gz",
value: readExtJSONFile("full_bson.json.gz"),
name: "full_bson.json",
value: loadExtendedBSON(b, "full_bson.json"),
},
}

Expand Down
Binary file removed testdata/extended_bson/deep_bson.json.gz
Binary file not shown.
Binary file removed testdata/extended_bson/flat_bson.json.gz
Binary file not shown.
Binary file removed testdata/extended_bson/full_bson.json.gz
Binary file not shown.
Loading