diff --git a/cmd/dive/cli/internal/command/export/testdata/snapshots/export_test.snap b/cmd/dive/cli/internal/command/export/testdata/snapshots/export_test.snap index 454c34a9..25d6b10c 100755 --- a/cmd/dive/cli/internal/command/export/testdata/snapshots/export_test.snap +++ b/cmd/dive/cli/internal/command/export/testdata/snapshots/export_test.snap @@ -20,7 +20,7 @@ "sizeBytes": 6405 } ], - "inefficientBytes": 32025, + "inefficientBytes": 19215, "sizeBytes": 1220598 }, "layer": [ diff --git a/cmd/dive/cli/internal/ui/v1/view/image_details.go b/cmd/dive/cli/internal/ui/v1/view/image_details.go index a119ba77..ddcbaa53 100644 --- a/cmd/dive/cli/internal/ui/v1/view/image_details.go +++ b/cmd/dive/cli/internal/ui/v1/view/image_details.go @@ -86,7 +86,7 @@ func (v *ImageDetails) Render() error { var wastedSpace int64 for idx := 0; idx < len(v.inefficiencies); idx++ { data := v.inefficiencies[len(v.inefficiencies)-1-idx] - wastedSpace += data.CumulativeSize + wastedSpace += data.WastedSize() inefficiencyReport += fmt.Sprintf(analysisTemplate, strconv.Itoa(len(data.Nodes)), humanize.Bytes(uint64(data.CumulativeSize)), data.Path) } diff --git a/cmd/dive/cli/internal/ui/v1/view/image_details_regression_test.go b/cmd/dive/cli/internal/ui/v1/view/image_details_regression_test.go new file mode 100644 index 00000000..eb43bdd1 --- /dev/null +++ b/cmd/dive/cli/internal/ui/v1/view/image_details_regression_test.go @@ -0,0 +1,88 @@ +package view + +import ( + "go/ast" + "go/parser" + "go/token" + "path/filepath" + "runtime" + "testing" +) + +func imageDetailsSourcePath(t *testing.T) string { + t.Helper() + _, thisFile, _, ok := runtime.Caller(0) + if !ok { + t.Fatal("unable to determine test file path") + } + return filepath.Join(filepath.Dir(thisFile), "image_details.go") +} + +func imageDetailsAST(t *testing.T) *ast.File { + t.Helper() + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, imageDetailsSourcePath(t), nil, parser.ParseComments) + if err != nil { + t.Fatalf("unable to parse image_details.go: %+v", err) + } + return file +} + +func methodDeclByName(file *ast.File, recvType, methodName string) *ast.FuncDecl { + for _, decl := range file.Decls { + fn, ok := decl.(*ast.FuncDecl) + if !ok || fn.Recv == nil || fn.Name == nil || fn.Name.Name != methodName || len(fn.Recv.List) != 1 { + continue + } + star, ok := fn.Recv.List[0].Type.(*ast.StarExpr) + if !ok { + continue + } + ident, ok := star.X.(*ast.Ident) + if !ok { + continue + } + if ident.Name == recvType { + return fn + } + } + return nil +} + +func TestImageDetailsRenderPotentialWastedSpaceUsesRemovableBytes(t *testing.T) { + file := imageDetailsAST(t) + renderMethod := methodDeclByName(file, "ImageDetails", "Render") + if renderMethod == nil { + t.Fatal("missing ImageDetails.Render declaration") + } + + found := false + ast.Inspect(renderMethod.Body, func(node ast.Node) bool { + assign, ok := node.(*ast.AssignStmt) + if !ok || assign.Tok != token.ADD_ASSIGN || len(assign.Lhs) != 1 || len(assign.Rhs) != 1 { + return true + } + + lhs, ok := assign.Lhs[0].(*ast.Ident) + if !ok || lhs.Name != "wastedSpace" { + return true + } + + call, ok := assign.Rhs[0].(*ast.CallExpr) + if !ok { + return true + } + + sel, ok := call.Fun.(*ast.SelectorExpr) + if !ok || sel.Sel == nil || sel.Sel.Name != "WastedSize" { + return true + } + + found = true + return false + }) + + if !found { + t.Fatal("ImageDetails.Render should sum removable bytes via data.WastedSize()") + } +} diff --git a/dive/filetree/efficiency.go b/dive/filetree/efficiency.go index 02035a63..8e802529 100644 --- a/dive/filetree/efficiency.go +++ b/dive/filetree/efficiency.go @@ -14,6 +14,17 @@ type EfficiencyData struct { minDiscoveredSize int64 } +// WastedSize is the estimated reclaimable size for this path. +// Exactly one discovered copy must remain, so only bytes beyond the smallest +// discovered copy count as removable. +func (data *EfficiencyData) WastedSize() int64 { + wasted := data.CumulativeSize - data.minDiscoveredSize + if wasted < 0 { + return 0 + } + return wasted +} + // EfficiencySlice represents an ordered set of EfficiencyData data structures. type EfficiencySlice []*EfficiencyData diff --git a/dive/filetree/efficiency_test.go b/dive/filetree/efficiency_test.go index 1662efad..13b1829f 100644 --- a/dive/filetree/efficiency_test.go +++ b/dive/filetree/efficiency_test.go @@ -78,3 +78,74 @@ func TestEfficiency_ScratchImage(t *testing.T) { } } + +func TestEfficiency_ManyCopiesAndHashSizeVariation(t *testing.T) { + tests := map[string]struct { + path string + sizes []int64 + hashes []uint64 + expected int64 + expectedN int + }{ + "hundred_copies": { + path: "/dup", + sizes: func() []int64 { + vals := make([]int64, 100) + for i := range vals { + vals[i] = 10 + } + return vals + }(), + hashes: func() []uint64 { + vals := make([]uint64, 100) + for i := range vals { + vals[i] = uint64(i + 1) + } + return vals + }(), + expected: 1000, + expectedN: 100, + }, + "different_hashes_same_size": { + path: "/same-size", + sizes: []int64{100, 100, 100}, + hashes: []uint64{1, 2, 3}, + expected: 300, + expectedN: 3, + }, + "different_sizes_and_hashes": { + path: "/varying", + sizes: []int64{400, 250, 100, 100}, + hashes: []uint64{10, 20, 30, 40}, + expected: 850, + expectedN: 4, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + trees := make([]*FileTree, len(test.sizes)) + for idx := range test.sizes { + trees[idx] = NewFileTree() + _, _, err := trees[idx].AddPath(test.path, FileInfo{Size: test.sizes[idx], hash: test.hashes[idx]}) + if err != nil { + t.Fatalf("could not setup test at layer %d: %+v", idx, err) + } + } + + _, matches := Efficiency(trees) + + if len(matches) != 1 { + t.Fatalf("expected 1 inefficiency match, got %d", len(matches)) + } + + if matches[0].CumulativeSize != test.expected { + t.Fatalf("expected cumulative size %d, got %d", test.expected, matches[0].CumulativeSize) + } + + if len(matches[0].Nodes) != test.expectedN { + t.Fatalf("expected node count %d, got %d", test.expectedN, len(matches[0].Nodes)) + } + }) + } +} diff --git a/dive/image/analysis.go b/dive/image/analysis.go index d2b0f04a..eb368246 100644 --- a/dive/image/analysis.go +++ b/dive/image/analysis.go @@ -30,7 +30,7 @@ func Analyze(ctx context.Context, img *Image) (*Analysis, error) { var wastedBytes uint64 for _, file := range inefficiencies { - wastedBytes += uint64(file.CumulativeSize) + wastedBytes += uint64(file.WastedSize()) } return &Analysis{ diff --git a/dive/image/analysis_regression_test.go b/dive/image/analysis_regression_test.go new file mode 100644 index 00000000..eff9ebe2 --- /dev/null +++ b/dive/image/analysis_regression_test.go @@ -0,0 +1,134 @@ +package image + +import ( + "context" + "fmt" + "testing" + + "github.com/wagoodman/dive/dive/filetree" +) + +type regressionLayerFile struct { + path string + size int64 +} + +type regressionLayerSpec struct { + size uint64 + files []regressionLayerFile +} + +func buildRegressionImage(t *testing.T, specs []regressionLayerSpec) *Image { + t.Helper() + + trees := make([]*filetree.FileTree, len(specs)) + layers := make([]*Layer, len(specs)) + + for idx, spec := range specs { + tree := filetree.NewFileTree() + for _, file := range spec.files { + _, _, err := tree.AddPath(file.path, filetree.FileInfo{Size: file.size}) + if err != nil { + t.Fatalf("could not add path %q in layer %d: %+v", file.path, idx, err) + } + } + + trees[idx] = tree + layers[idx] = &Layer{ + Id: fmt.Sprintf("layer-%d", idx), + Index: idx, + Size: spec.size, + Tree: tree, + } + } + + return &Image{ + Request: "img:test", + Trees: trees, + Layers: layers, + } +} + +func TestAnalyzeWastedBytesCountsOnlyRemovableDuplicates(t *testing.T) { + tests := map[string]struct { + specs []regressionLayerSpec + wastedBytes uint64 + wastedPct float64 + userSizeByes uint64 + }{ + "single_copy_has_zero_waste": { + specs: []regressionLayerSpec{ + {size: 100, files: []regressionLayerFile{{path: "/dup", size: 100}}}, + {size: 50, files: []regressionLayerFile{{path: "/other", size: 50}}}, + }, + wastedBytes: 0, + wastedPct: 0, + userSizeByes: 50, + }, + "two_copies_remove_only_one": { + specs: []regressionLayerSpec{ + {size: 100, files: []regressionLayerFile{{path: "/dup", size: 100}}}, + {size: 100, files: []regressionLayerFile{{path: "/dup", size: 100}}}, + }, + wastedBytes: 100, + wastedPct: 1, + userSizeByes: 100, + }, + "ten_copies_remove_n_minus_one": { + specs: func() []regressionLayerSpec { + specs := make([]regressionLayerSpec, 10) + for i := range specs { + specs[i] = regressionLayerSpec{size: 100, files: []regressionLayerFile{{path: "/dup", size: 100}}} + } + return specs + }(), + wastedBytes: 900, + wastedPct: 1, + userSizeByes: 900, + }, + "hundred_copies_remove_n_minus_one": { + specs: func() []regressionLayerSpec { + specs := make([]regressionLayerSpec, 100) + for i := range specs { + specs[i] = regressionLayerSpec{size: 10, files: []regressionLayerFile{{path: "/dup", size: 10}}} + } + return specs + }(), + wastedBytes: 990, + wastedPct: 1, + userSizeByes: 990, + }, + "different_sizes_remove_all_but_smallest_copy": { + specs: []regressionLayerSpec{ + {size: 400, files: []regressionLayerFile{{path: "/dup", size: 400}}}, + {size: 250, files: []regressionLayerFile{{path: "/dup", size: 250}}}, + {size: 100, files: []regressionLayerFile{{path: "/dup", size: 100}}}, + {size: 100, files: []regressionLayerFile{{path: "/dup", size: 100}}}, + }, + wastedBytes: 750, + wastedPct: 750.0 / 450.0, + userSizeByes: 450, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + analysis, err := Analyze(context.Background(), buildRegressionImage(t, test.specs)) + if err != nil { + t.Fatalf("analyze failed: %+v", err) + } + + if analysis.WastedBytes != test.wastedBytes { + t.Fatalf("expected wasted bytes %d, got %d", test.wastedBytes, analysis.WastedBytes) + } + + if analysis.UserSizeByes != test.userSizeByes { + t.Fatalf("expected user size bytes %d, got %d", test.userSizeByes, analysis.UserSizeByes) + } + + if analysis.WastedUserPercent != test.wastedPct { + t.Fatalf("expected wasted percent %v, got %v", test.wastedPct, analysis.WastedUserPercent) + } + }) + } +} diff --git a/dive/image/docker/image_archive_analysis_test.go b/dive/image/docker/image_archive_analysis_test.go index afc38254..0652b8ee 100644 --- a/dive/image/docker/image_archive_analysis_test.go +++ b/dive/image/docker/image_archive_analysis_test.go @@ -14,7 +14,7 @@ func Test_Analysis(t *testing.T) { wastedPercent float64 path string }{ - "docker-image": {0.9844212134184309, 1220598, 66237, 32025, 0.4834911001404049, "../../../.data/test-docker-image.tar"}, + "docker-image": {0.9844212134184309, 1220598, 66237, 19215, 0.29009466008424295, "../../../.data/test-docker-image.tar"}, } for name, test := range table {