Dwarves
Memo
Type ESC to close search bar

Go Commentary #17: Leveraging benchstat Projects in Go benchmark and Go Plan9 memo on 450% speeding up calculations

Leveraging benchstat Projections in Go Benchmark Analysis!

Context: (golang.org/x/perf/cmd/benchstat)

Example:

  export bench=allcases && go test \
  -run '^$' -bench '^BenchmarkFoo' \
  -benchtime 5s -count 6 -cpu 2 -benchmem -timeout 999m \
  | tee ${bench}.txt 
package across_cases

// ...

/*
  export bench=allcases && go test \
    -run '^$' -bench '^BenchmarkEncode' \
    -benchtime 5s -count 6 -cpu 2 -benchmem -timeout 999m \
  | tee ${bench}.txt
*/
func BenchmarkEncode(b *testing.B) {
  for _, sampleCase := range sampleCases {
    b.Run(fmt.Sprintf("sample=%v", sampleCase.samples), func(b *testing.B) {
      for _, compr := range compressionCases {
        b.Run(fmt.Sprintf("compression=%v", compr.name()), func(b *testing.B) {
          for _, protoCase := range protoCases {
            b.Run(fmt.Sprintf("proto=%v", protoCase.name), func(b *testing.B) {
              for _, marshaller := range marshallers {
                b.Run(fmt.Sprintf("encoder=%v", marshaller.name()), func(b *testing.B) {
                  msg := protoCase.msgFromConfigFn(sampleCase.config)

                  b.ReportAllocs()
                  b.ResetTimer()
                  for i := 0; i < b.N; i++ {
                    out, err := marshaller.marshal(msg)
                    testutil.Ok(b, err)

                    out = compr.compress(out)
                    b.ReportMetric(float64(len(out)), "bytes/message")
                  }
                })
              }
            })
          }
        })
      }
    })
  }
}

var (
  sampleCases = []struct {
    samples int
    config  utils.GenerateConfig
  }{
    {samples: 200, config: generateConfig200samples},
    {samples: 2000, config: generateConfig2000samples},
    {samples: 10000, config: generateConfig10000samples},
  }
  compressionCases = []*compressor{
    newCompressor(""),
    newCompressor(remote.SnappyBlockCompression),
    newCompressor("zstd"),
  }
  protoCases = []struct {
    name            string
    msgFromConfigFn func(config utils.GenerateConfig) vtprotobufEnhancedMessage
  }{
    {
      name: "prometheus.WriteRequest",
      msgFromConfigFn: func(config utils.GenerateConfig) vtprotobufEnhancedMessage {
        return utils.ToV1(utils.GeneratePrometheusMetricsBatch(config), true, true)
      },
    },
    {
      name: "io.prometheus.write.v2.Request",
      msgFromConfigFn: func(config utils.GenerateConfig) vtprotobufEnhancedMessage {
        return utils.ToV2(utils.ConvertClassicToCustom(utils.GeneratePrometheusMetricsBatch(config)))
      },
    },
  }
  marshallers = []*marshaller{
    newMarshaller("protobuf"), newMarshaller("vtprotobuf"),
  }
)

benchstat -row ".name /sample /compression /encoder" -filter "/compression:zstd /encoder:protobuf" -col /proto allcases.txt
goos: darwin
goarch: arm64
pkg: go-microbenchmarks-benchstat/across_cases
                          │ prometheus.WriteRequest │   io.prometheus.write.v2.Request   │
                          │         sec/op          │   sec/op     vs base               │
Encode 200 zstd protobuf                 268.8µ ± 2%   103.3µ ± 7%  -61.57% (p=0.002 n=6)
Encode 2000 zstd protobuf               2671.4µ ± 5%   877.4µ ± 4%  -67.16% (p=0.002 n=6)
Encode 10000 zstd protobuf              12.834m ± 2%   3.059m ± 8%  -76.16% (p=0.002 n=6)
geomean                                  2.097m        652.1µ       -68.90%

                          │ prometheus.WriteRequest │    io.prometheus.write.v2.Request    │
                          │      bytes/message      │ bytes/message  vs base               │
Encode 200 zstd protobuf                5.949Ki ± 0%   5.548Ki ±  0%   -6.73% (p=0.002 n=6)
Encode 2000 zstd protobuf               45.90Ki ± 0%   33.49Ki ±  0%  -27.03% (p=0.002 n=6)
Encode 10000 zstd protobuf              227.8Ki ± 1%   121.4Ki ± 25%  -46.70% (p=0.002 n=6)
geomean                                 39.62Ki        28.26Ki        -28.68%

                          │ prometheus.WriteRequest │   io.prometheus.write.v2.Request    │
                          │          B/op           │     B/op      vs base               │
Encode 200 zstd protobuf               336.00Ki ± 0%   64.00Ki ± 0%  -80.95% (p=0.002 n=6)
Encode 2000 zstd protobuf              1799.8Ki ± 1%   368.0Ki ± 0%  -79.55% (p=0.002 n=6)
Encode 10000 zstd protobuf              9.015Mi ± 2%   1.312Mi ± 0%  -85.44% (p=0.002 n=6)
geomean                                 1.732Mi        316.3Ki       -82.17%

                          │ prometheus.WriteRequest │   io.prometheus.write.v2.Request    │
                          │        allocs/op        │ allocs/op   vs base                 │
Encode 200 zstd protobuf                  2.000 ± 0%   2.000 ± 0%        ~ (p=1.000 n=6) ¹
Encode 2000 zstd protobuf                10.000 ± 0%   2.000 ± 0%  -80.00% (p=0.002 n=6)
Encode 10000 zstd protobuf               16.000 ± 0%   2.000 ± 0%  -87.50% (p=0.002 n=6)
geomean                                   6.840        2.000       -70.76%
¹ all samples are equal

-> Limitations:

Conclusion:

Go Plan9 Memo, Speeding Up Calculations 450%

Context

Conclusion:


https://www.bwplotka.dev/2024/go-microbenchmarks-benchstat/

https://pehringer.info/go_plan9_memo.html