Hi,
I tested pogreb out with a very simple fuzzer that I initially wrote for bigCache
, with very small adaptations (which explains why the test is a bit wonky, calling it "cache", for example). Here's the program:
package main
import (
"bytes"
"context"
"fmt"
"github.com/akrylysov/pogreb"
"math"
"math/rand"
"os"
"os/signal"
"sync"
"syscall"
)
const (
slotsPerBucket = 28
loadFactor = 0.7
indexPostfix = ".index"
lockPostfix = ".lock"
version = 1 // file format version
// MaxKeyLength is the maximum size of a key in bytes.
MaxKeyLength = 1 << 16
// MaxValueLength is the maximum size of a value in bytes.
MaxValueLength = 1 << 30
// MaxKeys is the maximum numbers of keys in the DB.
MaxKeys = math.MaxUint32
)
func removeAndOpen(path string, opts *pogreb.Options) ( *pogreb.DB, error) {
os.Remove(path)
os.Remove(path + indexPostfix)
os.Remove(path + lockPostfix)
return pogreb.Open(path, opts)
}
func fuzzDeletePutGet(ctx context.Context) {
cache, err := removeAndOpen("test.db", nil)
if err != nil {
panic(err)
}
var wg sync.WaitGroup
// Deleter
wg.Add(1)
go func() {
defer wg.Done()
for {
select {
case <-ctx.Done():
return
default:
r := uint8(rand.Int())
key := fmt.Sprintf("thekey%d", r)
cache.Delete([]byte(key))
}
}
}()
// Setter
wg.Add(1)
go func() {
defer wg.Done()
val := make([]byte, 1024)
for {
select {
case <-ctx.Done():
return
default:
r := byte(rand.Int())
key := fmt.Sprintf("thekey%d", r)
for j := 0; j < len(val); j++ {
val[j] = r
}
cache.Put([]byte(key), []byte(val))
}
}
}()
// Getter
wg.Add(1)
go func() {
defer wg.Done()
var (
val = make([]byte, 1024)
hits = uint64(0)
misses = uint64(0)
)
for {
select {
case <-ctx.Done():
return
default:
r := byte(rand.Int())
key := fmt.Sprintf("thekey%d", r)
for j := 0; j < len(val); j++ {
val[j] = r
}
if got, err := cache.Get([]byte(key)); got != nil && !bytes.Equal(got, val) {
errStr := fmt.Sprintf("got %s ->\n %x\n expected:\n %x\n ", key, got, val)
panic(errStr)
} else {
if err == nil {
hits++
} else {
misses++
}
}
if total := hits + misses; total%1000000 == 0 {
percentage := float64(100) * float64(hits) / float64(total)
fmt.Printf("Hits %d (%.2f%%) misses %d \n", hits, percentage, misses)
}
}
}
}()
wg.Wait()
}
func main() {
sigs := make(chan os.Signal, 1)
ctx, cancel := context.WithCancel(context.Background())
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
fmt.Println("Press ctrl-c to exit")
go fuzzDeletePutGet(ctx)
<-sigs
fmt.Println("Exiting...")
cancel()
}
The program has three workers :
- One that randomly deletes a key
- One that randomly writes a key, where there's a well defined correlation between key and value.
- One that randomly checks if a key/value mapping is consistent.
When I ran it, it errorred out after about 4M
or 5M
tests:
GOROOT=/rw/usrlocal/go #gosetup
GOPATH=/home/user/go #gosetup
/rw/usrlocal/go/bin/go build -o /tmp/___go_build_fuzzer_go /home/user/go/src/github.com/akrylysov/pogreb/fuzz/fuzzer.go #gosetup
/tmp/___go_build_fuzzer_go #gosetup
Press ctrl-c to exit
Hits 1000000 (100.00%) misses 0
Hits 2000000 (100.00%) misses 0
Hits 3000000 (100.00%) misses 0
Hits 4000000 (100.00%) misses 0
Hits 5000000 (100.00%) misses 0
panic: got thekey112 ->
b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6b6
expected:
70707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070707070
goroutine 10 [running]:
main.fuzzDeletePutGet.func3(0xc00001a650, 0x6ee480, 0xc0000601c0, 0xc00008b110)
/home/user/go/src/github.com/akrylysov/pogreb/fuzz/fuzzer.go:108 +0x656
created by main.fuzzDeletePutGet
/home/user/go/src/github.com/akrylysov/pogreb/fuzz/fuzzer.go:88 +0x17a
Looking into it a bit, I found that although the Get
method is properly mutex:ed, the value
is in fact a pointer to a slice, and not copied out into a new buffer.
I hacked on a little fix:
diff --git a/db.go b/db.go
index 967bbf0..961add9 100644
--- a/db.go
+++ b/db.go
@@ -288,7 +288,12 @@ func (db *DB) Get(key []byte) ([]byte, error) {
if err != nil {
return nil, err
}
- return retValue, nil
+ var safeRetValue []byte
+ if retValue != nil{
+ safeRetValue = make([]byte, len(retValue))
+ copy(safeRetValue, retValue)
+ }
+ return safeRetValue, nil
}
// Has returns true if the DB contains the given key.
And with the attached fix, I couldn't reproduce it any longer (at least not for 10M+
tests.
The benchmarks without and with the hacky fix are:
BenchmarkGet-6 10000000 166 ns/op
BenchmarkGet-6 10000000 182 ns/op
Now, I'm not totally sure if the testcase is fair, as I'm not 100% sure what concurrency-guarantees pogreb
has. My test has both a setter
and a deleter
, so basically two writers and one reader, which might not be a supported setup? (on the other hand, I'm guessing this flaw should be reproducible even with only one writer)