diff --git a/bloom/bloom.go b/bloom/bloom.go index 267995e..4a8a208 100644 --- a/bloom/bloom.go +++ b/bloom/bloom.go @@ -1,6 +1,9 @@ package bloom -import "github.com/bits-and-blooms/bitset" +import ( + "github.com/bits-and-blooms/bitset" + "github.com/charlienet/go-mixed/locker" +) const DEFAULT_SIZE = 2 << 24 @@ -12,24 +15,53 @@ type simplehash struct { } type BloomFilter struct { - set *bitset.BitSet - funcs [6]simplehash + size int // 布隆过滤器大小 + set *bitset.BitSet // 位图 + funcs [6]simplehash // 哈希函数 + lock locker.RWLocker } -func NewBloomFilter() *BloomFilter { - bf := new(BloomFilter) - for i := 0; i < len(bf.funcs); i++ { - bf.funcs[i] = simplehash{DEFAULT_SIZE, seeds[i]} +type bloomOptions struct { + Size int +} + +type option func(*bloomOptions) + +// 布隆过滤器中所有位长度,请根据存储数量进行评估 +func WithSize(size int) option { + return func(bo *bloomOptions) { + bo.Size = size } - bf.set = bitset.New(DEFAULT_SIZE) +} + +func NewBloomFilter(opts ...option) *BloomFilter { + opt := &bloomOptions{ + Size: DEFAULT_SIZE, + } + + for _, f := range opts { + f(opt) + } + + bf := &BloomFilter{ + size: opt.Size, + lock: locker.NewRWLocker(), + } + + for i := 0; i < len(bf.funcs); i++ { + bf.funcs[i] = simplehash{uint(opt.Size), seeds[i]} + } + bf.set = bitset.New(uint(opt.Size)) return bf } func (bf *BloomFilter) Add(value string) { funcs := bf.funcs[:] + for _, f := range funcs { bf.set.Set(f.hash(value)) } + } func (bf *BloomFilter) Contains(value string) bool { @@ -42,9 +74,15 @@ func (bf *BloomFilter) Contains(value string) bool { for _, f := range funcs { ret = ret && bf.set.Test(f.hash(value)) } + return ret } +// 清空布隆过滤器 +func (bf *BloomFilter) Clear() { + bf.set.ClearAll() +} + func (s simplehash) hash(value string) uint { var result uint = 0 for i := 0; i < len(value); i++ { diff --git a/bloom/boom_test.go b/bloom/boom_test.go index 15e9184..6033be4 100644 --- a/bloom/boom_test.go +++ b/bloom/boom_test.go @@ -6,6 +6,9 @@ import ( "testing" "github.com/charlienet/go-mixed/bloom" + "github.com/charlienet/go-mixed/rand" + "github.com/charlienet/go-mixed/sys" + "github.com/stretchr/testify/assert" ) func TestBloom(t *testing.T) { @@ -15,6 +18,58 @@ func TestBloom(t *testing.T) { b.Add(strconv.Itoa(i)) } - fmt.Println(b.Contains(strconv.Itoa(9999))) - fmt.Println(b.Contains("ss")) + v := "6943553521463296-1635402930" + + t.Log(b.Contains(v)) + b.Add(v) + t.Log(b.Contains(v)) + + fmt.Println("过滤器中包含值:", b.Contains(strconv.Itoa(9999))) + fmt.Println("过滤器中未包含:", b.Contains("ss")) + + t.Log(sys.ShowMemUsage()) +} + +func TestSize(t *testing.T) { + bloom.NewBloomFilter(bloom.WithSize(1 << 2)) +} + +func TestClear(t *testing.T) { + bf := bloom.NewBloomFilter() + + v := "abc" + bf.Add(v) + assert.True(t, bf.Contains(v)) + + bf.Clear() + assert.False(t, bf.Contains(v)) +} + +func TestParallel(t *testing.T) { + f := bloom.NewBloomFilter() + + for i := 0; i < 10000; i++ { + v := rand.Hex.Generate(10) + + f.Add(v) + assert.True(t, f.Contains(v)) + } +} + +func BenchmarkFilter(b *testing.B) { + f := bloom.NewBloomFilter() + + b.RunParallel(func(p *testing.PB) { + for p.Next() { + v := rand.Hex.Generate(10) + f.Add(v) + + f.Contains(v) + + // assert.True(b, f.Contains(v)) + + // assert.True(b, f.Contains(v)) + } + }) + }