commit 0c3a00ba36aa795c02fd3f1b65a8bbb088805ff5 Author: charlie <3140647@qq.com> Date: Sun Mar 27 10:17:21 2022 +0800 布隆过滤器 diff --git a/bloom/bloom.go b/bloom/bloom.go new file mode 100644 index 0000000..21342c7 --- /dev/null +++ b/bloom/bloom.go @@ -0,0 +1,51 @@ +package bloom + +import "github.com/bits-and-blooms/bitset" + +const DEFAULT_SIZE = 2 << 24 + +var seeds = []uint{7, 11, 13, 31, 37, 61} + +type simplehash struct { + cap uint + seed uint +} + +type BloomFilter struct { + set *bitset.BitSet + funcs [6]simplehash +} + +func NewBloomFilter() *BloomFilter { + bf := new(BloomFilter) + for i := 0; i < len(bf.funcs); i++ { + bf.funcs[i] = simplehash{DEFAULT_SIZE, seeds[i]} + } + bf.set = bitset.New(DEFAULT_SIZE) + return bf +} + +func (bf *BloomFilter) Add(value string) { + for _, f := range bf.funcs { + bf.set.Set(f.hash(value)) + } +} + +func (bf *BloomFilter) Contains(value string) bool { + if value == "" { + return false + } + ret := true + for _, f := range bf.funcs { + ret = ret && bf.set.Test(f.hash(value)) + } + return ret +} + +func (s simplehash) hash(value string) uint { + var result uint = 0 + for i := 0; i < len(value); i++ { + result = result*s.seed + uint(value[i]) + } + return (s.cap - 1) & result +} diff --git a/bloom/boom_test.go b/bloom/boom_test.go new file mode 100644 index 0000000..15e9184 --- /dev/null +++ b/bloom/boom_test.go @@ -0,0 +1,20 @@ +package bloom_test + +import ( + "fmt" + "strconv" + "testing" + + "github.com/charlienet/go-mixed/bloom" +) + +func TestBloom(t *testing.T) { + b := bloom.NewBloomFilter() + + for i := 0; i < 1000000; i++ { + b.Add(strconv.Itoa(i)) + } + + fmt.Println(b.Contains(strconv.Itoa(9999))) + fmt.Println(b.Contains("ss")) +}