diff --git a/bloom/bloom.go b/bloom/bloom.go index 4a8a208..4bf9697 100644 --- a/bloom/bloom.go +++ b/bloom/bloom.go @@ -1,92 +1,113 @@ package bloom import ( - "github.com/bits-and-blooms/bitset" - "github.com/charlienet/go-mixed/locker" + "math" + + "github.com/charlienet/go-mixed/bytesconv" + "github.com/charlienet/go-mixed/expr" + "github.com/charlienet/go-mixed/hash" + "github.com/go-redis/redis/v8" ) const DEFAULT_SIZE = 2 << 24 var seeds = []uint{7, 11, 13, 31, 37, 61} -type simplehash struct { - cap uint - seed uint +type bitStore interface { + Clear() + Set(pos ...uint) error + Test(pos ...uint) (bool, error) } type BloomFilter struct { - size int // 布隆过滤器大小 - set *bitset.BitSet // 位图 - funcs [6]simplehash // 哈希函数 - lock locker.RWLocker + bits uint // 布隆过滤器大小 + funcs uint // 哈希函数数量 + store bitStore // 位图存储 } type bloomOptions struct { - Size int + redisClient *redis.Client + redisKey string } type option func(*bloomOptions) -// 布隆过滤器中所有位长度,请根据存储数量进行评估 -func WithSize(size int) option { +func WithRedis(redis *redis.Client, key string) option { return func(bo *bloomOptions) { - bo.Size = size + bo.redisClient = redis + bo.redisKey = key } } -func NewBloomFilter(opts ...option) *BloomFilter { - opt := &bloomOptions{ - Size: DEFAULT_SIZE, - } +// 初始化布隆过滤器 +// https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html +func NewBloomFilter(expectedInsertions uint, fpp float64, opts ...option) *BloomFilter { + opt := &bloomOptions{} for _, f := range opts { f(opt) } + bits := optimalNumOfBits(expectedInsertions, fpp) + k := optimalNumOfHashFunctions(bits, expectedInsertions) + bf := &BloomFilter{ - size: opt.Size, - lock: locker.NewRWLocker(), + bits: bits, + funcs: k, + store: expr.If[bitStore]( + opt.redisClient == nil, + newMemStore(bits), + newRedisStore(opt.redisClient, opt.redisKey, bits)), } - for i := 0; i < len(bf.funcs); i++ { - bf.funcs[i] = simplehash{uint(opt.Size), seeds[i]} - } - bf.set = bitset.New(uint(opt.Size)) return bf } -func (bf *BloomFilter) Add(value string) { - funcs := bf.funcs[:] - - for _, f := range funcs { - bf.set.Set(f.hash(value)) - } - +func (bf *BloomFilter) Add(data string) { + offsets := bf.geOffsets([]byte(data)) + bf.store.Set(offsets...) } -func (bf *BloomFilter) Contains(value string) bool { - if value == "" { - return false - } - ret := true +func (bf *BloomFilter) ExistString(data string) (bool, error) { + return bf.Exists(bytesconv.StringToBytes(data)) +} - funcs := bf.funcs[:] - for _, f := range funcs { - ret = ret && bf.set.Test(f.hash(value)) +func (bf *BloomFilter) Exists(data []byte) (bool, error) { + if data == nil || len(data) == 0 { + return false, nil } - return ret + offsets := bf.geOffsets(data) + isSet, err := bf.store.Test(offsets...) + if err != nil { + return false, err + } + + return isSet, nil +} + +func (bf *BloomFilter) geOffsets(data []byte) []uint { + offsets := make([]uint, bf.funcs) + for i := uint(0); i < bf.funcs; i++ { + offsets[i] = uint(hash.Murmur3(append(data, byte(i))) % uint64(bf.bits)) + } + + return offsets } // 清空布隆过滤器 func (bf *BloomFilter) Clear() { - bf.set.ClearAll() + bf.store.Clear() } -func (s simplehash) hash(value string) uint { - var result uint = 0 - for i := 0; i < len(value); i++ { - result = result*s.seed + uint(value[i]) - } - return (s.cap - 1) & result +// 计算优化的位图长度, +// n 期望放置元素数量, +// p 预期的误判概率 +func optimalNumOfBits(n uint, p float64) uint { + return (uint)(-float64(n) * math.Log(p) / (math.Log(2) * math.Log(2))) +} + +// 计算哈希函数数量 +func optimalNumOfHashFunctions(m, n uint) uint { + return uint(math.Round(float64(m) / float64(n) * math.Log(2))) } diff --git a/bloom/boom_test.go b/bloom/boom_test.go index 6033be4..9ca1dd4 100644 --- a/bloom/boom_test.go +++ b/bloom/boom_test.go @@ -2,17 +2,21 @@ package bloom_test import ( "fmt" + "math" "strconv" "testing" "github.com/charlienet/go-mixed/bloom" "github.com/charlienet/go-mixed/rand" "github.com/charlienet/go-mixed/sys" + "github.com/go-redis/redis/v8" "github.com/stretchr/testify/assert" ) +const () + func TestBloom(t *testing.T) { - b := bloom.NewBloomFilter() + b := bloom.NewBloomFilter(1000, 0.03) for i := 0; i < 1000000; i++ { b.Add(strconv.Itoa(i)) @@ -20,51 +24,94 @@ func TestBloom(t *testing.T) { v := "6943553521463296-1635402930" - t.Log(b.Contains(v)) + t.Log(b.ExistString(v)) b.Add(v) - t.Log(b.Contains(v)) + t.Log(b.ExistString(v)) - fmt.Println("过滤器中包含值:", b.Contains(strconv.Itoa(9999))) - fmt.Println("过滤器中未包含:", b.Contains("ss")) + isSet, err := b.ExistString(strconv.Itoa(9999)) + fmt.Println("过滤器中包含值:", isSet, err) + + isSet, err = b.ExistString("ss") + fmt.Println("过滤器中未包含:", isSet, err) t.Log(sys.ShowMemUsage()) } -func TestSize(t *testing.T) { - bloom.NewBloomFilter(bloom.WithSize(1 << 2)) +func TestOptimize(t *testing.T) { + + expectedInsertions := 1000000 // 期望存储数据量 + falseProbability := 0.00002 // 预期误差 + bits := uint(float64(-expectedInsertions) * math.Log(falseProbability) / (math.Log(2) * math.Log(2))) + hashSize := uint(math.Round(float64(bits) / float64(expectedInsertions) * math.Log(2))) + + t.Log(bits) + t.Log(hashSize) +} + +func TestRedis(t *testing.T) { + client := redis.NewClient(&redis.Options{ + Addr: "192.168.2.222:6379", + Password: "123456", + }) + + bf := bloom.NewBloomFilter(10000, 0.03, bloom.WithRedis(client, "bloom:test")) + + for i := 0; i < 100; i++ { + bf.Add(strconv.Itoa(i)) + } + + for i := 0; i < 100; i++ { + isSet, err := bf.ExistString(strconv.Itoa(i)) + if err != nil { + t.Fatal(err) + } + + if !isSet { + t.Log(i, isSet) + } + } + + for i := 101; i < 200; i++ { + isSet, err := bf.ExistString(strconv.Itoa(i)) + t.Log(isSet, err) + } } func TestClear(t *testing.T) { - bf := bloom.NewBloomFilter() + bf := bloom.NewBloomFilter(1000, 0.03) v := "abc" bf.Add(v) - assert.True(t, bf.Contains(v)) + isSet, _ := bf.ExistString(v) + assert.True(t, isSet) bf.Clear() - assert.False(t, bf.Contains(v)) + isSet, _ = bf.ExistString(v) + assert.False(t, isSet) } func TestParallel(t *testing.T) { - f := bloom.NewBloomFilter() + f := bloom.NewBloomFilter(1000, 0.03) for i := 0; i < 10000; i++ { v := rand.Hex.Generate(10) f.Add(v) - assert.True(t, f.Contains(v)) + isSet, _ := f.ExistString(v) + + assert.True(t, isSet) } } func BenchmarkFilter(b *testing.B) { - f := bloom.NewBloomFilter() + f := bloom.NewBloomFilter(1000, 0.03) b.RunParallel(func(p *testing.PB) { for p.Next() { v := rand.Hex.Generate(10) f.Add(v) - f.Contains(v) + f.ExistString(v) // assert.True(b, f.Contains(v)) diff --git a/bloom/mem_store.go b/bloom/mem_store.go new file mode 100644 index 0000000..30ac8b5 --- /dev/null +++ b/bloom/mem_store.go @@ -0,0 +1,37 @@ +package bloom + +import "github.com/bits-and-blooms/bitset" + +type memStore struct { + size uint + set *bitset.BitSet // 内存位图 +} + +func newMemStore(size uint) *memStore { + return &memStore{ + size: size, + set: bitset.New(size), + } +} + +func (s *memStore) Clear() { + s.set.ClearAll() +} + +func (s *memStore) Set(offsets ...uint) error { + for _, p := range offsets { + s.set.Set(p) + } + + return nil +} + +func (s *memStore) Test(offsets ...uint) (bool, error) { + for _, p := range offsets { + if !s.set.Test(p) { + return false, nil + } + } + + return true, nil +} diff --git a/bloom/redis_store.go b/bloom/redis_store.go new file mode 100644 index 0000000..c7019df --- /dev/null +++ b/bloom/redis_store.go @@ -0,0 +1,116 @@ +package bloom + +import ( + "context" + "errors" + "strconv" + "time" + + "github.com/go-redis/redis/v8" +) + +const ( + // ARGV:偏移量offset数组 + // KYES[1]: setbit操作的key + // 全部设置为1 + setScript = ` + for _, offset in ipairs(ARGV) do + redis.call("setbit", KEYS[1], offset, 1) + end + ` + + //ARGV:偏移量offset数组 + //KYES[1]: setbit操作的key + //检查是否全部为1 + testScript = ` + for _, offset in ipairs(ARGV) do + if tonumber(redis.call("getbit", KEYS[1], offset)) == 0 then + return false + end + end + return true + ` +) + +var ErrTooLargeOffset = errors.New("超出最大偏移量") + +var _ bitStore = &redisBitSet{} + +// 使用Redis存储位图 +type redisBitSet struct { + store *redis.Client + key string + bits uint +} + +func newRedisStore(store *redis.Client, key string, bits uint) *redisBitSet { + return &redisBitSet{ + store: store, + key: key, + bits: bits, + } +} + +func (s *redisBitSet) Set(offsets ...uint) error { + args, err := s.buildOffsetArgs(offsets) + if err != nil { + return err + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*500) + defer cancel() + + _, err = s.store.Eval(ctx, setScript, []string{s.key}, args).Result() + + //底层使用的是go-redis,redis.Nil表示操作的key不存在 + //需要针对key不存在的情况特殊判断 + if err == redis.Nil { + return nil + } else if err != nil { + return err + } + + return nil +} + +func (s *redisBitSet) Test(offsets ...uint) (bool, error) { + args, err := s.buildOffsetArgs(offsets) + if err != nil { + return false, err + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*500) + defer cancel() + + resp, err := s.store.Eval(ctx, testScript, []string{s.key}, args).Result() + + // key 不存在,表示还未存放任何数据 + if err == redis.Nil { + return false, nil + } else if err != nil { + return false, err + } + + exists, ok := resp.(int64) + if !ok { + return false, nil + } + + return exists == 1, nil +} + +func (s *redisBitSet) Clear() { + +} + +func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) { + args := make([]string, 0, len(offsets)) + for _, offset := range offsets { + if offset >= r.bits { + return nil, ErrTooLargeOffset + } + + args = append(args, strconv.FormatUint(uint64(offset), 10)) + } + return args, nil +} diff --git a/bloom/redis_store_test.go b/bloom/redis_store_test.go new file mode 100644 index 0000000..834f943 --- /dev/null +++ b/bloom/redis_store_test.go @@ -0,0 +1,24 @@ +package bloom + +import ( + "testing" + + "github.com/go-redis/redis/v8" +) + +func TestRedisStore(t *testing.T) { + client := redis.NewClient(&redis.Options{ + Addr: "192.168.2.222:6379", + Password: "123456", + }) + + store := newRedisStore(client, "abcdef", 10000) + err := store.Set(1, 2, 3, 9, 1223) + if err != nil { + t.Fatal(err) + } + + t.Log(store.Test(1)) + t.Log(store.Test(1, 2, 3)) + t.Log(store.Test(4, 5, 8)) +} diff --git a/dateconv/duration_test.go b/dateconv/duration_test.go index 770bbc1..2cad87a 100644 --- a/dateconv/duration_test.go +++ b/dateconv/duration_test.go @@ -2,9 +2,23 @@ package dateconv import ( "testing" + "time" ) func TestParseDuration(t *testing.T) { t.Log(ParseDuration("")) t.Log(ParseDuration("abc")) } + +func TestMonth(t *testing.T) { + month := time.Now() + + offset := (int(month.Month()) - 3) + t.Log(offset) + + month = month.AddDate(0, -3, 1) + t.Log(month) + + tt := time.Date(month.Year(), month.Month(), 1, 0, 0, 0, 0, month.Location()) + t.Log(tt) +} diff --git a/go.mod b/go.mod index 7eac09b..d2ee6a9 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/charlienet/go-mixed go 1.18 require ( - github.com/bits-and-blooms/bitset v1.3.0 + github.com/bits-and-blooms/bitset v1.3.3 github.com/cespare/xxhash/v2 v2.1.2 github.com/go-playground/universal-translator v0.18.0 github.com/json-iterator/go v1.1.12 @@ -31,7 +31,7 @@ require ( require ( github.com/allegro/bigcache/v3 v3.0.2 github.com/antonfisher/nested-logrus-formatter v1.3.1 - github.com/coocood/freecache v1.2.1 + github.com/coocood/freecache v1.2.2 github.com/dlclark/regexp2 v1.7.0 github.com/go-redis/redis/v8 v8.11.5 github.com/lestrrat/go-file-rotatelogs v0.0.0-20180223000712-d3151e2a480f diff --git a/go.sum b/go.sum index fd86190..543a289 100644 --- a/go.sum +++ b/go.sum @@ -55,6 +55,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/jehiah/go-strftime v0.0.0-20171201141054-1d33003b3869 h1:IPJ3dvxmJ4uczJe5YQdrYB16oTJlGSC/OyZDqUk9xX4= github.com/jehiah/go-strftime v0.0.0-20171201141054-1d33003b3869/go.mod h1:cJ6Cj7dQo+O6GJNiMx+Pa94qKj+TG8ONdKHgMNIyyag= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jonboulle/clockwork v0.3.0 h1:9BSCMi8C+0qdApAp4auwX0RkLGUjs956h0EkuQymUhg= github.com/jonboulle/clockwork v0.3.0/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=