1
0
mirror of https://github.com/charlienet/go-mixed.git synced 2025-07-18 00:22:41 +08:00

添加存储

This commit is contained in:
2022-10-10 11:07:59 +08:00
parent f043d2e5a7
commit 716a199c9b
8 changed files with 323 additions and 62 deletions

View File

@ -1,92 +1,113 @@
package bloom package bloom
import ( import (
"github.com/bits-and-blooms/bitset" "math"
"github.com/charlienet/go-mixed/locker"
"github.com/charlienet/go-mixed/bytesconv"
"github.com/charlienet/go-mixed/expr"
"github.com/charlienet/go-mixed/hash"
"github.com/go-redis/redis/v8"
) )
const DEFAULT_SIZE = 2 << 24 const DEFAULT_SIZE = 2 << 24
var seeds = []uint{7, 11, 13, 31, 37, 61} var seeds = []uint{7, 11, 13, 31, 37, 61}
type simplehash struct { type bitStore interface {
cap uint Clear()
seed uint Set(pos ...uint) error
Test(pos ...uint) (bool, error)
} }
type BloomFilter struct { type BloomFilter struct {
size int // 布隆过滤器大小 bits uint // 布隆过滤器大小
set *bitset.BitSet // 位图 funcs uint // 哈希函数数量
funcs [6]simplehash // 哈希函数 store bitStore // 位图存储
lock locker.RWLocker
} }
type bloomOptions struct { type bloomOptions struct {
Size int redisClient *redis.Client
redisKey string
} }
type option func(*bloomOptions) type option func(*bloomOptions)
// 布隆过滤器中所有位长度,请根据存储数量进行评估 func WithRedis(redis *redis.Client, key string) option {
func WithSize(size int) option {
return func(bo *bloomOptions) { return func(bo *bloomOptions) {
bo.Size = size bo.redisClient = redis
bo.redisKey = key
} }
} }
func NewBloomFilter(opts ...option) *BloomFilter { // 初始化布隆过滤器
opt := &bloomOptions{ // https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
Size: DEFAULT_SIZE, func NewBloomFilter(expectedInsertions uint, fpp float64, opts ...option) *BloomFilter {
} opt := &bloomOptions{}
for _, f := range opts { for _, f := range opts {
f(opt) f(opt)
} }
bits := optimalNumOfBits(expectedInsertions, fpp)
k := optimalNumOfHashFunctions(bits, expectedInsertions)
bf := &BloomFilter{ bf := &BloomFilter{
size: opt.Size, bits: bits,
lock: locker.NewRWLocker(), funcs: k,
store: expr.If[bitStore](
opt.redisClient == nil,
newMemStore(bits),
newRedisStore(opt.redisClient, opt.redisKey, bits)),
} }
for i := 0; i < len(bf.funcs); i++ {
bf.funcs[i] = simplehash{uint(opt.Size), seeds[i]}
}
bf.set = bitset.New(uint(opt.Size))
return bf return bf
} }
func (bf *BloomFilter) Add(value string) { func (bf *BloomFilter) Add(data string) {
funcs := bf.funcs[:] offsets := bf.geOffsets([]byte(data))
bf.store.Set(offsets...)
for _, f := range funcs {
bf.set.Set(f.hash(value))
}
} }
func (bf *BloomFilter) Contains(value string) bool { func (bf *BloomFilter) ExistString(data string) (bool, error) {
if value == "" { return bf.Exists(bytesconv.StringToBytes(data))
return false }
}
ret := true
funcs := bf.funcs[:] func (bf *BloomFilter) Exists(data []byte) (bool, error) {
for _, f := range funcs { if data == nil || len(data) == 0 {
ret = ret && bf.set.Test(f.hash(value)) return false, nil
} }
return ret offsets := bf.geOffsets(data)
isSet, err := bf.store.Test(offsets...)
if err != nil {
return false, err
}
return isSet, nil
}
func (bf *BloomFilter) geOffsets(data []byte) []uint {
offsets := make([]uint, bf.funcs)
for i := uint(0); i < bf.funcs; i++ {
offsets[i] = uint(hash.Murmur3(append(data, byte(i))) % uint64(bf.bits))
}
return offsets
} }
// 清空布隆过滤器 // 清空布隆过滤器
func (bf *BloomFilter) Clear() { func (bf *BloomFilter) Clear() {
bf.set.ClearAll() bf.store.Clear()
} }
func (s simplehash) hash(value string) uint { // 计算优化的位图长度,
var result uint = 0 // n 期望放置元素数量,
for i := 0; i < len(value); i++ { // p 预期的误判概率
result = result*s.seed + uint(value[i]) func optimalNumOfBits(n uint, p float64) uint {
} return (uint)(-float64(n) * math.Log(p) / (math.Log(2) * math.Log(2)))
return (s.cap - 1) & result }
// 计算哈希函数数量
func optimalNumOfHashFunctions(m, n uint) uint {
return uint(math.Round(float64(m) / float64(n) * math.Log(2)))
} }

View File

@ -2,17 +2,21 @@ package bloom_test
import ( import (
"fmt" "fmt"
"math"
"strconv" "strconv"
"testing" "testing"
"github.com/charlienet/go-mixed/bloom" "github.com/charlienet/go-mixed/bloom"
"github.com/charlienet/go-mixed/rand" "github.com/charlienet/go-mixed/rand"
"github.com/charlienet/go-mixed/sys" "github.com/charlienet/go-mixed/sys"
"github.com/go-redis/redis/v8"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
const ()
func TestBloom(t *testing.T) { func TestBloom(t *testing.T) {
b := bloom.NewBloomFilter() b := bloom.NewBloomFilter(1000, 0.03)
for i := 0; i < 1000000; i++ { for i := 0; i < 1000000; i++ {
b.Add(strconv.Itoa(i)) b.Add(strconv.Itoa(i))
@ -20,51 +24,94 @@ func TestBloom(t *testing.T) {
v := "6943553521463296-1635402930" v := "6943553521463296-1635402930"
t.Log(b.Contains(v)) t.Log(b.ExistString(v))
b.Add(v) b.Add(v)
t.Log(b.Contains(v)) t.Log(b.ExistString(v))
fmt.Println("过滤器中包含值:", b.Contains(strconv.Itoa(9999))) isSet, err := b.ExistString(strconv.Itoa(9999))
fmt.Println("过滤器中包含:", b.Contains("ss")) fmt.Println("过滤器中包含:", isSet, err)
isSet, err = b.ExistString("ss")
fmt.Println("过滤器中未包含:", isSet, err)
t.Log(sys.ShowMemUsage()) t.Log(sys.ShowMemUsage())
} }
func TestSize(t *testing.T) { func TestOptimize(t *testing.T) {
bloom.NewBloomFilter(bloom.WithSize(1 << 2))
expectedInsertions := 1000000 // 期望存储数据量
falseProbability := 0.00002 // 预期误差
bits := uint(float64(-expectedInsertions) * math.Log(falseProbability) / (math.Log(2) * math.Log(2)))
hashSize := uint(math.Round(float64(bits) / float64(expectedInsertions) * math.Log(2)))
t.Log(bits)
t.Log(hashSize)
}
func TestRedis(t *testing.T) {
client := redis.NewClient(&redis.Options{
Addr: "192.168.2.222:6379",
Password: "123456",
})
bf := bloom.NewBloomFilter(10000, 0.03, bloom.WithRedis(client, "bloom:test"))
for i := 0; i < 100; i++ {
bf.Add(strconv.Itoa(i))
}
for i := 0; i < 100; i++ {
isSet, err := bf.ExistString(strconv.Itoa(i))
if err != nil {
t.Fatal(err)
}
if !isSet {
t.Log(i, isSet)
}
}
for i := 101; i < 200; i++ {
isSet, err := bf.ExistString(strconv.Itoa(i))
t.Log(isSet, err)
}
} }
func TestClear(t *testing.T) { func TestClear(t *testing.T) {
bf := bloom.NewBloomFilter() bf := bloom.NewBloomFilter(1000, 0.03)
v := "abc" v := "abc"
bf.Add(v) bf.Add(v)
assert.True(t, bf.Contains(v)) isSet, _ := bf.ExistString(v)
assert.True(t, isSet)
bf.Clear() bf.Clear()
assert.False(t, bf.Contains(v)) isSet, _ = bf.ExistString(v)
assert.False(t, isSet)
} }
func TestParallel(t *testing.T) { func TestParallel(t *testing.T) {
f := bloom.NewBloomFilter() f := bloom.NewBloomFilter(1000, 0.03)
for i := 0; i < 10000; i++ { for i := 0; i < 10000; i++ {
v := rand.Hex.Generate(10) v := rand.Hex.Generate(10)
f.Add(v) f.Add(v)
assert.True(t, f.Contains(v)) isSet, _ := f.ExistString(v)
assert.True(t, isSet)
} }
} }
func BenchmarkFilter(b *testing.B) { func BenchmarkFilter(b *testing.B) {
f := bloom.NewBloomFilter() f := bloom.NewBloomFilter(1000, 0.03)
b.RunParallel(func(p *testing.PB) { b.RunParallel(func(p *testing.PB) {
for p.Next() { for p.Next() {
v := rand.Hex.Generate(10) v := rand.Hex.Generate(10)
f.Add(v) f.Add(v)
f.Contains(v) f.ExistString(v)
// assert.True(b, f.Contains(v)) // assert.True(b, f.Contains(v))

37
bloom/mem_store.go Normal file
View File

@ -0,0 +1,37 @@
package bloom
import "github.com/bits-and-blooms/bitset"
type memStore struct {
size uint
set *bitset.BitSet // 内存位图
}
func newMemStore(size uint) *memStore {
return &memStore{
size: size,
set: bitset.New(size),
}
}
func (s *memStore) Clear() {
s.set.ClearAll()
}
func (s *memStore) Set(offsets ...uint) error {
for _, p := range offsets {
s.set.Set(p)
}
return nil
}
func (s *memStore) Test(offsets ...uint) (bool, error) {
for _, p := range offsets {
if !s.set.Test(p) {
return false, nil
}
}
return true, nil
}

116
bloom/redis_store.go Normal file
View File

@ -0,0 +1,116 @@
package bloom
import (
"context"
"errors"
"strconv"
"time"
"github.com/go-redis/redis/v8"
)
const (
// ARGV:偏移量offset数组
// KYES[1]: setbit操作的key
// 全部设置为1
setScript = `
for _, offset in ipairs(ARGV) do
redis.call("setbit", KEYS[1], offset, 1)
end
`
//ARGV:偏移量offset数组
//KYES[1]: setbit操作的key
//检查是否全部为1
testScript = `
for _, offset in ipairs(ARGV) do
if tonumber(redis.call("getbit", KEYS[1], offset)) == 0 then
return false
end
end
return true
`
)
var ErrTooLargeOffset = errors.New("超出最大偏移量")
var _ bitStore = &redisBitSet{}
// 使用Redis存储位图
type redisBitSet struct {
store *redis.Client
key string
bits uint
}
func newRedisStore(store *redis.Client, key string, bits uint) *redisBitSet {
return &redisBitSet{
store: store,
key: key,
bits: bits,
}
}
func (s *redisBitSet) Set(offsets ...uint) error {
args, err := s.buildOffsetArgs(offsets)
if err != nil {
return err
}
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*500)
defer cancel()
_, err = s.store.Eval(ctx, setScript, []string{s.key}, args).Result()
//底层使用的是go-redis,redis.Nil表示操作的key不存在
//需要针对key不存在的情况特殊判断
if err == redis.Nil {
return nil
} else if err != nil {
return err
}
return nil
}
func (s *redisBitSet) Test(offsets ...uint) (bool, error) {
args, err := s.buildOffsetArgs(offsets)
if err != nil {
return false, err
}
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*500)
defer cancel()
resp, err := s.store.Eval(ctx, testScript, []string{s.key}, args).Result()
// key 不存在,表示还未存放任何数据
if err == redis.Nil {
return false, nil
} else if err != nil {
return false, err
}
exists, ok := resp.(int64)
if !ok {
return false, nil
}
return exists == 1, nil
}
func (s *redisBitSet) Clear() {
}
func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) {
args := make([]string, 0, len(offsets))
for _, offset := range offsets {
if offset >= r.bits {
return nil, ErrTooLargeOffset
}
args = append(args, strconv.FormatUint(uint64(offset), 10))
}
return args, nil
}

24
bloom/redis_store_test.go Normal file
View File

@ -0,0 +1,24 @@
package bloom
import (
"testing"
"github.com/go-redis/redis/v8"
)
func TestRedisStore(t *testing.T) {
client := redis.NewClient(&redis.Options{
Addr: "192.168.2.222:6379",
Password: "123456",
})
store := newRedisStore(client, "abcdef", 10000)
err := store.Set(1, 2, 3, 9, 1223)
if err != nil {
t.Fatal(err)
}
t.Log(store.Test(1))
t.Log(store.Test(1, 2, 3))
t.Log(store.Test(4, 5, 8))
}

View File

@ -2,9 +2,23 @@ package dateconv
import ( import (
"testing" "testing"
"time"
) )
func TestParseDuration(t *testing.T) { func TestParseDuration(t *testing.T) {
t.Log(ParseDuration("")) t.Log(ParseDuration(""))
t.Log(ParseDuration("abc")) t.Log(ParseDuration("abc"))
} }
func TestMonth(t *testing.T) {
month := time.Now()
offset := (int(month.Month()) - 3)
t.Log(offset)
month = month.AddDate(0, -3, 1)
t.Log(month)
tt := time.Date(month.Year(), month.Month(), 1, 0, 0, 0, 0, month.Location())
t.Log(tt)
}

4
go.mod
View File

@ -3,7 +3,7 @@ module github.com/charlienet/go-mixed
go 1.18 go 1.18
require ( require (
github.com/bits-and-blooms/bitset v1.3.0 github.com/bits-and-blooms/bitset v1.3.3
github.com/cespare/xxhash/v2 v2.1.2 github.com/cespare/xxhash/v2 v2.1.2
github.com/go-playground/universal-translator v0.18.0 github.com/go-playground/universal-translator v0.18.0
github.com/json-iterator/go v1.1.12 github.com/json-iterator/go v1.1.12
@ -31,7 +31,7 @@ require (
require ( require (
github.com/allegro/bigcache/v3 v3.0.2 github.com/allegro/bigcache/v3 v3.0.2
github.com/antonfisher/nested-logrus-formatter v1.3.1 github.com/antonfisher/nested-logrus-formatter v1.3.1
github.com/coocood/freecache v1.2.1 github.com/coocood/freecache v1.2.2
github.com/dlclark/regexp2 v1.7.0 github.com/dlclark/regexp2 v1.7.0
github.com/go-redis/redis/v8 v8.11.5 github.com/go-redis/redis/v8 v8.11.5
github.com/lestrrat/go-file-rotatelogs v0.0.0-20180223000712-d3151e2a480f github.com/lestrrat/go-file-rotatelogs v0.0.0-20180223000712-d3151e2a480f

2
go.sum
View File

@ -55,6 +55,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/jehiah/go-strftime v0.0.0-20171201141054-1d33003b3869 h1:IPJ3dvxmJ4uczJe5YQdrYB16oTJlGSC/OyZDqUk9xX4= github.com/jehiah/go-strftime v0.0.0-20171201141054-1d33003b3869 h1:IPJ3dvxmJ4uczJe5YQdrYB16oTJlGSC/OyZDqUk9xX4=
github.com/jehiah/go-strftime v0.0.0-20171201141054-1d33003b3869/go.mod h1:cJ6Cj7dQo+O6GJNiMx+Pa94qKj+TG8ONdKHgMNIyyag= github.com/jehiah/go-strftime v0.0.0-20171201141054-1d33003b3869/go.mod h1:cJ6Cj7dQo+O6GJNiMx+Pa94qKj+TG8ONdKHgMNIyyag=
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/jonboulle/clockwork v0.3.0 h1:9BSCMi8C+0qdApAp4auwX0RkLGUjs956h0EkuQymUhg= github.com/jonboulle/clockwork v0.3.0 h1:9BSCMi8C+0qdApAp4auwX0RkLGUjs956h0EkuQymUhg=
github.com/jonboulle/clockwork v0.3.0/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8= github.com/jonboulle/clockwork v0.3.0/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=