1
0
mirror of https://github.com/charlienet/go-mixed.git synced 2025-07-18 00:22:41 +08:00

添加存储

This commit is contained in:
2022-10-10 11:07:59 +08:00
parent f043d2e5a7
commit 716a199c9b
8 changed files with 323 additions and 62 deletions

View File

@ -1,92 +1,113 @@
package bloom
import (
"github.com/bits-and-blooms/bitset"
"github.com/charlienet/go-mixed/locker"
"math"
"github.com/charlienet/go-mixed/bytesconv"
"github.com/charlienet/go-mixed/expr"
"github.com/charlienet/go-mixed/hash"
"github.com/go-redis/redis/v8"
)
const DEFAULT_SIZE = 2 << 24
var seeds = []uint{7, 11, 13, 31, 37, 61}
type simplehash struct {
cap uint
seed uint
type bitStore interface {
Clear()
Set(pos ...uint) error
Test(pos ...uint) (bool, error)
}
type BloomFilter struct {
size int // 布隆过滤器大小
set *bitset.BitSet // 位图
funcs [6]simplehash // 哈希函数
lock locker.RWLocker
bits uint // 布隆过滤器大小
funcs uint // 哈希函数数量
store bitStore // 位图存储
}
type bloomOptions struct {
Size int
redisClient *redis.Client
redisKey string
}
type option func(*bloomOptions)
// 布隆过滤器中所有位长度,请根据存储数量进行评估
func WithSize(size int) option {
func WithRedis(redis *redis.Client, key string) option {
return func(bo *bloomOptions) {
bo.Size = size
bo.redisClient = redis
bo.redisKey = key
}
}
func NewBloomFilter(opts ...option) *BloomFilter {
opt := &bloomOptions{
Size: DEFAULT_SIZE,
}
// 初始化布隆过滤器
// https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
func NewBloomFilter(expectedInsertions uint, fpp float64, opts ...option) *BloomFilter {
opt := &bloomOptions{}
for _, f := range opts {
f(opt)
}
bits := optimalNumOfBits(expectedInsertions, fpp)
k := optimalNumOfHashFunctions(bits, expectedInsertions)
bf := &BloomFilter{
size: opt.Size,
lock: locker.NewRWLocker(),
bits: bits,
funcs: k,
store: expr.If[bitStore](
opt.redisClient == nil,
newMemStore(bits),
newRedisStore(opt.redisClient, opt.redisKey, bits)),
}
for i := 0; i < len(bf.funcs); i++ {
bf.funcs[i] = simplehash{uint(opt.Size), seeds[i]}
}
bf.set = bitset.New(uint(opt.Size))
return bf
}
func (bf *BloomFilter) Add(value string) {
funcs := bf.funcs[:]
for _, f := range funcs {
bf.set.Set(f.hash(value))
}
func (bf *BloomFilter) Add(data string) {
offsets := bf.geOffsets([]byte(data))
bf.store.Set(offsets...)
}
func (bf *BloomFilter) Contains(value string) bool {
if value == "" {
return false
}
ret := true
func (bf *BloomFilter) ExistString(data string) (bool, error) {
return bf.Exists(bytesconv.StringToBytes(data))
}
funcs := bf.funcs[:]
for _, f := range funcs {
ret = ret && bf.set.Test(f.hash(value))
func (bf *BloomFilter) Exists(data []byte) (bool, error) {
if data == nil || len(data) == 0 {
return false, nil
}
return ret
offsets := bf.geOffsets(data)
isSet, err := bf.store.Test(offsets...)
if err != nil {
return false, err
}
return isSet, nil
}
func (bf *BloomFilter) geOffsets(data []byte) []uint {
offsets := make([]uint, bf.funcs)
for i := uint(0); i < bf.funcs; i++ {
offsets[i] = uint(hash.Murmur3(append(data, byte(i))) % uint64(bf.bits))
}
return offsets
}
// 清空布隆过滤器
func (bf *BloomFilter) Clear() {
bf.set.ClearAll()
bf.store.Clear()
}
func (s simplehash) hash(value string) uint {
var result uint = 0
for i := 0; i < len(value); i++ {
result = result*s.seed + uint(value[i])
}
return (s.cap - 1) & result
// 计算优化的位图长度,
// n 期望放置元素数量,
// p 预期的误判概率
func optimalNumOfBits(n uint, p float64) uint {
return (uint)(-float64(n) * math.Log(p) / (math.Log(2) * math.Log(2)))
}
// 计算哈希函数数量
func optimalNumOfHashFunctions(m, n uint) uint {
return uint(math.Round(float64(m) / float64(n) * math.Log(2)))
}

View File

@ -2,17 +2,21 @@ package bloom_test
import (
"fmt"
"math"
"strconv"
"testing"
"github.com/charlienet/go-mixed/bloom"
"github.com/charlienet/go-mixed/rand"
"github.com/charlienet/go-mixed/sys"
"github.com/go-redis/redis/v8"
"github.com/stretchr/testify/assert"
)
const ()
func TestBloom(t *testing.T) {
b := bloom.NewBloomFilter()
b := bloom.NewBloomFilter(1000, 0.03)
for i := 0; i < 1000000; i++ {
b.Add(strconv.Itoa(i))
@ -20,51 +24,94 @@ func TestBloom(t *testing.T) {
v := "6943553521463296-1635402930"
t.Log(b.Contains(v))
t.Log(b.ExistString(v))
b.Add(v)
t.Log(b.Contains(v))
t.Log(b.ExistString(v))
fmt.Println("过滤器中包含值:", b.Contains(strconv.Itoa(9999)))
fmt.Println("过滤器中包含:", b.Contains("ss"))
isSet, err := b.ExistString(strconv.Itoa(9999))
fmt.Println("过滤器中包含:", isSet, err)
isSet, err = b.ExistString("ss")
fmt.Println("过滤器中未包含:", isSet, err)
t.Log(sys.ShowMemUsage())
}
func TestSize(t *testing.T) {
bloom.NewBloomFilter(bloom.WithSize(1 << 2))
func TestOptimize(t *testing.T) {
expectedInsertions := 1000000 // 期望存储数据量
falseProbability := 0.00002 // 预期误差
bits := uint(float64(-expectedInsertions) * math.Log(falseProbability) / (math.Log(2) * math.Log(2)))
hashSize := uint(math.Round(float64(bits) / float64(expectedInsertions) * math.Log(2)))
t.Log(bits)
t.Log(hashSize)
}
func TestRedis(t *testing.T) {
client := redis.NewClient(&redis.Options{
Addr: "192.168.2.222:6379",
Password: "123456",
})
bf := bloom.NewBloomFilter(10000, 0.03, bloom.WithRedis(client, "bloom:test"))
for i := 0; i < 100; i++ {
bf.Add(strconv.Itoa(i))
}
for i := 0; i < 100; i++ {
isSet, err := bf.ExistString(strconv.Itoa(i))
if err != nil {
t.Fatal(err)
}
if !isSet {
t.Log(i, isSet)
}
}
for i := 101; i < 200; i++ {
isSet, err := bf.ExistString(strconv.Itoa(i))
t.Log(isSet, err)
}
}
func TestClear(t *testing.T) {
bf := bloom.NewBloomFilter()
bf := bloom.NewBloomFilter(1000, 0.03)
v := "abc"
bf.Add(v)
assert.True(t, bf.Contains(v))
isSet, _ := bf.ExistString(v)
assert.True(t, isSet)
bf.Clear()
assert.False(t, bf.Contains(v))
isSet, _ = bf.ExistString(v)
assert.False(t, isSet)
}
func TestParallel(t *testing.T) {
f := bloom.NewBloomFilter()
f := bloom.NewBloomFilter(1000, 0.03)
for i := 0; i < 10000; i++ {
v := rand.Hex.Generate(10)
f.Add(v)
assert.True(t, f.Contains(v))
isSet, _ := f.ExistString(v)
assert.True(t, isSet)
}
}
func BenchmarkFilter(b *testing.B) {
f := bloom.NewBloomFilter()
f := bloom.NewBloomFilter(1000, 0.03)
b.RunParallel(func(p *testing.PB) {
for p.Next() {
v := rand.Hex.Generate(10)
f.Add(v)
f.Contains(v)
f.ExistString(v)
// assert.True(b, f.Contains(v))

37
bloom/mem_store.go Normal file
View File

@ -0,0 +1,37 @@
package bloom
import "github.com/bits-and-blooms/bitset"
type memStore struct {
size uint
set *bitset.BitSet // 内存位图
}
func newMemStore(size uint) *memStore {
return &memStore{
size: size,
set: bitset.New(size),
}
}
func (s *memStore) Clear() {
s.set.ClearAll()
}
func (s *memStore) Set(offsets ...uint) error {
for _, p := range offsets {
s.set.Set(p)
}
return nil
}
func (s *memStore) Test(offsets ...uint) (bool, error) {
for _, p := range offsets {
if !s.set.Test(p) {
return false, nil
}
}
return true, nil
}

116
bloom/redis_store.go Normal file
View File

@ -0,0 +1,116 @@
package bloom
import (
"context"
"errors"
"strconv"
"time"
"github.com/go-redis/redis/v8"
)
const (
// ARGV:偏移量offset数组
// KYES[1]: setbit操作的key
// 全部设置为1
setScript = `
for _, offset in ipairs(ARGV) do
redis.call("setbit", KEYS[1], offset, 1)
end
`
//ARGV:偏移量offset数组
//KYES[1]: setbit操作的key
//检查是否全部为1
testScript = `
for _, offset in ipairs(ARGV) do
if tonumber(redis.call("getbit", KEYS[1], offset)) == 0 then
return false
end
end
return true
`
)
var ErrTooLargeOffset = errors.New("超出最大偏移量")
var _ bitStore = &redisBitSet{}
// 使用Redis存储位图
type redisBitSet struct {
store *redis.Client
key string
bits uint
}
func newRedisStore(store *redis.Client, key string, bits uint) *redisBitSet {
return &redisBitSet{
store: store,
key: key,
bits: bits,
}
}
func (s *redisBitSet) Set(offsets ...uint) error {
args, err := s.buildOffsetArgs(offsets)
if err != nil {
return err
}
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*500)
defer cancel()
_, err = s.store.Eval(ctx, setScript, []string{s.key}, args).Result()
//底层使用的是go-redis,redis.Nil表示操作的key不存在
//需要针对key不存在的情况特殊判断
if err == redis.Nil {
return nil
} else if err != nil {
return err
}
return nil
}
func (s *redisBitSet) Test(offsets ...uint) (bool, error) {
args, err := s.buildOffsetArgs(offsets)
if err != nil {
return false, err
}
ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*500)
defer cancel()
resp, err := s.store.Eval(ctx, testScript, []string{s.key}, args).Result()
// key 不存在,表示还未存放任何数据
if err == redis.Nil {
return false, nil
} else if err != nil {
return false, err
}
exists, ok := resp.(int64)
if !ok {
return false, nil
}
return exists == 1, nil
}
func (s *redisBitSet) Clear() {
}
func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) {
args := make([]string, 0, len(offsets))
for _, offset := range offsets {
if offset >= r.bits {
return nil, ErrTooLargeOffset
}
args = append(args, strconv.FormatUint(uint64(offset), 10))
}
return args, nil
}

24
bloom/redis_store_test.go Normal file
View File

@ -0,0 +1,24 @@
package bloom
import (
"testing"
"github.com/go-redis/redis/v8"
)
func TestRedisStore(t *testing.T) {
client := redis.NewClient(&redis.Options{
Addr: "192.168.2.222:6379",
Password: "123456",
})
store := newRedisStore(client, "abcdef", 10000)
err := store.Set(1, 2, 3, 9, 1223)
if err != nil {
t.Fatal(err)
}
t.Log(store.Test(1))
t.Log(store.Test(1, 2, 3))
t.Log(store.Test(4, 5, 8))
}

View File

@ -2,9 +2,23 @@ package dateconv
import (
"testing"
"time"
)
func TestParseDuration(t *testing.T) {
t.Log(ParseDuration(""))
t.Log(ParseDuration("abc"))
}
func TestMonth(t *testing.T) {
month := time.Now()
offset := (int(month.Month()) - 3)
t.Log(offset)
month = month.AddDate(0, -3, 1)
t.Log(month)
tt := time.Date(month.Year(), month.Month(), 1, 0, 0, 0, 0, month.Location())
t.Log(tt)
}

4
go.mod
View File

@ -3,7 +3,7 @@ module github.com/charlienet/go-mixed
go 1.18
require (
github.com/bits-and-blooms/bitset v1.3.0
github.com/bits-and-blooms/bitset v1.3.3
github.com/cespare/xxhash/v2 v2.1.2
github.com/go-playground/universal-translator v0.18.0
github.com/json-iterator/go v1.1.12
@ -31,7 +31,7 @@ require (
require (
github.com/allegro/bigcache/v3 v3.0.2
github.com/antonfisher/nested-logrus-formatter v1.3.1
github.com/coocood/freecache v1.2.1
github.com/coocood/freecache v1.2.2
github.com/dlclark/regexp2 v1.7.0
github.com/go-redis/redis/v8 v8.11.5
github.com/lestrrat/go-file-rotatelogs v0.0.0-20180223000712-d3151e2a480f

2
go.sum
View File

@ -55,6 +55,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/jehiah/go-strftime v0.0.0-20171201141054-1d33003b3869 h1:IPJ3dvxmJ4uczJe5YQdrYB16oTJlGSC/OyZDqUk9xX4=
github.com/jehiah/go-strftime v0.0.0-20171201141054-1d33003b3869/go.mod h1:cJ6Cj7dQo+O6GJNiMx+Pa94qKj+TG8ONdKHgMNIyyag=
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/jonboulle/clockwork v0.3.0 h1:9BSCMi8C+0qdApAp4auwX0RkLGUjs956h0EkuQymUhg=
github.com/jonboulle/clockwork v0.3.0/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=