1
0
mirror of https://github.com/charlienet/go-mixed.git synced 2025-07-18 00:22:41 +08:00

添加存储

This commit is contained in:
2022-10-10 11:07:59 +08:00
parent f043d2e5a7
commit 716a199c9b
8 changed files with 323 additions and 62 deletions

View File

@ -1,92 +1,113 @@
package bloom
import (
"github.com/bits-and-blooms/bitset"
"github.com/charlienet/go-mixed/locker"
"math"
"github.com/charlienet/go-mixed/bytesconv"
"github.com/charlienet/go-mixed/expr"
"github.com/charlienet/go-mixed/hash"
"github.com/go-redis/redis/v8"
)
const DEFAULT_SIZE = 2 << 24
var seeds = []uint{7, 11, 13, 31, 37, 61}
type simplehash struct {
cap uint
seed uint
type bitStore interface {
Clear()
Set(pos ...uint) error
Test(pos ...uint) (bool, error)
}
type BloomFilter struct {
size int // 布隆过滤器大小
set *bitset.BitSet // 位图
funcs [6]simplehash // 哈希函数
lock locker.RWLocker
bits uint // 布隆过滤器大小
funcs uint // 哈希函数数量
store bitStore // 位图存储
}
type bloomOptions struct {
Size int
redisClient *redis.Client
redisKey string
}
type option func(*bloomOptions)
// 布隆过滤器中所有位长度,请根据存储数量进行评估
func WithSize(size int) option {
func WithRedis(redis *redis.Client, key string) option {
return func(bo *bloomOptions) {
bo.Size = size
bo.redisClient = redis
bo.redisKey = key
}
}
func NewBloomFilter(opts ...option) *BloomFilter {
opt := &bloomOptions{
Size: DEFAULT_SIZE,
}
// 初始化布隆过滤器
// https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
func NewBloomFilter(expectedInsertions uint, fpp float64, opts ...option) *BloomFilter {
opt := &bloomOptions{}
for _, f := range opts {
f(opt)
}
bits := optimalNumOfBits(expectedInsertions, fpp)
k := optimalNumOfHashFunctions(bits, expectedInsertions)
bf := &BloomFilter{
size: opt.Size,
lock: locker.NewRWLocker(),
bits: bits,
funcs: k,
store: expr.If[bitStore](
opt.redisClient == nil,
newMemStore(bits),
newRedisStore(opt.redisClient, opt.redisKey, bits)),
}
for i := 0; i < len(bf.funcs); i++ {
bf.funcs[i] = simplehash{uint(opt.Size), seeds[i]}
}
bf.set = bitset.New(uint(opt.Size))
return bf
}
func (bf *BloomFilter) Add(value string) {
funcs := bf.funcs[:]
for _, f := range funcs {
bf.set.Set(f.hash(value))
}
func (bf *BloomFilter) Add(data string) {
offsets := bf.geOffsets([]byte(data))
bf.store.Set(offsets...)
}
func (bf *BloomFilter) Contains(value string) bool {
if value == "" {
return false
}
ret := true
func (bf *BloomFilter) ExistString(data string) (bool, error) {
return bf.Exists(bytesconv.StringToBytes(data))
}
funcs := bf.funcs[:]
for _, f := range funcs {
ret = ret && bf.set.Test(f.hash(value))
func (bf *BloomFilter) Exists(data []byte) (bool, error) {
if data == nil || len(data) == 0 {
return false, nil
}
return ret
offsets := bf.geOffsets(data)
isSet, err := bf.store.Test(offsets...)
if err != nil {
return false, err
}
return isSet, nil
}
func (bf *BloomFilter) geOffsets(data []byte) []uint {
offsets := make([]uint, bf.funcs)
for i := uint(0); i < bf.funcs; i++ {
offsets[i] = uint(hash.Murmur3(append(data, byte(i))) % uint64(bf.bits))
}
return offsets
}
// 清空布隆过滤器
func (bf *BloomFilter) Clear() {
bf.set.ClearAll()
bf.store.Clear()
}
func (s simplehash) hash(value string) uint {
var result uint = 0
for i := 0; i < len(value); i++ {
result = result*s.seed + uint(value[i])
}
return (s.cap - 1) & result
// 计算优化的位图长度,
// n 期望放置元素数量,
// p 预期的误判概率
func optimalNumOfBits(n uint, p float64) uint {
return (uint)(-float64(n) * math.Log(p) / (math.Log(2) * math.Log(2)))
}
// 计算哈希函数数量
func optimalNumOfHashFunctions(m, n uint) uint {
return uint(math.Round(float64(m) / float64(n) * math.Log(2)))
}