mirror of
https://github.com/charlienet/go-mixed.git
synced 2025-07-18 00:22:41 +08:00
添加存储
This commit is contained in:
113
bloom/bloom.go
113
bloom/bloom.go
@ -1,92 +1,113 @@
|
||||
package bloom
|
||||
|
||||
import (
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
"github.com/charlienet/go-mixed/locker"
|
||||
"math"
|
||||
|
||||
"github.com/charlienet/go-mixed/bytesconv"
|
||||
"github.com/charlienet/go-mixed/expr"
|
||||
"github.com/charlienet/go-mixed/hash"
|
||||
"github.com/go-redis/redis/v8"
|
||||
)
|
||||
|
||||
const DEFAULT_SIZE = 2 << 24
|
||||
|
||||
var seeds = []uint{7, 11, 13, 31, 37, 61}
|
||||
|
||||
type simplehash struct {
|
||||
cap uint
|
||||
seed uint
|
||||
type bitStore interface {
|
||||
Clear()
|
||||
Set(pos ...uint) error
|
||||
Test(pos ...uint) (bool, error)
|
||||
}
|
||||
|
||||
type BloomFilter struct {
|
||||
size int // 布隆过滤器大小
|
||||
set *bitset.BitSet // 位图
|
||||
funcs [6]simplehash // 哈希函数
|
||||
lock locker.RWLocker
|
||||
bits uint // 布隆过滤器大小
|
||||
funcs uint // 哈希函数数量
|
||||
store bitStore // 位图存储
|
||||
}
|
||||
|
||||
type bloomOptions struct {
|
||||
Size int
|
||||
redisClient *redis.Client
|
||||
redisKey string
|
||||
}
|
||||
|
||||
type option func(*bloomOptions)
|
||||
|
||||
// 布隆过滤器中所有位长度,请根据存储数量进行评估
|
||||
func WithSize(size int) option {
|
||||
func WithRedis(redis *redis.Client, key string) option {
|
||||
return func(bo *bloomOptions) {
|
||||
bo.Size = size
|
||||
bo.redisClient = redis
|
||||
bo.redisKey = key
|
||||
}
|
||||
}
|
||||
|
||||
func NewBloomFilter(opts ...option) *BloomFilter {
|
||||
opt := &bloomOptions{
|
||||
Size: DEFAULT_SIZE,
|
||||
}
|
||||
// 初始化布隆过滤器
|
||||
// https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
|
||||
func NewBloomFilter(expectedInsertions uint, fpp float64, opts ...option) *BloomFilter {
|
||||
opt := &bloomOptions{}
|
||||
|
||||
for _, f := range opts {
|
||||
f(opt)
|
||||
}
|
||||
|
||||
bits := optimalNumOfBits(expectedInsertions, fpp)
|
||||
k := optimalNumOfHashFunctions(bits, expectedInsertions)
|
||||
|
||||
bf := &BloomFilter{
|
||||
size: opt.Size,
|
||||
lock: locker.NewRWLocker(),
|
||||
bits: bits,
|
||||
funcs: k,
|
||||
store: expr.If[bitStore](
|
||||
opt.redisClient == nil,
|
||||
newMemStore(bits),
|
||||
newRedisStore(opt.redisClient, opt.redisKey, bits)),
|
||||
}
|
||||
|
||||
for i := 0; i < len(bf.funcs); i++ {
|
||||
bf.funcs[i] = simplehash{uint(opt.Size), seeds[i]}
|
||||
}
|
||||
bf.set = bitset.New(uint(opt.Size))
|
||||
return bf
|
||||
}
|
||||
|
||||
func (bf *BloomFilter) Add(value string) {
|
||||
funcs := bf.funcs[:]
|
||||
|
||||
for _, f := range funcs {
|
||||
bf.set.Set(f.hash(value))
|
||||
}
|
||||
|
||||
func (bf *BloomFilter) Add(data string) {
|
||||
offsets := bf.geOffsets([]byte(data))
|
||||
bf.store.Set(offsets...)
|
||||
}
|
||||
|
||||
func (bf *BloomFilter) Contains(value string) bool {
|
||||
if value == "" {
|
||||
return false
|
||||
}
|
||||
ret := true
|
||||
func (bf *BloomFilter) ExistString(data string) (bool, error) {
|
||||
return bf.Exists(bytesconv.StringToBytes(data))
|
||||
}
|
||||
|
||||
funcs := bf.funcs[:]
|
||||
for _, f := range funcs {
|
||||
ret = ret && bf.set.Test(f.hash(value))
|
||||
func (bf *BloomFilter) Exists(data []byte) (bool, error) {
|
||||
if data == nil || len(data) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return ret
|
||||
offsets := bf.geOffsets(data)
|
||||
isSet, err := bf.store.Test(offsets...)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return isSet, nil
|
||||
}
|
||||
|
||||
func (bf *BloomFilter) geOffsets(data []byte) []uint {
|
||||
offsets := make([]uint, bf.funcs)
|
||||
for i := uint(0); i < bf.funcs; i++ {
|
||||
offsets[i] = uint(hash.Murmur3(append(data, byte(i))) % uint64(bf.bits))
|
||||
}
|
||||
|
||||
return offsets
|
||||
}
|
||||
|
||||
// 清空布隆过滤器
|
||||
func (bf *BloomFilter) Clear() {
|
||||
bf.set.ClearAll()
|
||||
bf.store.Clear()
|
||||
}
|
||||
|
||||
func (s simplehash) hash(value string) uint {
|
||||
var result uint = 0
|
||||
for i := 0; i < len(value); i++ {
|
||||
result = result*s.seed + uint(value[i])
|
||||
}
|
||||
return (s.cap - 1) & result
|
||||
// 计算优化的位图长度,
|
||||
// n 期望放置元素数量,
|
||||
// p 预期的误判概率
|
||||
func optimalNumOfBits(n uint, p float64) uint {
|
||||
return (uint)(-float64(n) * math.Log(p) / (math.Log(2) * math.Log(2)))
|
||||
}
|
||||
|
||||
// 计算哈希函数数量
|
||||
func optimalNumOfHashFunctions(m, n uint) uint {
|
||||
return uint(math.Round(float64(m) / float64(n) * math.Log(2)))
|
||||
}
|
||||
|
Reference in New Issue
Block a user