mirror of
https://github.com/charlienet/go-mixed.git
synced 2025-07-18 00:22:41 +08:00
122 lines
2.5 KiB
Go
122 lines
2.5 KiB
Go
package bloom
|
|
|
|
import (
|
|
"context"
|
|
"math"
|
|
|
|
"github.com/charlienet/go-mixed/bytesconv"
|
|
"github.com/charlienet/go-mixed/hash"
|
|
"github.com/charlienet/go-mixed/redis"
|
|
)
|
|
|
|
const DEFAULT_SIZE = 2 << 24
|
|
|
|
var seeds = []uint{7, 11, 13, 31, 37, 61, 79, 97}
|
|
|
|
type bitStore interface {
|
|
Clear()
|
|
Set(ctx context.Context, pos ...uint) error
|
|
Test(pos ...uint) (bool, error)
|
|
}
|
|
|
|
type BloomFilter struct {
|
|
bits uint // 布隆过滤器大小
|
|
funcs uint // 哈希函数数量
|
|
store bitStore // 位图存储
|
|
}
|
|
|
|
type bloomOptions struct {
|
|
redisClient redis.Client
|
|
redisKey string
|
|
}
|
|
|
|
type option func(*bloomOptions)
|
|
|
|
func WithRedis(redis redis.Client, key string) option {
|
|
return func(bo *bloomOptions) {
|
|
bo.redisClient = redis
|
|
bo.redisKey = key
|
|
}
|
|
}
|
|
|
|
// New 初始化布隆过滤器
|
|
// https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
|
|
func New(expectedInsertions uint, fpp float64, opts ...option) *BloomFilter {
|
|
opt := &bloomOptions{}
|
|
|
|
for _, f := range opts {
|
|
f(opt)
|
|
}
|
|
|
|
bits := optimalNumOfBits(expectedInsertions, fpp)
|
|
k := optimalNumOfHashFunctions(bits, expectedInsertions)
|
|
if k > uint(len(seeds)) {
|
|
k = uint(len(seeds))
|
|
}
|
|
|
|
bf := &BloomFilter{
|
|
bits: bits,
|
|
funcs: k,
|
|
store: createBitStore(opt, bits),
|
|
}
|
|
|
|
return bf
|
|
}
|
|
|
|
func (bf *BloomFilter) Add(ctx context.Context, data string) {
|
|
offsets := bf.geOffsets([]byte(data))
|
|
bf.store.Set(ctx, offsets...)
|
|
}
|
|
|
|
func (bf *BloomFilter) ExistString(data string) (bool, error) {
|
|
return bf.Exists(bytesconv.StringToBytes(data))
|
|
}
|
|
|
|
func (bf *BloomFilter) Exists(data []byte) (bool, error) {
|
|
if len(data) == 0 {
|
|
return false, nil
|
|
}
|
|
|
|
offsets := bf.geOffsets(data)
|
|
isSet, err := bf.store.Test(offsets...)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
return isSet, nil
|
|
}
|
|
|
|
func (bf *BloomFilter) geOffsets(data []byte) []uint {
|
|
offsets := make([]uint, bf.funcs)
|
|
for i := uint(0); i < bf.funcs; i++ {
|
|
offsets[i] = uint(hash.Murmur3(append(data, byte(seeds[i]))) % uint64(bf.bits))
|
|
}
|
|
|
|
return offsets
|
|
}
|
|
|
|
// 清空布隆过滤器
|
|
func (bf *BloomFilter) Clear() {
|
|
bf.store.Clear()
|
|
}
|
|
|
|
func createBitStore(opt *bloomOptions, bits uint) bitStore {
|
|
if opt.redisClient != nil {
|
|
return newRedisStore(opt.redisClient, opt.redisKey, bits)
|
|
}
|
|
|
|
return newMemStore(bits)
|
|
}
|
|
|
|
// 计算优化的位图长度,
|
|
// n 期望放置元素数量,
|
|
// p 预期的误判概率
|
|
func optimalNumOfBits(n uint, p float64) uint {
|
|
return (uint)(-float64(n) * math.Log(p) / (math.Log(2) * math.Log(2)))
|
|
}
|
|
|
|
// 计算哈希函数数量
|
|
func optimalNumOfHashFunctions(m, n uint) uint {
|
|
return uint(math.Round(float64(m) / float64(n) * math.Log(2)))
|
|
}
|