Chore: move "geodata" to package "component"
This commit is contained in:
241
component/geodata/strmatcher/ac_automaton_matcher.go
Normal file
241
component/geodata/strmatcher/ac_automaton_matcher.go
Normal file
@ -0,0 +1,241 @@
|
||||
package strmatcher
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
)
|
||||
|
||||
const validCharCount = 53
|
||||
|
||||
type MatchType struct {
|
||||
matchType Type
|
||||
exist bool
|
||||
}
|
||||
|
||||
const (
|
||||
TrieEdge bool = true
|
||||
FailEdge bool = false
|
||||
)
|
||||
|
||||
type Edge struct {
|
||||
edgeType bool
|
||||
nextNode int
|
||||
}
|
||||
|
||||
type ACAutomaton struct {
|
||||
trie [][validCharCount]Edge
|
||||
fail []int
|
||||
exists []MatchType
|
||||
count int
|
||||
}
|
||||
|
||||
func newNode() [validCharCount]Edge {
|
||||
var s [validCharCount]Edge
|
||||
for i := range s {
|
||||
s[i] = Edge{
|
||||
edgeType: FailEdge,
|
||||
nextNode: 0,
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
var char2Index = []int{
|
||||
'A': 0,
|
||||
'a': 0,
|
||||
'B': 1,
|
||||
'b': 1,
|
||||
'C': 2,
|
||||
'c': 2,
|
||||
'D': 3,
|
||||
'd': 3,
|
||||
'E': 4,
|
||||
'e': 4,
|
||||
'F': 5,
|
||||
'f': 5,
|
||||
'G': 6,
|
||||
'g': 6,
|
||||
'H': 7,
|
||||
'h': 7,
|
||||
'I': 8,
|
||||
'i': 8,
|
||||
'J': 9,
|
||||
'j': 9,
|
||||
'K': 10,
|
||||
'k': 10,
|
||||
'L': 11,
|
||||
'l': 11,
|
||||
'M': 12,
|
||||
'm': 12,
|
||||
'N': 13,
|
||||
'n': 13,
|
||||
'O': 14,
|
||||
'o': 14,
|
||||
'P': 15,
|
||||
'p': 15,
|
||||
'Q': 16,
|
||||
'q': 16,
|
||||
'R': 17,
|
||||
'r': 17,
|
||||
'S': 18,
|
||||
's': 18,
|
||||
'T': 19,
|
||||
't': 19,
|
||||
'U': 20,
|
||||
'u': 20,
|
||||
'V': 21,
|
||||
'v': 21,
|
||||
'W': 22,
|
||||
'w': 22,
|
||||
'X': 23,
|
||||
'x': 23,
|
||||
'Y': 24,
|
||||
'y': 24,
|
||||
'Z': 25,
|
||||
'z': 25,
|
||||
'!': 26,
|
||||
'$': 27,
|
||||
'&': 28,
|
||||
'\'': 29,
|
||||
'(': 30,
|
||||
')': 31,
|
||||
'*': 32,
|
||||
'+': 33,
|
||||
',': 34,
|
||||
';': 35,
|
||||
'=': 36,
|
||||
':': 37,
|
||||
'%': 38,
|
||||
'-': 39,
|
||||
'.': 40,
|
||||
'_': 41,
|
||||
'~': 42,
|
||||
'0': 43,
|
||||
'1': 44,
|
||||
'2': 45,
|
||||
'3': 46,
|
||||
'4': 47,
|
||||
'5': 48,
|
||||
'6': 49,
|
||||
'7': 50,
|
||||
'8': 51,
|
||||
'9': 52,
|
||||
}
|
||||
|
||||
func NewACAutomaton() *ACAutomaton {
|
||||
ac := new(ACAutomaton)
|
||||
ac.trie = append(ac.trie, newNode())
|
||||
ac.fail = append(ac.fail, 0)
|
||||
ac.exists = append(ac.exists, MatchType{
|
||||
matchType: Full,
|
||||
exist: false,
|
||||
})
|
||||
return ac
|
||||
}
|
||||
|
||||
func (ac *ACAutomaton) Add(domain string, t Type) {
|
||||
node := 0
|
||||
for i := len(domain) - 1; i >= 0; i-- {
|
||||
idx := char2Index[domain[i]]
|
||||
if ac.trie[node][idx].nextNode == 0 {
|
||||
ac.count++
|
||||
if len(ac.trie) < ac.count+1 {
|
||||
ac.trie = append(ac.trie, newNode())
|
||||
ac.fail = append(ac.fail, 0)
|
||||
ac.exists = append(ac.exists, MatchType{
|
||||
matchType: Full,
|
||||
exist: false,
|
||||
})
|
||||
}
|
||||
ac.trie[node][idx] = Edge{
|
||||
edgeType: TrieEdge,
|
||||
nextNode: ac.count,
|
||||
}
|
||||
}
|
||||
node = ac.trie[node][idx].nextNode
|
||||
}
|
||||
ac.exists[node] = MatchType{
|
||||
matchType: t,
|
||||
exist: true,
|
||||
}
|
||||
switch t {
|
||||
case Domain:
|
||||
ac.exists[node] = MatchType{
|
||||
matchType: Full,
|
||||
exist: true,
|
||||
}
|
||||
idx := char2Index['.']
|
||||
if ac.trie[node][idx].nextNode == 0 {
|
||||
ac.count++
|
||||
if len(ac.trie) < ac.count+1 {
|
||||
ac.trie = append(ac.trie, newNode())
|
||||
ac.fail = append(ac.fail, 0)
|
||||
ac.exists = append(ac.exists, MatchType{
|
||||
matchType: Full,
|
||||
exist: false,
|
||||
})
|
||||
}
|
||||
ac.trie[node][idx] = Edge{
|
||||
edgeType: TrieEdge,
|
||||
nextNode: ac.count,
|
||||
}
|
||||
}
|
||||
node = ac.trie[node][idx].nextNode
|
||||
ac.exists[node] = MatchType{
|
||||
matchType: t,
|
||||
exist: true,
|
||||
}
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
func (ac *ACAutomaton) Build() {
|
||||
queue := list.New()
|
||||
for i := 0; i < validCharCount; i++ {
|
||||
if ac.trie[0][i].nextNode != 0 {
|
||||
queue.PushBack(ac.trie[0][i])
|
||||
}
|
||||
}
|
||||
for {
|
||||
front := queue.Front()
|
||||
if front == nil {
|
||||
break
|
||||
} else {
|
||||
node := front.Value.(Edge).nextNode
|
||||
queue.Remove(front)
|
||||
for i := 0; i < validCharCount; i++ {
|
||||
if ac.trie[node][i].nextNode != 0 {
|
||||
ac.fail[ac.trie[node][i].nextNode] = ac.trie[ac.fail[node]][i].nextNode
|
||||
queue.PushBack(ac.trie[node][i])
|
||||
} else {
|
||||
ac.trie[node][i] = Edge{
|
||||
edgeType: FailEdge,
|
||||
nextNode: ac.trie[ac.fail[node]][i].nextNode,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ac *ACAutomaton) Match(s string) bool {
|
||||
node := 0
|
||||
fullMatch := true
|
||||
// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
|
||||
// 2. the match string is through a fail edge. NOT FULL MATCH
|
||||
// 2.1 Through a fail edge, but there exists a valid node. SUBSTR
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
idx := char2Index[s[i]]
|
||||
fullMatch = fullMatch && ac.trie[node][idx].edgeType
|
||||
node = ac.trie[node][idx].nextNode
|
||||
switch ac.exists[node].matchType {
|
||||
case Substr:
|
||||
return true
|
||||
case Domain:
|
||||
if fullMatch {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return fullMatch && ac.exists[node].exist
|
||||
}
|
98
component/geodata/strmatcher/domain_matcher.go
Normal file
98
component/geodata/strmatcher/domain_matcher.go
Normal file
@ -0,0 +1,98 @@
|
||||
package strmatcher
|
||||
|
||||
import "strings"
|
||||
|
||||
func breakDomain(domain string) []string {
|
||||
return strings.Split(domain, ".")
|
||||
}
|
||||
|
||||
type node struct {
|
||||
values []uint32
|
||||
sub map[string]*node
|
||||
}
|
||||
|
||||
// DomainMatcherGroup is a IndexMatcher for a large set of Domain matchers.
|
||||
// Visible for testing only.
|
||||
type DomainMatcherGroup struct {
|
||||
root *node
|
||||
}
|
||||
|
||||
func (g *DomainMatcherGroup) Add(domain string, value uint32) {
|
||||
if g.root == nil {
|
||||
g.root = new(node)
|
||||
}
|
||||
|
||||
current := g.root
|
||||
parts := breakDomain(domain)
|
||||
for i := len(parts) - 1; i >= 0; i-- {
|
||||
part := parts[i]
|
||||
if current.sub == nil {
|
||||
current.sub = make(map[string]*node)
|
||||
}
|
||||
next := current.sub[part]
|
||||
if next == nil {
|
||||
next = new(node)
|
||||
current.sub[part] = next
|
||||
}
|
||||
current = next
|
||||
}
|
||||
|
||||
current.values = append(current.values, value)
|
||||
}
|
||||
|
||||
func (g *DomainMatcherGroup) addMatcher(m domainMatcher, value uint32) {
|
||||
g.Add(string(m), value)
|
||||
}
|
||||
|
||||
func (g *DomainMatcherGroup) Match(domain string) []uint32 {
|
||||
if domain == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
current := g.root
|
||||
if current == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
nextPart := func(idx int) int {
|
||||
for i := idx - 1; i >= 0; i-- {
|
||||
if domain[i] == '.' {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
matches := [][]uint32{}
|
||||
idx := len(domain)
|
||||
for {
|
||||
if idx == -1 || current.sub == nil {
|
||||
break
|
||||
}
|
||||
|
||||
nidx := nextPart(idx)
|
||||
part := domain[nidx+1 : idx]
|
||||
next := current.sub[part]
|
||||
if next == nil {
|
||||
break
|
||||
}
|
||||
current = next
|
||||
idx = nidx
|
||||
if len(current.values) > 0 {
|
||||
matches = append(matches, current.values)
|
||||
}
|
||||
}
|
||||
switch len(matches) {
|
||||
case 0:
|
||||
return nil
|
||||
case 1:
|
||||
return matches[0]
|
||||
default:
|
||||
result := []uint32{}
|
||||
for idx := range matches {
|
||||
// Insert reversely, the subdomain that matches further ranks higher
|
||||
result = append(result, matches[len(matches)-1-idx]...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
25
component/geodata/strmatcher/full_matcher.go
Normal file
25
component/geodata/strmatcher/full_matcher.go
Normal file
@ -0,0 +1,25 @@
|
||||
package strmatcher
|
||||
|
||||
type FullMatcherGroup struct {
|
||||
matchers map[string][]uint32
|
||||
}
|
||||
|
||||
func (g *FullMatcherGroup) Add(domain string, value uint32) {
|
||||
if g.matchers == nil {
|
||||
g.matchers = make(map[string][]uint32)
|
||||
}
|
||||
|
||||
g.matchers[domain] = append(g.matchers[domain], value)
|
||||
}
|
||||
|
||||
func (g *FullMatcherGroup) addMatcher(m fullMatcher, value uint32) {
|
||||
g.Add(string(m), value)
|
||||
}
|
||||
|
||||
func (g *FullMatcherGroup) Match(str string) []uint32 {
|
||||
if g.matchers == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return g.matchers[str]
|
||||
}
|
52
component/geodata/strmatcher/matchers.go
Normal file
52
component/geodata/strmatcher/matchers.go
Normal file
@ -0,0 +1,52 @@
|
||||
package strmatcher
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type fullMatcher string
|
||||
|
||||
func (m fullMatcher) Match(s string) bool {
|
||||
return string(m) == s
|
||||
}
|
||||
|
||||
func (m fullMatcher) String() string {
|
||||
return "full:" + string(m)
|
||||
}
|
||||
|
||||
type substrMatcher string
|
||||
|
||||
func (m substrMatcher) Match(s string) bool {
|
||||
return strings.Contains(s, string(m))
|
||||
}
|
||||
|
||||
func (m substrMatcher) String() string {
|
||||
return "keyword:" + string(m)
|
||||
}
|
||||
|
||||
type domainMatcher string
|
||||
|
||||
func (m domainMatcher) Match(s string) bool {
|
||||
pattern := string(m)
|
||||
if !strings.HasSuffix(s, pattern) {
|
||||
return false
|
||||
}
|
||||
return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.'
|
||||
}
|
||||
|
||||
func (m domainMatcher) String() string {
|
||||
return "domain:" + string(m)
|
||||
}
|
||||
|
||||
type regexMatcher struct {
|
||||
pattern *regexp.Regexp
|
||||
}
|
||||
|
||||
func (m *regexMatcher) Match(s string) bool {
|
||||
return m.pattern.MatchString(s)
|
||||
}
|
||||
|
||||
func (m *regexMatcher) String() string {
|
||||
return "regexp:" + m.pattern.String()
|
||||
}
|
304
component/geodata/strmatcher/mph_matcher.go
Normal file
304
component/geodata/strmatcher/mph_matcher.go
Normal file
@ -0,0 +1,304 @@
|
||||
package strmatcher
|
||||
|
||||
import (
|
||||
"math/bits"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// PrimeRK is the prime base used in Rabin-Karp algorithm.
|
||||
const PrimeRK = 16777619
|
||||
|
||||
// calculate the rolling murmurHash of given string
|
||||
func RollingHash(s string) uint32 {
|
||||
h := uint32(0)
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
h = h*PrimeRK + uint32(s[i])
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
// A MphMatcherGroup is divided into three parts:
|
||||
// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table;
|
||||
// 2. `substr` patterns are matched by ac automaton;
|
||||
// 3. `regex` patterns are matched with the regex library.
|
||||
type MphMatcherGroup struct {
|
||||
ac *ACAutomaton
|
||||
otherMatchers []matcherEntry
|
||||
rules []string
|
||||
level0 []uint32
|
||||
level0Mask int
|
||||
level1 []uint32
|
||||
level1Mask int
|
||||
count uint32
|
||||
ruleMap *map[string]uint32
|
||||
}
|
||||
|
||||
func (g *MphMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) {
|
||||
h := RollingHash(pattern)
|
||||
switch t {
|
||||
case Domain:
|
||||
(*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.')
|
||||
fallthrough
|
||||
case Full:
|
||||
(*g.ruleMap)[pattern] = h
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func NewMphMatcherGroup() *MphMatcherGroup {
|
||||
return &MphMatcherGroup{
|
||||
ac: nil,
|
||||
otherMatchers: nil,
|
||||
rules: nil,
|
||||
level0: nil,
|
||||
level0Mask: 0,
|
||||
level1: nil,
|
||||
level1Mask: 0,
|
||||
count: 1,
|
||||
ruleMap: &map[string]uint32{},
|
||||
}
|
||||
}
|
||||
|
||||
// AddPattern adds a pattern to MphMatcherGroup
|
||||
func (g *MphMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
|
||||
switch t {
|
||||
case Substr:
|
||||
if g.ac == nil {
|
||||
g.ac = NewACAutomaton()
|
||||
}
|
||||
g.ac.Add(pattern, t)
|
||||
case Full, Domain:
|
||||
pattern = strings.ToLower(pattern)
|
||||
g.AddFullOrDomainPattern(pattern, t)
|
||||
case Regex:
|
||||
r, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
g.otherMatchers = append(g.otherMatchers, matcherEntry{
|
||||
m: ®exMatcher{pattern: r},
|
||||
id: g.count,
|
||||
})
|
||||
default:
|
||||
panic("Unknown type")
|
||||
}
|
||||
return g.count, nil
|
||||
}
|
||||
|
||||
// Build builds a minimal perfect hash table and ac automaton from insert rules
|
||||
func (g *MphMatcherGroup) Build() {
|
||||
if g.ac != nil {
|
||||
g.ac.Build()
|
||||
}
|
||||
keyLen := len(*g.ruleMap)
|
||||
if keyLen == 0 {
|
||||
keyLen = 1
|
||||
(*g.ruleMap)["empty___"] = RollingHash("empty___")
|
||||
}
|
||||
g.level0 = make([]uint32, nextPow2(keyLen/4))
|
||||
g.level0Mask = len(g.level0) - 1
|
||||
g.level1 = make([]uint32, nextPow2(keyLen))
|
||||
g.level1Mask = len(g.level1) - 1
|
||||
sparseBuckets := make([][]int, len(g.level0))
|
||||
var ruleIdx int
|
||||
for rule, hash := range *g.ruleMap {
|
||||
n := int(hash) & g.level0Mask
|
||||
g.rules = append(g.rules, rule)
|
||||
sparseBuckets[n] = append(sparseBuckets[n], ruleIdx)
|
||||
ruleIdx++
|
||||
}
|
||||
g.ruleMap = nil
|
||||
var buckets []indexBucket
|
||||
for n, vals := range sparseBuckets {
|
||||
if len(vals) > 0 {
|
||||
buckets = append(buckets, indexBucket{n, vals})
|
||||
}
|
||||
}
|
||||
sort.Sort(bySize(buckets))
|
||||
|
||||
occ := make([]bool, len(g.level1))
|
||||
var tmpOcc []int
|
||||
for _, bucket := range buckets {
|
||||
seed := uint32(0)
|
||||
for {
|
||||
findSeed := true
|
||||
tmpOcc = tmpOcc[:0]
|
||||
for _, i := range bucket.vals {
|
||||
n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask
|
||||
if occ[n] {
|
||||
for _, n := range tmpOcc {
|
||||
occ[n] = false
|
||||
}
|
||||
seed++
|
||||
findSeed = false
|
||||
break
|
||||
}
|
||||
occ[n] = true
|
||||
tmpOcc = append(tmpOcc, n)
|
||||
g.level1[n] = uint32(i)
|
||||
}
|
||||
if findSeed {
|
||||
g.level0[bucket.n] = seed
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func nextPow2(v int) int {
|
||||
if v <= 1 {
|
||||
return 1
|
||||
}
|
||||
const MaxUInt = ^uint(0)
|
||||
n := (MaxUInt >> bits.LeadingZeros(uint(v))) + 1
|
||||
return int(n)
|
||||
}
|
||||
|
||||
// Lookup searches for s in t and returns its index and whether it was found.
|
||||
func (g *MphMatcherGroup) Lookup(h uint32, s string) bool {
|
||||
i0 := int(h) & g.level0Mask
|
||||
seed := g.level0[i0]
|
||||
i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask
|
||||
n := g.level1[i1]
|
||||
return s == g.rules[int(n)]
|
||||
}
|
||||
|
||||
// Match implements IndexMatcher.Match.
|
||||
func (g *MphMatcherGroup) Match(pattern string) []uint32 {
|
||||
result := []uint32{}
|
||||
hash := uint32(0)
|
||||
for i := len(pattern) - 1; i >= 0; i-- {
|
||||
hash = hash*PrimeRK + uint32(pattern[i])
|
||||
if pattern[i] == '.' {
|
||||
if g.Lookup(hash, pattern[i:]) {
|
||||
result = append(result, 1)
|
||||
return result
|
||||
}
|
||||
}
|
||||
}
|
||||
if g.Lookup(hash, pattern) {
|
||||
result = append(result, 1)
|
||||
return result
|
||||
}
|
||||
if g.ac != nil && g.ac.Match(pattern) {
|
||||
result = append(result, 1)
|
||||
return result
|
||||
}
|
||||
for _, e := range g.otherMatchers {
|
||||
if e.m.Match(pattern) {
|
||||
result = append(result, e.id)
|
||||
return result
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type indexBucket struct {
|
||||
n int
|
||||
vals []int
|
||||
}
|
||||
|
||||
type bySize []indexBucket
|
||||
|
||||
func (s bySize) Len() int { return len(s) }
|
||||
func (s bySize) Less(i, j int) bool { return len(s[i].vals) > len(s[j].vals) }
|
||||
func (s bySize) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
|
||||
type stringStruct struct {
|
||||
str unsafe.Pointer
|
||||
len int
|
||||
}
|
||||
|
||||
func strhashFallback(a unsafe.Pointer, h uintptr) uintptr {
|
||||
x := (*stringStruct)(a)
|
||||
return memhashFallback(x.str, h, uintptr(x.len))
|
||||
}
|
||||
|
||||
const (
|
||||
// Constants for multiplication: four random odd 64-bit numbers.
|
||||
m1 = 16877499708836156737
|
||||
m2 = 2820277070424839065
|
||||
m3 = 9497967016996688599
|
||||
m4 = 15839092249703872147
|
||||
)
|
||||
|
||||
var hashkey = [4]uintptr{1, 1, 1, 1}
|
||||
|
||||
func memhashFallback(p unsafe.Pointer, seed, s uintptr) uintptr {
|
||||
h := uint64(seed + s*hashkey[0])
|
||||
tail:
|
||||
switch {
|
||||
case s == 0:
|
||||
case s < 4:
|
||||
h ^= uint64(*(*byte)(p))
|
||||
h ^= uint64(*(*byte)(add(p, s>>1))) << 8
|
||||
h ^= uint64(*(*byte)(add(p, s-1))) << 16
|
||||
h = rotl31(h*m1) * m2
|
||||
case s <= 8:
|
||||
h ^= uint64(readUnaligned32(p))
|
||||
h ^= uint64(readUnaligned32(add(p, s-4))) << 32
|
||||
h = rotl31(h*m1) * m2
|
||||
case s <= 16:
|
||||
h ^= readUnaligned64(p)
|
||||
h = rotl31(h*m1) * m2
|
||||
h ^= readUnaligned64(add(p, s-8))
|
||||
h = rotl31(h*m1) * m2
|
||||
case s <= 32:
|
||||
h ^= readUnaligned64(p)
|
||||
h = rotl31(h*m1) * m2
|
||||
h ^= readUnaligned64(add(p, 8))
|
||||
h = rotl31(h*m1) * m2
|
||||
h ^= readUnaligned64(add(p, s-16))
|
||||
h = rotl31(h*m1) * m2
|
||||
h ^= readUnaligned64(add(p, s-8))
|
||||
h = rotl31(h*m1) * m2
|
||||
default:
|
||||
v1 := h
|
||||
v2 := uint64(seed * hashkey[1])
|
||||
v3 := uint64(seed * hashkey[2])
|
||||
v4 := uint64(seed * hashkey[3])
|
||||
for s >= 32 {
|
||||
v1 ^= readUnaligned64(p)
|
||||
v1 = rotl31(v1*m1) * m2
|
||||
p = add(p, 8)
|
||||
v2 ^= readUnaligned64(p)
|
||||
v2 = rotl31(v2*m2) * m3
|
||||
p = add(p, 8)
|
||||
v3 ^= readUnaligned64(p)
|
||||
v3 = rotl31(v3*m3) * m4
|
||||
p = add(p, 8)
|
||||
v4 ^= readUnaligned64(p)
|
||||
v4 = rotl31(v4*m4) * m1
|
||||
p = add(p, 8)
|
||||
s -= 32
|
||||
}
|
||||
h = v1 ^ v2 ^ v3 ^ v4
|
||||
goto tail
|
||||
}
|
||||
|
||||
h ^= h >> 29
|
||||
h *= m3
|
||||
h ^= h >> 32
|
||||
return uintptr(h)
|
||||
}
|
||||
|
||||
func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
|
||||
return unsafe.Pointer(uintptr(p) + x)
|
||||
}
|
||||
|
||||
func readUnaligned32(p unsafe.Pointer) uint32 {
|
||||
q := (*[4]byte)(p)
|
||||
return uint32(q[0]) | uint32(q[1])<<8 | uint32(q[2])<<16 | uint32(q[3])<<24
|
||||
}
|
||||
|
||||
func rotl31(x uint64) uint64 {
|
||||
return (x << 31) | (x >> (64 - 31))
|
||||
}
|
||||
|
||||
func readUnaligned64(p unsafe.Pointer) uint64 {
|
||||
q := (*[8]byte)(p)
|
||||
return uint64(q[0]) | uint64(q[1])<<8 | uint64(q[2])<<16 | uint64(q[3])<<24 | uint64(q[4])<<32 | uint64(q[5])<<40 | uint64(q[6])<<48 | uint64(q[7])<<56
|
||||
}
|
4
component/geodata/strmatcher/package_info.go
Normal file
4
component/geodata/strmatcher/package_info.go
Normal file
@ -0,0 +1,4 @@
|
||||
// Modified from: https://github.com/v2fly/v2ray-core/tree/master/common/strmatcher
|
||||
// License: MIT
|
||||
|
||||
package strmatcher
|
107
component/geodata/strmatcher/strmatcher.go
Normal file
107
component/geodata/strmatcher/strmatcher.go
Normal file
@ -0,0 +1,107 @@
|
||||
package strmatcher
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// Matcher is the interface to determine a string matches a pattern.
|
||||
type Matcher interface {
|
||||
// Match returns true if the given string matches a predefined pattern.
|
||||
Match(string) bool
|
||||
String() string
|
||||
}
|
||||
|
||||
// Type is the type of the matcher.
|
||||
type Type byte
|
||||
|
||||
const (
|
||||
// Full is the type of matcher that the input string must exactly equal to the pattern.
|
||||
Full Type = iota
|
||||
// Substr is the type of matcher that the input string must contain the pattern as a sub-string.
|
||||
Substr
|
||||
// Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern.
|
||||
Domain
|
||||
// Regex is the type of matcher that the input string must matches the regular-expression pattern.
|
||||
Regex
|
||||
)
|
||||
|
||||
// New creates a new Matcher based on the given pattern.
|
||||
func (t Type) New(pattern string) (Matcher, error) {
|
||||
// 1. regex matching is case-sensitive
|
||||
switch t {
|
||||
case Full:
|
||||
return fullMatcher(pattern), nil
|
||||
case Substr:
|
||||
return substrMatcher(pattern), nil
|
||||
case Domain:
|
||||
return domainMatcher(pattern), nil
|
||||
case Regex:
|
||||
r, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ®exMatcher{
|
||||
pattern: r,
|
||||
}, nil
|
||||
default:
|
||||
panic("Unknown type")
|
||||
}
|
||||
}
|
||||
|
||||
// IndexMatcher is the interface for matching with a group of matchers.
|
||||
type IndexMatcher interface {
|
||||
// Match returns the index of a matcher that matches the input. It returns empty array if no such matcher exists.
|
||||
Match(input string) []uint32
|
||||
}
|
||||
|
||||
type matcherEntry struct {
|
||||
m Matcher
|
||||
id uint32
|
||||
}
|
||||
|
||||
// MatcherGroup is an implementation of IndexMatcher.
|
||||
// Empty initialization works.
|
||||
type MatcherGroup struct {
|
||||
count uint32
|
||||
fullMatcher FullMatcherGroup
|
||||
domainMatcher DomainMatcherGroup
|
||||
otherMatchers []matcherEntry
|
||||
}
|
||||
|
||||
// Add adds a new Matcher into the MatcherGroup, and returns its index. The index will never be 0.
|
||||
func (g *MatcherGroup) Add(m Matcher) uint32 {
|
||||
g.count++
|
||||
c := g.count
|
||||
|
||||
switch tm := m.(type) {
|
||||
case fullMatcher:
|
||||
g.fullMatcher.addMatcher(tm, c)
|
||||
case domainMatcher:
|
||||
g.domainMatcher.addMatcher(tm, c)
|
||||
default:
|
||||
g.otherMatchers = append(g.otherMatchers, matcherEntry{
|
||||
m: m,
|
||||
id: c,
|
||||
})
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// Match implements IndexMatcher.Match.
|
||||
func (g *MatcherGroup) Match(pattern string) []uint32 {
|
||||
result := []uint32{}
|
||||
result = append(result, g.fullMatcher.Match(pattern)...)
|
||||
result = append(result, g.domainMatcher.Match(pattern)...)
|
||||
for _, e := range g.otherMatchers {
|
||||
if e.m.Match(pattern) {
|
||||
result = append(result, e.id)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Size returns the number of matchers in the MatcherGroup.
|
||||
func (g *MatcherGroup) Size() uint32 {
|
||||
return g.count
|
||||
}
|
Reference in New Issue
Block a user