diff --git a/cmd/cmd.go b/cmd/cmd.go index 68753c2..c4d4109 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "github.com/chainreactors/logs" + "github.com/chainreactors/parsers" "github.com/chainreactors/parsers/iutils" "github.com/chainreactors/spray/internal" "github.com/chainreactors/spray/pkg" @@ -63,7 +64,12 @@ func Spray() { if reg, ok := pkg.ExtractRegexps[e]; ok { pkg.Extractors[e] = reg } else { - pkg.Extractors[e] = []*regexp.Regexp{regexp.MustCompile(e)} + pkg.Extractors[e] = []*parsers.Extractor{ + &parsers.Extractor{ + Name: e, + CompiledRegexps: []*regexp.Regexp{regexp.MustCompile(e)}, + }, + } } } } diff --git a/go.mod b/go.mod index 7b8e9bc..01c6a39 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/chainreactors/gogo/v2 v2.10.4 github.com/chainreactors/ipcs v0.0.13 github.com/chainreactors/logs v0.7.1-0.20221214153111-85f123ff6580 - github.com/chainreactors/parsers v0.3.1-0.20230204104401-6e150669e599 + github.com/chainreactors/parsers v0.3.1-0.20230208070438-6903b0d366c9 github.com/chainreactors/words v0.4.1-0.20230203115443-ca934844e361 ) diff --git a/go.sum b/go.sum index 0e3ca44..ae1cbda 100644 --- a/go.sum +++ b/go.sum @@ -28,6 +28,8 @@ github.com/chainreactors/parsers v0.3.1-0.20230201103008-e20167926b49 h1:snsLbWc github.com/chainreactors/parsers v0.3.1-0.20230201103008-e20167926b49/go.mod h1:tA33N6UbYFnIT3k5tufOMfETxmEP20RZFyTSEnVXNUA= github.com/chainreactors/parsers v0.3.1-0.20230204104401-6e150669e599 h1:9PwMZzN+RZDv2BUDvOG8e0N6W3XJQLVaP2AW6RD5mjM= github.com/chainreactors/parsers v0.3.1-0.20230204104401-6e150669e599/go.mod h1:tA33N6UbYFnIT3k5tufOMfETxmEP20RZFyTSEnVXNUA= +github.com/chainreactors/parsers v0.3.1-0.20230208070438-6903b0d366c9 h1:JCm8SmLb1jMFp5T6bBXKn3GmqPTjLxqWiz5yQKlo5Bs= +github.com/chainreactors/parsers v0.3.1-0.20230208070438-6903b0d366c9/go.mod h1:tA33N6UbYFnIT3k5tufOMfETxmEP20RZFyTSEnVXNUA= github.com/chainreactors/words v0.3.2-0.20230105161651-7c1fc4c9605a h1:vRAMDJ6UQV73uyiRBQnuE/+S7Q7JTpfubSpyRlooZ2U= github.com/chainreactors/words v0.3.2-0.20230105161651-7c1fc4c9605a/go.mod h1:QIWX1vMT5j/Mp9zx3/wgZh3FqskhjCbo/3Ffy/Hxj9w= github.com/chainreactors/words v0.4.1-0.20230203114605-f305deb098a2 h1:51GoU85MLp/s8IvXcKLeedSxypkvZBFJWIBUlGV+MiI= diff --git a/pkg/baseline.go b/pkg/baseline.go index 550e8df..c1924fa 100644 --- a/pkg/baseline.go +++ b/pkg/baseline.go @@ -147,7 +147,7 @@ func (bl *Baseline) CollectURL() { if len(bl.Body) == 0 { return } - for _, reg := range ExtractRegexps["js"] { + for _, reg := range ExtractRegexps["js"][0].CompiledRegexps { urls := reg.FindAllStringSubmatch(string(bl.Body), -1) for _, u := range urls { u[1] = formatURL(u[1]) @@ -157,7 +157,7 @@ func (bl *Baseline) CollectURL() { } } - for _, reg := range ExtractRegexps["url"] { + for _, reg := range ExtractRegexps["url"][0].CompiledRegexps { urls := reg.FindAllStringSubmatch(string(bl.Body), -1) for _, u := range urls { u[1] = formatURL(u[1]) diff --git a/pkg/utils.go b/pkg/utils.go index 03f60c6..f7b873f 100644 --- a/pkg/utils.go +++ b/pkg/utils.go @@ -11,7 +11,6 @@ import ( "net/url" "os" "path" - "regexp" "strconv" "strings" "time" @@ -19,27 +18,15 @@ import ( ) var ( - Md5Fingers map[string]string = make(map[string]string) - Mmh3Fingers map[string]string = make(map[string]string) - Rules map[string]string = make(map[string]string) - ActivePath []string - Fingers fingers.Fingers - //JSRegexps []*regexp.Regexp = []*regexp.Regexp{ - // regexp.MustCompile(`.(https{0,1}:[^\s'’"”><()|*\[]{2,250}?[^=*\s'’><:;|()[]{3}\[]\.js)`), - // regexp.MustCompile(`["']([^\s',’"”><;()|*:\[]{2,250}?[^=*\s'’|"”><^:;()\[]{3}\.js)`), - // regexp.MustCompile(`=\s{0,6}["']{0,1}\s{0,6}([^\s^'’,+><;()|*\[]{2,250}?[^=,\s'’"”>|<:;*()\[]{3}\.js)`), - //} - //URLRegexps []*regexp.Regexp = []*regexp.Regexp{ - // regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s'"><()|*\[]{2,250})`), - // regexp.MustCompile(`["']([^\s',’"”><.@$;:()|*\[]{2,250}\.[a-zA-Z]\w{1,4})["']`), - // regexp.MustCompile(`["'](https?:[^\s'"><()@|*\[]{2,250}?\.[^\s',’"”><;()|*\[]{2,250}?)["']`), - // regexp.MustCompile(`["']\s{0,6}([#,.]{0,2}/[^\s'",><;@$()|*\[]{2,250}?)\s{0,6}["']`), - // regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s',’"”><$@;()|*\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s'’"“><)(]{2,250})`), - //} - ExtractRegexps map[string][]*regexp.Regexp = map[string][]*regexp.Regexp{} - Extractors = make(parsers.Extractors) + Md5Fingers map[string]string = make(map[string]string) + Mmh3Fingers map[string]string = make(map[string]string) + Rules map[string]string = make(map[string]string) + ActivePath []string + Fingers fingers.Fingers + ExtractRegexps = map[string][]*parsers.Extractor{} + Extractors = make(parsers.Extractors) - BadExt = []string{".js", ".css", ".scss", ".,", ".jpeg", ".jpg", ".png", ".gif", ".svg", ".vue", ".ts", ".swf", ".pdf", ".mp4"} + BadExt = []string{".js", ".css", ".scss", ".,", ".jpeg", ".jpg", ".png", ".gif", ".svg", ".vue", ".ts", ".swf", ".pdf", ".mp4", ".zip", ".rar"} BadURL = []string{";", "}", "\\n", "webpack://", "{", "www.w3.org", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path"} ContentTypeMap = map[string]string{ @@ -210,12 +197,12 @@ func LoadTemplates() error { for _, extract := range extracts { extract.Compile() - ExtractRegexps[extract.Name] = extract.CompiledRegexps + ExtractRegexps[extract.Name] = []*parsers.Extractor{extract} for _, tag := range extract.Tags { if _, ok := ExtractRegexps[tag]; !ok { - ExtractRegexps[tag] = extract.CompiledRegexps + ExtractRegexps[tag] = []*parsers.Extractor{extract} } else { - ExtractRegexps[tag] = append(ExtractRegexps[tag], extract.CompiledRegexps...) + ExtractRegexps[tag] = append(ExtractRegexps[tag], extract) } } } @@ -354,6 +341,7 @@ func CRC16Hash(data []byte) uint16 { } func UniqueHash(bl *Baseline) uint16 { - // 由host+状态码+重定向url+content-type+title+length舍去个位与十位组成的hash, 没有body length, 因为可能存在随机值 + // 由host+状态码+重定向url+content-type+title+length舍去个位与十位组成的hash + // body length可能会导致一些误报, 目前没有更好的解决办法 return CRC16Hash([]byte(bl.Host + strconv.Itoa(bl.Status) + bl.RedirectURL + bl.ContentType + bl.Title + strconv.Itoa(bl.BodyLength/100*100))) }