mirror of
https://github.com/chainreactors/spray.git
synced 2025-09-15 11:40:13 +00:00
调整extract格式
This commit is contained in:
parent
b3589db853
commit
5ace37824a
@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/chainreactors/logs"
|
"github.com/chainreactors/logs"
|
||||||
|
"github.com/chainreactors/parsers"
|
||||||
"github.com/chainreactors/parsers/iutils"
|
"github.com/chainreactors/parsers/iutils"
|
||||||
"github.com/chainreactors/spray/internal"
|
"github.com/chainreactors/spray/internal"
|
||||||
"github.com/chainreactors/spray/pkg"
|
"github.com/chainreactors/spray/pkg"
|
||||||
@ -63,7 +64,12 @@ func Spray() {
|
|||||||
if reg, ok := pkg.ExtractRegexps[e]; ok {
|
if reg, ok := pkg.ExtractRegexps[e]; ok {
|
||||||
pkg.Extractors[e] = reg
|
pkg.Extractors[e] = reg
|
||||||
} else {
|
} else {
|
||||||
pkg.Extractors[e] = []*regexp.Regexp{regexp.MustCompile(e)}
|
pkg.Extractors[e] = []*parsers.Extractor{
|
||||||
|
&parsers.Extractor{
|
||||||
|
Name: e,
|
||||||
|
CompiledRegexps: []*regexp.Regexp{regexp.MustCompile(e)},
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
2
go.mod
2
go.mod
@ -8,7 +8,7 @@ require (
|
|||||||
github.com/chainreactors/gogo/v2 v2.10.4
|
github.com/chainreactors/gogo/v2 v2.10.4
|
||||||
github.com/chainreactors/ipcs v0.0.13
|
github.com/chainreactors/ipcs v0.0.13
|
||||||
github.com/chainreactors/logs v0.7.1-0.20221214153111-85f123ff6580
|
github.com/chainreactors/logs v0.7.1-0.20221214153111-85f123ff6580
|
||||||
github.com/chainreactors/parsers v0.3.1-0.20230204104401-6e150669e599
|
github.com/chainreactors/parsers v0.3.1-0.20230208070438-6903b0d366c9
|
||||||
github.com/chainreactors/words v0.4.1-0.20230203115443-ca934844e361
|
github.com/chainreactors/words v0.4.1-0.20230203115443-ca934844e361
|
||||||
)
|
)
|
||||||
|
|
||||||
|
2
go.sum
2
go.sum
@ -28,6 +28,8 @@ github.com/chainreactors/parsers v0.3.1-0.20230201103008-e20167926b49 h1:snsLbWc
|
|||||||
github.com/chainreactors/parsers v0.3.1-0.20230201103008-e20167926b49/go.mod h1:tA33N6UbYFnIT3k5tufOMfETxmEP20RZFyTSEnVXNUA=
|
github.com/chainreactors/parsers v0.3.1-0.20230201103008-e20167926b49/go.mod h1:tA33N6UbYFnIT3k5tufOMfETxmEP20RZFyTSEnVXNUA=
|
||||||
github.com/chainreactors/parsers v0.3.1-0.20230204104401-6e150669e599 h1:9PwMZzN+RZDv2BUDvOG8e0N6W3XJQLVaP2AW6RD5mjM=
|
github.com/chainreactors/parsers v0.3.1-0.20230204104401-6e150669e599 h1:9PwMZzN+RZDv2BUDvOG8e0N6W3XJQLVaP2AW6RD5mjM=
|
||||||
github.com/chainreactors/parsers v0.3.1-0.20230204104401-6e150669e599/go.mod h1:tA33N6UbYFnIT3k5tufOMfETxmEP20RZFyTSEnVXNUA=
|
github.com/chainreactors/parsers v0.3.1-0.20230204104401-6e150669e599/go.mod h1:tA33N6UbYFnIT3k5tufOMfETxmEP20RZFyTSEnVXNUA=
|
||||||
|
github.com/chainreactors/parsers v0.3.1-0.20230208070438-6903b0d366c9 h1:JCm8SmLb1jMFp5T6bBXKn3GmqPTjLxqWiz5yQKlo5Bs=
|
||||||
|
github.com/chainreactors/parsers v0.3.1-0.20230208070438-6903b0d366c9/go.mod h1:tA33N6UbYFnIT3k5tufOMfETxmEP20RZFyTSEnVXNUA=
|
||||||
github.com/chainreactors/words v0.3.2-0.20230105161651-7c1fc4c9605a h1:vRAMDJ6UQV73uyiRBQnuE/+S7Q7JTpfubSpyRlooZ2U=
|
github.com/chainreactors/words v0.3.2-0.20230105161651-7c1fc4c9605a h1:vRAMDJ6UQV73uyiRBQnuE/+S7Q7JTpfubSpyRlooZ2U=
|
||||||
github.com/chainreactors/words v0.3.2-0.20230105161651-7c1fc4c9605a/go.mod h1:QIWX1vMT5j/Mp9zx3/wgZh3FqskhjCbo/3Ffy/Hxj9w=
|
github.com/chainreactors/words v0.3.2-0.20230105161651-7c1fc4c9605a/go.mod h1:QIWX1vMT5j/Mp9zx3/wgZh3FqskhjCbo/3Ffy/Hxj9w=
|
||||||
github.com/chainreactors/words v0.4.1-0.20230203114605-f305deb098a2 h1:51GoU85MLp/s8IvXcKLeedSxypkvZBFJWIBUlGV+MiI=
|
github.com/chainreactors/words v0.4.1-0.20230203114605-f305deb098a2 h1:51GoU85MLp/s8IvXcKLeedSxypkvZBFJWIBUlGV+MiI=
|
||||||
|
@ -147,7 +147,7 @@ func (bl *Baseline) CollectURL() {
|
|||||||
if len(bl.Body) == 0 {
|
if len(bl.Body) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, reg := range ExtractRegexps["js"] {
|
for _, reg := range ExtractRegexps["js"][0].CompiledRegexps {
|
||||||
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
|
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
|
||||||
for _, u := range urls {
|
for _, u := range urls {
|
||||||
u[1] = formatURL(u[1])
|
u[1] = formatURL(u[1])
|
||||||
@ -157,7 +157,7 @@ func (bl *Baseline) CollectURL() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, reg := range ExtractRegexps["url"] {
|
for _, reg := range ExtractRegexps["url"][0].CompiledRegexps {
|
||||||
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
|
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
|
||||||
for _, u := range urls {
|
for _, u := range urls {
|
||||||
u[1] = formatURL(u[1])
|
u[1] = formatURL(u[1])
|
||||||
|
38
pkg/utils.go
38
pkg/utils.go
@ -11,7 +11,6 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"regexp"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@ -19,27 +18,15 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
Md5Fingers map[string]string = make(map[string]string)
|
Md5Fingers map[string]string = make(map[string]string)
|
||||||
Mmh3Fingers map[string]string = make(map[string]string)
|
Mmh3Fingers map[string]string = make(map[string]string)
|
||||||
Rules map[string]string = make(map[string]string)
|
Rules map[string]string = make(map[string]string)
|
||||||
ActivePath []string
|
ActivePath []string
|
||||||
Fingers fingers.Fingers
|
Fingers fingers.Fingers
|
||||||
//JSRegexps []*regexp.Regexp = []*regexp.Regexp{
|
ExtractRegexps = map[string][]*parsers.Extractor{}
|
||||||
// regexp.MustCompile(`.(https{0,1}:[^\s'’"”><()|*\[]{2,250}?[^=*\s'’><:;|()[]{3}\[]\.js)`),
|
Extractors = make(parsers.Extractors)
|
||||||
// regexp.MustCompile(`["']([^\s',’"”><;()|*:\[]{2,250}?[^=*\s'’|"”><^:;()\[]{3}\.js)`),
|
|
||||||
// regexp.MustCompile(`=\s{0,6}["']{0,1}\s{0,6}([^\s^'’,+><;()|*\[]{2,250}?[^=,\s'’"”>|<:;*()\[]{3}\.js)`),
|
|
||||||
//}
|
|
||||||
//URLRegexps []*regexp.Regexp = []*regexp.Regexp{
|
|
||||||
// regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s'"><()|*\[]{2,250})`),
|
|
||||||
// regexp.MustCompile(`["']([^\s',’"”><.@$;:()|*\[]{2,250}\.[a-zA-Z]\w{1,4})["']`),
|
|
||||||
// regexp.MustCompile(`["'](https?:[^\s'"><()@|*\[]{2,250}?\.[^\s',’"”><;()|*\[]{2,250}?)["']`),
|
|
||||||
// regexp.MustCompile(`["']\s{0,6}([#,.]{0,2}/[^\s'",><;@$()|*\[]{2,250}?)\s{0,6}["']`),
|
|
||||||
// regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s',’"”><$@;()|*\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s'’"“><)(]{2,250})`),
|
|
||||||
//}
|
|
||||||
ExtractRegexps map[string][]*regexp.Regexp = map[string][]*regexp.Regexp{}
|
|
||||||
Extractors = make(parsers.Extractors)
|
|
||||||
|
|
||||||
BadExt = []string{".js", ".css", ".scss", ".,", ".jpeg", ".jpg", ".png", ".gif", ".svg", ".vue", ".ts", ".swf", ".pdf", ".mp4"}
|
BadExt = []string{".js", ".css", ".scss", ".,", ".jpeg", ".jpg", ".png", ".gif", ".svg", ".vue", ".ts", ".swf", ".pdf", ".mp4", ".zip", ".rar"}
|
||||||
BadURL = []string{";", "}", "\\n", "webpack://", "{", "www.w3.org", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path"}
|
BadURL = []string{";", "}", "\\n", "webpack://", "{", "www.w3.org", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path"}
|
||||||
|
|
||||||
ContentTypeMap = map[string]string{
|
ContentTypeMap = map[string]string{
|
||||||
@ -210,12 +197,12 @@ func LoadTemplates() error {
|
|||||||
for _, extract := range extracts {
|
for _, extract := range extracts {
|
||||||
extract.Compile()
|
extract.Compile()
|
||||||
|
|
||||||
ExtractRegexps[extract.Name] = extract.CompiledRegexps
|
ExtractRegexps[extract.Name] = []*parsers.Extractor{extract}
|
||||||
for _, tag := range extract.Tags {
|
for _, tag := range extract.Tags {
|
||||||
if _, ok := ExtractRegexps[tag]; !ok {
|
if _, ok := ExtractRegexps[tag]; !ok {
|
||||||
ExtractRegexps[tag] = extract.CompiledRegexps
|
ExtractRegexps[tag] = []*parsers.Extractor{extract}
|
||||||
} else {
|
} else {
|
||||||
ExtractRegexps[tag] = append(ExtractRegexps[tag], extract.CompiledRegexps...)
|
ExtractRegexps[tag] = append(ExtractRegexps[tag], extract)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -354,6 +341,7 @@ func CRC16Hash(data []byte) uint16 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func UniqueHash(bl *Baseline) uint16 {
|
func UniqueHash(bl *Baseline) uint16 {
|
||||||
// 由host+状态码+重定向url+content-type+title+length舍去个位与十位组成的hash, 没有body length, 因为可能存在随机值
|
// 由host+状态码+重定向url+content-type+title+length舍去个位与十位组成的hash
|
||||||
|
// body length可能会导致一些误报, 目前没有更好的解决办法
|
||||||
return CRC16Hash([]byte(bl.Host + strconv.Itoa(bl.Status) + bl.RedirectURL + bl.ContentType + bl.Title + strconv.Itoa(bl.BodyLength/100*100)))
|
return CRC16Hash([]byte(bl.Host + strconv.Itoa(bl.Status) + bl.RedirectURL + bl.ContentType + bl.Title + strconv.Itoa(bl.BodyLength/100*100)))
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user