From 599118284c66bd38c3d1b790aec001b008440b03 Mon Sep 17 00:00:00 2001 From: M09Ic Date: Tue, 10 Jan 2023 00:58:16 +0800 Subject: [PATCH] =?UTF-8?q?=E8=BF=9B=E4=B8=80=E6=AD=A5=E4=BC=98=E5=8C=96cr?= =?UTF-8?q?awl=E7=9A=84=E6=AD=A3=E5=88=99=E4=B8=8E=E7=89=B9=E6=AE=8A?= =?UTF-8?q?=E6=83=85=E5=86=B5=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/pool.go | 9 +++++---- pkg/baseline.go | 35 ++++------------------------------- pkg/types.go | 29 +++++++++++++++++++++++++++++ pkg/utils.go | 35 +++++++++++++++++++++++------------ 4 files changed, 61 insertions(+), 47 deletions(-) diff --git a/internal/pool.go b/internal/pool.go index f709533..2953a96 100644 --- a/internal/pool.go +++ b/internal/pool.go @@ -506,7 +506,7 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) { if err != nil { continue } - if parsed.Host != bl.Url.Host { + if parsed.Host != bl.Url.Host || len(parsed.Path) <= 1 { continue } u = parsed.Path @@ -534,18 +534,18 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) { if err != nil { continue } - if parsed.Host != bl.Url.Host { + if parsed.Host != bl.Url.Host || len(parsed.Path) <= 1 { continue } + u = parsed.Path } + pool.locker.Lock() if _, ok := pool.urls[u]; ok { pool.urls[u]++ } else { // 通过map去重, 只有新的url才会进入到该逻辑 - pool.locker.Lock() pool.urls[u] = 1 - pool.locker.Unlock() if bl.ReqDepth < maxCrawl { pool.wg.Add(1) pool.addAddition(&Unit{ @@ -555,6 +555,7 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) { }) } } + pool.locker.Unlock() } }() diff --git a/pkg/baseline.go b/pkg/baseline.go index 272eb51..35083bd 100644 --- a/pkg/baseline.go +++ b/pkg/baseline.go @@ -13,35 +13,6 @@ import ( "strings" ) -func GetSourceName(s int) string { - switch s { - case 1: - return "check" - case 2: - return "random" - case 3: - return "index" - case 4: - return "redirect" - case 5: - return "crawl" - case 6: - return "active" - case 7: - return "word" - case 8: - return "waf" - case 9: - return "rule" - case 10: - return "bak" - case 11: - return "common" - default: - return "unknown" - } -} - func NewBaseline(u, host string, resp *ihttp.Response) *Baseline { bl := &Baseline{ UrlString: u, @@ -176,8 +147,9 @@ func (bl *Baseline) CollectURL() { for _, reg := range JSRegexps { urls := reg.FindAllStringSubmatch(string(bl.Body), -1) for _, u := range urls { + u[1] = formatURL(u[1]) if !filterJs(u[1]) { - bl.URLs = append(bl.URLs, formatURL(u[1])) + bl.URLs = append(bl.URLs, u[1]) } } } @@ -185,8 +157,9 @@ func (bl *Baseline) CollectURL() { for _, reg := range URLRegexps { urls := reg.FindAllStringSubmatch(string(bl.Body), -1) for _, u := range urls { + u[1] = formatURL(u[1]) if !filterUrl(u[1]) { - bl.URLs = append(bl.URLs, formatURL(u[1])) + bl.URLs = append(bl.URLs, u[1]) } } } diff --git a/pkg/types.go b/pkg/types.go index 396f26b..31d3994 100644 --- a/pkg/types.go +++ b/pkg/types.go @@ -27,3 +27,32 @@ func (es Extracteds) String() string { } var Extractors = make(fingers.Extractors) + +func GetSourceName(s int) string { + switch s { + case 1: + return "check" + case 2: + return "random" + case 3: + return "index" + case 4: + return "redirect" + case 5: + return "crawl" + case 6: + return "active" + case 7: + return "word" + case 8: + return "waf" + case 9: + return "rule" + case 10: + return "bak" + case 11: + return "common" + default: + return "unknown" + } +} diff --git a/pkg/utils.go b/pkg/utils.go index 361e9aa..b3a396c 100644 --- a/pkg/utils.go +++ b/pkg/utils.go @@ -23,16 +23,17 @@ var ( ActivePath []string Fingers fingers.Fingers JSRegexps []*regexp.Regexp = []*regexp.Regexp{ - regexp.MustCompile(`.(https{0,1}:[^\s^'^,^’^"^”^>^<^;^(^)^|^*^\[]{2,250}?[^=^*^\s^'^’^"^”^>^<^:^;^*^|^(^)^\[]{3}[.]js)`), - regexp.MustCompile(`["'‘“]\s{0,6}(/{0,1}[^\s^,^'^’^"^”^|^>^<^:^;^*^(^\)^\[]{2,250}?[^=^*^\s^'^’^|^"^”^>^<^:^;^*^(^)^\[]{3}[.]js)`), - regexp.MustCompile(`=\s{0,6}["'’”]{0,1}\s{0,6}(/{0,1}[^\s^'^,^’^"^”^>^<^;^(^)^|^*^\[]{2,250}?[^=^,^*^\s^'^’^"^”^>^|^<^:^;^*^(^)^\[]{3}[.]js)`), + regexp.MustCompile(`.(https{0,1}:[^\s',’"”><;()|*\[]{2,250}?[^=*\s'’><:;|()[]{3}\[]\.js)`), + regexp.MustCompile(`["']\s{0,6}(/{0,1}[^\s',’"”><;()|*:\[]{2,250}?[^=*\s'’|"”><^:;()\[]{3}\.\.js)`), + regexp.MustCompile(`=\s{0,6}["']{0,1}\s{0,6}(/{0,1}[^\s^',’><;()|*\[]{2,250}?[^=,\s'’"”>|<:;*()\[]{3}\.js)`), } URLRegexps []*regexp.Regexp = []*regexp.Regexp{ - regexp.MustCompile(`["'‘“]\s{0,6}(https{0,1}:[^\s^,^'^’^"^”^>^<^),^(]{2,250}?)\s{0,6}["'‘“]`), - regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s^'^,^’^"^”^>^<^;^(^)^|^*^\[]{2,250})`), - regexp.MustCompile(`["']([\w/]{2,250}?\.\w{2,4}?)["']`), - regexp.MustCompile(`["'‘“]\s{0,6}([#,.]{0,2}/[^\s^'^,^’^"^”^>^<^;^(^)^|^*^\[]{2,250}?)\s{0,6}["'‘“]`), - regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s^'^,^’^"^”^>^<^;^(^)^|^*^\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s^'^’^"^“^>^<^)^(]{2,250})`), + regexp.MustCompile(`["']\s{0,6}(https{0,1}:[^\s,'’"”><)^(]{2,250}?)\s{0,6}["']`), + regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s',’"”><;()|*\[]{2,250})`), + regexp.MustCompile(`["']([^\s',’"”><;()|*\[]{2,250}\.[a-zA-Z]\w{1,3})["']`), + regexp.MustCompile(`["'](https?:[^\s',’"”><;()|*\[]{2,250}?)["']`), + regexp.MustCompile(`["']\s{0,6}([#,.]{0,2}/[^\s',’"”><;()|*\[]{2,250}?)\s{0,6}["']`), + regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s',’"”><;()|*\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s'’"“><)(]{2,250})`), } ContentTypeMap = map[string]string{ @@ -226,8 +227,8 @@ func FingerDetect(content string) Frameworks { } var ( - BadExt = []string{".js", ".css", ".scss", ".,", ".jpeg", ".jpg", ".png", ".gif", ".svg", ".vue", ".ts", ".swf", ".pdf"} - BadURL = []string{";", "}", "{", "www.w3.org", "example.com", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path", "*#__PURE__*"} + BadExt = []string{".js", ".css", ".scss", ".,", ".jpeg", ".jpg", ".png", ".gif", ".svg", ".vue", ".ts", ".swf", ".pdf", ".mp4"} + BadURL = []string{";", "}", "{", "www.w3.org", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path", "*#__PURE__*"} ) func filterJs(u string) bool { @@ -259,6 +260,16 @@ func filterUrl(u string) bool { func formatURL(u string) string { // 去掉frag与params, 节约url.parse性能, 防止带参数造成意外的影响 + if strings.Contains(u, "2f") || strings.Contains(u, "2F") { + u = strings.ReplaceAll(u, "\\u002F", "/") + u = strings.ReplaceAll(u, "\\u002f", "/") + u = strings.ReplaceAll(u, "%252F", "/") + u = strings.ReplaceAll(u, "%252f", "/") + u = strings.ReplaceAll(u, "%2f", "/") + u = strings.ReplaceAll(u, "%2F", "/") + } + + u = strings.TrimRight(u, "\\") if i := strings.Index(u, "?"); i != -1 { return u[:i] } @@ -273,8 +284,8 @@ func commonFilter(u string) bool { return true } - for _, scoop := range BadURL { - if strings.Contains(u, scoop) { + for _, bad := range BadURL { + if strings.Contains(u, bad) { return true } }