mirror of
https://github.com/chainreactors/spray.git
synced 2025-09-15 11:40:13 +00:00
调整多条爬虫的正则
This commit is contained in:
parent
a4d912ed4d
commit
009ca464bd
17
pkg/utils.go
17
pkg/utils.go
@ -24,16 +24,18 @@ var (
|
|||||||
Fingers fingers.Fingers
|
Fingers fingers.Fingers
|
||||||
JSRegexps []*regexp.Regexp = []*regexp.Regexp{
|
JSRegexps []*regexp.Regexp = []*regexp.Regexp{
|
||||||
regexp.MustCompile(`.(https{0,1}:[^\s'’"”><;()|*\[]{2,250}?[^=*\s'’><:;|()[]{3}\[]\.js)`),
|
regexp.MustCompile(`.(https{0,1}:[^\s'’"”><;()|*\[]{2,250}?[^=*\s'’><:;|()[]{3}\[]\.js)`),
|
||||||
regexp.MustCompile(`["']\s{0,6}(/{0,1}[^\s',’"”><;()|*:\[]{2,250}?[^=*\s'’|"”><^:;()\[]{3}\.\.js)`),
|
regexp.MustCompile(`["']\s{0,6}([^\s',’"”><;()|*:\[]{2,250}?[^=*\s'’|"”><^:;()\[]{3}\.js)`),
|
||||||
regexp.MustCompile(`=\s{0,6}["']{0,1}\s{0,6}(/{0,1}[^\s^',’><;()|*\[]{2,250}?[^=,\s'’"”>|<:;*()\[]{3}\.js)`),
|
regexp.MustCompile(`=\s{0,6}["']{0,1}\s{0,6}([^\s^',’><;()|*\[]{2,250}?[^=,\s'’"”>|<:;*()\[]{3}\.js)`),
|
||||||
}
|
}
|
||||||
URLRegexps []*regexp.Regexp = []*regexp.Regexp{
|
URLRegexps []*regexp.Regexp = []*regexp.Regexp{
|
||||||
regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s'’"”><;()|*\[]{2,250})`),
|
regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s'"><;()|*\[]{2,250})`),
|
||||||
regexp.MustCompile(`["']([^\s',’"”><.@;:()|*\[]{2,250}\.[a-zA-Z]\w{1,4})["']`),
|
regexp.MustCompile(`["']([^\s',’"”><.@;:()|*\[]{2,250}\.[a-zA-Z]\w{1,4})["']`),
|
||||||
regexp.MustCompile(`["'](https?:[^\s'’"”><;()@|*\[]{2,250}?\.[^\s',’"”><;()|*\[]{2,250}?)["']`),
|
regexp.MustCompile(`["'](https?:[^\s'"><;()@|*\[]{2,250}?\.[^\s',’"”><;()|*\[]{2,250}?)["']`),
|
||||||
regexp.MustCompile(`["']\s{0,6}([#,.]{0,2}/[^\s',’"”><;()|*\[]{2,250}?)\s{0,6}["']`),
|
regexp.MustCompile(`["']\s{0,6}([#,.]{0,2}/[^\s'",><;()|*\[]{2,250}?)\s{0,6}["']`),
|
||||||
regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s',’"”><;()|*\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s'’"“><)(]{2,250})`),
|
regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s',’"”><;()|*\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s'’"“><)(]{2,250})`),
|
||||||
}
|
}
|
||||||
|
BadExt = []string{".js", ".css", ".scss", ".,", ".jpeg", ".jpg", ".png", ".gif", ".svg", ".vue", ".ts", ".swf", ".pdf", ".mp4"}
|
||||||
|
BadURL = []string{";", "}", "\\n", "webpack://", "{", "www.w3.org", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path"}
|
||||||
|
|
||||||
ContentTypeMap = map[string]string{
|
ContentTypeMap = map[string]string{
|
||||||
"application/javascript": "js",
|
"application/javascript": "js",
|
||||||
@ -225,11 +227,6 @@ func FingerDetect(content string) Frameworks {
|
|||||||
return frames
|
return frames
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
|
||||||
BadExt = []string{".js", ".css", ".scss", ".,", ".jpeg", ".jpg", ".png", ".gif", ".svg", ".vue", ".ts", ".swf", ".pdf", ".mp4"}
|
|
||||||
BadURL = []string{";", "}", "webpack://", "{", "www.w3.org", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path"}
|
|
||||||
)
|
|
||||||
|
|
||||||
func filterJs(u string) bool {
|
func filterJs(u string) bool {
|
||||||
if commonFilter(u) {
|
if commonFilter(u) {
|
||||||
return true
|
return true
|
||||||
|
Loading…
x
Reference in New Issue
Block a user