mirror of
https://github.com/chainreactors/spray.git
synced 2025-09-15 11:40:13 +00:00
调整爬虫的正则
This commit is contained in:
parent
4a774718c9
commit
8233dcefd3
@ -23,14 +23,14 @@ var (
|
||||
ActivePath []string
|
||||
Fingers fingers.Fingers
|
||||
JSRegexps []*regexp.Regexp = []*regexp.Regexp{
|
||||
regexp.MustCompile(`.(https{0,1}:[^\s',’"”><;()|*\[]{2,250}?[^=*\s'’><:;|()[]{3}\[]\.js)`),
|
||||
regexp.MustCompile(`.(https{0,1}:[^\s'’"”><;()|*\[]{2,250}?[^=*\s'’><:;|()[]{3}\[]\.js)`),
|
||||
regexp.MustCompile(`["']\s{0,6}(/{0,1}[^\s',’"”><;()|*:\[]{2,250}?[^=*\s'’|"”><^:;()\[]{3}\.\.js)`),
|
||||
regexp.MustCompile(`=\s{0,6}["']{0,1}\s{0,6}(/{0,1}[^\s^',’><;()|*\[]{2,250}?[^=,\s'’"”>|<:;*()\[]{3}\.js)`),
|
||||
}
|
||||
URLRegexps []*regexp.Regexp = []*regexp.Regexp{
|
||||
regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s',’"”><;()|*\[]{2,250})`),
|
||||
regexp.MustCompile(`["']([^\s',’"”><.@;()|*\[]{2,250}\.[a-zA-Z]\w{1,4})["']`),
|
||||
regexp.MustCompile(`["'](https?:[^\s',’"”><;()@|*\[]{2,250}?\.[^\s',’"”><;()|*\[]{2,250}?)["']`),
|
||||
regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s'’"”><;()|*\[]{2,250})`),
|
||||
regexp.MustCompile(`["']([^\s',’"”><.@;:()|*\[]{2,250}\.[a-zA-Z]\w{1,4})["']`),
|
||||
regexp.MustCompile(`["'](https?:[^\s'’"”><;()@|*\[]{2,250}?\.[^\s',’"”><;()|*\[]{2,250}?)["']`),
|
||||
regexp.MustCompile(`["']\s{0,6}([#,.]{0,2}/[^\s',’"”><;()|*\[]{2,250}?)\s{0,6}["']`),
|
||||
regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s',’"”><;()|*\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s'’"“><)(]{2,250})`),
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user