调整爬虫的正则

This commit is contained in:
M09Ic 2023-01-11 11:12:40 +08:00
parent 4a774718c9
commit 8233dcefd3

View File

@ -23,14 +23,14 @@ var (
ActivePath []string ActivePath []string
Fingers fingers.Fingers Fingers fingers.Fingers
JSRegexps []*regexp.Regexp = []*regexp.Regexp{ JSRegexps []*regexp.Regexp = []*regexp.Regexp{
regexp.MustCompile(`.(https{0,1}:[^\s',"”><;()|*\[]{2,250}?[^=*\s'><:;|()[]{3}\[]\.js)`), regexp.MustCompile(`.(https{0,1}:[^\s'"”><;()|*\[]{2,250}?[^=*\s'><:;|()[]{3}\[]\.js)`),
regexp.MustCompile(`["']\s{0,6}(/{0,1}[^\s',"”><;()|*:\[]{2,250}?[^=*\s'|"”><^:;()\[]{3}\.\.js)`), regexp.MustCompile(`["']\s{0,6}(/{0,1}[^\s',"”><;()|*:\[]{2,250}?[^=*\s'|"”><^:;()\[]{3}\.\.js)`),
regexp.MustCompile(`=\s{0,6}["']{0,1}\s{0,6}(/{0,1}[^\s^',><;()|*\[]{2,250}?[^=,\s'"”>|<:;*()\[]{3}\.js)`), regexp.MustCompile(`=\s{0,6}["']{0,1}\s{0,6}(/{0,1}[^\s^',><;()|*\[]{2,250}?[^=,\s'"”>|<:;*()\[]{3}\.js)`),
} }
URLRegexps []*regexp.Regexp = []*regexp.Regexp{ URLRegexps []*regexp.Regexp = []*regexp.Regexp{
regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s',"”><;()|*\[]{2,250})`), regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s'"”><;()|*\[]{2,250})`),
regexp.MustCompile(`["']([^\s',"”><.@;()|*\[]{2,250}\.[a-zA-Z]\w{1,4})["']`), regexp.MustCompile(`["']([^\s',"”><.@;:()|*\[]{2,250}\.[a-zA-Z]\w{1,4})["']`),
regexp.MustCompile(`["'](https?:[^\s',"”><;()@|*\[]{2,250}?\.[^\s',"”><;()|*\[]{2,250}?)["']`), regexp.MustCompile(`["'](https?:[^\s'"”><;()@|*\[]{2,250}?\.[^\s',"”><;()|*\[]{2,250}?)["']`),
regexp.MustCompile(`["']\s{0,6}([#,.]{0,2}/[^\s',"”><;()|*\[]{2,250}?)\s{0,6}["']`), regexp.MustCompile(`["']\s{0,6}([#,.]{0,2}/[^\s',"”><;()|*\[]{2,250}?)\s{0,6}["']`),
regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s',"”><;()|*\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s'"“><)(]{2,250})`), regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s',"”><;()|*\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s'"“><)(]{2,250})`),
} }