调整爬虫的逻辑, 优化输出颜色

This commit is contained in:
M09Ic 2023-01-03 18:22:13 +08:00
parent 900dca32cb
commit 835d4663dd
2 changed files with 35 additions and 41 deletions

View File

@ -8,7 +8,6 @@ import (
"github.com/chainreactors/parsers"
"github.com/chainreactors/spray/pkg/ihttp"
"net/url"
"path"
"strconv"
"strings"
)
@ -116,53 +115,18 @@ func (bl *Baseline) CollectURL() {
for _, reg := range JSRegexps {
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
for _, u := range urls {
var filter bool
parsed, err := url.Parse(u[1])
if err != nil {
filter = true
} else {
for _, scoop := range BadScoop {
if scoop == parsed.Host {
filter = true
break
}
}
if !filterJs(u[1]) {
bl.URLs = append(bl.URLs, u[1])
}
if filter {
continue
}
bl.URLs = append(bl.URLs, u[1])
}
}
for _, reg := range URLRegexps {
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
for _, u := range urls {
var filter bool
parsed, err := url.Parse(u[1])
if err != nil {
filter = true
} else {
ext := path.Ext(parsed.Path)
for _, e := range BadExt {
if e == ext {
filter = true
break
}
}
for _, scoop := range BadScoop {
if scoop == parsed.Host {
filter = true
break
}
}
if !filterUrl(u[1]) {
bl.URLs = append(bl.URLs, u[1])
}
if filter {
continue
}
bl.URLs = append(bl.URLs, u[1])
}
}
}
@ -334,7 +298,7 @@ func (bl *Baseline) ColorString() string {
line.WriteString("\n")
}
for _, u := range bl.URLs {
line.WriteString("\t" + u + "\n")
line.WriteString("\t" + logs.PurpleLine(u) + "\n")
}
return line.String()
}

View File

@ -5,7 +5,9 @@ import (
"github.com/chainreactors/gogo/v2/pkg/utils"
"github.com/chainreactors/ipcs"
"math/rand"
"net/url"
"os"
"path"
"regexp"
"strings"
"time"
@ -144,6 +146,34 @@ var (
BadScoop = []string{"www.w3.org", "example.com"}
)
func filterJs(u string) bool {
for _, scoop := range BadScoop {
if strings.Contains(u, scoop) {
return true
}
}
return false
}
func filterUrl(u string) bool {
parsed, err := url.Parse(u)
if err != nil {
return true
} else {
ext := path.Ext(parsed.Path)
for _, e := range BadExt {
if e == ext {
return true
}
}
}
for _, scoop := range BadScoop {
if strings.Contains(u, scoop) {
return true
}
}
return false
}
func URLJoin(base, uri string) string {
baseSlash := strings.HasSuffix(base, "/")
uriSlash := strings.HasPrefix(uri, "/")