spray/pkg/baseline.go

415 lines
9.8 KiB
Go
Raw Normal View History

package pkg
2022-09-08 15:57:17 +08:00
import (
"bytes"
2022-09-08 15:57:17 +08:00
"encoding/json"
2023-01-04 11:18:03 +08:00
"github.com/chainreactors/gogo/v2/pkg/fingers"
"github.com/chainreactors/gogo/v2/pkg/utils"
"github.com/chainreactors/logs"
2022-09-19 14:42:29 +08:00
"github.com/chainreactors/parsers"
"github.com/chainreactors/spray/pkg/ihttp"
2022-11-09 16:05:17 +08:00
"net/url"
"strconv"
2022-09-08 15:57:17 +08:00
"strings"
)
2023-01-04 13:52:03 +08:00
func GetSourceName(s int) string {
switch s {
case 1:
return "check"
case 2:
return "index"
case 3:
return "random"
case 4:
return "redirect"
case 5:
return "crawl"
case 6:
return "active"
case 7:
return "word"
case 8:
return "waf"
2023-01-06 03:31:28 +08:00
case 9:
return "rule"
case 10:
return "bak"
2023-01-04 13:52:03 +08:00
default:
return "unknown"
}
}
func NewBaseline(u, host string, resp *ihttp.Response) *Baseline {
bl := &Baseline{
2022-11-29 20:50:00 +08:00
UrlString: u,
Status: resp.StatusCode(),
IsValid: true,
2022-09-08 15:57:17 +08:00
}
2022-11-09 16:05:17 +08:00
uu, err := url.Parse(u)
if err == nil {
bl.Path = uu.Path
2022-11-29 20:50:00 +08:00
bl.Url = uu
2022-11-09 16:05:17 +08:00
}
bl.Dir = bl.IsDir()
2022-10-27 23:40:15 +08:00
if resp.ClientType == ihttp.STANDARD {
bl.Host = host
}
body := resp.Body()
bl.Body = make([]byte, len(body))
copy(bl.Body, body)
bl.BodyLength = resp.ContentLength()
bl.Header = resp.Header()
bl.HeaderLength = len(bl.Header)
bl.RedirectURL = resp.GetHeader("Location")
bl.Raw = append(bl.Header, bl.Body...)
2022-09-23 01:47:24 +08:00
return bl
2022-09-08 15:57:17 +08:00
}
func NewInvalidBaseline(u, host string, resp *ihttp.Response, reason string) *Baseline {
bl := &Baseline{
2022-11-29 20:50:00 +08:00
UrlString: u,
Status: resp.StatusCode(),
IsValid: false,
Reason: reason,
2022-09-08 15:57:17 +08:00
}
2022-11-09 16:05:17 +08:00
uu, err := url.Parse(u)
if err == nil {
bl.Path = uu.Path
2022-11-29 20:50:00 +08:00
bl.Url = uu
2022-11-09 16:05:17 +08:00
}
bl.Dir = bl.IsDir()
2022-11-09 16:05:17 +08:00
2022-10-27 23:40:15 +08:00
if resp.ClientType == ihttp.STANDARD {
bl.Host = host
}
bl.Body = resp.Body()
bl.BodyLength = resp.ContentLength()
2022-12-02 15:21:17 +08:00
bl.Header = resp.Header()
bl.HeaderLength = len(bl.Header)
bl.RedirectURL = string(resp.GetHeader("Location"))
2022-09-08 15:57:17 +08:00
return bl
}
type Baseline struct {
Number int `json:"number"`
Url *url.URL `json:"-"`
UrlString string `json:"url"`
Path string `json:"path"`
Dir bool `json:"isdir"`
Host string `json:"host"`
Body []byte `json:"-"`
BodyLength int `json:"body_length"`
ExceedLength bool `json:"-"`
Header []byte `json:"-"`
Raw []byte `json:"-"`
HeaderLength int `json:"header_length"`
RedirectURL string `json:"redirect_url,omitempty"`
FrontURL string `json:"front_url,omitempty"`
Status int `json:"status"`
Spended int64 `json:"spend"` // 耗时, 毫秒
Title string `json:"title"`
Frameworks Frameworks `json:"frameworks"`
Extracteds Extracteds `json:"extracts"`
ErrString string `json:"error"`
Reason string `json:"reason"`
IsValid bool `json:"valid"`
IsFuzzy bool `json:"fuzzy"`
Source int `json:"source"`
ReqDepth int `json:"depth"`
Recu bool `json:"-"`
RecuDepth int `json:"-"`
URLs []string `json:"-"`
*parsers.Hashes `json:"hashes"`
2022-09-08 15:57:17 +08:00
}
2022-12-11 00:24:28 +08:00
func (bl *Baseline) IsDir() bool {
if strings.HasSuffix(bl.Path, "/") {
return true
}
return false
}
// Collect 深度收集信息
func (bl *Baseline) Collect() {
if len(bl.Body) > 0 {
bl.Title = utils.AsciiEncode(parsers.MatchTitle(string(bl.Body)))
}
bl.Hashes = parsers.NewHashes(bl.Raw)
bl.Extracteds = Extractors.Extract(string(bl.Raw))
bl.Frameworks = FingerDetect(string(bl.Raw))
}
2023-01-03 17:09:32 +08:00
func (bl *Baseline) CollectURL() {
if len(bl.Body) == 0 {
return
}
for _, reg := range JSRegexps {
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
for _, u := range urls {
if !filterJs(u[1]) {
bl.URLs = append(bl.URLs, u[1])
2023-01-03 17:09:32 +08:00
}
}
}
for _, reg := range URLRegexps {
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
for _, u := range urls {
if !filterUrl(u[1]) {
bl.URLs = append(bl.URLs, u[1])
2023-01-03 17:09:32 +08:00
}
}
}
2023-01-06 03:31:28 +08:00
2023-01-04 11:18:03 +08:00
if bl.URLs != nil {
bl.Extracteds = append(bl.Extracteds, &fingers.Extracted{
Name: "crawl",
ExtractResult: bl.URLs,
})
}
2023-01-03 17:09:32 +08:00
}
// Compare
// if totally equal return 1
// if maybe equal return 0
// not equal return -1
func (bl *Baseline) Compare(other *Baseline) int {
if other.RedirectURL != "" && bl.RedirectURL == other.RedirectURL {
2022-11-09 16:05:17 +08:00
// 如果重定向url不为空, 且与base不相同, 则说明不是同一个页面
return 1
2022-09-08 15:57:17 +08:00
}
if bl.BodyLength == other.BodyLength {
// 如果body length相等且md5相等, 则说明是同一个页面
if bytes.Equal(bl.Body, other.Body) {
2022-11-09 16:05:17 +08:00
// 如果length相等, md5也相等, 则判断为全同
return 1
} else {
// 如果长度相等, 但是md5不相等, 可能是存在csrftoken之类的随机值
return 0
}
} else if i := bl.BodyLength - other.BodyLength; (i < 16 && i > 0) || (i > -16 && i < 0) {
// 如果body length绝对值小于16, 则可能是存在csrftoken之类的随机值, 需要模糊判断
return 0
2022-11-09 16:05:17 +08:00
} else {
// 如果body length绝对值大于16, 则认为大概率存在较大差异
2022-11-09 16:05:17 +08:00
if strings.Contains(string(other.Body), other.Path) {
// 如果包含路径本身, 可能是路径自身的随机值影响结果
2022-11-09 16:05:17 +08:00
return 0
} else {
// 如果不包含路径本身, 则认为是不同页面
2022-11-09 16:05:17 +08:00
return -1
}
2022-09-08 15:57:17 +08:00
}
2022-11-09 16:05:17 +08:00
return -1
2022-09-08 15:57:17 +08:00
}
var Distance uint8 = 5
func (bl *Baseline) FuzzyCompare(other *Baseline) bool {
if parsers.SimhashCompare(other.BodySimhash, bl.BodySimhash) < Distance {
return true
}
2022-09-08 15:57:17 +08:00
return false
}
func (bl *Baseline) Get(key string) string {
switch key {
case "url":
2022-11-29 20:50:00 +08:00
return bl.UrlString
case "host":
return bl.Host
case "title":
return bl.Title
case "redirect":
return bl.RedirectURL
case "md5":
if bl.Hashes != nil {
return bl.Hashes.BodyMd5
} else {
return ""
}
case "simhash":
if bl.Hashes != nil {
return bl.Hashes.BodySimhash
} else {
return ""
}
case "mmh3":
if bl.Hashes != nil {
return bl.Hashes.BodySimhash
} else {
return ""
}
case "stat", "status":
return strconv.Itoa(bl.Status)
case "spend":
2023-01-06 03:31:28 +08:00
return strconv.Itoa(int(bl.Spended)) + "ms"
case "length":
return strconv.Itoa(bl.BodyLength)
2023-01-04 13:52:03 +08:00
case "source":
return GetSourceName(bl.Source)
2022-12-02 15:21:17 +08:00
case "extract":
return bl.Extracteds.String()
case "frame", "framework":
2022-12-02 15:21:17 +08:00
return bl.Frameworks.String()
2023-01-03 17:09:32 +08:00
case "full":
return bl.String()
default:
return ""
}
}
func (bl *Baseline) Additional(key string) string {
2023-01-06 03:31:28 +08:00
if key == "frame" || key == "extract" {
return bl.Get(key)
} else if v := bl.Get(key); v != "" {
2022-12-02 18:05:33 +08:00
return " [" + v + "]"
} else {
2022-12-02 18:05:33 +08:00
return ""
}
}
2022-10-27 23:40:15 +08:00
func (bl *Baseline) Format(probes []string) string {
var line strings.Builder
2022-11-29 21:55:27 +08:00
if bl.FrontURL != "" {
line.WriteString("\t")
line.WriteString(bl.FrontURL)
line.WriteString(" -> ")
}
2022-11-29 20:50:00 +08:00
line.WriteString(bl.UrlString)
if bl.Host != "" {
line.WriteString(" (" + bl.Host + ")")
}
if bl.Reason != "" {
line.WriteString(" ,")
line.WriteString(bl.Reason)
}
if bl.ErrString != "" {
line.WriteString(" ,err: ")
line.WriteString(bl.ErrString)
return line.String()
}
for _, p := range probes {
line.WriteString(" ")
line.WriteString(bl.Additional(p))
}
return line.String()
}
func (bl *Baseline) ColorString() string {
var line strings.Builder
if bl.FrontURL != "" {
line.WriteString("\t")
line.WriteString(logs.CyanLine(bl.FrontURL))
line.WriteString(" --> ")
}
line.WriteString(logs.GreenLine(bl.UrlString))
if bl.Host != "" {
line.WriteString(" (" + bl.Host + ")")
}
if bl.Reason != "" {
line.WriteString(" [reason: ")
line.WriteString(logs.YellowBold(bl.Reason))
line.WriteString("]")
}
if bl.ErrString != "" {
line.WriteString(" [err: ")
line.WriteString(logs.RedBold(bl.ErrString))
line.WriteString("]")
return line.String()
}
line.WriteString(" - ")
line.WriteString(logs.GreenBold(strconv.Itoa(bl.Status)))
line.WriteString(" - ")
2023-01-03 17:09:32 +08:00
line.WriteString(logs.YellowBold(strconv.Itoa(bl.BodyLength)))
if bl.ExceedLength {
line.WriteString(logs.Red("(exceed)"))
}
line.WriteString(" - ")
2023-01-03 17:09:32 +08:00
line.WriteString(logs.YellowBold(strconv.Itoa(int(bl.Spended)) + "ms"))
line.WriteString(logs.GreenLine(bl.Additional("title")))
2023-01-06 03:31:28 +08:00
line.WriteString(logs.GreenLine(bl.Additional("source")))
line.WriteString(logs.Cyan(bl.Frameworks.String()))
line.WriteString(logs.Cyan(bl.Extracteds.String()))
if bl.RedirectURL != "" {
line.WriteString(" --> ")
line.WriteString(logs.CyanLine(bl.RedirectURL))
line.WriteString(" ")
}
2023-01-04 11:07:18 +08:00
if len(bl.Extracteds) > 0 {
for _, e := range bl.Extracteds {
2023-01-04 11:18:03 +08:00
line.WriteString("\n " + e.Name + ": \n\t")
2023-01-04 11:07:18 +08:00
line.WriteString(logs.GreenLine(strings.Join(e.ExtractResult, "\n\t")))
}
2023-01-03 17:09:32 +08:00
}
return line.String()
}
func (bl *Baseline) String() string {
2022-09-20 18:09:06 +08:00
var line strings.Builder
2022-11-29 21:55:27 +08:00
if bl.FrontURL != "" {
line.WriteString("\t")
line.WriteString(bl.FrontURL)
line.WriteString(" --> ")
}
2022-11-29 20:50:00 +08:00
line.WriteString(bl.UrlString)
2022-10-27 23:40:15 +08:00
if bl.Host != "" {
line.WriteString(" (" + bl.Host + ")")
}
if bl.Reason != "" {
line.WriteString(" [reason: ")
line.WriteString(bl.Reason)
line.WriteString("]")
}
if bl.ErrString != "" {
line.WriteString(" [err: ")
line.WriteString(bl.ErrString)
line.WriteString("]")
return line.String()
}
line.WriteString(" - ")
line.WriteString(strconv.Itoa(bl.Status))
line.WriteString(" - ")
line.WriteString(strconv.Itoa(bl.BodyLength))
if bl.ExceedLength {
line.WriteString("(exceed)")
}
line.WriteString(" - ")
line.WriteString(strconv.Itoa(int(bl.Spended)) + "ms")
2022-11-29 21:55:27 +08:00
line.WriteString(bl.Additional("title"))
2022-12-02 15:21:17 +08:00
line.WriteString(bl.Frameworks.String())
line.WriteString(bl.Extracteds.String())
2022-09-20 18:09:06 +08:00
if bl.RedirectURL != "" {
2022-11-29 21:55:27 +08:00
line.WriteString(" --> ")
2022-09-20 18:09:06 +08:00
line.WriteString(bl.RedirectURL)
line.WriteString(" ")
2022-09-20 18:09:06 +08:00
}
2023-01-04 11:07:18 +08:00
if len(bl.Extracteds) > 0 {
for _, e := range bl.Extracteds {
2023-01-04 11:18:03 +08:00
line.WriteString("\n " + e.Name + ": \n\t")
2023-01-04 11:07:18 +08:00
line.WriteString(strings.Join(e.ExtractResult, "\n\t"))
}
}
2022-09-20 18:09:06 +08:00
return line.String()
2022-09-08 15:57:17 +08:00
}
func (bl *Baseline) Jsonify() string {
2022-09-08 15:57:17 +08:00
bs, err := json.Marshal(bl)
if err != nil {
return ""
}
return string(bs)
}