mirror of
https://github.com/chainreactors/spray.git
synced 2025-09-15 19:50:18 +00:00
优化相似度判断, 并添加了distance/sim字段用来获取.
优化fuzzybaseline的逻辑, 移动到处理线程中. 优化expr的性能 修复--fuzzy没启用也会生效的bug
This commit is contained in:
parent
797ac74af3
commit
a94f9e3dc7
@ -27,6 +27,7 @@ var (
|
|||||||
maxRedirect = 3
|
maxRedirect = 3
|
||||||
maxCrawl = 3
|
maxCrawl = 3
|
||||||
maxRecursion = 0
|
maxRecursion = 0
|
||||||
|
nilBaseline = &pkg.Baseline{}
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
||||||
@ -54,6 +55,10 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
|||||||
// 挂起一个异步的处理结果线程, 不干扰主线程的请求并发
|
// 挂起一个异步的处理结果线程, 不干扰主线程的请求并发
|
||||||
go func() {
|
go func() {
|
||||||
for bl := range pool.tempCh {
|
for bl := range pool.tempCh {
|
||||||
|
if bl.IsValid {
|
||||||
|
pool.addFuzzyBaseline(bl)
|
||||||
|
}
|
||||||
|
|
||||||
if _, ok := pool.Statistor.Counts[bl.Status]; ok {
|
if _, ok := pool.Statistor.Counts[bl.Status]; ok {
|
||||||
pool.Statistor.Counts[bl.Status]++
|
pool.Statistor.Counts[bl.Status]++
|
||||||
} else {
|
} else {
|
||||||
@ -71,7 +76,7 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
|||||||
if bl, ok := pool.baselines[status]; ok {
|
if bl, ok := pool.baselines[status]; ok {
|
||||||
params["bl"+strconv.Itoa(status)] = bl
|
params["bl"+strconv.Itoa(status)] = bl
|
||||||
} else {
|
} else {
|
||||||
params["bl"+strconv.Itoa(status)] = &pkg.Baseline{}
|
params["bl"+strconv.Itoa(status)] = nilBaseline
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -257,7 +262,7 @@ Loop:
|
|||||||
if pool.Mod == pkg.HostSpray {
|
if pool.Mod == pkg.HostSpray {
|
||||||
pool.reqPool.Invoke(newUnit(pkg.RandHost(), source))
|
pool.reqPool.Invoke(newUnit(pkg.RandHost(), source))
|
||||||
} else if pool.Mod == pkg.PathSpray {
|
} else if pool.Mod == pkg.PathSpray {
|
||||||
pool.reqPool.Invoke(newUnit(pkg.RandPath(), source))
|
pool.reqPool.Invoke(newUnit(safePath(pool.BaseURL, pkg.RandPath()), source))
|
||||||
}
|
}
|
||||||
case unit, ok := <-pool.additionCh:
|
case unit, ok := <-pool.additionCh:
|
||||||
if !ok {
|
if !ok {
|
||||||
@ -323,7 +328,6 @@ func (pool *Pool) Invoke(v interface{}) {
|
|||||||
pool.wg.Add(1)
|
pool.wg.Add(1)
|
||||||
pool.doRedirect(bl, unit.depth)
|
pool.doRedirect(bl, unit.depth)
|
||||||
}
|
}
|
||||||
pool.addFuzzyBaseline(bl)
|
|
||||||
} else {
|
} else {
|
||||||
bl = pkg.NewInvalidBaseline(req.URI(), req.Host(), resp, err.Error())
|
bl = pkg.NewInvalidBaseline(req.URI(), req.Host(), resp, err.Error())
|
||||||
}
|
}
|
||||||
@ -643,11 +647,9 @@ func (pool *Pool) addAddition(u *Unit) {
|
|||||||
func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) {
|
func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) {
|
||||||
if _, ok := pool.baselines[bl.Status]; !ok && IntsContains(FuzzyStatus, bl.Status) {
|
if _, ok := pool.baselines[bl.Status]; !ok && IntsContains(FuzzyStatus, bl.Status) {
|
||||||
bl.Collect()
|
bl.Collect()
|
||||||
pool.locker.Lock()
|
|
||||||
pool.wg.Add(1)
|
pool.wg.Add(1)
|
||||||
pool.doCrawl(bl)
|
pool.doCrawl(bl)
|
||||||
pool.baselines[bl.Status] = bl
|
pool.baselines[bl.Status] = bl
|
||||||
pool.locker.Unlock()
|
|
||||||
logs.Log.Infof("[baseline.%dinit] %s", bl.Status, bl.Format([]string{"status", "length", "spend", "title", "frame", "redirect"}))
|
logs.Log.Infof("[baseline.%dinit] %s", bl.Status, bl.Format([]string{"status", "length", "spend", "title", "frame", "redirect"}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -380,7 +380,6 @@ func (r *Runner) Outputting() {
|
|||||||
} else {
|
} else {
|
||||||
logs.Log.Debug(bl.String())
|
logs.Log.Debug(bl.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -410,8 +409,10 @@ func (r *Runner) Outputting() {
|
|||||||
if !ok {
|
if !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if r.Fuzzy {
|
||||||
fuzzySaveFunc(bl)
|
fuzzySaveFunc(bl)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
@ -135,6 +135,7 @@ type Baseline struct {
|
|||||||
IsFuzzy bool `json:"fuzzy"`
|
IsFuzzy bool `json:"fuzzy"`
|
||||||
Source int `json:"source"`
|
Source int `json:"source"`
|
||||||
ReqDepth int `json:"depth"`
|
ReqDepth int `json:"depth"`
|
||||||
|
Distance uint8 `json:"distance"`
|
||||||
Recu bool `json:"-"`
|
Recu bool `json:"-"`
|
||||||
RecuDepth int `json:"-"`
|
RecuDepth int `json:"-"`
|
||||||
URLs []string `json:"-"`
|
URLs []string `json:"-"`
|
||||||
@ -233,10 +234,11 @@ func (bl *Baseline) Compare(other *Baseline) int {
|
|||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
var Distance uint8 = 5
|
var Distance uint8 = 5 // 数字越小越相似, 数字为0则为完全一致.
|
||||||
|
|
||||||
func (bl *Baseline) FuzzyCompare(other *Baseline) bool {
|
func (bl *Baseline) FuzzyCompare(other *Baseline) bool {
|
||||||
if parsers.SimhashCompare(other.BodySimhash, bl.BodySimhash) < Distance {
|
// 这里使用rawsimhash, 是为了保证一定数量的字符串, 否则超短的body会导致simhash偏差指较大
|
||||||
|
if other.Distance = parsers.SimhashCompare(other.RawSimhash, bl.RawSimhash); other.Distance < Distance {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
@ -278,6 +280,8 @@ func (bl *Baseline) Get(key string) string {
|
|||||||
return strconv.Itoa(int(bl.Spended)) + "ms"
|
return strconv.Itoa(int(bl.Spended)) + "ms"
|
||||||
case "length":
|
case "length":
|
||||||
return strconv.Itoa(bl.BodyLength)
|
return strconv.Itoa(bl.BodyLength)
|
||||||
|
case "sim", "distance":
|
||||||
|
return "sim:" + strconv.Itoa(int(bl.Distance))
|
||||||
case "source":
|
case "source":
|
||||||
return GetSourceName(bl.Source)
|
return GetSourceName(bl.Source)
|
||||||
case "extract":
|
case "extract":
|
||||||
@ -366,6 +370,9 @@ func (bl *Baseline) ColorString() string {
|
|||||||
line.WriteString(logs.YellowBold(strconv.Itoa(int(bl.Spended)) + "ms"))
|
line.WriteString(logs.YellowBold(strconv.Itoa(int(bl.Spended)) + "ms"))
|
||||||
line.WriteString(logs.YellowBold(" - " + GetSourceName(bl.Source)))
|
line.WriteString(logs.YellowBold(" - " + GetSourceName(bl.Source)))
|
||||||
line.WriteString(logs.GreenLine(bl.Additional("title")))
|
line.WriteString(logs.GreenLine(bl.Additional("title")))
|
||||||
|
if bl.Distance != 0 {
|
||||||
|
line.WriteString(logs.GreenLine(bl.Additional("sim")))
|
||||||
|
}
|
||||||
line.WriteString(logs.Cyan(bl.Frameworks.String()))
|
line.WriteString(logs.Cyan(bl.Frameworks.String()))
|
||||||
line.WriteString(logs.Cyan(bl.Extracteds.String()))
|
line.WriteString(logs.Cyan(bl.Extracteds.String()))
|
||||||
if bl.RedirectURL != "" {
|
if bl.RedirectURL != "" {
|
||||||
@ -416,6 +423,9 @@ func (bl *Baseline) String() string {
|
|||||||
line.WriteString(" - ")
|
line.WriteString(" - ")
|
||||||
line.WriteString(strconv.Itoa(int(bl.Spended)) + "ms")
|
line.WriteString(strconv.Itoa(int(bl.Spended)) + "ms")
|
||||||
line.WriteString(bl.Additional("title"))
|
line.WriteString(bl.Additional("title"))
|
||||||
|
if bl.Distance != 0 {
|
||||||
|
line.WriteString(logs.GreenLine(bl.Additional("sim")))
|
||||||
|
}
|
||||||
line.WriteString(bl.Frameworks.String())
|
line.WriteString(bl.Frameworks.String())
|
||||||
line.WriteString(bl.Extracteds.String())
|
line.WriteString(bl.Extracteds.String())
|
||||||
if bl.RedirectURL != "" {
|
if bl.RedirectURL != "" {
|
||||||
|
@ -104,9 +104,8 @@ const (
|
|||||||
func RandPath() string {
|
func RandPath() string {
|
||||||
n := 16
|
n := 16
|
||||||
b := make([]byte, n)
|
b := make([]byte, n)
|
||||||
b[0] = byte(0x2f)
|
|
||||||
// A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
|
// A rand.Int63() generates 63 random bits, enough for letterIdMax letters!
|
||||||
for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 1; {
|
for i, cache, remain := n-1, src.Int63(), letterIdMax; i >= 0; {
|
||||||
if remain == 0 {
|
if remain == 0 {
|
||||||
cache, remain = src.Int63(), letterIdMax
|
cache, remain = src.Int63(), letterIdMax
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user