2022-09-08 15:57:17 +08:00
|
|
|
|
package internal
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
2022-11-10 21:03:07 +08:00
|
|
|
|
"fmt"
|
2022-11-21 20:44:02 +08:00
|
|
|
|
"github.com/antonmedv/expr"
|
|
|
|
|
"github.com/antonmedv/expr/vm"
|
2022-09-20 04:01:38 +08:00
|
|
|
|
"github.com/chainreactors/logs"
|
2023-02-01 18:31:50 +08:00
|
|
|
|
"github.com/chainreactors/parsers"
|
2023-01-28 13:15:49 +08:00
|
|
|
|
"github.com/chainreactors/parsers/iutils"
|
2022-09-08 17:04:41 +08:00
|
|
|
|
"github.com/chainreactors/spray/pkg"
|
2022-10-26 18:28:40 +08:00
|
|
|
|
"github.com/chainreactors/spray/pkg/ihttp"
|
2022-09-15 19:27:07 +08:00
|
|
|
|
"github.com/chainreactors/words"
|
2023-01-06 13:07:59 +08:00
|
|
|
|
"github.com/chainreactors/words/mask"
|
2023-01-05 22:42:07 +08:00
|
|
|
|
"github.com/chainreactors/words/rule"
|
2022-09-08 15:57:17 +08:00
|
|
|
|
"github.com/panjf2000/ants/v2"
|
2022-09-23 01:20:01 +08:00
|
|
|
|
"github.com/valyala/fasthttp"
|
2023-01-12 17:41:44 +08:00
|
|
|
|
"golang.org/x/time/rate"
|
2022-11-29 20:50:00 +08:00
|
|
|
|
"net/url"
|
2023-01-05 22:42:07 +08:00
|
|
|
|
"path"
|
2022-11-29 20:50:00 +08:00
|
|
|
|
"strings"
|
2022-09-08 15:57:17 +08:00
|
|
|
|
"sync"
|
2022-12-11 04:21:42 +08:00
|
|
|
|
"sync/atomic"
|
2022-09-15 19:27:07 +08:00
|
|
|
|
"time"
|
2022-09-08 15:57:17 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var (
|
2023-02-07 18:42:20 +08:00
|
|
|
|
max = 2147483647
|
|
|
|
|
MaxRedirect = 3
|
|
|
|
|
MaxCrawl = 3
|
|
|
|
|
MaxRecursion = 0
|
|
|
|
|
enableAllFuzzy = false
|
|
|
|
|
nilBaseline = &pkg.Baseline{}
|
2022-09-08 15:57:17 +08:00
|
|
|
|
)
|
2022-11-17 16:27:44 +08:00
|
|
|
|
|
2022-11-10 21:03:07 +08:00
|
|
|
|
func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
2023-01-10 02:04:12 +08:00
|
|
|
|
var u *url.URL
|
|
|
|
|
var err error
|
|
|
|
|
if u, err = url.Parse(config.BaseURL); err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2022-09-19 14:42:29 +08:00
|
|
|
|
pctx, cancel := context.WithCancel(ctx)
|
2022-09-08 15:57:17 +08:00
|
|
|
|
pool := &Pool{
|
2022-09-23 01:39:00 +08:00
|
|
|
|
Config: config,
|
2023-01-12 18:17:53 +08:00
|
|
|
|
base: u.Scheme + "://" + u.Host,
|
2023-01-11 11:12:00 +08:00
|
|
|
|
isDir: strings.HasSuffix(u.Path, "/"),
|
2023-01-10 02:04:12 +08:00
|
|
|
|
url: u,
|
2022-09-23 01:39:00 +08:00
|
|
|
|
ctx: pctx,
|
2022-09-23 01:47:24 +08:00
|
|
|
|
cancel: cancel,
|
2022-10-26 18:28:40 +08:00
|
|
|
|
client: ihttp.NewClient(config.Thread, 2, config.ClientType),
|
2022-11-10 21:03:07 +08:00
|
|
|
|
baselines: make(map[int]*pkg.Baseline),
|
2023-01-11 11:12:00 +08:00
|
|
|
|
urls: make(map[string]struct{}),
|
2023-01-29 18:23:55 +08:00
|
|
|
|
tempCh: make(chan *pkg.Baseline, 100),
|
|
|
|
|
checkCh: make(chan int, 100),
|
2023-01-03 17:09:32 +08:00
|
|
|
|
additionCh: make(chan *Unit, 100),
|
2023-01-16 17:30:54 +08:00
|
|
|
|
closeCh: make(chan struct{}),
|
2023-01-12 17:41:44 +08:00
|
|
|
|
waiter: sync.WaitGroup{},
|
2022-09-23 11:20:41 +08:00
|
|
|
|
initwg: sync.WaitGroup{},
|
2023-01-12 17:41:44 +08:00
|
|
|
|
limiter: rate.NewLimiter(rate.Limit(config.RateLimit), 1),
|
2022-11-10 17:19:05 +08:00
|
|
|
|
failedCount: 1,
|
2022-09-08 15:57:17 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-10 23:44:03 +08:00
|
|
|
|
// 格式化dir, 保证至少有一个"/"
|
2023-01-11 11:12:00 +08:00
|
|
|
|
if strings.HasSuffix(config.BaseURL, "/") {
|
2023-01-10 23:44:03 +08:00
|
|
|
|
pool.dir = pool.url.Path
|
|
|
|
|
} else if pool.url.Path == "" {
|
|
|
|
|
pool.dir = "/"
|
|
|
|
|
} else {
|
|
|
|
|
pool.dir = Dir(pool.url.Path)
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-06 04:18:21 +08:00
|
|
|
|
p, _ := ants.NewPoolWithFunc(config.Thread, pool.Invoke)
|
2022-12-11 03:52:06 +08:00
|
|
|
|
pool.reqPool = p
|
2023-01-06 15:06:40 +08:00
|
|
|
|
|
2022-12-11 00:24:28 +08:00
|
|
|
|
// 挂起一个异步的处理结果线程, 不干扰主线程的请求并发
|
2023-02-04 19:44:37 +08:00
|
|
|
|
go pool.Handler()
|
2022-09-08 15:57:17 +08:00
|
|
|
|
return pool, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type Pool struct {
|
|
|
|
|
*pkg.Config
|
2023-01-10 23:44:03 +08:00
|
|
|
|
base string // url的根目录, 在爬虫或者redirect时, 会需要用到根目录进行拼接
|
|
|
|
|
dir string
|
2023-01-11 11:12:00 +08:00
|
|
|
|
isDir bool
|
2023-01-10 02:04:12 +08:00
|
|
|
|
url *url.URL
|
2022-11-21 23:56:27 +08:00
|
|
|
|
Statistor *pkg.Statistor
|
2022-11-10 21:03:07 +08:00
|
|
|
|
client *ihttp.Client
|
2022-12-11 03:52:06 +08:00
|
|
|
|
reqPool *ants.PoolWithFunc
|
2022-11-10 21:03:07 +08:00
|
|
|
|
bar *pkg.Bar
|
|
|
|
|
ctx context.Context
|
|
|
|
|
cancel context.CancelFunc
|
|
|
|
|
tempCh chan *pkg.Baseline // 待处理的baseline
|
2023-01-05 22:42:07 +08:00
|
|
|
|
checkCh chan int // 独立的check管道, 防止与redirect/crawl冲突
|
2023-01-03 17:09:32 +08:00
|
|
|
|
additionCh chan *Unit
|
2023-01-16 17:30:54 +08:00
|
|
|
|
closeCh chan struct{}
|
|
|
|
|
closed bool
|
2023-01-12 19:21:35 +08:00
|
|
|
|
wordOffset int
|
2023-01-29 18:23:55 +08:00
|
|
|
|
failedCount int32
|
2022-12-16 11:56:27 +08:00
|
|
|
|
isFailed bool
|
2022-11-10 21:03:07 +08:00
|
|
|
|
failedBaselines []*pkg.Baseline
|
2022-12-11 00:24:28 +08:00
|
|
|
|
random *pkg.Baseline
|
2022-11-17 05:48:46 +08:00
|
|
|
|
index *pkg.Baseline
|
2022-11-10 21:03:07 +08:00
|
|
|
|
baselines map[int]*pkg.Baseline
|
2023-01-11 11:12:00 +08:00
|
|
|
|
urls map[string]struct{}
|
2022-11-10 15:43:25 +08:00
|
|
|
|
analyzeDone bool
|
|
|
|
|
worder *words.Worder
|
2023-01-12 17:41:44 +08:00
|
|
|
|
limiter *rate.Limiter
|
2022-12-11 00:24:28 +08:00
|
|
|
|
locker sync.Mutex
|
2023-01-12 17:41:44 +08:00
|
|
|
|
waiter sync.WaitGroup
|
2022-11-10 15:43:25 +08:00
|
|
|
|
initwg sync.WaitGroup // 初始化用, 之后改成锁
|
2022-09-08 15:57:17 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 03:52:06 +08:00
|
|
|
|
func (pool *Pool) Init() error {
|
2022-12-09 19:30:12 +08:00
|
|
|
|
// 分成两步是为了避免闭包的线程安全问题
|
2023-01-06 11:30:17 +08:00
|
|
|
|
pool.initwg.Add(2)
|
2023-01-10 23:55:03 +08:00
|
|
|
|
pool.reqPool.Invoke(newUnit(pool.url.Path, InitIndexSource))
|
2023-01-10 23:44:03 +08:00
|
|
|
|
pool.reqPool.Invoke(newUnit(pool.safePath(pkg.RandPath()), InitRandomSource))
|
2022-12-11 03:52:06 +08:00
|
|
|
|
pool.initwg.Wait()
|
|
|
|
|
if pool.index.ErrString != "" {
|
2023-01-12 18:17:53 +08:00
|
|
|
|
logs.Log.Error(pool.index.String())
|
|
|
|
|
return fmt.Errorf(pool.index.ErrString)
|
2022-12-02 18:05:33 +08:00
|
|
|
|
}
|
2023-01-12 16:35:34 +08:00
|
|
|
|
if pool.index.Chunked && pool.ClientType == ihttp.FAST {
|
|
|
|
|
logs.Log.Warn("chunk encoding! buf current client FASTHTTP not support chunk decode")
|
|
|
|
|
}
|
2023-01-06 03:31:28 +08:00
|
|
|
|
logs.Log.Info("[baseline.index] " + pool.index.Format([]string{"status", "length", "spend", "title", "frame", "redirect"}))
|
2022-09-08 15:57:17 +08:00
|
|
|
|
// 检测基本访问能力
|
2022-12-11 03:52:06 +08:00
|
|
|
|
if pool.random.ErrString != "" {
|
2023-01-12 18:17:53 +08:00
|
|
|
|
logs.Log.Error(pool.index.String())
|
|
|
|
|
return fmt.Errorf(pool.index.ErrString)
|
2022-09-08 15:57:17 +08:00
|
|
|
|
}
|
2023-01-06 03:31:28 +08:00
|
|
|
|
logs.Log.Info("[baseline.random] " + pool.random.Format([]string{"status", "length", "spend", "title", "frame", "redirect"}))
|
2022-11-17 05:48:46 +08:00
|
|
|
|
|
2023-01-10 02:04:12 +08:00
|
|
|
|
// 某些网站http会重定向到https, 如果发现随机目录出现这种情况, 则自定将baseurl升级为https
|
|
|
|
|
if pool.url.Scheme == "http" {
|
|
|
|
|
if pool.index.RedirectURL != "" {
|
|
|
|
|
if err := pool.Upgrade(pool.index); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
} else if pool.random.RedirectURL != "" {
|
|
|
|
|
if err := pool.Upgrade(pool.random); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2022-11-29 20:50:00 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
2022-11-17 05:48:46 +08:00
|
|
|
|
|
2022-09-08 15:57:17 +08:00
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 03:52:06 +08:00
|
|
|
|
func (pool *Pool) Run(ctx context.Context, offset, limit int) {
|
2022-12-12 17:05:44 +08:00
|
|
|
|
pool.worder.RunWithRules()
|
2023-01-03 17:16:55 +08:00
|
|
|
|
if pool.Active {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-03 17:16:55 +08:00
|
|
|
|
go pool.doActive()
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-06 00:48:13 +08:00
|
|
|
|
if pool.Bak {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-06 00:48:13 +08:00
|
|
|
|
go pool.doBak()
|
|
|
|
|
}
|
2023-01-06 13:07:59 +08:00
|
|
|
|
|
|
|
|
|
if pool.Common {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-06 13:07:59 +08:00
|
|
|
|
go pool.doCommonFile()
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-16 17:30:54 +08:00
|
|
|
|
var done bool
|
|
|
|
|
go func() {
|
|
|
|
|
for {
|
|
|
|
|
if done {
|
|
|
|
|
pool.waiter.Wait()
|
|
|
|
|
close(pool.closeCh)
|
|
|
|
|
return
|
|
|
|
|
}
|
2023-01-29 18:23:55 +08:00
|
|
|
|
time.Sleep(100 * time.Millisecond)
|
2023-01-06 03:31:28 +08:00
|
|
|
|
}
|
2023-01-16 17:30:54 +08:00
|
|
|
|
}()
|
2023-01-06 13:07:59 +08:00
|
|
|
|
|
2022-09-15 19:27:07 +08:00
|
|
|
|
Loop:
|
|
|
|
|
for {
|
|
|
|
|
select {
|
2023-01-29 18:23:55 +08:00
|
|
|
|
case w, ok := <-pool.worder.C:
|
2022-09-15 19:27:07 +08:00
|
|
|
|
if !ok {
|
2023-01-16 17:30:54 +08:00
|
|
|
|
done = true
|
2023-01-06 03:31:28 +08:00
|
|
|
|
continue
|
2022-09-15 19:27:07 +08:00
|
|
|
|
}
|
2022-12-11 03:52:06 +08:00
|
|
|
|
pool.Statistor.End++
|
2023-01-12 19:21:35 +08:00
|
|
|
|
pool.wordOffset++
|
2023-01-29 18:23:55 +08:00
|
|
|
|
if pool.wordOffset < offset {
|
2022-11-10 15:48:38 +08:00
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 03:52:06 +08:00
|
|
|
|
if pool.Statistor.End > limit {
|
2023-01-16 17:30:54 +08:00
|
|
|
|
done = true
|
2023-01-06 03:31:28 +08:00
|
|
|
|
continue
|
2022-11-10 15:48:38 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-02-06 23:29:11 +08:00
|
|
|
|
//pool.urls[w] = struct{}{}
|
2023-01-29 18:23:55 +08:00
|
|
|
|
pool.reqPool.Invoke(newUnitWithNumber(pool.safePath(w), WordSource, pool.wordOffset)) // 原样的目录拼接, 输入了几个"/"就是几个, 适配java的目录解析
|
2022-12-16 11:56:27 +08:00
|
|
|
|
case source := <-pool.checkCh:
|
2022-12-11 03:52:06 +08:00
|
|
|
|
pool.Statistor.CheckNumber++
|
2022-12-16 11:56:27 +08:00
|
|
|
|
if pool.Mod == pkg.HostSpray {
|
2023-01-12 19:21:35 +08:00
|
|
|
|
pool.reqPool.Invoke(newUnitWithNumber(pkg.RandHost(), source, pool.wordOffset))
|
2022-12-16 11:56:27 +08:00
|
|
|
|
} else if pool.Mod == pkg.PathSpray {
|
2023-01-12 19:21:35 +08:00
|
|
|
|
pool.reqPool.Invoke(newUnitWithNumber(pool.safePath(pkg.RandPath()), source, pool.wordOffset))
|
2023-01-06 03:31:28 +08:00
|
|
|
|
}
|
|
|
|
|
case unit, ok := <-pool.additionCh:
|
2023-01-16 17:30:54 +08:00
|
|
|
|
if !ok || pool.closed {
|
2023-01-06 03:31:28 +08:00
|
|
|
|
continue
|
2022-12-16 11:56:27 +08:00
|
|
|
|
}
|
2023-01-11 11:12:00 +08:00
|
|
|
|
if _, ok := pool.urls[unit.path]; ok {
|
2023-02-01 18:31:50 +08:00
|
|
|
|
logs.Log.Debugf("[%s] duplicate path: %s, skipped", parsers.GetSpraySourceName(unit.source), pool.base+unit.path)
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Done()
|
2023-01-11 11:12:00 +08:00
|
|
|
|
} else {
|
|
|
|
|
pool.urls[unit.path] = struct{}{}
|
2023-01-12 19:21:35 +08:00
|
|
|
|
unit.number = pool.wordOffset
|
2023-01-11 11:12:00 +08:00
|
|
|
|
pool.reqPool.Invoke(unit)
|
|
|
|
|
}
|
2023-01-16 17:30:54 +08:00
|
|
|
|
case <-pool.closeCh:
|
2023-01-06 03:31:28 +08:00
|
|
|
|
break Loop
|
2022-09-15 19:27:07 +08:00
|
|
|
|
case <-ctx.Done():
|
|
|
|
|
break Loop
|
2022-12-11 03:52:06 +08:00
|
|
|
|
case <-pool.ctx.Done():
|
2022-09-19 14:42:29 +08:00
|
|
|
|
break Loop
|
2022-09-15 19:27:07 +08:00
|
|
|
|
}
|
2022-09-08 15:57:17 +08:00
|
|
|
|
}
|
2023-01-16 17:30:54 +08:00
|
|
|
|
pool.closed = true
|
2022-12-11 03:52:06 +08:00
|
|
|
|
pool.Close()
|
2022-09-08 15:57:17 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-06 04:18:21 +08:00
|
|
|
|
func (pool *Pool) Invoke(v interface{}) {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
if pool.RateLimit != 0 {
|
|
|
|
|
pool.limiter.Wait(pool.ctx)
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-06 04:18:21 +08:00
|
|
|
|
atomic.AddInt32(&pool.Statistor.ReqTotal, 1)
|
|
|
|
|
unit := v.(*Unit)
|
|
|
|
|
req, err := pool.genReq(unit.path)
|
|
|
|
|
if err != nil {
|
|
|
|
|
logs.Log.Error(err.Error())
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
req.SetHeaders(pool.Headers)
|
|
|
|
|
start := time.Now()
|
|
|
|
|
resp, reqerr := pool.client.Do(pool.ctx, req)
|
|
|
|
|
if pool.ClientType == ihttp.FAST {
|
|
|
|
|
defer fasthttp.ReleaseResponse(resp.FastResponse)
|
|
|
|
|
defer fasthttp.ReleaseRequest(req.FastRequest)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// compare与各种错误处理
|
|
|
|
|
var bl *pkg.Baseline
|
|
|
|
|
if reqerr != nil && reqerr != fasthttp.ErrBodyTooLarge {
|
2023-01-29 18:23:55 +08:00
|
|
|
|
atomic.AddInt32(&pool.failedCount, 1)
|
2023-01-06 04:18:21 +08:00
|
|
|
|
atomic.AddInt32(&pool.Statistor.FailedNumber, 1)
|
2023-02-01 18:31:50 +08:00
|
|
|
|
bl = &pkg.Baseline{
|
|
|
|
|
SprayResult: &parsers.SprayResult{
|
|
|
|
|
UrlString: pool.base + unit.path,
|
|
|
|
|
IsValid: false,
|
|
|
|
|
ErrString: reqerr.Error(),
|
|
|
|
|
Reason: ErrRequestFailed.Error(),
|
|
|
|
|
},
|
|
|
|
|
}
|
2023-01-06 04:18:21 +08:00
|
|
|
|
pool.failedBaselines = append(pool.failedBaselines, bl)
|
|
|
|
|
} else {
|
2023-01-06 13:07:59 +08:00
|
|
|
|
if unit.source <= 3 || unit.source == CrawlSource || unit.source == CommonFileSource {
|
2023-01-10 23:44:03 +08:00
|
|
|
|
// 一些高优先级的source, 将跳过PreCompare
|
|
|
|
|
bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
|
|
|
|
|
} else if pool.MatchExpr != nil {
|
|
|
|
|
// 如果自定义了match函数, 则所有数据送入tempch中
|
|
|
|
|
bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
|
|
|
|
|
} else if err = pool.PreCompare(resp); err == nil {
|
|
|
|
|
// 通过预对比跳过一些无用数据, 减少性能消耗
|
2023-01-06 04:18:21 +08:00
|
|
|
|
bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
|
|
|
|
|
} else {
|
2023-01-10 23:44:03 +08:00
|
|
|
|
bl = pkg.NewInvalidBaseline(req.URI(), req.Host(), resp, err.Error())
|
2023-01-06 04:18:21 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-10 23:44:03 +08:00
|
|
|
|
// 手动处理重定向
|
2023-01-11 12:07:07 +08:00
|
|
|
|
if bl.IsValid && unit.source != CheckSource && bl.RedirectURL != "" {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-10 23:44:03 +08:00
|
|
|
|
pool.doRedirect(bl, unit.depth)
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-09 22:23:34 +08:00
|
|
|
|
if ihttp.DefaultMaxBodySize != 0 && bl.BodyLength > ihttp.DefaultMaxBodySize {
|
2023-01-06 04:18:21 +08:00
|
|
|
|
bl.ExceedLength = true
|
|
|
|
|
}
|
2023-01-09 22:23:34 +08:00
|
|
|
|
bl.Source = unit.source
|
2023-01-06 04:18:21 +08:00
|
|
|
|
bl.ReqDepth = unit.depth
|
2023-01-12 19:21:35 +08:00
|
|
|
|
bl.Number = unit.number
|
2023-01-06 04:18:21 +08:00
|
|
|
|
bl.Spended = time.Since(start).Milliseconds()
|
|
|
|
|
switch unit.source {
|
|
|
|
|
case InitRandomSource:
|
|
|
|
|
bl.Collect()
|
2023-01-06 11:30:17 +08:00
|
|
|
|
pool.locker.Lock()
|
2023-01-06 04:18:21 +08:00
|
|
|
|
pool.random = bl
|
|
|
|
|
pool.addFuzzyBaseline(bl)
|
2023-01-09 21:33:05 +08:00
|
|
|
|
pool.locker.Unlock()
|
2023-01-06 04:18:21 +08:00
|
|
|
|
pool.initwg.Done()
|
|
|
|
|
case InitIndexSource:
|
|
|
|
|
bl.Collect()
|
2023-01-06 11:30:17 +08:00
|
|
|
|
pool.locker.Lock()
|
2023-01-06 04:18:21 +08:00
|
|
|
|
pool.index = bl
|
2023-01-06 11:30:17 +08:00
|
|
|
|
pool.locker.Unlock()
|
|
|
|
|
if bl.Status == 200 || (bl.Status/100) == 3 {
|
2023-02-04 19:44:37 +08:00
|
|
|
|
pool.waiter.Add(1)
|
|
|
|
|
pool.tempCh <- bl
|
2023-01-06 11:30:17 +08:00
|
|
|
|
}
|
2023-01-06 04:18:21 +08:00
|
|
|
|
pool.initwg.Done()
|
|
|
|
|
case CheckSource:
|
|
|
|
|
if bl.ErrString != "" {
|
|
|
|
|
logs.Log.Warnf("[check.error] %s maybe ip had banned, break (%d/%d), error: %s", pool.BaseURL, pool.failedCount, pool.BreakThreshold, bl.ErrString)
|
|
|
|
|
} else if i := pool.random.Compare(bl); i < 1 {
|
|
|
|
|
if i == 0 {
|
|
|
|
|
if pool.Fuzzy {
|
|
|
|
|
logs.Log.Warn("[check.fuzzy] maybe trigger risk control, " + bl.String())
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2023-01-29 18:23:55 +08:00
|
|
|
|
atomic.AddInt32(&pool.failedCount, 1) //
|
2023-01-06 04:18:21 +08:00
|
|
|
|
logs.Log.Warn("[check.failed] maybe trigger risk control, " + bl.String())
|
|
|
|
|
pool.failedBaselines = append(pool.failedBaselines, bl)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
pool.resetFailed() // 如果后续访问正常, 重置错误次数
|
|
|
|
|
logs.Log.Debug("[check.pass] " + bl.String())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case WordSource:
|
|
|
|
|
// 异步进行性能消耗较大的深度对比
|
|
|
|
|
pool.tempCh <- bl
|
2023-01-29 18:23:55 +08:00
|
|
|
|
if int(pool.Statistor.ReqTotal)%pool.CheckPeriod == 0 {
|
2023-01-06 04:18:21 +08:00
|
|
|
|
pool.doCheck()
|
|
|
|
|
} else if pool.failedCount%pool.ErrPeriod == 0 {
|
2023-01-29 18:23:55 +08:00
|
|
|
|
atomic.AddInt32(&pool.failedCount, 1)
|
2023-01-06 04:18:21 +08:00
|
|
|
|
pool.doCheck()
|
|
|
|
|
}
|
|
|
|
|
pool.bar.Done()
|
|
|
|
|
case RedirectSource:
|
|
|
|
|
bl.FrontURL = unit.frontUrl
|
|
|
|
|
pool.tempCh <- bl
|
2023-01-06 13:07:59 +08:00
|
|
|
|
default:
|
2023-01-06 04:18:21 +08:00
|
|
|
|
pool.tempCh <- bl
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-04 19:44:37 +08:00
|
|
|
|
func (pool *Pool) Handler() {
|
|
|
|
|
for bl := range pool.tempCh {
|
|
|
|
|
if bl.IsValid {
|
|
|
|
|
pool.addFuzzyBaseline(bl)
|
|
|
|
|
}
|
|
|
|
|
if _, ok := pool.Statistor.Counts[bl.Status]; ok {
|
|
|
|
|
pool.Statistor.Counts[bl.Status]++
|
|
|
|
|
} else {
|
|
|
|
|
pool.Statistor.Counts[bl.Status] = 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if _, ok := pool.Statistor.Sources[bl.Source]; ok {
|
|
|
|
|
pool.Statistor.Sources[bl.Source]++
|
|
|
|
|
} else {
|
|
|
|
|
pool.Statistor.Sources[bl.Source] = 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var params map[string]interface{}
|
|
|
|
|
if pool.MatchExpr != nil || pool.FilterExpr != nil || pool.RecuExpr != nil {
|
|
|
|
|
params = map[string]interface{}{
|
|
|
|
|
"index": pool.index,
|
|
|
|
|
"random": pool.random,
|
|
|
|
|
"current": bl,
|
|
|
|
|
}
|
2023-02-07 18:42:20 +08:00
|
|
|
|
//for _, status := range FuzzyStatus {
|
|
|
|
|
// if bl, ok := pool.baselines[status]; ok {
|
|
|
|
|
// params["bl"+strconv.Itoa(status)] = bl
|
|
|
|
|
// } else {
|
|
|
|
|
// params["bl"+strconv.Itoa(status)] = nilBaseline
|
|
|
|
|
// }
|
|
|
|
|
//}
|
2023-02-04 19:44:37 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var status bool
|
|
|
|
|
if pool.MatchExpr != nil {
|
|
|
|
|
if CompareWithExpr(pool.MatchExpr, params) {
|
|
|
|
|
status = true
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2023-02-07 18:37:47 +08:00
|
|
|
|
status = pool.BaseCompare(bl)
|
2023-02-04 19:44:37 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if status {
|
|
|
|
|
pool.Statistor.FoundNumber++
|
|
|
|
|
if pool.FilterExpr != nil && CompareWithExpr(pool.FilterExpr, params) {
|
|
|
|
|
pool.Statistor.FilteredNumber++
|
|
|
|
|
bl.Reason = ErrCustomFilter.Error()
|
|
|
|
|
bl.IsValid = false
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
bl.IsValid = false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if bl.IsValid || bl.IsFuzzy {
|
|
|
|
|
pool.waiter.Add(2)
|
|
|
|
|
pool.doCrawl(bl)
|
|
|
|
|
pool.doRule(bl)
|
|
|
|
|
}
|
|
|
|
|
// 如果要进行递归判断, 要满足 bl有效, mod为path-spray, 当前深度小于最大递归深度
|
|
|
|
|
if bl.IsValid {
|
|
|
|
|
if bl.RecuDepth < MaxRecursion {
|
|
|
|
|
if CompareWithExpr(pool.RecuExpr, params) {
|
|
|
|
|
bl.Recu = true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !pool.closed {
|
|
|
|
|
// 如果任务被取消, 所有还没处理的请求结果都会被丢弃
|
|
|
|
|
pool.OutputCh <- bl
|
|
|
|
|
}
|
|
|
|
|
pool.waiter.Done()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pool.analyzeDone = true
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-12 18:17:53 +08:00
|
|
|
|
func (pool *Pool) checkRedirect(redirectURL string) bool {
|
|
|
|
|
if pool.random.RedirectURL == "" {
|
|
|
|
|
// 如果random的redirectURL为空, 此时该项
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if redirectURL == pool.random.RedirectURL {
|
|
|
|
|
// 相同的RedirectURL将被认为是无效数据
|
|
|
|
|
return false
|
|
|
|
|
} else {
|
|
|
|
|
// path为3xx, 且与baseline中的RedirectURL不同时, 为有效数据
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pool *Pool) genReq(s string) (*ihttp.Request, error) {
|
|
|
|
|
if pool.Mod == pkg.HostSpray {
|
|
|
|
|
return ihttp.BuildHostRequest(pool.ClientType, pool.BaseURL, s)
|
|
|
|
|
} else if pool.Mod == pkg.PathSpray {
|
|
|
|
|
return ihttp.BuildPathRequest(pool.ClientType, pool.base, s)
|
|
|
|
|
}
|
|
|
|
|
return nil, fmt.Errorf("unknown mod")
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 03:52:06 +08:00
|
|
|
|
func (pool *Pool) PreCompare(resp *ihttp.Response) error {
|
2022-11-17 16:27:44 +08:00
|
|
|
|
status := resp.StatusCode()
|
2023-01-28 13:15:49 +08:00
|
|
|
|
if iutils.IntsContains(WhiteStatus, status) {
|
2022-11-17 17:09:37 +08:00
|
|
|
|
// 如果为白名单状态码则直接返回
|
|
|
|
|
return nil
|
|
|
|
|
}
|
2022-12-11 03:52:06 +08:00
|
|
|
|
if pool.random != nil && pool.random.Status != 200 && pool.random.Status == status {
|
2022-11-10 21:03:07 +08:00
|
|
|
|
return ErrSameStatus
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-28 13:15:49 +08:00
|
|
|
|
if iutils.IntsContains(BlackStatus, status) {
|
2022-09-15 19:27:07 +08:00
|
|
|
|
return ErrBadStatus
|
2022-09-08 15:57:17 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-28 13:15:49 +08:00
|
|
|
|
if iutils.IntsContains(WAFStatus, status) {
|
2022-11-17 16:27:44 +08:00
|
|
|
|
return ErrWaf
|
2022-09-08 15:57:17 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-03 17:09:32 +08:00
|
|
|
|
if !pool.checkRedirect(resp.GetHeader("Location")) {
|
2022-11-17 16:27:44 +08:00
|
|
|
|
return ErrRedirect
|
2022-09-26 17:19:08 +08:00
|
|
|
|
}
|
2022-09-08 15:57:17 +08:00
|
|
|
|
|
2022-09-15 19:27:07 +08:00
|
|
|
|
return nil
|
2022-09-08 15:57:17 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 03:52:06 +08:00
|
|
|
|
func (pool *Pool) BaseCompare(bl *pkg.Baseline) bool {
|
2022-11-11 11:55:49 +08:00
|
|
|
|
var status = -1
|
2023-02-07 18:37:47 +08:00
|
|
|
|
|
|
|
|
|
// 30x状态码的特殊处理
|
|
|
|
|
if strings.HasSuffix(bl.RedirectURL, bl.Url.Path+"/") {
|
|
|
|
|
bl.Reason = ErrFuzzyRedirect.Error()
|
|
|
|
|
pool.putToFuzzy(bl)
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 使用与baseline相同状态码, 需要在fuzzystatus中提前配置
|
2022-12-11 03:52:06 +08:00
|
|
|
|
base, ok := pool.baselines[bl.Status] // 挑选对应状态码的baseline进行compare
|
2022-11-17 05:48:46 +08:00
|
|
|
|
if !ok {
|
2023-02-04 19:44:37 +08:00
|
|
|
|
if pool.index != nil {
|
|
|
|
|
|
|
|
|
|
} else if pool.random.Status == bl.Status {
|
2022-11-17 05:48:46 +08:00
|
|
|
|
// 当other的状态码与base相同时, 会使用base
|
|
|
|
|
ok = true
|
2022-12-11 03:52:06 +08:00
|
|
|
|
base = pool.random
|
|
|
|
|
} else if pool.index.Status == bl.Status {
|
2022-11-17 05:48:46 +08:00
|
|
|
|
// 当other的状态码与index相同时, 会使用index
|
|
|
|
|
ok = true
|
2022-12-11 03:52:06 +08:00
|
|
|
|
base = pool.index
|
2022-11-17 05:48:46 +08:00
|
|
|
|
}
|
2022-11-11 11:40:53 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-11-11 11:55:49 +08:00
|
|
|
|
if ok {
|
|
|
|
|
if status = base.Compare(bl); status == 1 {
|
2022-11-21 20:44:02 +08:00
|
|
|
|
bl.Reason = ErrCompareFailed.Error()
|
|
|
|
|
return false
|
2022-09-23 01:20:01 +08:00
|
|
|
|
}
|
2022-11-11 10:20:32 +08:00
|
|
|
|
}
|
2022-09-23 01:20:01 +08:00
|
|
|
|
|
2022-11-11 14:50:59 +08:00
|
|
|
|
bl.Collect()
|
2022-12-12 18:01:14 +08:00
|
|
|
|
//if !pool.IgnoreWaf {
|
|
|
|
|
// // 部分情况下waf的特征可能是全局, 指定了--ignore-waf则不会进行waf的指纹检测
|
|
|
|
|
// for _, f := range bl.Frameworks {
|
|
|
|
|
// if f.HasTag("waf") {
|
|
|
|
|
// pool.Statistor.WafedNumber++
|
|
|
|
|
// bl.Reason = ErrWaf.Error()
|
|
|
|
|
// return false
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
//}
|
2022-09-23 11:20:41 +08:00
|
|
|
|
|
2022-11-11 14:50:59 +08:00
|
|
|
|
if ok && status == 0 && base.FuzzyCompare(bl) {
|
2022-12-11 03:52:06 +08:00
|
|
|
|
pool.Statistor.FuzzyNumber++
|
2022-11-21 20:44:02 +08:00
|
|
|
|
bl.Reason = ErrFuzzyCompareFailed.Error()
|
2023-01-03 17:09:32 +08:00
|
|
|
|
pool.putToFuzzy(bl)
|
2022-11-21 20:44:02 +08:00
|
|
|
|
return false
|
2022-11-11 14:50:59 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-11-21 20:44:02 +08:00
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 00:24:28 +08:00
|
|
|
|
func CompareWithExpr(exp *vm.Program, params map[string]interface{}) bool {
|
2022-11-21 20:44:02 +08:00
|
|
|
|
res, err := expr.Run(exp, params)
|
|
|
|
|
if err != nil {
|
|
|
|
|
logs.Log.Warn(err.Error())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if res == true {
|
|
|
|
|
return true
|
|
|
|
|
} else {
|
|
|
|
|
return false
|
|
|
|
|
}
|
2022-09-23 01:20:01 +08:00
|
|
|
|
}
|
2022-09-26 17:19:08 +08:00
|
|
|
|
|
2023-01-10 02:04:12 +08:00
|
|
|
|
func (pool *Pool) Upgrade(bl *pkg.Baseline) error {
|
|
|
|
|
rurl, err := url.Parse(bl.RedirectURL)
|
|
|
|
|
if err == nil && rurl.Hostname() == bl.Url.Hostname() && bl.Url.Scheme == "http" && rurl.Scheme == "https" {
|
|
|
|
|
logs.Log.Infof("baseurl %s upgrade http to https, reinit", pool.BaseURL)
|
2023-01-10 23:44:03 +08:00
|
|
|
|
pool.base = strings.Replace(pool.BaseURL, "http", "https", 1)
|
2023-01-10 02:04:12 +08:00
|
|
|
|
pool.url.Scheme = "https"
|
|
|
|
|
// 重新初始化
|
|
|
|
|
err = pool.Init()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-03 17:09:32 +08:00
|
|
|
|
func (pool *Pool) doRedirect(bl *pkg.Baseline, depth int) {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
defer pool.waiter.Done()
|
2023-01-10 01:09:00 +08:00
|
|
|
|
if depth >= MaxRedirect {
|
2023-01-03 17:09:32 +08:00
|
|
|
|
return
|
|
|
|
|
}
|
2023-01-10 23:44:03 +08:00
|
|
|
|
reURL := FormatURL(bl.Url.Path, bl.RedirectURL)
|
2023-01-03 17:09:32 +08:00
|
|
|
|
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-10 23:44:03 +08:00
|
|
|
|
go pool.addAddition(&Unit{
|
|
|
|
|
path: reURL,
|
|
|
|
|
source: RedirectSource,
|
|
|
|
|
frontUrl: bl.UrlString,
|
|
|
|
|
depth: depth + 1,
|
|
|
|
|
})
|
2023-01-03 17:09:32 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pool *Pool) doCrawl(bl *pkg.Baseline) {
|
2023-01-10 23:44:03 +08:00
|
|
|
|
if !pool.Crawl || bl.ReqDepth >= MaxCrawl {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Done()
|
2023-01-05 22:42:07 +08:00
|
|
|
|
return
|
|
|
|
|
}
|
2023-01-03 17:09:32 +08:00
|
|
|
|
bl.CollectURL()
|
2023-01-09 21:33:05 +08:00
|
|
|
|
if bl.URLs == nil {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Done()
|
2023-01-09 21:33:05 +08:00
|
|
|
|
return
|
|
|
|
|
}
|
2023-01-10 23:44:03 +08:00
|
|
|
|
|
2023-01-06 04:18:21 +08:00
|
|
|
|
go func() {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
defer pool.waiter.Done()
|
2023-01-06 04:18:21 +08:00
|
|
|
|
for _, u := range bl.URLs {
|
2023-01-11 11:12:00 +08:00
|
|
|
|
if u = FormatURL(bl.Url.Path, u); u == "" {
|
2023-01-10 23:44:03 +08:00
|
|
|
|
continue
|
2023-01-06 04:18:21 +08:00
|
|
|
|
}
|
2023-01-03 17:09:32 +08:00
|
|
|
|
|
2023-01-11 11:12:00 +08:00
|
|
|
|
// 通过map去重, 只有新的url才会进入到该逻辑
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-11 11:12:00 +08:00
|
|
|
|
pool.addAddition(&Unit{
|
|
|
|
|
path: u,
|
|
|
|
|
source: CrawlSource,
|
|
|
|
|
depth: bl.ReqDepth + 1,
|
|
|
|
|
})
|
2023-01-03 17:09:32 +08:00
|
|
|
|
}
|
2023-01-06 04:18:21 +08:00
|
|
|
|
}()
|
|
|
|
|
|
2023-01-03 17:09:32 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-06 00:48:13 +08:00
|
|
|
|
func (pool *Pool) doRule(bl *pkg.Baseline) {
|
|
|
|
|
if pool.AppendRule == nil {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Done()
|
2023-01-06 00:48:13 +08:00
|
|
|
|
return
|
|
|
|
|
}
|
2023-01-12 19:21:35 +08:00
|
|
|
|
if bl.Source == RuleSource {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Done()
|
2023-01-06 00:48:13 +08:00
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-06 04:18:21 +08:00
|
|
|
|
go func() {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
defer pool.waiter.Done()
|
2023-01-06 04:18:21 +08:00
|
|
|
|
for u := range rule.RunAsStream(pool.AppendRule.Expressions, path.Base(bl.Path)) {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-06 04:18:21 +08:00
|
|
|
|
pool.addAddition(&Unit{
|
2023-01-10 23:44:03 +08:00
|
|
|
|
path: Dir(bl.Url.Path) + u,
|
2023-01-06 04:18:21 +08:00
|
|
|
|
source: RuleSource,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}()
|
2023-01-06 00:48:13 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-03 17:16:55 +08:00
|
|
|
|
func (pool *Pool) doActive() {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
defer pool.waiter.Done()
|
2023-01-03 17:16:55 +08:00
|
|
|
|
for _, u := range pkg.ActivePath {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-06 03:31:28 +08:00
|
|
|
|
pool.addAddition(&Unit{
|
2023-01-10 23:44:03 +08:00
|
|
|
|
path: pool.dir + u[1:],
|
2023-01-03 17:16:55 +08:00
|
|
|
|
source: ActiveSource,
|
2023-01-06 03:31:28 +08:00
|
|
|
|
})
|
2023-01-03 17:16:55 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-06 00:48:13 +08:00
|
|
|
|
func (pool *Pool) doBak() {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
defer pool.waiter.Done()
|
2023-01-10 02:04:12 +08:00
|
|
|
|
worder, err := words.NewWorderWithDsl("{?0}.{@bak_ext}", [][]string{pkg.BakGenerator(pool.url.Host)}, nil)
|
2023-01-06 00:48:13 +08:00
|
|
|
|
if err != nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
worder.Run()
|
|
|
|
|
for w := range worder.C {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-06 03:31:28 +08:00
|
|
|
|
pool.addAddition(&Unit{
|
2023-01-10 23:44:03 +08:00
|
|
|
|
path: pool.dir + w,
|
2023-01-06 00:48:13 +08:00
|
|
|
|
source: BakSource,
|
2023-01-06 03:31:28 +08:00
|
|
|
|
})
|
2023-01-06 00:48:13 +08:00
|
|
|
|
}
|
2023-01-06 13:07:59 +08:00
|
|
|
|
|
|
|
|
|
worder, err = words.NewWorderWithDsl("{@bak_name}.{@bak_ext}", nil, nil)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
worder.Run()
|
|
|
|
|
for w := range worder.C {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-06 13:07:59 +08:00
|
|
|
|
pool.addAddition(&Unit{
|
2023-01-10 23:44:03 +08:00
|
|
|
|
path: pool.dir + w,
|
2023-01-06 13:07:59 +08:00
|
|
|
|
source: BakSource,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pool *Pool) doCommonFile() {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
defer pool.waiter.Done()
|
2023-01-06 13:07:59 +08:00
|
|
|
|
for _, u := range mask.SpecialWords["common_file"] {
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-06 13:07:59 +08:00
|
|
|
|
pool.addAddition(&Unit{
|
2023-01-10 23:44:03 +08:00
|
|
|
|
path: pool.dir + u,
|
2023-01-06 13:07:59 +08:00
|
|
|
|
source: CommonFileSource,
|
|
|
|
|
})
|
|
|
|
|
}
|
2023-01-06 00:48:13 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-03 17:09:32 +08:00
|
|
|
|
func (pool *Pool) doCheck() {
|
|
|
|
|
if pool.failedCount > pool.BreakThreshold {
|
|
|
|
|
// 当报错次数超过上限是, 结束任务
|
|
|
|
|
pool.recover()
|
|
|
|
|
pool.cancel()
|
|
|
|
|
pool.isFailed = true
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if pool.Mod == pkg.HostSpray {
|
|
|
|
|
pool.checkCh <- CheckSource
|
|
|
|
|
} else if pool.Mod == pkg.PathSpray {
|
|
|
|
|
pool.checkCh <- CheckSource
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-06 03:31:28 +08:00
|
|
|
|
func (pool *Pool) addAddition(u *Unit) {
|
2023-01-16 17:30:54 +08:00
|
|
|
|
// 强行屏蔽报错, 防止goroutine泄露
|
|
|
|
|
defer func() {
|
|
|
|
|
if err := recover(); err != nil {
|
|
|
|
|
}
|
|
|
|
|
}()
|
2023-01-06 03:31:28 +08:00
|
|
|
|
pool.additionCh <- u
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 03:52:06 +08:00
|
|
|
|
func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) {
|
2023-02-07 18:42:20 +08:00
|
|
|
|
if _, ok := pool.baselines[bl.Status]; !ok && (enableAllFuzzy || iutils.IntsContains(FuzzyStatus, bl.Status)) {
|
2022-11-10 21:03:07 +08:00
|
|
|
|
bl.Collect()
|
2023-01-12 17:41:44 +08:00
|
|
|
|
pool.waiter.Add(1)
|
2023-01-04 11:26:25 +08:00
|
|
|
|
pool.doCrawl(bl)
|
2022-12-11 03:52:06 +08:00
|
|
|
|
pool.baselines[bl.Status] = bl
|
2023-01-06 03:31:28 +08:00
|
|
|
|
logs.Log.Infof("[baseline.%dinit] %s", bl.Status, bl.Format([]string{"status", "length", "spend", "title", "frame", "redirect"}))
|
2022-11-10 21:03:07 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-03 17:09:32 +08:00
|
|
|
|
func (pool *Pool) putToInvalid(bl *pkg.Baseline, reason string) {
|
2022-11-10 21:18:26 +08:00
|
|
|
|
bl.IsValid = false
|
2022-12-11 03:52:06 +08:00
|
|
|
|
pool.OutputCh <- bl
|
2022-11-10 21:18:26 +08:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-03 17:09:32 +08:00
|
|
|
|
func (pool *Pool) putToFuzzy(bl *pkg.Baseline) {
|
2022-11-10 21:18:26 +08:00
|
|
|
|
bl.IsFuzzy = true
|
2022-12-11 03:52:06 +08:00
|
|
|
|
pool.FuzzyCh <- bl
|
2022-11-10 21:18:26 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 03:52:06 +08:00
|
|
|
|
func (pool *Pool) resetFailed() {
|
|
|
|
|
pool.failedCount = 1
|
|
|
|
|
pool.failedBaselines = nil
|
2022-11-10 15:43:25 +08:00
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 03:52:06 +08:00
|
|
|
|
func (pool *Pool) recover() {
|
2023-01-12 19:21:35 +08:00
|
|
|
|
logs.Log.Errorf("%s ,failed request exceeds the threshold , task will exit. Breakpoint %d", pool.BaseURL, pool.wordOffset)
|
2022-12-11 03:52:06 +08:00
|
|
|
|
for i, bl := range pool.failedBaselines {
|
2022-11-10 17:19:05 +08:00
|
|
|
|
logs.Log.Errorf("[failed.%d] %s", i, bl.String())
|
2022-11-10 15:43:25 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-11 03:52:06 +08:00
|
|
|
|
func (pool *Pool) Close() {
|
|
|
|
|
for pool.analyzeDone {
|
2023-01-16 17:30:54 +08:00
|
|
|
|
// 等待缓存的待处理任务完成
|
2022-09-23 11:20:41 +08:00
|
|
|
|
time.Sleep(time.Duration(100) * time.Millisecond)
|
|
|
|
|
}
|
2023-01-16 17:30:54 +08:00
|
|
|
|
close(pool.additionCh) // 关闭addition管道
|
|
|
|
|
close(pool.checkCh) // 关闭check管道
|
2023-01-12 18:17:53 +08:00
|
|
|
|
pool.Statistor.EndTime = time.Now().Unix()
|
2022-12-11 03:52:06 +08:00
|
|
|
|
pool.bar.Close()
|
2022-09-23 11:20:41 +08:00
|
|
|
|
}
|
2023-01-10 23:44:03 +08:00
|
|
|
|
|
|
|
|
|
func (pool *Pool) safePath(u string) string {
|
|
|
|
|
// 自动生成的目录将采用safepath的方式拼接到相对目录中, 避免出现//的情况. 例如init, check, common
|
2023-01-12 19:21:35 +08:00
|
|
|
|
hasSlash := strings.HasPrefix(u, "/")
|
2023-02-04 19:44:37 +08:00
|
|
|
|
if hasSlash {
|
|
|
|
|
if pool.isDir {
|
|
|
|
|
return pool.dir + u[1:]
|
|
|
|
|
} else {
|
|
|
|
|
return pool.url.Path + u
|
|
|
|
|
}
|
2023-01-12 19:21:35 +08:00
|
|
|
|
} else {
|
2023-02-04 19:44:37 +08:00
|
|
|
|
if pool.isDir {
|
|
|
|
|
return pool.url.Path + u
|
|
|
|
|
} else {
|
|
|
|
|
return pool.url.Path + "/" + u
|
|
|
|
|
}
|
2023-01-10 23:44:03 +08:00
|
|
|
|
}
|
|
|
|
|
}
|