From 61167054eecda934fb9544f14c5cc39b5c478105 Mon Sep 17 00:00:00 2001 From: M09Ic Date: Sat, 10 Feb 2024 18:23:50 +0800 Subject: [PATCH] refactor pool --- cmd/cmd.go | 15 +- internal/format.go | 5 +- internal/option.go | 32 +- internal/{pool.go => pool/brutepool.go} | 444 +++++++++--------------- internal/{ => pool}/checkpool.go | 96 ++--- internal/{ => pool}/config.go | 9 +- internal/pool/pool.go | 160 +++++++++ internal/pool/unit.go | 20 ++ internal/runner.go | 111 +++--- internal/types.go | 73 ---- internal/utils.go | 151 -------- {internal => pkg}/baseline.go | 31 +- {internal => pkg}/errors.go | 2 +- pkg/utils.go | 156 +++++++++ templates | 2 +- 15 files changed, 632 insertions(+), 675 deletions(-) rename internal/{pool.go => pool/brutepool.go} (66%) rename internal/{ => pool}/checkpool.go (64%) rename internal/{ => pool}/config.go (85%) create mode 100644 internal/pool/pool.go create mode 100644 internal/pool/unit.go rename {internal => pkg}/baseline.go (87%) rename {internal => pkg}/errors.go (98%) diff --git a/cmd/cmd.go b/cmd/cmd.go index b73b9d1..6f8d07a 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -7,6 +7,7 @@ import ( "github.com/chainreactors/parsers" "github.com/chainreactors/spray/internal" "github.com/chainreactors/spray/internal/ihttp" + "github.com/chainreactors/spray/internal/pool" "github.com/chainreactors/spray/pkg" "github.com/chainreactors/utils/iutils" "github.com/jessevdk/go-flags" @@ -80,23 +81,23 @@ func Spray() { } } // logs - logs.AddLevel(internal.LogVerbose, "verbose", "[=] %s {{suffix}}") + logs.AddLevel(pkg.LogVerbose, "verbose", "[=] %s {{suffix}}") if option.Debug { logs.Log.SetLevel(logs.Debug) } else if len(option.Verbose) > 0 { - logs.Log.SetLevel(internal.LogVerbose) + logs.Log.SetLevel(pkg.LogVerbose) } logs.Log.SetColorMap(map[logs.Level]func(string) string{ - logs.Info: logs.PurpleBold, - logs.Important: logs.GreenBold, - internal.LogVerbose: logs.Green, + logs.Info: logs.PurpleBold, + logs.Important: logs.GreenBold, + pkg.LogVerbose: logs.Green, }) // 初始化全局变量 - internal.Distance = uint8(option.SimhashDistance) + pkg.Distance = uint8(option.SimhashDistance) ihttp.DefaultMaxBodySize = option.MaxBodyLength * 1024 - internal.MaxCrawl = option.CrawlDepth + pool.MaxCrawl = option.CrawlDepth var runner *internal.Runner if option.ResumeFrom != "" { diff --git a/internal/format.go b/internal/format.go index e968e64..40b1d3b 100644 --- a/internal/format.go +++ b/internal/format.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/json" "github.com/chainreactors/logs" + "github.com/chainreactors/spray/pkg" "io/ioutil" "os" ) @@ -20,9 +21,9 @@ func Format(filename string, color bool) { if err != nil { return } - var results []*Baseline + var results []*pkg.Baseline for _, line := range bytes.Split(bytes.TrimSpace(content), []byte("\n")) { - var result Baseline + var result pkg.Baseline err := json.Unmarshal(line, &result) if err != nil { logs.Log.Error(err.Error()) diff --git a/internal/option.go b/internal/option.go index 13f5861..54c8f6b 100644 --- a/internal/option.go +++ b/internal/option.go @@ -8,6 +8,7 @@ import ( "github.com/chainreactors/files" "github.com/chainreactors/logs" "github.com/chainreactors/spray/internal/ihttp" + "github.com/chainreactors/spray/internal/pool" "github.com/chainreactors/spray/pkg" "github.com/chainreactors/utils" "github.com/chainreactors/utils/iutils" @@ -19,11 +20,11 @@ import ( "os" "strconv" "strings" + "sync" ) var ( DefaultThreads = 20 - LogVerbose = logs.Warn - 1 ) type Option struct { @@ -156,8 +157,9 @@ func (opt *Option) PrepareRunner() (*Runner, error) { Offset: opt.Offset, Total: opt.Limit, taskCh: make(chan *Task), - OutputCh: make(chan *Baseline, 100), - FuzzyCh: make(chan *Baseline, 100), + outputCh: make(chan *pkg.Baseline, 100), + outwg: &sync.WaitGroup{}, + fuzzyCh: make(chan *pkg.Baseline, 100), Fuzzy: opt.Fuzzy, Force: opt.Force, CheckOnly: opt.CheckOnly, @@ -259,18 +261,18 @@ func (opt *Option) PrepareRunner() (*Runner, error) { r.Scope = []string{"*"} } - BlackStatus = parseStatus(BlackStatus, opt.BlackStatus) - WhiteStatus = parseStatus(WhiteStatus, opt.WhiteStatus) + pkg.BlackStatus = parseStatus(pkg.BlackStatus, opt.BlackStatus) + pkg.WhiteStatus = parseStatus(pkg.WhiteStatus, opt.WhiteStatus) if opt.FuzzyStatus == "all" { - enableAllFuzzy = true + pool.EnableAllFuzzy = true } else { - FuzzyStatus = parseStatus(FuzzyStatus, opt.FuzzyStatus) + pkg.FuzzyStatus = parseStatus(pkg.FuzzyStatus, opt.FuzzyStatus) } if opt.Unique { - enableAllUnique = true + pool.EnableAllUnique = true } else { - UniqueStatus = parseStatus(UniqueStatus, opt.UniqueStatus) + pkg.UniqueStatus = parseStatus(pkg.UniqueStatus, opt.UniqueStatus) } // prepare word @@ -288,7 +290,7 @@ func (opt *Option) PrepareRunner() (*Runner, error) { return nil, err } - logs.Log.Logf(LogVerbose, "Loaded %d word from %s", len(dicts[i]), f) + logs.Log.Logf(pkg.LogVerbose, "Loaded %d word from %s", len(dicts[i]), f) } } @@ -325,7 +327,7 @@ func (opt *Option) PrepareRunner() (*Runner, error) { return nil, fmt.Errorf("%s %w", opt.Word, err) } if len(r.Wordlist) > 0 { - logs.Log.Logf(LogVerbose, "Parsed %d words by %s", len(r.Wordlist), opt.Word) + logs.Log.Logf(pkg.LogVerbose, "Parsed %d words by %s", len(r.Wordlist), opt.Word) } if opt.Rules != nil { @@ -500,7 +502,7 @@ func (opt *Option) PrepareRunner() (*Runner, error) { } r.Tasks = tasks - logs.Log.Logf(LogVerbose, "Loaded %d urls from %s", len(tasks), taskfrom) + logs.Log.Logf(pkg.LogVerbose, "Loaded %d urls from %s", len(tasks), taskfrom) // 类似dirsearch中的 if opt.Extensions != "" { @@ -577,7 +579,7 @@ func (opt *Option) PrepareRunner() (*Runner, error) { }) } - logs.Log.Logf(LogVerbose, "Loaded %d dictionaries and %d decorators", len(opt.Dictionaries), len(r.Fns)) + logs.Log.Logf(pkg.LogVerbose, "Loaded %d dictionaries and %d decorators", len(opt.Dictionaries), len(r.Fns)) if opt.Match != "" { exp, err := expr.Compile(opt.Match, expr.Patch(&bytesPatcher{})) @@ -599,13 +601,13 @@ func (opt *Option) PrepareRunner() (*Runner, error) { var express string if opt.Recursive != "current.IsDir()" && opt.Depth != 0 { // 默认不打开递归, 除非指定了非默认的递归表达式 - MaxRecursion = 1 + pool.MaxRecursion = 1 express = opt.Recursive } if opt.Depth != 0 { // 手动设置的depth优先级高于默认 - MaxRecursion = opt.Depth + pool.MaxRecursion = opt.Depth express = opt.Recursive } diff --git a/internal/pool.go b/internal/pool/brutepool.go similarity index 66% rename from internal/pool.go rename to internal/pool/brutepool.go index 9f57b4e..dcde456 100644 --- a/internal/pool.go +++ b/internal/pool/brutepool.go @@ -1,4 +1,4 @@ -package internal +package pool import ( "context" @@ -9,14 +9,11 @@ import ( "github.com/chainreactors/spray/pkg" "github.com/chainreactors/utils/iutils" "github.com/chainreactors/words" - "github.com/chainreactors/words/mask" - "github.com/chainreactors/words/rule" "github.com/panjf2000/ants/v2" "github.com/valyala/fasthttp" "golang.org/x/time/rate" "math/rand" "net/url" - "path" "strings" "sync" "sync/atomic" @@ -24,42 +21,44 @@ import ( ) var ( - max = 2147483647 MaxRedirect = 3 MaxCrawl = 3 MaxRecursion = 0 - enableAllFuzzy = false - enableAllUnique = false + EnableAllFuzzy = false + EnableAllUnique = false ) -func NewPool(ctx context.Context, config *Config) (*Pool, error) { +func NewBrutePool(ctx context.Context, config *Config) (*BrutePool, error) { var u *url.URL var err error if u, err = url.Parse(config.BaseURL); err != nil { return nil, err } pctx, cancel := context.WithCancel(ctx) - pool := &Pool{ - Config: config, + pool := &BrutePool{ Baselines: NewBaselines(), - base: u.Scheme + "://" + u.Host, - isDir: strings.HasSuffix(u.Path, "/"), - url: u, - ctx: pctx, - cancel: cancel, - client: ihttp.NewClient(&ihttp.ClientConfig{ - Thread: config.Thread, - Type: config.ClientType, - Timeout: time.Duration(config.Timeout) * time.Second, - ProxyAddr: config.ProxyAddr, - }), + This: &This{ + Config: config, + ctx: pctx, + Cancel: cancel, + client: ihttp.NewClient(&ihttp.ClientConfig{ + Thread: config.Thread, + Type: config.ClientType, + Timeout: time.Duration(config.Timeout) * time.Second, + ProxyAddr: config.ProxyAddr, + }), + additionCh: make(chan *Unit, config.Thread), + closeCh: make(chan struct{}), + wg: sync.WaitGroup{}, + }, + base: u.Scheme + "://" + u.Host, + isDir: strings.HasSuffix(u.Path, "/"), + url: u, + scopeurls: make(map[string]struct{}), uniques: make(map[uint16]struct{}), - handlerCh: make(chan *Baseline, config.Thread), + handlerCh: make(chan *pkg.Baseline, config.Thread), checkCh: make(chan struct{}, config.Thread), - additionCh: make(chan *Unit, config.Thread), - closeCh: make(chan struct{}), - waiter: sync.WaitGroup{}, initwg: sync.WaitGroup{}, limiter: rate.NewLimiter(rate.Limit(config.RateLimit), 1), failedCount: 1, @@ -71,7 +70,7 @@ func NewPool(ctx context.Context, config *Config) (*Pool, error) { } else if pool.url.Path == "" { pool.dir = "/" } else { - pool.dir = dir(pool.url.Path) + pool.dir = pkg.Dir(pool.url.Path) } pool.reqPool, _ = ants.NewPoolWithFunc(config.Thread, pool.Invoke) @@ -82,41 +81,32 @@ func NewPool(ctx context.Context, config *Config) (*Pool, error) { return pool, nil } -type Pool struct { - *Config // read only +type BrutePool struct { *Baselines - base string // url的根目录, 在爬虫或者redirect时, 会需要用到根目录进行拼接 - dir string - isDir bool - url *url.URL - Statistor *pkg.Statistor - client *ihttp.Client + *This + base string // url的根目录, 在爬虫或者redirect时, 会需要用到根目录进行拼接 + isDir bool + url *url.URL + reqPool *ants.PoolWithFunc scopePool *ants.PoolWithFunc - bar *pkg.Bar - ctx context.Context - cancel context.CancelFunc - handlerCh chan *Baseline // 待处理的baseline - checkCh chan struct{} // 独立的check管道, 防止与redirect/crawl冲突 - additionCh chan *Unit // 插件添加的任务, 待处理管道 - closeCh chan struct{} + handlerCh chan *pkg.Baseline // 待处理的baseline + checkCh chan struct{} // 独立的check管道, 防止与redirect/crawl冲突 closed bool wordOffset int failedCount int32 - isFailed bool + IsFailed bool urls sync.Map scopeurls map[string]struct{} uniques map[uint16]struct{} analyzeDone bool - worder *words.Worder limiter *rate.Limiter locker sync.Mutex scopeLocker sync.Mutex - waiter sync.WaitGroup initwg sync.WaitGroup // 初始化用, 之后改成锁 } -func (pool *Pool) checkRedirect(redirectURL string) bool { +func (pool *BrutePool) checkRedirect(redirectURL string) bool { if pool.random.RedirectURL == "" { // 如果random的redirectURL为空, 此时该项 return true @@ -131,7 +121,7 @@ func (pool *Pool) checkRedirect(redirectURL string) bool { } } -func (pool *Pool) genReq(mod SprayMod, s string) (*ihttp.Request, error) { +func (pool *BrutePool) genReq(mod SprayMod, s string) (*ihttp.Request, error) { if mod == HostSpray { return ihttp.BuildHostRequest(pool.ClientType, pool.BaseURL, s) } else if mod == PathSpray { @@ -140,10 +130,10 @@ func (pool *Pool) genReq(mod SprayMod, s string) (*ihttp.Request, error) { return nil, fmt.Errorf("unknown mod") } -func (pool *Pool) Init() error { +func (pool *BrutePool) Init() error { pool.initwg.Add(2) if pool.Index != "/" { - logs.Log.Logf(LogVerbose, "custom index url: %s", BaseURL(pool.url)+FormatURL(BaseURL(pool.url), pool.Index)) + logs.Log.Logf(pkg.LogVerbose, "custom index url: %s", pkg.BaseURL(pool.url)+pkg.FormatURL(pkg.BaseURL(pool.url), pool.Index)) pool.reqPool.Invoke(newUnit(pool.Index, parsers.InitIndexSource)) //pool.urls[dir(pool.Index)] = struct{}{} } else { @@ -152,7 +142,7 @@ func (pool *Pool) Init() error { } if pool.Random != "" { - logs.Log.Logf(LogVerbose, "custom random url: %s", BaseURL(pool.url)+FormatURL(BaseURL(pool.url), pool.Random)) + logs.Log.Logf(pkg.LogVerbose, "custom random url: %s", pkg.BaseURL(pool.url)+pkg.FormatURL(pkg.BaseURL(pool.url), pool.Random)) pool.reqPool.Invoke(newUnit(pool.Random, parsers.InitRandomSource)) } else { pool.reqPool.Invoke(newUnit(pool.safePath(pkg.RandPath()), parsers.InitRandomSource)) @@ -166,13 +156,13 @@ func (pool *Pool) Init() error { if pool.index.Chunked && pool.ClientType == ihttp.FAST { logs.Log.Warn("chunk encoding! buf current client FASTHTTP not support chunk decode") } - logs.Log.Logf(LogVerbose, "[baseline.index] "+pool.index.Format([]string{"status", "length", "spend", "title", "frame", "redirect"})) + logs.Log.Logf(pkg.LogVerbose, "[baseline.index] "+pool.index.Format([]string{"status", "length", "spend", "title", "frame", "redirect"})) // 检测基本访问能力 if pool.random.ErrString != "" { logs.Log.Error(pool.index.String()) return fmt.Errorf(pool.index.ErrString) } - logs.Log.Logf(LogVerbose, "[baseline.random] "+pool.random.Format([]string{"status", "length", "spend", "title", "frame", "redirect"})) + logs.Log.Logf(pkg.LogVerbose, "[baseline.random] "+pool.random.Format([]string{"status", "length", "spend", "title", "frame", "redirect"})) // 某些网站http会重定向到https, 如果发现随机目录出现这种情况, 则自定将baseurl升级为https if pool.url.Scheme == "http" { @@ -190,20 +180,36 @@ func (pool *Pool) Init() error { return nil } -func (pool *Pool) Run(offset, limit int) { - pool.worder.Run() +func (pool *BrutePool) Upgrade(bl *pkg.Baseline) error { + rurl, err := url.Parse(bl.RedirectURL) + if err == nil && rurl.Hostname() == bl.Url.Hostname() && bl.Url.Scheme == "http" && rurl.Scheme == "https" { + logs.Log.Infof("baseurl %s upgrade http to https, reinit", pool.BaseURL) + pool.base = strings.Replace(pool.BaseURL, "http", "https", 1) + pool.url.Scheme = "https" + // 重新初始化 + err = pool.Init() + if err != nil { + return err + } + } + + return nil +} + +func (pool *BrutePool) Run(offset, limit int) { + pool.Worder.Run() if pool.Active { - pool.waiter.Add(1) + pool.wg.Add(1) go pool.doActive() } if pool.Bak { - pool.waiter.Add(1) + pool.wg.Add(1) go pool.doBak() } if pool.Common { - pool.waiter.Add(1) + pool.wg.Add(1) go pool.doCommonFile() } @@ -212,7 +218,7 @@ func (pool *Pool) Run(offset, limit int) { go func() { for { if done { - pool.waiter.Wait() + pool.wg.Wait() close(pool.closeCh) return } @@ -223,7 +229,7 @@ func (pool *Pool) Run(offset, limit int) { Loop: for { select { - case w, ok := <-pool.worder.C: + case w, ok := <-pool.Worder.C: if !ok { done = true continue @@ -239,7 +245,7 @@ Loop: continue } - pool.waiter.Add(1) + pool.wg.Add(1) if pool.Mod == HostSpray { pool.reqPool.Invoke(newUnitWithNumber(w, parsers.WordSource, pool.wordOffset)) } else { @@ -260,7 +266,7 @@ Loop: } if _, ok := pool.urls.Load(unit.path); ok { logs.Log.Debugf("[%s] duplicate path: %s, skipped", unit.source.Name(), pool.base+unit.path) - pool.waiter.Done() + pool.wg.Done() } else { pool.urls.Store(unit.path, nil) unit.number = pool.wordOffset @@ -278,7 +284,7 @@ Loop: pool.Close() } -func (pool *Pool) Invoke(v interface{}) { +func (pool *BrutePool) Invoke(v interface{}) { if pool.RateLimit != 0 { pool.limiter.Wait(pool.ctx) } @@ -300,7 +306,7 @@ func (pool *Pool) Invoke(v interface{}) { } req.SetHeaders(pool.Headers) - req.SetHeader("User-Agent", RandomUA()) + req.SetHeader("User-Agent", pkg.RandomUA()) start := time.Now() resp, reqerr := pool.client.Do(pool.ctx, req) @@ -310,38 +316,38 @@ func (pool *Pool) Invoke(v interface{}) { } // compare与各种错误处理 - var bl *Baseline + var bl *pkg.Baseline if reqerr != nil && reqerr != fasthttp.ErrBodyTooLarge { atomic.AddInt32(&pool.failedCount, 1) atomic.AddInt32(&pool.Statistor.FailedNumber, 1) - bl = &Baseline{ + bl = &pkg.Baseline{ SprayResult: &parsers.SprayResult{ UrlString: pool.base + unit.path, ErrString: reqerr.Error(), - Reason: ErrRequestFailed.Error(), + Reason: pkg.ErrRequestFailed.Error(), }, } - pool.failedBaselines = append(pool.failedBaselines, bl) + pool.FailedBaselines = append(pool.FailedBaselines, bl) // 自动重放失败请求 pool.doRetry(bl) } else { // 特定场景优化 if unit.source <= 3 || unit.source == parsers.CrawlSource || unit.source == parsers.CommonFileSource { // 一些高优先级的source, 将跳过PreCompare - bl = NewBaseline(req.URI(), req.Host(), resp) + bl = pkg.NewBaseline(req.URI(), req.Host(), resp) } else if pool.MatchExpr != nil { // 如果自定义了match函数, 则所有数据送入tempch中 - bl = NewBaseline(req.URI(), req.Host(), resp) + bl = pkg.NewBaseline(req.URI(), req.Host(), resp) } else if err = pool.PreCompare(resp); err == nil { // 通过预对比跳过一些无用数据, 减少性能消耗 - bl = NewBaseline(req.URI(), req.Host(), resp) + bl = pkg.NewBaseline(req.URI(), req.Host(), resp) } else { - bl = NewInvalidBaseline(req.URI(), req.Host(), resp, err.Error()) + bl = pkg.NewInvalidBaseline(req.URI(), req.Host(), resp, err.Error()) } } // 手动处理重定向 if bl.IsValid && unit.source != parsers.CheckSource && bl.RedirectURL != "" { - //pool.waiter.Add(1) + //pool.wg.Add(1) pool.doRedirect(bl, unit.depth) } @@ -367,9 +373,9 @@ func (pool *Pool) Invoke(v interface{}) { pool.locker.Unlock() if bl.Status == 200 || (bl.Status/100) == 3 { // 保留index输出结果 - pool.waiter.Add(1) + pool.wg.Add(1) pool.doCrawl(bl) - pool.OutputCh <- bl + pool.putToOutput(bl) } pool.initwg.Done() case parsers.CheckSource: @@ -383,7 +389,7 @@ func (pool *Pool) Invoke(v interface{}) { } else { atomic.AddInt32(&pool.failedCount, 1) // logs.Log.Debug("[check.failed] maybe trigger risk control, " + bl.String()) - pool.failedBaselines = append(pool.failedBaselines, bl) + pool.FailedBaselines = append(pool.FailedBaselines, bl) } } else { pool.resetFailed() // 如果后续访问正常, 重置错误次数 @@ -399,7 +405,7 @@ func (pool *Pool) Invoke(v interface{}) { atomic.AddInt32(&pool.failedCount, 1) pool.doCheck() } - pool.bar.Done() + pool.Bar.Done() case parsers.RedirectSource: bl.FrontURL = unit.frontUrl pool.handlerCh <- bl @@ -408,8 +414,8 @@ func (pool *Pool) Invoke(v interface{}) { } } -func (pool *Pool) NoScopeInvoke(v interface{}) { - defer pool.waiter.Done() +func (pool *BrutePool) NoScopeInvoke(v interface{}) { + defer pool.wg.Done() unit := v.(*Unit) req, err := ihttp.BuildPathRequest(pool.ClientType, unit.path, "") if err != nil { @@ -417,7 +423,7 @@ func (pool *Pool) NoScopeInvoke(v interface{}) { return } req.SetHeaders(pool.Headers) - req.SetHeader("User-Agent", RandomUA()) + req.SetHeader("User-Agent", pkg.RandomUA()) resp, reqerr := pool.client.Do(pool.ctx, req) if pool.ClientType == ihttp.FAST { defer fasthttp.ReleaseResponse(resp.FastResponse) @@ -428,18 +434,18 @@ func (pool *Pool) NoScopeInvoke(v interface{}) { return } if resp.StatusCode() == 200 { - bl := NewBaseline(req.URI(), req.Host(), resp) + bl := pkg.NewBaseline(req.URI(), req.Host(), resp) bl.Source = unit.source bl.ReqDepth = unit.depth bl.Collect() bl.CollectURL() - pool.waiter.Add(1) + pool.wg.Add(1) pool.doScopeCrawl(bl) - pool.OutputCh <- bl + pool.putToOutput(bl) } } -func (pool *Pool) Handler() { +func (pool *BrutePool) Handler() { for bl := range pool.handlerCh { if bl.IsValid { pool.addFuzzyBaseline(bl) @@ -474,7 +480,7 @@ func (pool *Pool) Handler() { var ok bool if pool.MatchExpr != nil { - if CompareWithExpr(pool.MatchExpr, params) { + if pkg.CompareWithExpr(pool.MatchExpr, params) { ok = true } } else { @@ -485,20 +491,20 @@ func (pool *Pool) Handler() { pool.Statistor.FoundNumber++ // unique判断 - if enableAllUnique || iutils.IntsContains(UniqueStatus, bl.Status) { + if EnableAllUnique || iutils.IntsContains(pkg.UniqueStatus, bl.Status) { if _, ok := pool.uniques[bl.Unique]; ok { bl.IsValid = false bl.IsFuzzy = true - bl.Reason = ErrFuzzyNotUnique.Error() + bl.Reason = pkg.ErrFuzzyNotUnique.Error() } else { pool.uniques[bl.Unique] = struct{}{} } } // 对通过所有对比的有效数据进行再次filter - if bl.IsValid && pool.FilterExpr != nil && CompareWithExpr(pool.FilterExpr, params) { + if bl.IsValid && pool.FilterExpr != nil && pkg.CompareWithExpr(pool.FilterExpr, params) { pool.Statistor.FilteredNumber++ - bl.Reason = ErrCustomFilter.Error() + bl.Reason = pkg.ErrCustomFilter.Error() bl.IsValid = false } } else { @@ -506,11 +512,11 @@ func (pool *Pool) Handler() { } if bl.IsValid || bl.IsFuzzy { - pool.waiter.Add(2) + pool.wg.Add(2) pool.doCrawl(bl) pool.doRule(bl) - if iutils.IntsContains(WhiteStatus, bl.Status) || iutils.IntsContains(UniqueStatus, bl.Status) { - pool.waiter.Add(1) + if iutils.IntsContains(pkg.WhiteStatus, bl.Status) || iutils.IntsContains(pkg.UniqueStatus, bl.Status) { + pool.wg.Add(1) pool.doAppendWords(bl) } } @@ -518,7 +524,7 @@ func (pool *Pool) Handler() { // 如果要进行递归判断, 要满足 bl有效, mod为path-spray, 当前深度小于最大递归深度 if bl.IsValid { if bl.RecuDepth < MaxRecursion { - if CompareWithExpr(pool.RecuExpr, params) { + if pkg.CompareWithExpr(pool.RecuExpr, params) { bl.Recu = true } } @@ -526,47 +532,47 @@ func (pool *Pool) Handler() { if !pool.closed { // 如果任务被取消, 所有还没处理的请求结果都会被丢弃 - pool.OutputCh <- bl + pool.putToOutput(bl) } - pool.waiter.Done() + pool.wg.Done() } pool.analyzeDone = true } -func (pool *Pool) PreCompare(resp *ihttp.Response) error { +func (pool *BrutePool) PreCompare(resp *ihttp.Response) error { status := resp.StatusCode() - if iutils.IntsContains(WhiteStatus, status) { + if iutils.IntsContains(pkg.WhiteStatus, status) { // 如果为白名单状态码则直接返回 return nil } if pool.random.Status != 200 && pool.random.Status == status { - return ErrSameStatus + return pkg.ErrSameStatus } - if iutils.IntsContains(BlackStatus, status) { - return ErrBadStatus + if iutils.IntsContains(pkg.BlackStatus, status) { + return pkg.ErrBadStatus } - if iutils.IntsContains(WAFStatus, status) { - return ErrWaf + if iutils.IntsContains(pkg.WAFStatus, status) { + return pkg.ErrWaf } if !pool.checkRedirect(resp.GetHeader("Location")) { - return ErrRedirect + return pkg.ErrRedirect } return nil } -func (pool *Pool) BaseCompare(bl *Baseline) bool { +func (pool *BrutePool) BaseCompare(bl *pkg.Baseline) bool { if !bl.IsValid { return false } var status = -1 // 30x状态码的特殊处理 if bl.RedirectURL != "" && strings.HasSuffix(bl.RedirectURL, bl.Url.Path+"/") { - bl.Reason = ErrFuzzyRedirect.Error() + bl.Reason = pkg.ErrFuzzyRedirect.Error() pool.putToFuzzy(bl) return false } @@ -586,7 +592,7 @@ func (pool *Pool) BaseCompare(bl *Baseline) bool { if ok { if status = base.Compare(bl); status == 1 { - bl.Reason = ErrCompareFailed.Error() + bl.Reason = pkg.ErrCompareFailed.Error() return false } } @@ -606,7 +612,7 @@ func (pool *Pool) BaseCompare(bl *Baseline) bool { if ok && status == 0 && base.FuzzyCompare(bl) { pool.Statistor.FuzzyNumber++ - bl.Reason = ErrFuzzyCompareFailed.Error() + bl.Reason = pkg.ErrFuzzyCompareFailed.Error() pool.putToFuzzy(bl) return false } @@ -614,57 +620,40 @@ func (pool *Pool) BaseCompare(bl *Baseline) bool { return true } -func (pool *Pool) Upgrade(bl *Baseline) error { - rurl, err := url.Parse(bl.RedirectURL) - if err == nil && rurl.Hostname() == bl.Url.Hostname() && bl.Url.Scheme == "http" && rurl.Scheme == "https" { - logs.Log.Infof("baseurl %s upgrade http to https, reinit", pool.BaseURL) - pool.base = strings.Replace(pool.BaseURL, "http", "https", 1) - pool.url.Scheme = "https" - // 重新初始化 - err = pool.Init() - if err != nil { - return err - } - } - - return nil -} - -func (pool *Pool) doRedirect(bl *Baseline, depth int) { - if depth >= MaxRedirect { +func (pool *BrutePool) doCheck() { + if pool.failedCount > pool.BreakThreshold { + // 当报错次数超过上限是, 结束任务 + pool.recover() + pool.Cancel() + pool.IsFailed = true return } - reURL := FormatURL(bl.Url.Path, bl.RedirectURL) - pool.waiter.Add(1) - go func() { - defer pool.waiter.Done() - pool.addAddition(&Unit{ - path: reURL, - source: parsers.RedirectSource, - frontUrl: bl.UrlString, - depth: depth + 1, - }) - }() + + if pool.Mod == HostSpray { + pool.checkCh <- struct{}{} + } else if pool.Mod == PathSpray { + pool.checkCh <- struct{}{} + } } -func (pool *Pool) doCrawl(bl *Baseline) { +func (pool *BrutePool) doCrawl(bl *pkg.Baseline) { if !pool.Crawl || bl.ReqDepth >= MaxCrawl { - pool.waiter.Done() + pool.wg.Done() return } bl.CollectURL() if bl.URLs == nil { - pool.waiter.Done() + pool.wg.Done() return } - pool.waiter.Add(1) + pool.wg.Add(1) pool.doScopeCrawl(bl) go func() { - defer pool.waiter.Done() + defer pool.wg.Done() for _, u := range bl.URLs { - if u = FormatURL(bl.Url.Path, u); u == "" { + if u = pkg.FormatURL(bl.Url.Path, u); u == "" { continue } pool.addAddition(&Unit{ @@ -677,23 +666,23 @@ func (pool *Pool) doCrawl(bl *Baseline) { } -func (pool *Pool) doScopeCrawl(bl *Baseline) { +func (pool *BrutePool) doScopeCrawl(bl *pkg.Baseline) { if bl.ReqDepth >= MaxCrawl { - pool.waiter.Done() + pool.wg.Done() return } go func() { - defer pool.waiter.Done() + defer pool.wg.Done() for _, u := range bl.URLs { if strings.HasPrefix(u, "http") { - if v, _ := url.Parse(u); v == nil || !MatchWithGlobs(v.Host, pool.Scope) { + if v, _ := url.Parse(u); v == nil || !pkg.MatchWithGlobs(v.Host, pool.Scope) { continue } pool.scopeLocker.Lock() if _, ok := pool.scopeurls[u]; !ok { pool.urls.Store(u, nil) - pool.waiter.Add(1) + pool.wg.Add(1) pool.scopePool.Invoke(&Unit{path: u, source: parsers.CrawlSource, depth: bl.ReqDepth + 1}) } pool.scopeLocker.Unlock() @@ -702,75 +691,18 @@ func (pool *Pool) doScopeCrawl(bl *Baseline) { }() } -func (pool *Pool) doRule(bl *Baseline) { - if pool.AppendRule == nil { - pool.waiter.Done() - return - } - if bl.Source == parsers.RuleSource { - pool.waiter.Done() - return - } - - go func() { - defer pool.waiter.Done() - for u := range rule.RunAsStream(pool.AppendRule.Expressions, path.Base(bl.Path)) { - pool.addAddition(&Unit{ - path: dir(bl.Url.Path) + u, - source: parsers.RuleSource, - }) - } - }() -} - -func (pool *Pool) doAppendWords(bl *Baseline) { - if pool.AppendWords == nil { - pool.waiter.Done() - return - } - if bl.Source == parsers.AppendSource { - pool.waiter.Done() - return - } - - go func() { - defer pool.waiter.Done() - for _, u := range pool.AppendWords { - pool.addAddition(&Unit{ - path: safePath(bl.Path, u), - source: parsers.AppendSource, - }) - } - }() -} - -func (pool *Pool) doRetry(bl *Baseline) { - if bl.Retry >= pool.Retry { - return - } - pool.waiter.Add(1) - go func() { - defer pool.waiter.Done() - pool.addAddition(&Unit{ - path: bl.Path, - source: parsers.RetrySource, - retry: bl.Retry + 1, - }) - }() -} - -func (pool *Pool) doActive() { - defer pool.waiter.Done() - for _, u := range pkg.ActivePath { - pool.addAddition(&Unit{ - path: pool.dir + u[1:], - source: parsers.FingerSource, - }) +func (pool *BrutePool) addFuzzyBaseline(bl *pkg.Baseline) { + if _, ok := pool.baselines[bl.Status]; !ok && (EnableAllFuzzy || iutils.IntsContains(pkg.FuzzyStatus, bl.Status)) { + bl.Collect() + pool.wg.Add(1) + pool.doCrawl(bl) // 非有效页面也可能存在一些特殊的url可以用来爬取 + pool.baselines[bl.Status] = bl + logs.Log.Logf(pkg.LogVerbose, "[baseline.%dinit] %s", bl.Status, bl.Format([]string{"status", "length", "spend", "title", "frame", "redirect"})) } } -func (pool *Pool) doBak() { - defer pool.waiter.Done() +func (pool *BrutePool) doBak() { + defer pool.wg.Done() worder, err := words.NewWorderWithDsl("{?0}.{@bak_ext}", [][]string{pkg.BakGenerator(pool.url.Host)}, nil) if err != nil { return @@ -796,75 +728,14 @@ func (pool *Pool) doBak() { } } -func (pool *Pool) doCommonFile() { - defer pool.waiter.Done() - for _, u := range mask.SpecialWords["common_file"] { - pool.addAddition(&Unit{ - path: pool.dir + u, - source: parsers.CommonFileSource, - }) - } -} - -func (pool *Pool) doCheck() { - if pool.failedCount > pool.BreakThreshold { - // 当报错次数超过上限是, 结束任务 - pool.recover() - pool.cancel() - pool.isFailed = true - return - } - - if pool.Mod == HostSpray { - pool.checkCh <- struct{}{} - } else if pool.Mod == PathSpray { - pool.checkCh <- struct{}{} - } -} - -func (pool *Pool) addAddition(u *Unit) { - // 强行屏蔽报错, 防止goroutine泄露 - pool.waiter.Add(1) - defer func() { - if err := recover(); err != nil { - } - }() - pool.additionCh <- u -} - -func (pool *Pool) addFuzzyBaseline(bl *Baseline) { - if _, ok := pool.baselines[bl.Status]; !ok && (enableAllFuzzy || iutils.IntsContains(FuzzyStatus, bl.Status)) { - bl.Collect() - pool.waiter.Add(1) - pool.doCrawl(bl) // 非有效页面也可能存在一些特殊的url可以用来爬取 - pool.baselines[bl.Status] = bl - logs.Log.Logf(LogVerbose, "[baseline.%dinit] %s", bl.Status, bl.Format([]string{"status", "length", "spend", "title", "frame", "redirect"})) - } -} - -func (pool *Pool) putToInvalid(bl *Baseline, reason string) { - bl.IsValid = false - pool.OutputCh <- bl -} - -func (pool *Pool) putToFuzzy(bl *Baseline) { - bl.IsFuzzy = true - pool.FuzzyCh <- bl -} - -func (pool *Pool) resetFailed() { - pool.failedCount = 1 - pool.failedBaselines = nil -} - -func (pool *Pool) recover() { +func (pool *BrutePool) recover() { logs.Log.Errorf("%s ,failed request exceeds the threshold , task will exit. Breakpoint %d", pool.BaseURL, pool.wordOffset) - for i, bl := range pool.failedBaselines { + for i, bl := range pool.FailedBaselines { logs.Log.Errorf("[failed.%d] %s", i, bl.String()) } } -func (pool *Pool) Close() { +func (pool *BrutePool) Close() { for pool.analyzeDone { // 等待缓存的待处理任务完成 time.Sleep(time.Duration(100) * time.Millisecond) @@ -872,27 +743,32 @@ func (pool *Pool) Close() { close(pool.additionCh) // 关闭addition管道 close(pool.checkCh) // 关闭check管道 pool.Statistor.EndTime = time.Now().Unix() - pool.bar.Close() + pool.Bar.Close() } -func (pool *Pool) safePath(u string) string { +func (pool *BrutePool) safePath(u string) string { // 自动生成的目录将采用safepath的方式拼接到相对目录中, 避免出现//的情况. 例如init, check, common if pool.isDir { - return safePath(pool.dir, u) + return pkg.SafePath(pool.dir, u) } else { - return safePath(pool.url.Path+"/", u) + return pkg.SafePath(pool.url.Path+"/", u) } } +func (pool *BrutePool) resetFailed() { + pool.failedCount = 1 + pool.FailedBaselines = nil +} + func NewBaselines() *Baselines { return &Baselines{ - baselines: map[int]*Baseline{}, + baselines: map[int]*pkg.Baseline{}, } } type Baselines struct { - failedBaselines []*Baseline - random *Baseline - index *Baseline - baselines map[int]*Baseline + FailedBaselines []*pkg.Baseline + random *pkg.Baseline + index *pkg.Baseline + baselines map[int]*pkg.Baseline } diff --git a/internal/checkpool.go b/internal/pool/checkpool.go similarity index 64% rename from internal/checkpool.go rename to internal/pool/checkpool.go index 331f025..56aeb00 100644 --- a/internal/checkpool.go +++ b/internal/pool/checkpool.go @@ -1,13 +1,11 @@ -package internal +package pool import ( "context" - "fmt" "github.com/chainreactors/logs" "github.com/chainreactors/parsers" - ihttp2 "github.com/chainreactors/spray/internal/ihttp" + "github.com/chainreactors/spray/internal/ihttp" "github.com/chainreactors/spray/pkg" - "github.com/chainreactors/words" "github.com/panjf2000/ants/v2" "github.com/valyala/fasthttp" "net/url" @@ -20,58 +18,34 @@ import ( func NewCheckPool(ctx context.Context, config *Config) (*CheckPool, error) { pctx, cancel := context.WithCancel(ctx) pool := &CheckPool{ - Config: config, - ctx: pctx, - cancel: cancel, - client: ihttp2.NewClient(&ihttp2.ClientConfig{ - Thread: config.Thread, - Type: config.ClientType, - Timeout: time.Duration(config.Timeout) * time.Second, - ProxyAddr: config.ProxyAddr, - }), - wg: sync.WaitGroup{}, - additionCh: make(chan *Unit, 100), - closeCh: make(chan struct{}), - reqCount: 1, - failedCount: 1, + &This{ + Config: config, + ctx: pctx, + Cancel: cancel, + client: ihttp.NewClient(&ihttp.ClientConfig{ + Thread: config.Thread, + Type: config.ClientType, + Timeout: time.Duration(config.Timeout) * time.Second, + ProxyAddr: config.ProxyAddr, + }), + wg: sync.WaitGroup{}, + additionCh: make(chan *Unit, 100), + closeCh: make(chan struct{}), + }, } pool.Headers = map[string]string{"Connection": "close"} p, _ := ants.NewPoolWithFunc(config.Thread, pool.Invoke) - pool.pool = p + pool.This.Pool = p return pool, nil } type CheckPool struct { - *Config - client *ihttp2.Client - pool *ants.PoolWithFunc - bar *pkg.Bar - ctx context.Context - cancel context.CancelFunc - reqCount int - failedCount int - additionCh chan *Unit - closeCh chan struct{} - worder *words.Worder - wg sync.WaitGroup -} - -func (pool *CheckPool) Close() { - pool.bar.Close() -} - -func (pool *CheckPool) genReq(s string) (*ihttp2.Request, error) { - if pool.Mod == HostSpray { - return ihttp2.BuildHostRequest(pool.ClientType, pool.BaseURL, s) - } else if pool.Mod == PathSpray { - return ihttp2.BuildPathRequest(pool.ClientType, pool.BaseURL, s) - } - return nil, fmt.Errorf("unknown mod") + *This } func (pool *CheckPool) Run(ctx context.Context, offset, limit int) { - pool.worder.Run() + pool.Worder.Run() var done bool // 挂起一个监控goroutine, 每100ms判断一次done, 如果已经done, 则关闭closeCh, 然后通过Loop中的select case closeCh去break, 实现退出 @@ -89,7 +63,7 @@ func (pool *CheckPool) Run(ctx context.Context, offset, limit int) { Loop: for { select { - case u, ok := <-pool.worder.C: + case u, ok := <-pool.Worder.C: if !ok { done = true continue @@ -105,12 +79,12 @@ Loop: } pool.wg.Add(1) - _ = pool.pool.Invoke(newUnit(u, parsers.CheckSource)) + _ = pool.This.Pool.Invoke(newUnit(u, parsers.CheckSource)) case u, ok := <-pool.additionCh: if !ok { continue } - _ = pool.pool.Invoke(u) + _ = pool.This.Pool.Invoke(u) case <-pool.closeCh: break Loop case <-ctx.Done(): @@ -131,21 +105,21 @@ func (pool *CheckPool) Invoke(v interface{}) { } req.SetHeaders(pool.Headers) start := time.Now() - var bl *Baseline + var bl *pkg.Baseline resp, reqerr := pool.client.Do(pool.ctx, req) - if pool.ClientType == ihttp2.FAST { + if pool.ClientType == ihttp.FAST { defer fasthttp.ReleaseResponse(resp.FastResponse) defer fasthttp.ReleaseRequest(req.FastRequest) } if reqerr != nil && reqerr != fasthttp.ErrBodyTooLarge { pool.failedCount++ - bl = &Baseline{ + bl = &pkg.Baseline{ SprayResult: &parsers.SprayResult{ UrlString: unit.path, IsValid: false, ErrString: reqerr.Error(), - Reason: ErrRequestFailed.Error(), + Reason: pkg.ErrRequestFailed.Error(), ReqDepth: unit.depth, }, } @@ -157,7 +131,7 @@ func (pool *CheckPool) Invoke(v interface{}) { } } else { - bl = NewBaseline(req.URI(), req.Host(), resp) + bl = pkg.NewBaseline(req.URI(), req.Host(), resp) bl.Collect() } bl.ReqDepth = unit.depth @@ -168,26 +142,26 @@ func (pool *CheckPool) Invoke(v interface{}) { if bl.IsValid { if bl.RedirectURL != "" { pool.doRedirect(bl, unit.depth) - pool.FuzzyCh <- bl + pool.putToFuzzy(bl) } else if bl.Status == 400 { pool.doUpgrade(bl) - pool.FuzzyCh <- bl + pool.putToFuzzy(bl) } else { params := map[string]interface{}{ "current": bl, } - if pool.MatchExpr == nil || CompareWithExpr(pool.MatchExpr, params) { - pool.OutputCh <- bl + if pool.MatchExpr == nil || pkg.CompareWithExpr(pool.MatchExpr, params) { + pool.putToOutput(bl) } } } pool.reqCount++ pool.wg.Done() - pool.bar.Done() + pool.Bar.Done() } -func (pool *CheckPool) doRedirect(bl *Baseline, depth int) { +func (pool *CheckPool) doRedirect(bl *pkg.Baseline, depth int) { if depth >= MaxRedirect { return } @@ -199,7 +173,7 @@ func (pool *CheckPool) doRedirect(bl *Baseline, depth int) { } reURL = bl.RedirectURL } else { - reURL = BaseURL(bl.Url) + FormatURL(BaseURL(bl.Url), bl.RedirectURL) + reURL = pkg.BaseURL(bl.Url) + pkg.FormatURL(pkg.BaseURL(bl.Url), bl.RedirectURL) } pool.wg.Add(1) @@ -214,7 +188,7 @@ func (pool *CheckPool) doRedirect(bl *Baseline, depth int) { } // tcp与400进行协议转换 -func (pool *CheckPool) doUpgrade(bl *Baseline) { +func (pool *CheckPool) doUpgrade(bl *pkg.Baseline) { if bl.ReqDepth >= 1 { return } diff --git a/internal/config.go b/internal/pool/config.go similarity index 85% rename from internal/config.go rename to internal/pool/config.go index ba9f4b4..bbf2460 100644 --- a/internal/config.go +++ b/internal/pool/config.go @@ -1,8 +1,10 @@ -package internal +package pool import ( "github.com/antonmedv/expr/vm" + "github.com/chainreactors/spray/pkg" "github.com/chainreactors/words/rule" + "sync" ) type SprayMod int @@ -25,6 +27,9 @@ type Config struct { Thread int Wordlist []string Timeout int + OutputCh chan *pkg.Baseline + FuzzyCh chan *pkg.Baseline + OutLocker *sync.WaitGroup RateLimit int CheckPeriod int ErrPeriod int32 @@ -38,8 +43,6 @@ type Config struct { RecuExpr *vm.Program AppendRule *rule.Program AppendWords []string - OutputCh chan *Baseline - FuzzyCh chan *Baseline Fuzzy bool IgnoreWaf bool Crawl bool diff --git a/internal/pool/pool.go b/internal/pool/pool.go new file mode 100644 index 0000000..f4bb8ab --- /dev/null +++ b/internal/pool/pool.go @@ -0,0 +1,160 @@ +package pool + +import ( + "context" + "fmt" + "github.com/chainreactors/parsers" + "github.com/chainreactors/spray/internal/ihttp" + "github.com/chainreactors/spray/pkg" + "github.com/chainreactors/words" + "github.com/chainreactors/words/mask" + "github.com/chainreactors/words/rule" + "github.com/panjf2000/ants/v2" + "path" + "sync" +) + +type This struct { + *Config + Statistor *pkg.Statistor + Pool *ants.PoolWithFunc + Bar *pkg.Bar + Worder *words.Worder + client *ihttp.Client + ctx context.Context + Cancel context.CancelFunc + dir string + reqCount int + failedCount int + additionCh chan *Unit + closeCh chan struct{} + wg sync.WaitGroup +} + +func (pool *This) doRedirect(bl *pkg.Baseline, depth int) { + if depth >= MaxRedirect { + return + } + reURL := pkg.FormatURL(bl.Url.Path, bl.RedirectURL) + pool.wg.Add(1) + go func() { + defer pool.wg.Done() + pool.addAddition(&Unit{ + path: reURL, + source: parsers.RedirectSource, + frontUrl: bl.UrlString, + depth: depth + 1, + }) + }() +} + +func (pool *This) doRule(bl *pkg.Baseline) { + if pool.AppendRule == nil { + pool.wg.Done() + return + } + if bl.Source == parsers.RuleSource { + pool.wg.Done() + return + } + + go func() { + defer pool.wg.Done() + for u := range rule.RunAsStream(pool.AppendRule.Expressions, path.Base(bl.Path)) { + pool.addAddition(&Unit{ + path: pkg.Dir(bl.Url.Path) + u, + source: parsers.RuleSource, + }) + } + }() +} + +func (pool *This) doAppendWords(bl *pkg.Baseline) { + if pool.AppendWords == nil { + pool.wg.Done() + return + } + if bl.Source == parsers.AppendSource { + pool.wg.Done() + return + } + + go func() { + defer pool.wg.Done() + for _, u := range pool.AppendWords { + pool.addAddition(&Unit{ + path: pkg.SafePath(bl.Path, u), + source: parsers.AppendSource, + }) + } + }() +} + +func (pool *This) doRetry(bl *pkg.Baseline) { + if bl.Retry >= pool.Retry { + return + } + pool.wg.Add(1) + go func() { + defer pool.wg.Done() + pool.addAddition(&Unit{ + path: bl.Path, + source: parsers.RetrySource, + retry: bl.Retry + 1, + }) + }() +} + +func (pool *This) doActive() { + defer pool.wg.Done() + for _, u := range pkg.ActivePath { + pool.addAddition(&Unit{ + path: pool.dir + u[1:], + source: parsers.FingerSource, + }) + } +} + +func (pool *This) doCommonFile() { + defer pool.wg.Done() + for _, u := range mask.SpecialWords["common_file"] { + pool.addAddition(&Unit{ + path: pool.dir + u, + source: parsers.CommonFileSource, + }) + } +} + +func (pool *This) addAddition(u *Unit) { + // 强行屏蔽报错, 防止goroutine泄露 + pool.wg.Add(1) + defer func() { + if err := recover(); err != nil { + } + }() + pool.additionCh <- u +} + +func (pool *This) Close() { + pool.Bar.Close() +} + +func (pool *This) genReq(s string) (*ihttp.Request, error) { + if pool.Mod == HostSpray { + return ihttp.BuildHostRequest(pool.ClientType, pool.BaseURL, s) + } else if pool.Mod == PathSpray { + return ihttp.BuildPathRequest(pool.ClientType, pool.BaseURL, s) + } + return nil, fmt.Errorf("unknown mod") +} + +func (pool *This) putToOutput(bl *pkg.Baseline) { + pool.OutLocker.Add(1) + pool.OutputCh <- bl +} + +func (pool *This) putToFuzzy(bl *pkg.Baseline) { + pool.OutLocker.Add(1) + bl.IsFuzzy = true + pool.FuzzyCh <- bl +} diff --git a/internal/pool/unit.go b/internal/pool/unit.go new file mode 100644 index 0000000..06ff692 --- /dev/null +++ b/internal/pool/unit.go @@ -0,0 +1,20 @@ +package pool + +import "github.com/chainreactors/parsers" + +func newUnit(path string, source parsers.SpraySource) *Unit { + return &Unit{path: path, source: source} +} + +func newUnitWithNumber(path string, source parsers.SpraySource, number int) *Unit { + return &Unit{path: path, source: source, number: number} +} + +type Unit struct { + number int + path string + source parsers.SpraySource + retry int + frontUrl string + depth int // redirect depth +} diff --git a/internal/runner.go b/internal/runner.go index 42a85b6..50afd4f 100644 --- a/internal/runner.go +++ b/internal/runner.go @@ -7,6 +7,7 @@ import ( "github.com/chainreactors/files" "github.com/chainreactors/logs" "github.com/chainreactors/spray/internal/ihttp" + "github.com/chainreactors/spray/internal/pool" "github.com/chainreactors/spray/pkg" "github.com/chainreactors/words" "github.com/chainreactors/words/rule" @@ -17,11 +18,7 @@ import ( ) var ( - WhiteStatus = []int{} // cmd input, 200 - BlackStatus = []int{} // cmd input, 400,410 - FuzzyStatus = []int{} // cmd input, 500,501,502,503 - WAFStatus = []int{493, 418, 1020, 406} - UniqueStatus = []int{} // 相同unique的403表示命中了同一条acl, 相同unique的200表示default页面 + max = 2147483647 ) var ( @@ -33,6 +30,9 @@ var ( type Runner struct { taskCh chan *Task poolwg sync.WaitGroup + outwg *sync.WaitGroup + outputCh chan *pkg.Baseline + fuzzyCh chan *pkg.Baseline bar *uiprogress.Bar finished int @@ -56,8 +56,6 @@ type Runner struct { Timeout int Mod string Probes []string - OutputCh chan *Baseline - FuzzyCh chan *Baseline Fuzzy bool OutputFile *files.File FuzzyFile *files.File @@ -88,15 +86,16 @@ type Runner struct { Proxy string } -func (r *Runner) PrepareConfig() *Config { - config := &Config{ +func (r *Runner) PrepareConfig() *pool.Config { + config := &pool.Config{ Thread: r.Threads, Timeout: r.Timeout, RateLimit: r.RateLimit, Headers: r.Headers, - Mod: ModMap[r.Mod], - OutputCh: r.OutputCh, - FuzzyCh: r.FuzzyCh, + Mod: pool.ModMap[r.Mod], + OutputCh: r.outputCh, + FuzzyCh: r.fuzzyCh, + OutLocker: r.outwg, Fuzzy: r.Fuzzy, CheckPeriod: r.CheckPeriod, ErrPeriod: int32(r.ErrPeriod), @@ -121,9 +120,9 @@ func (r *Runner) PrepareConfig() *Config { } if config.ClientType == ihttp.Auto { - if config.Mod == PathSpray { + if config.Mod == pool.PathSpray { config.ClientType = ihttp.FAST - } else if config.Mod == HostSpray { + } else if config.Mod == pool.HostSpray { config.ClientType = ihttp.STANDARD } } @@ -141,10 +140,10 @@ func (r *Runner) Prepare(ctx context.Context) error { r.Pools, err = ants.NewPoolWithFunc(1, func(i interface{}) { config := r.PrepareConfig() - pool, err := NewCheckPool(ctx, config) + pool, err := pool.NewCheckPool(ctx, config) if err != nil { logs.Log.Error(err.Error()) - pool.cancel() + pool.Cancel() r.poolwg.Done() return } @@ -156,9 +155,9 @@ func (r *Runner) Prepare(ctx context.Context) error { } close(ch) }() - pool.worder = words.NewWorderWithChan(ch) - pool.worder.Fns = r.Fns - pool.bar = pkg.NewBar("check", r.Count-r.Offset, r.Progress) + pool.Worder = words.NewWorderWithChan(ch) + pool.Worder.Fns = r.Fns + pool.Bar = pkg.NewBar("check", r.Count-r.Offset, r.Progress) pool.Run(ctx, r.Offset, r.Count) r.poolwg.Done() }) @@ -190,17 +189,17 @@ func (r *Runner) Prepare(ctx context.Context) error { config := r.PrepareConfig() config.BaseURL = t.baseUrl - pool, err := NewPool(ctx, config) + pool, err := pool.NewBrutePool(ctx, config) if err != nil { logs.Log.Error(err.Error()) - pool.cancel() + pool.Cancel() r.Done() return } if t.origin != nil && len(r.Wordlist) == 0 { // 如果是从断点续传中恢复的任务, 则自动设置word,dict与rule, 不过优先级低于命令行参数 pool.Statistor = pkg.NewStatistorFromStat(t.origin.Statistor) - pool.worder, err = t.origin.InitWorder(r.Fns) + pool.Worder, err = t.origin.InitWorder(r.Fns) if err != nil { logs.Log.Error(err.Error()) r.Done() @@ -209,9 +208,9 @@ func (r *Runner) Prepare(ctx context.Context) error { pool.Statistor.Total = t.origin.sum } else { pool.Statistor = pkg.NewStatistor(t.baseUrl) - pool.worder = words.NewWorder(r.Wordlist) - pool.worder.Fns = r.Fns - pool.worder.Rules = r.Rules.Expressions + pool.Worder = words.NewWorder(r.Wordlist) + pool.Worder.Fns = r.Fns + pool.Worder.Rules = r.Rules.Expressions } var limit int @@ -220,7 +219,7 @@ func (r *Runner) Prepare(ctx context.Context) error { } else { limit = pool.Statistor.Total } - pool.bar = pkg.NewBar(config.BaseURL, limit-pool.Statistor.Offset, r.Progress) + pool.Bar = pkg.NewBar(config.BaseURL, limit-pool.Statistor.Offset, r.Progress) logs.Log.Importantf("[pool] task: %s, total %d words, %d threads, proxy: %s", pool.BaseURL, limit-pool.Statistor.Offset, pool.Thread, pool.ProxyAddr) err = pool.Init() if err != nil { @@ -236,9 +235,9 @@ func (r *Runner) Prepare(ctx context.Context) error { pool.Run(pool.Statistor.Offset, limit) - if pool.isFailed && len(pool.failedBaselines) > 0 { + if pool.IsFailed && len(pool.FailedBaselines) > 0 { // 如果因为错误积累退出, end将指向第一个错误发生时, 防止resume时跳过大量目标 - pool.Statistor.End = pool.failedBaselines[0].Number + pool.Statistor.End = pool.FailedBaselines[0].Number } r.PrintStat(pool) r.Done() @@ -248,11 +247,11 @@ func (r *Runner) Prepare(ctx context.Context) error { if err != nil { return err } - r.Output() + r.OutputHandler() return nil } -func (r *Runner) AddRecursive(bl *Baseline) { +func (r *Runner) AddRecursive(bl *pkg.Baseline) { // 递归新任务 task := &Task{ baseUrl: bl.UrlString, @@ -296,19 +295,7 @@ Loop: } r.poolwg.Wait() - time.Sleep(100 * time.Millisecond) // 延迟100ms, 等所有数据处理完毕 - for { - if len(r.OutputCh) == 0 { - break - } - } - - for { - if len(r.FuzzyCh) == 0 { - break - } - } - time.Sleep(100 * time.Millisecond) // 延迟100ms, 等所有数据处理完毕 + r.outwg.Wait() } func (r *Runner) RunWithCheck(ctx context.Context) { @@ -335,7 +322,7 @@ Loop: } for { - if len(r.OutputCh) == 0 { + if len(r.outputCh) == 0 { break } } @@ -349,7 +336,7 @@ func (r *Runner) Done() { r.poolwg.Done() } -func (r *Runner) PrintStat(pool *Pool) { +func (r *Runner) PrintStat(pool *pool.BrutePool) { if r.Color { logs.Log.Important(pool.Statistor.ColorString()) if pool.Statistor.Error == "" { @@ -370,8 +357,8 @@ func (r *Runner) PrintStat(pool *Pool) { } } -func (r *Runner) Output() { - debugPrint := func(bl *Baseline) { +func (r *Runner) OutputHandler() { + debugPrint := func(bl *pkg.Baseline) { if r.Color { logs.Log.Debug(bl.ColorString()) } else { @@ -379,31 +366,31 @@ func (r *Runner) Output() { } } go func() { - var saveFunc func(*Baseline) + var saveFunc func(*pkg.Baseline) if r.OutputFile != nil { - saveFunc = func(bl *Baseline) { + saveFunc = func(bl *pkg.Baseline) { r.OutputFile.SafeWrite(bl.Jsonify() + "\n") r.OutputFile.SafeSync() } } else { if len(r.Probes) > 0 { if r.Color { - saveFunc = func(bl *Baseline) { + saveFunc = func(bl *pkg.Baseline) { logs.Log.Console(logs.GreenBold("[+] " + bl.Format(r.Probes) + "\n")) } } else { - saveFunc = func(bl *Baseline) { + saveFunc = func(bl *pkg.Baseline) { logs.Log.Console("[+] " + bl.Format(r.Probes) + "\n") } } } else { if r.Color { - saveFunc = func(bl *Baseline) { + saveFunc = func(bl *pkg.Baseline) { logs.Log.Console(logs.GreenBold("[+] " + bl.ColorString() + "\n")) } } else { - saveFunc = func(bl *Baseline) { + saveFunc = func(bl *pkg.Baseline) { logs.Log.Console("[+] " + bl.String() + "\n") } } @@ -412,7 +399,7 @@ func (r *Runner) Output() { for { select { - case bl, ok := <-r.OutputCh: + case bl, ok := <-r.outputCh: if !ok { return } @@ -428,23 +415,24 @@ func (r *Runner) Output() { } else { debugPrint(bl) } + r.outwg.Done() } } }() go func() { - var fuzzySaveFunc func(*Baseline) + var fuzzySaveFunc func(*pkg.Baseline) if r.FuzzyFile != nil { - fuzzySaveFunc = func(bl *Baseline) { + fuzzySaveFunc = func(bl *pkg.Baseline) { r.FuzzyFile.SafeWrite(bl.Jsonify() + "\n") } } else { if r.Color { - fuzzySaveFunc = func(bl *Baseline) { + fuzzySaveFunc = func(bl *pkg.Baseline) { logs.Log.Console(logs.GreenBold("[fuzzy] " + bl.ColorString() + "\n")) } } else { - fuzzySaveFunc = func(bl *Baseline) { + fuzzySaveFunc = func(bl *pkg.Baseline) { logs.Log.Console("[fuzzy] " + bl.String() + "\n") } } @@ -452,15 +440,16 @@ func (r *Runner) Output() { for { select { - case bl, ok := <-r.FuzzyCh: + case bl, ok := <-r.fuzzyCh: if !ok { return } if r.Fuzzy { fuzzySaveFunc(bl) - } else { - debugPrint(bl) + //} else { + // debugPrint(bl) } + r.outwg.Done() } } }() diff --git a/internal/types.go b/internal/types.go index 3b74075..9f50a91 100644 --- a/internal/types.go +++ b/internal/types.go @@ -1,84 +1,11 @@ package internal import ( - "github.com/chainreactors/parsers" "github.com/chainreactors/spray/pkg" "github.com/chainreactors/words" "github.com/chainreactors/words/rule" ) -type Source int - -const ( - CheckSource Source = iota + 1 - InitRandomSource - InitIndexSource - RedirectSource - CrawlSource - FingerSource - WordSource - WafSource - RuleSource - BakSource - CommonFileSource - UpgradeSource - RetrySource - AppendSource -) - -// Name return the name of the source -func (s Source) Name() string { - switch s { - case CheckSource: - return "check" - case InitRandomSource: - return "random" - case InitIndexSource: - return "index" - case RedirectSource: - return "redirect" - case CrawlSource: - return "crawl" - case FingerSource: - return "finger" - case WordSource: - return "word" - case WafSource: - return "waf" - case RuleSource: - return "rule" - case BakSource: - return "bak" - case CommonFileSource: - return "common" - case UpgradeSource: - return "upgrade" - case RetrySource: - return "retry" - case AppendSource: - return "append" - default: - return "unknown" - } -} - -func newUnit(path string, source parsers.SpraySource) *Unit { - return &Unit{path: path, source: source} -} - -func newUnitWithNumber(path string, source parsers.SpraySource, number int) *Unit { - return &Unit{path: path, source: source, number: number} -} - -type Unit struct { - number int - path string - source parsers.SpraySource - retry int - frontUrl string - depth int // redirect depth -} - type Task struct { baseUrl string depth int diff --git a/internal/utils.go b/internal/utils.go index 9c4da0f..d745f02 100644 --- a/internal/utils.go +++ b/internal/utils.go @@ -2,41 +2,15 @@ package internal import ( "bytes" - "github.com/antonmedv/expr" "github.com/antonmedv/expr/ast" - "github.com/antonmedv/expr/vm" - "github.com/chainreactors/logs" "github.com/chainreactors/spray/pkg" "github.com/chainreactors/words/mask" "github.com/chainreactors/words/rule" "io/ioutil" - "math/rand" - "net/url" - "path" - "path/filepath" "strconv" "strings" ) -var ( - // from feroxbuster - randomUserAgent = []string{ - "Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36", - "Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1", - "Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254", - "Mozilla/5.0 (Linux; Android 7.0; Pixel C Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.98 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246", - "Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9", - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36", - "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1", - "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", - "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", - "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", - } - uacount = len(randomUserAgent) -) - func parseExtension(s string) string { if i := strings.Index(s, "."); i != -1 { return s[i+1:] @@ -171,125 +145,6 @@ func loadRuleWithFiles(ruleFiles []string, filter string) ([]rule.Expression, er return rule.Compile(rules.String(), filter).Expressions, nil } -func safePath(dir, u string) string { - hasSlash := strings.HasPrefix(u, "/") - if hasSlash { - return path.Join(dir, u[1:]) - } else { - return path.Join(dir, u) - } -} - -func relaPath(base, u string) string { - // 拼接相对目录, 不使用path.join的原因是, 如果存在"////"这样的情况, 可能真的是有意义的路由, 不能随意去掉. - // "" /a /a - // "" a /a - // / "" / - // /a/ b /a/b - // /a/ /b /a/b - // /a b /b - // /a /b /b - - if u == "" { - return base - } - - pathSlash := strings.HasPrefix(u, "/") - if base == "" { - if pathSlash { - return u[1:] - } else { - return "/" + u - } - } else if strings.HasSuffix(base, "/") { - if pathSlash { - return base + u[1:] - } else { - return base + u - } - } else { - if pathSlash { - return dir(base) + u[1:] - } else { - return dir(base) + u - } - } -} - -func dir(u string) string { - // 安全的获取目录, 不会额外处理多个"//", 并非用来获取上级目录 - // /a / - // /a/ /a/ - // a/ a/ - // aaa / - if strings.HasSuffix(u, "/") { - return u - } else if i := strings.LastIndex(u, "/"); i == -1 { - return "/" - } else { - return u[:i+1] - } -} - -func FormatURL(base, u string) string { - if strings.HasPrefix(u, "http") { - parsed, err := url.Parse(u) - if err != nil { - return "" - } - return parsed.Path - } else if strings.HasPrefix(u, "//") { - parsed, err := url.Parse(u) - if err != nil { - return "" - } - return parsed.Path - } else if strings.HasPrefix(u, "/") { - // 绝对目录拼接 - // 不需要进行处理, 用来跳过下面的判断 - return u - } else if strings.HasPrefix(u, "./") { - // "./"相对目录拼接 - return relaPath(base, u[2:]) - } else if strings.HasPrefix(u, "../") { - return path.Join(dir(base), u) - } else { - // 相对目录拼接 - return relaPath(base, u) - } -} - -func BaseURL(u *url.URL) string { - return u.Scheme + "://" + u.Host -} - -func RandomUA() string { - return randomUserAgent[rand.Intn(uacount)] -} - -func CompareWithExpr(exp *vm.Program, params map[string]interface{}) bool { - res, err := expr.Run(exp, params) - if err != nil { - logs.Log.Warn(err.Error()) - } - - if res == true { - return true - } else { - return false - } -} - -func MatchWithGlobs(u string, globs []string) bool { - for _, glob := range globs { - ok, err := filepath.Match(glob, u) - if err == nil && ok { - return true - } - } - return false -} - type bytesPatcher struct{} func (p *bytesPatcher) Visit(node *ast.Node) { @@ -310,9 +165,3 @@ func wrapWordsFunc(f func(string) string) func(string) []string { return []string{f(s)} } } - -func UniqueHash(bl *Baseline) uint16 { - // 由host+状态码+重定向url+content-type+title+length舍去个位组成的hash - // body length可能会导致一些误报, 目前没有更好的解决办法 - return pkg.CRC16Hash([]byte(bl.Host + strconv.Itoa(bl.Status) + bl.RedirectURL + bl.ContentType + bl.Title + strconv.Itoa(bl.BodyLength/10*10))) -} diff --git a/internal/baseline.go b/pkg/baseline.go similarity index 87% rename from internal/baseline.go rename to pkg/baseline.go index 3b10c72..d365d39 100644 --- a/internal/baseline.go +++ b/pkg/baseline.go @@ -1,10 +1,9 @@ -package internal +package pkg import ( "bytes" "github.com/chainreactors/parsers" "github.com/chainreactors/spray/internal/ihttp" - "github.com/chainreactors/spray/pkg" "github.com/chainreactors/utils/encode" "github.com/chainreactors/utils/iutils" "net/url" @@ -21,7 +20,7 @@ func NewBaseline(u, host string, resp *ihttp.Response) *Baseline { }, } - if t, ok := pkg.ContentTypeMap[resp.ContentType()]; ok { + if t, ok := ContentTypeMap[resp.ContentType()]; ok { bl.ContentType = t bl.Title = t + " data" } else { @@ -106,9 +105,9 @@ type Baseline struct { Url *url.URL `json:"-"` Dir bool `json:"-"` Chunked bool `json:"-"` - Body pkg.BS `json:"-"` - Header pkg.BS `json:"-"` - Raw pkg.BS `json:"-"` + Body BS `json:"-"` + Header BS `json:"-"` + Raw BS `json:"-"` Recu bool `json:"-"` RecuDepth int `json:"-"` URLs []string `json:"-"` @@ -127,23 +126,23 @@ func (bl *Baseline) IsDir() bool { func (bl *Baseline) Collect() { if bl.ContentType == "html" || bl.ContentType == "json" || bl.ContentType == "txt" { // 指纹库设计的时候没考虑js,css文件的指纹, 跳过非必要的指纹收集减少误报提高性能 - bl.Frameworks = pkg.FingerDetect(bl.Raw) + bl.Frameworks = FingerDetect(bl.Raw) } if len(bl.Body) > 0 { if bl.ContentType == "html" { bl.Title = iutils.AsciiEncode(parsers.MatchTitle(bl.Body)) } else if bl.ContentType == "ico" { - if name, ok := pkg.Md5Fingers[encode.Md5Hash(bl.Body)]; ok { + if name, ok := Md5Fingers[encode.Md5Hash(bl.Body)]; ok { bl.Frameworks[name] = &parsers.Framework{Name: name} - } else if name, ok := pkg.Mmh3Fingers[encode.Mmh3Hash32(bl.Body)]; ok { + } else if name, ok := Mmh3Fingers[encode.Mmh3Hash32(bl.Body)]; ok { bl.Frameworks[name] = &parsers.Framework{Name: name} } } } bl.Hashes = parsers.NewHashes(bl.Raw) - bl.Extracteds = pkg.Extractors.Extract(string(bl.Raw)) + bl.Extracteds = Extractors.Extract(string(bl.Raw)) bl.Unique = UniqueHash(bl) } @@ -158,21 +157,21 @@ func (bl *Baseline) CollectURL() { if len(bl.Body) == 0 { return } - for _, reg := range pkg.ExtractRegexps["js"][0].CompiledRegexps { + for _, reg := range ExtractRegexps["js"][0].CompiledRegexps { urls := reg.FindAllStringSubmatch(string(bl.Body), -1) for _, u := range urls { - u[1] = pkg.CleanURL(u[1]) - if u[1] != "" && !pkg.FilterJs(u[1]) { + u[1] = CleanURL(u[1]) + if u[1] != "" && !FilterJs(u[1]) { bl.URLs = append(bl.URLs, u[1]) } } } - for _, reg := range pkg.ExtractRegexps["url"][0].CompiledRegexps { + for _, reg := range ExtractRegexps["url"][0].CompiledRegexps { urls := reg.FindAllStringSubmatch(string(bl.Body), -1) for _, u := range urls { - u[1] = pkg.CleanURL(u[1]) - if u[1] != "" && !pkg.FilterUrl(u[1]) { + u[1] = CleanURL(u[1]) + if u[1] != "" && !FilterUrl(u[1]) { bl.URLs = append(bl.URLs, u[1]) } } diff --git a/internal/errors.go b/pkg/errors.go similarity index 98% rename from internal/errors.go rename to pkg/errors.go index eb459e6..1dd3575 100644 --- a/internal/errors.go +++ b/pkg/errors.go @@ -1,4 +1,4 @@ -package internal +package pkg type ErrorType uint diff --git a/pkg/utils.go b/pkg/utils.go index 306a2f9..dd87205 100644 --- a/pkg/utils.go +++ b/pkg/utils.go @@ -1,17 +1,31 @@ package pkg import ( + "github.com/antonmedv/expr" + "github.com/antonmedv/expr/vm" "github.com/chainreactors/gogo/v2/pkg/fingers" + "github.com/chainreactors/logs" "github.com/chainreactors/parsers" "github.com/chainreactors/utils/iutils" "math/rand" "net/url" "path" + "path/filepath" + "strconv" "strings" "time" "unsafe" ) +var ( + LogVerbose = logs.Warn - 2 + LogFuzz = logs.Warn - 1 + WhiteStatus = []int{} // cmd input, 200 + BlackStatus = []int{} // cmd input, 400,410 + FuzzyStatus = []int{} // cmd input, 500,501,502,503 + WAFStatus = []int{493, 418, 1020, 406} + UniqueStatus = []int{} // 相同unique的403表示命中了同一条acl, 相同unique的200表示default页面 +) var ( Md5Fingers map[string]string = make(map[string]string) Mmh3Fingers map[string]string = make(map[string]string) @@ -47,6 +61,23 @@ var ( "video/avi": "avi", "image/x-icon": "ico", } + + // from feroxbuster + randomUserAgent = []string{ + "Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36", + "Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254", + "Mozilla/5.0 (Linux; Android 7.0; Pixel C Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.98 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246", + "Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36", + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1", + "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", + "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", + } + uacount = len(randomUserAgent) ) type BS []byte @@ -237,3 +268,128 @@ func CRC16Hash(data []byte) uint16 { } return crc16 } + +func SafePath(dir, u string) string { + hasSlash := strings.HasPrefix(u, "/") + if hasSlash { + return path.Join(dir, u[1:]) + } else { + return path.Join(dir, u) + } +} + +func RelaPath(base, u string) string { + // 拼接相对目录, 不使用path.join的原因是, 如果存在"////"这样的情况, 可能真的是有意义的路由, 不能随意去掉. + // "" /a /a + // "" a /a + // / "" / + // /a/ b /a/b + // /a/ /b /a/b + // /a b /b + // /a /b /b + + if u == "" { + return base + } + + pathSlash := strings.HasPrefix(u, "/") + if base == "" { + if pathSlash { + return u[1:] + } else { + return "/" + u + } + } else if strings.HasSuffix(base, "/") { + if pathSlash { + return base + u[1:] + } else { + return base + u + } + } else { + if pathSlash { + return Dir(base) + u[1:] + } else { + return Dir(base) + u + } + } +} + +func Dir(u string) string { + // 安全的获取目录, 不会额外处理多个"//", 并非用来获取上级目录 + // /a / + // /a/ /a/ + // a/ a/ + // aaa / + if strings.HasSuffix(u, "/") { + return u + } else if i := strings.LastIndex(u, "/"); i == -1 { + return "/" + } else { + return u[:i+1] + } +} + +func UniqueHash(bl *Baseline) uint16 { + // 由host+状态码+重定向url+content-type+title+length舍去个位组成的hash + // body length可能会导致一些误报, 目前没有更好的解决办法 + return CRC16Hash([]byte(bl.Host + strconv.Itoa(bl.Status) + bl.RedirectURL + bl.ContentType + bl.Title + strconv.Itoa(bl.BodyLength/10*10))) +} + +func FormatURL(base, u string) string { + if strings.HasPrefix(u, "http") { + parsed, err := url.Parse(u) + if err != nil { + return "" + } + return parsed.Path + } else if strings.HasPrefix(u, "//") { + parsed, err := url.Parse(u) + if err != nil { + return "" + } + return parsed.Path + } else if strings.HasPrefix(u, "/") { + // 绝对目录拼接 + // 不需要进行处理, 用来跳过下面的判断 + return u + } else if strings.HasPrefix(u, "./") { + // "./"相对目录拼接 + return RelaPath(base, u[2:]) + } else if strings.HasPrefix(u, "../") { + return path.Join(Dir(base), u) + } else { + // 相对目录拼接 + return RelaPath(base, u) + } +} + +func BaseURL(u *url.URL) string { + return u.Scheme + "://" + u.Host +} + +func RandomUA() string { + return randomUserAgent[rand.Intn(uacount)] +} + +func CompareWithExpr(exp *vm.Program, params map[string]interface{}) bool { + res, err := expr.Run(exp, params) + if err != nil { + logs.Log.Warn(err.Error()) + } + + if res == true { + return true + } else { + return false + } +} + +func MatchWithGlobs(u string, globs []string) bool { + for _, glob := range globs { + ok, err := filepath.Match(glob, u) + if err == nil && ok { + return true + } + } + return false +} diff --git a/templates b/templates index e980f34..998cdc0 160000 --- a/templates +++ b/templates @@ -1 +1 @@ -Subproject commit e980f346de47e8162a9f466ab6c4218f0bfebd6e +Subproject commit 998cdc05018e9c221e91166d10c7b2e1b62396cf