diff --git a/internal/option.go b/internal/option.go index 140ea38..a4e6ce8 100644 --- a/internal/option.go +++ b/internal/option.go @@ -32,6 +32,7 @@ type InputOptions struct { ResumeFrom string `long:"resume-from"` URL string `short:"u" long:"url" description:"String, input baseurl (separated by commas), e.g.: http://google.com, http://baidu.com"` URLFile string `short:"l" long:"list" description:"File, input filename"` + Raw string `long:"raw" description:"File, input raw request filename"` Offset int `long:"offset" description:"Int, wordlist offset"` Limit int `long:"limit" description:"Int, wordlist limit, start with offset. e.g.: --offset 1000 --limit 100"` Dictionaries []string `short:"d" long:"dict" description:"Files, dict files, e.g.: -d 1.txt -d 2.txt"` @@ -77,6 +78,8 @@ type ModeOptions struct { CheckOnly bool `long:"check-only" description:"Bool, check only"` Recursive string `long:"recursive" default:"current.IsDir()" description:"String,custom recursive rule, e.g.: --recursive current.IsDir()"` Depth int `long:"depth" default:"0" description:"Int, recursive depth"` + Crawl bool `long:"crawl" description:"Bool, enable crawl"` + CrawlDepth int `long:"spider-depth" default:"3" description:"Int, crawl depth"` CheckPeriod int `long:"check-period" default:"200" description:"Int, check period when request"` ErrPeriod int `long:"error-period" default:"10" description:"Int, check period when error"` BreakThreshold int `long:"error-threshold" default:"20" description:"Int, break when the error exceeds the threshold "` @@ -123,6 +126,7 @@ func (opt *Option) PrepareRunner() (*Runner, error) { CheckPeriod: opt.CheckPeriod, ErrPeriod: opt.ErrPeriod, BreakThreshold: opt.BreakThreshold, + Crawl: opt.Crawl, } err = pkg.LoadTemplates() diff --git a/internal/pool.go b/internal/pool.go index a78ebea..ff26ee3 100644 --- a/internal/pool.go +++ b/internal/pool.go @@ -20,13 +20,12 @@ import ( ) var ( - CheckRedirect func(string) bool + max = 2147483647 + maxRedirect = 3 + maxCrawl = 3 + maxRecursion = 0 ) -var max = 2147483647 -var maxRedirect = 3 -var maxRecuDepth = 0 - func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) { pctx, cancel := context.WithCancel(ctx) pool := &Pool{ @@ -35,8 +34,10 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) { cancel: cancel, client: ihttp.NewClient(config.Thread, 2, config.ClientType), baselines: make(map[int]*pkg.Baseline), + urls: make(map[string]int), tempCh: make(chan *pkg.Baseline, config.Thread), checkCh: make(chan sourceType), + additionCh: make(chan *Unit, 100), wg: sync.WaitGroup{}, initwg: sync.WaitGroup{}, reqCount: 1, @@ -80,7 +81,7 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) { bl.RedirectURL = "/" + strings.TrimLeft(bl.RedirectURL, "/") bl.RedirectURL = pool.BaseURL + bl.RedirectURL } - pool.addRedirect(bl, unit.reCount) + pool.doRedirect(bl, unit.depth) } pool.addFuzzyBaseline(bl) } else { @@ -89,14 +90,17 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) { } } + bl.ReqDepth = unit.depth bl.Spended = time.Since(start).Milliseconds() switch unit.source { case InitRandomSource: pool.random = bl pool.addFuzzyBaseline(bl) + pool.doCrawl(bl) pool.initwg.Done() case InitIndexSource: pool.index = bl + pool.doCrawl(bl) pool.initwg.Done() case CheckSource: if bl.ErrString != "" { @@ -122,15 +126,17 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) { pool.reqCount++ if pool.reqCount%pool.CheckPeriod == 0 { pool.reqCount++ - pool.check() + pool.doCheck() } else if pool.failedCount%pool.ErrPeriod == 0 { pool.failedCount++ - pool.check() + pool.doCheck() } pool.bar.Done() case RedirectSource: bl.FrontURL = unit.frontUrl pool.tempCh <- bl + case CrawlSource: + pool.tempCh <- bl } }) @@ -184,9 +190,12 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) { } // 如果要进行递归判断, 要满足 bl有效, mod为path-spray, 当前深度小于最大递归深度 - if bl.IsValid && pool.Mod == pkg.PathSpray && bl.RecuDepth < maxRecuDepth { - if CompareWithExpr(pool.RecuExpr, params) { - bl.Recu = true + if bl.IsValid { + pool.doCrawl(bl) + if bl.RecuDepth < maxRecursion { + if CompareWithExpr(pool.RecuExpr, params) { + bl.Recu = true + } } } pool.OutputCh <- bl @@ -207,7 +216,8 @@ type Pool struct { ctx context.Context cancel context.CancelFunc tempCh chan *pkg.Baseline // 待处理的baseline - checkCh chan sourceType + checkCh chan sourceType // 独立的check管道, 防止与redirect/crawl冲突 + additionCh chan *Unit reqCount int failedCount int isFailed bool @@ -215,6 +225,7 @@ type Pool struct { random *pkg.Baseline index *pkg.Baseline baselines map[int]*pkg.Baseline + urls map[string]int analyzeDone bool worder *words.Worder locker sync.Mutex @@ -253,51 +264,16 @@ func (pool *Pool) Init() error { } } - if pool.random.RedirectURL != "" { - CheckRedirect = func(redirectURL string) bool { - if redirectURL == pool.random.RedirectURL { - // 相同的RedirectURL将被认为是无效数据 - return false - } else { - // path为3xx, 且与baseline中的RedirectURL不同时, 为有效数据 - return true - } - } - } - return nil } -func (pool *Pool) addRedirect(bl *pkg.Baseline, reCount int) { - if reCount >= maxRedirect { - return - } - - if uu, err := url.Parse(bl.RedirectURL); err == nil && uu.Hostname() == pool.index.Url.Hostname() { - pool.wg.Add(1) - _ = pool.reqPool.Invoke(&Unit{ - number: bl.Number, - path: uu.Path, - source: RedirectSource, - frontUrl: bl.UrlString, - reCount: reCount + 1, - }) - } -} - -func (pool *Pool) check() { - if pool.failedCount > pool.BreakThreshold { - // 当报错次数超过上限是, 结束任务 - pool.recover() - pool.cancel() - pool.isFailed = true - return - } - - if pool.Mod == pkg.HostSpray { - pool.checkCh <- CheckSource - } else if pool.Mod == pkg.PathSpray { - pool.checkCh <- CheckSource +func (pool *Pool) checkRedirect(redirectURL string) bool { + if redirectURL == pool.random.RedirectURL { + // 相同的RedirectURL将被认为是无效数据 + return false + } else { + // path为3xx, 且与baseline中的RedirectURL不同时, 为有效数据 + return true } } @@ -311,6 +287,11 @@ func (pool *Pool) genReq(s string) (*ihttp.Request, error) { } func (pool *Pool) Run(ctx context.Context, offset, limit int) { pool.worder.RunWithRules() + go func() { + for unit := range pool.additionCh { + pool.reqPool.Invoke(unit) + } + }() Loop: for { select { @@ -340,13 +321,16 @@ Loop: } else if pool.Mod == pkg.PathSpray { pool.reqPool.Invoke(newUnitWithNumber(pkg.RandPath(), source, pool.Statistor.End)) } - case <-ctx.Done(): break Loop case <-pool.ctx.Done(): break Loop } } + + for len(pool.additionCh) > 0 { + time.Sleep(time.Second) + } pool.wg.Wait() pool.Statistor.EndTime = time.Now().Unix() pool.Close() @@ -370,7 +354,7 @@ func (pool *Pool) PreCompare(resp *ihttp.Response) error { return ErrWaf } - if CheckRedirect != nil && !CheckRedirect(resp.GetHeader("Location")) { + if !pool.checkRedirect(resp.GetHeader("Location")) { return ErrRedirect } @@ -417,7 +401,7 @@ func (pool *Pool) BaseCompare(bl *pkg.Baseline) bool { if ok && status == 0 && base.FuzzyCompare(bl) { pool.Statistor.FuzzyNumber++ bl.Reason = ErrFuzzyCompareFailed.Error() - pool.PutToFuzzy(bl) + pool.putToFuzzy(bl) return false } @@ -437,6 +421,77 @@ func CompareWithExpr(exp *vm.Program, params map[string]interface{}) bool { } } +func (pool *Pool) doRedirect(bl *pkg.Baseline, depth int) { + if depth >= maxRedirect { + return + } + + if uu, err := url.Parse(bl.RedirectURL); err == nil && uu.Hostname() == pool.index.Url.Hostname() { + pool.wg.Add(1) + pool.additionCh <- &Unit{ + path: uu.Path, + source: RedirectSource, + frontUrl: bl.UrlString, + depth: depth + 1, + } + } +} + +func (pool *Pool) doCrawl(bl *pkg.Baseline) { + bl.CollectURL() + for _, u := range bl.URLs { + if strings.HasPrefix(u, "//") { + u = bl.Url.Scheme + u + } else if strings.HasPrefix(u, "/") { + // 绝对目录拼接 + u = pkg.URLJoin(pool.BaseURL, u) + } else if !strings.HasPrefix(u, "http") { + // 相对目录拼接 + u = pkg.URLJoin(pool.BaseURL, u) + } + + if _, ok := pool.urls[u]; ok { + pool.urls[u]++ + } else { + // 通过map去重, 只有新的url才会进入到该逻辑 + pool.urls[u] = 1 + if bl.ReqDepth < maxCrawl { + parsed, err := url.Parse(u) + if err != nil { + continue + } + if parsed.Host != bl.Url.Host { + // 自动限定scoop, 防止爬到其他网站 + continue + } + pool.wg.Add(1) + pool.additionCh <- &Unit{ + path: parsed.Path, + source: CrawlSource, + frontUrl: bl.UrlString, + depth: bl.ReqDepth + 1, + } + } + } + } +} + +func (pool *Pool) doCheck() { + if pool.failedCount > pool.BreakThreshold { + // 当报错次数超过上限是, 结束任务 + pool.recover() + pool.cancel() + pool.isFailed = true + return + } + + if pool.Mod == pkg.HostSpray { + pool.checkCh <- CheckSource + } else if pool.Mod == pkg.PathSpray { + pool.checkCh <- CheckSource + } +} + func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) { if _, ok := pool.baselines[bl.Status]; !ok && IntsContains(FuzzyStatus, bl.Status) { bl.Collect() @@ -447,12 +502,12 @@ func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) { } } -func (pool *Pool) PutToInvalid(bl *pkg.Baseline, reason string) { +func (pool *Pool) putToInvalid(bl *pkg.Baseline, reason string) { bl.IsValid = false pool.OutputCh <- bl } -func (pool *Pool) PutToFuzzy(bl *pkg.Baseline) { +func (pool *Pool) putToFuzzy(bl *pkg.Baseline) { bl.IsFuzzy = true pool.FuzzyCh <- bl } @@ -474,5 +529,6 @@ func (pool *Pool) Close() { time.Sleep(time.Duration(100) * time.Millisecond) } close(pool.tempCh) + close(pool.additionCh) pool.bar.Close() } diff --git a/internal/runner.go b/internal/runner.go index c3e2786..491312f 100644 --- a/internal/runner.go +++ b/internal/runner.go @@ -72,6 +72,7 @@ type Runner struct { CheckOnly bool Force bool IgnoreWaf bool + Crawl bool } func (r *Runner) PrepareConfig() *pkg.Config { @@ -90,6 +91,7 @@ func (r *Runner) PrepareConfig() *pkg.Config { FilterExpr: r.FilterExpr, RecuExpr: r.RecursiveExpr, IgnoreWaf: r.IgnoreWaf, + Crawl: r.Crawl, } if config.Mod == pkg.PathSpray { config.ClientType = ihttp.FAST diff --git a/internal/types.go b/internal/types.go index d290c74..72937f6 100644 --- a/internal/types.go +++ b/internal/types.go @@ -51,6 +51,7 @@ const ( InitRandomSource InitIndexSource RedirectSource + CrawlSource WordSource WafSource ) @@ -60,15 +61,14 @@ func newUnit(path string, source sourceType) *Unit { } func newUnitWithNumber(path string, source sourceType, number int) *Unit { - return &Unit{number: number, path: path, source: source} + return &Unit{path: path, source: source} } type Unit struct { - number int path string source sourceType frontUrl string - reCount int // redirect number + depth int // redirect depth } type Task struct { diff --git a/pkg/baseline.go b/pkg/baseline.go index 1b1149f..fc51ce4 100644 --- a/pkg/baseline.go +++ b/pkg/baseline.go @@ -8,6 +8,7 @@ import ( "github.com/chainreactors/parsers" "github.com/chainreactors/spray/pkg/ihttp" "net/url" + "path" "strconv" "strings" ) @@ -84,7 +85,9 @@ type Baseline struct { Reason string `json:"reason"` IsValid bool `json:"valid"` IsFuzzy bool `json:"fuzzy"` + URLs []string `json:"urls"` RecuDepth int `json:"-"` + ReqDepth int `json:"depth"` Recu bool `json:"-"` *parsers.Hashes } @@ -106,6 +109,64 @@ func (bl *Baseline) Collect() { bl.Frameworks = FingerDetect(string(bl.Raw)) } +func (bl *Baseline) CollectURL() { + if len(bl.Body) == 0 { + return + } + for _, reg := range JSRegexps { + urls := reg.FindAllStringSubmatch(string(bl.Body), -1) + for _, u := range urls { + var filter bool + parsed, err := url.Parse(u[1]) + if err != nil { + filter = true + } else { + for _, scoop := range BadScoop { + if scoop == parsed.Host { + filter = true + break + } + } + } + + if filter { + continue + } + bl.URLs = append(bl.URLs, u[1]) + } + } + + for _, reg := range URLRegexps { + urls := reg.FindAllStringSubmatch(string(bl.Body), -1) + for _, u := range urls { + var filter bool + parsed, err := url.Parse(u[1]) + if err != nil { + filter = true + } else { + ext := path.Ext(parsed.Path) + for _, e := range BadExt { + if e == ext { + filter = true + break + } + } + for _, scoop := range BadScoop { + if scoop == parsed.Host { + filter = true + break + } + } + } + + if filter { + continue + } + bl.URLs = append(bl.URLs, u[1]) + } + } +} + // Compare // if totally equal return 1 // if maybe equal return 0 @@ -186,6 +247,8 @@ func (bl *Baseline) Get(key string) string { return bl.Extracteds.String() case "frame", "framework": return bl.Frameworks.String() + case "full": + return bl.String() default: return "" } @@ -256,9 +319,9 @@ func (bl *Baseline) ColorString() string { line.WriteString(" - ") line.WriteString(logs.GreenBold(strconv.Itoa(bl.Status))) line.WriteString(" - ") - line.WriteString(logs.Blue(strconv.Itoa(bl.BodyLength))) + line.WriteString(logs.YellowBold(strconv.Itoa(bl.BodyLength))) line.WriteString(" - ") - line.WriteString(logs.Blue(strconv.Itoa(int(bl.Spended)) + "ms")) + line.WriteString(logs.YellowBold(strconv.Itoa(int(bl.Spended)) + "ms")) line.WriteString(logs.GreenLine(bl.Additional("title"))) line.WriteString(logs.Blue(bl.Frameworks.String())) line.WriteString(logs.Blue(bl.Extracteds.String())) @@ -267,6 +330,12 @@ func (bl *Baseline) ColorString() string { line.WriteString(logs.CyanLine(bl.RedirectURL)) line.WriteString(" ") } + if len(bl.URLs) > 0 { + line.WriteString("\n") + } + for _, u := range bl.URLs { + line.WriteString("\t" + u + "\n") + } return line.String() } @@ -308,6 +377,12 @@ func (bl *Baseline) String() string { line.WriteString(bl.RedirectURL) line.WriteString(" ") } + if len(bl.URLs) > 0 { + line.WriteString("\n") + } + for _, u := range bl.URLs { + line.WriteString("\t" + u + "\n") + } return line.String() } diff --git a/pkg/config.go b/pkg/config.go index 33048f9..2b660c7 100644 --- a/pkg/config.go +++ b/pkg/config.go @@ -38,4 +38,5 @@ type Config struct { FuzzyCh chan *Baseline Fuzzy bool IgnoreWaf bool + Crawl bool } diff --git a/pkg/utils.go b/pkg/utils.go index 502e9d6..b503f45 100644 --- a/pkg/utils.go +++ b/pkg/utils.go @@ -1,17 +1,35 @@ package pkg import ( - "fmt" "github.com/chainreactors/gogo/v2/pkg/fingers" "github.com/chainreactors/gogo/v2/pkg/utils" "github.com/chainreactors/ipcs" - "github.com/go-dedup/simhash" "math/rand" "os" + "regexp" + "strings" "time" "unsafe" ) +var ( + Md5Fingers map[string]string = make(map[string]string) + Mmh3Fingers map[string]string = make(map[string]string) + ActivePath []string + Fingers fingers.Fingers + JSRegexps []*regexp.Regexp = []*regexp.Regexp{ + regexp.MustCompile(".(https{0,1}:[^\\s,^',^’,^\",^”,^>,^<,^;,^(,^),^|,^*,^\\[]{2,250}?[^=,^*,^\\s,^',^’,^\",^”,^>,^<,^:,^;,^*,^|,^(,^),^\\[]{3}[.]js)"), + regexp.MustCompile("[\",',‘,“]\\s{0,6}(/{0,1}[^\\s,^',^’,^\",^”,^|,^>,^<,^:,^;,^*,^(,^\\),^\\[]{2,250}?[^=,^*,^\\s,^',^’,^|,^\",^”,^>,^<,^:,^;,^*,^(,^),^\\[]{3}[.]js)"), + regexp.MustCompile("=\\s{0,6}[\",',’,”]{0,1}\\s{0,6}(/{0,1}[^\\s,^',^’,^\",^”,^|,^>,^<,^;,^*,^(,^),^\\[]{2,250}?[^=,^*,^\\s,^',^’,^\",^”,^>,^|,^<,^:,^;,^*,^(,^),^\\[]{3}[.]js)"), + } + URLRegexps []*regexp.Regexp = []*regexp.Regexp{ + regexp.MustCompile("[\",',‘,“]\\s{0,6}(https{0,1}:[^\\s,^',^’,^\",^”,^>,^<,^),^(]{2,250}?)\\s{0,6}[\",',‘,“]"), + regexp.MustCompile("=\\s{0,6}(https{0,1}:[^\\s,^',^’,^\",^”,^>,^<,^),^(]{2,250})"), + regexp.MustCompile("[\",',‘,“]\\s{0,6}([#,.]{0,2}/[^\\s,^',^’,^\",^”,^>,^<,^:,^),^(]{2,250}?)\\s{0,6}[\",',‘,“]"), + regexp.MustCompile("href\\s{0,6}=\\s{0,6}[\",',‘,“]{0,1}\\s{0,6}([^\\s,^',^’,^\",^“,^>,^<,^,^+),^(]{2,250})|action\\s{0,6}=\\s{0,6}[\",',‘,“]{0,1}\\s{0,6}([^\\s,^',^’,^\",^“,^>,^<,^,^+),^(]{2,250})"), + } +) + func HasStdin() bool { stat, err := os.Stdin.Stat() if err != nil { @@ -24,11 +42,6 @@ func HasStdin() bool { return isPipedFromChrDev || isPipedFromFIFO } -func Simhash(raw []byte) string { - sh := simhash.NewSimhash() - return fmt.Sprintf("%x", sh.GetSimhash(sh.NewWordFeatureSet(raw))) -} - const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" var src = rand.NewSource(time.Now().UnixNano()) @@ -80,12 +93,6 @@ func RandHost() string { return *(*string)(unsafe.Pointer(&b)) } -var ( - Md5Fingers map[string]string = make(map[string]string) - Mmh3Fingers map[string]string = make(map[string]string) - Fingers fingers.Fingers -) - func LoadTemplates() error { var err error Fingers, err = fingers.LoadFingers(LoadConfig("http")) @@ -102,6 +109,9 @@ func LoadTemplates() error { for _, f := range Fingers { for _, rule := range f.Rules { + if rule.SendDataStr != "" { + ActivePath = append(ActivePath, rule.SendDataStr) + } if rule.Favicon != nil { for _, mmh3 := range rule.Favicon.Mmh3 { Mmh3Fingers[mmh3] = f.Name @@ -127,3 +137,21 @@ func FingerDetect(content string) Frameworks { } return frames } + +var ( + BadExt = []string{".js", ".css", ".scss", ",", ".jpeg", ".jpg", ".png", ".gif", ".ico", ".svg", ".vue", ".ts"} + //BadURL = []string{".js?", ".css?", ".jpeg?", ".jpg?", ".png?", ".gif?", "github.com", "www.w3.org", "example.com", "<", ">", "{", "}", "[", "]", "|", "^", ";", "/js/", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path", "*#__PURE__*", "\\n"} + BadScoop = []string{"www.w3.org", "example.com"} +) + +func URLJoin(base, uri string) string { + baseSlash := strings.HasSuffix(base, "/") + uriSlash := strings.HasPrefix(uri, "/") + if (baseSlash && !uriSlash) || (!baseSlash && uriSlash) { + return base + uri + } else if baseSlash && uriSlash { + return base + uri[1:] + } else { + return base + "/" + uri + } +}