mirror of
https://github.com/chainreactors/spray.git
synced 2025-09-15 11:40:13 +00:00
初步实现简易爬虫
This commit is contained in:
parent
494ce9414a
commit
9e9b0de039
@ -32,6 +32,7 @@ type InputOptions struct {
|
|||||||
ResumeFrom string `long:"resume-from"`
|
ResumeFrom string `long:"resume-from"`
|
||||||
URL string `short:"u" long:"url" description:"String, input baseurl (separated by commas), e.g.: http://google.com, http://baidu.com"`
|
URL string `short:"u" long:"url" description:"String, input baseurl (separated by commas), e.g.: http://google.com, http://baidu.com"`
|
||||||
URLFile string `short:"l" long:"list" description:"File, input filename"`
|
URLFile string `short:"l" long:"list" description:"File, input filename"`
|
||||||
|
Raw string `long:"raw" description:"File, input raw request filename"`
|
||||||
Offset int `long:"offset" description:"Int, wordlist offset"`
|
Offset int `long:"offset" description:"Int, wordlist offset"`
|
||||||
Limit int `long:"limit" description:"Int, wordlist limit, start with offset. e.g.: --offset 1000 --limit 100"`
|
Limit int `long:"limit" description:"Int, wordlist limit, start with offset. e.g.: --offset 1000 --limit 100"`
|
||||||
Dictionaries []string `short:"d" long:"dict" description:"Files, dict files, e.g.: -d 1.txt -d 2.txt"`
|
Dictionaries []string `short:"d" long:"dict" description:"Files, dict files, e.g.: -d 1.txt -d 2.txt"`
|
||||||
@ -77,6 +78,8 @@ type ModeOptions struct {
|
|||||||
CheckOnly bool `long:"check-only" description:"Bool, check only"`
|
CheckOnly bool `long:"check-only" description:"Bool, check only"`
|
||||||
Recursive string `long:"recursive" default:"current.IsDir()" description:"String,custom recursive rule, e.g.: --recursive current.IsDir()"`
|
Recursive string `long:"recursive" default:"current.IsDir()" description:"String,custom recursive rule, e.g.: --recursive current.IsDir()"`
|
||||||
Depth int `long:"depth" default:"0" description:"Int, recursive depth"`
|
Depth int `long:"depth" default:"0" description:"Int, recursive depth"`
|
||||||
|
Crawl bool `long:"crawl" description:"Bool, enable crawl"`
|
||||||
|
CrawlDepth int `long:"spider-depth" default:"3" description:"Int, crawl depth"`
|
||||||
CheckPeriod int `long:"check-period" default:"200" description:"Int, check period when request"`
|
CheckPeriod int `long:"check-period" default:"200" description:"Int, check period when request"`
|
||||||
ErrPeriod int `long:"error-period" default:"10" description:"Int, check period when error"`
|
ErrPeriod int `long:"error-period" default:"10" description:"Int, check period when error"`
|
||||||
BreakThreshold int `long:"error-threshold" default:"20" description:"Int, break when the error exceeds the threshold "`
|
BreakThreshold int `long:"error-threshold" default:"20" description:"Int, break when the error exceeds the threshold "`
|
||||||
@ -123,6 +126,7 @@ func (opt *Option) PrepareRunner() (*Runner, error) {
|
|||||||
CheckPeriod: opt.CheckPeriod,
|
CheckPeriod: opt.CheckPeriod,
|
||||||
ErrPeriod: opt.ErrPeriod,
|
ErrPeriod: opt.ErrPeriod,
|
||||||
BreakThreshold: opt.BreakThreshold,
|
BreakThreshold: opt.BreakThreshold,
|
||||||
|
Crawl: opt.Crawl,
|
||||||
}
|
}
|
||||||
|
|
||||||
err = pkg.LoadTemplates()
|
err = pkg.LoadTemplates()
|
||||||
|
174
internal/pool.go
174
internal/pool.go
@ -20,13 +20,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
CheckRedirect func(string) bool
|
max = 2147483647
|
||||||
|
maxRedirect = 3
|
||||||
|
maxCrawl = 3
|
||||||
|
maxRecursion = 0
|
||||||
)
|
)
|
||||||
|
|
||||||
var max = 2147483647
|
|
||||||
var maxRedirect = 3
|
|
||||||
var maxRecuDepth = 0
|
|
||||||
|
|
||||||
func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
||||||
pctx, cancel := context.WithCancel(ctx)
|
pctx, cancel := context.WithCancel(ctx)
|
||||||
pool := &Pool{
|
pool := &Pool{
|
||||||
@ -35,8 +34,10 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
|||||||
cancel: cancel,
|
cancel: cancel,
|
||||||
client: ihttp.NewClient(config.Thread, 2, config.ClientType),
|
client: ihttp.NewClient(config.Thread, 2, config.ClientType),
|
||||||
baselines: make(map[int]*pkg.Baseline),
|
baselines: make(map[int]*pkg.Baseline),
|
||||||
|
urls: make(map[string]int),
|
||||||
tempCh: make(chan *pkg.Baseline, config.Thread),
|
tempCh: make(chan *pkg.Baseline, config.Thread),
|
||||||
checkCh: make(chan sourceType),
|
checkCh: make(chan sourceType),
|
||||||
|
additionCh: make(chan *Unit, 100),
|
||||||
wg: sync.WaitGroup{},
|
wg: sync.WaitGroup{},
|
||||||
initwg: sync.WaitGroup{},
|
initwg: sync.WaitGroup{},
|
||||||
reqCount: 1,
|
reqCount: 1,
|
||||||
@ -80,7 +81,7 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
|||||||
bl.RedirectURL = "/" + strings.TrimLeft(bl.RedirectURL, "/")
|
bl.RedirectURL = "/" + strings.TrimLeft(bl.RedirectURL, "/")
|
||||||
bl.RedirectURL = pool.BaseURL + bl.RedirectURL
|
bl.RedirectURL = pool.BaseURL + bl.RedirectURL
|
||||||
}
|
}
|
||||||
pool.addRedirect(bl, unit.reCount)
|
pool.doRedirect(bl, unit.depth)
|
||||||
}
|
}
|
||||||
pool.addFuzzyBaseline(bl)
|
pool.addFuzzyBaseline(bl)
|
||||||
} else {
|
} else {
|
||||||
@ -89,14 +90,17 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bl.ReqDepth = unit.depth
|
||||||
bl.Spended = time.Since(start).Milliseconds()
|
bl.Spended = time.Since(start).Milliseconds()
|
||||||
switch unit.source {
|
switch unit.source {
|
||||||
case InitRandomSource:
|
case InitRandomSource:
|
||||||
pool.random = bl
|
pool.random = bl
|
||||||
pool.addFuzzyBaseline(bl)
|
pool.addFuzzyBaseline(bl)
|
||||||
|
pool.doCrawl(bl)
|
||||||
pool.initwg.Done()
|
pool.initwg.Done()
|
||||||
case InitIndexSource:
|
case InitIndexSource:
|
||||||
pool.index = bl
|
pool.index = bl
|
||||||
|
pool.doCrawl(bl)
|
||||||
pool.initwg.Done()
|
pool.initwg.Done()
|
||||||
case CheckSource:
|
case CheckSource:
|
||||||
if bl.ErrString != "" {
|
if bl.ErrString != "" {
|
||||||
@ -122,15 +126,17 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
|||||||
pool.reqCount++
|
pool.reqCount++
|
||||||
if pool.reqCount%pool.CheckPeriod == 0 {
|
if pool.reqCount%pool.CheckPeriod == 0 {
|
||||||
pool.reqCount++
|
pool.reqCount++
|
||||||
pool.check()
|
pool.doCheck()
|
||||||
} else if pool.failedCount%pool.ErrPeriod == 0 {
|
} else if pool.failedCount%pool.ErrPeriod == 0 {
|
||||||
pool.failedCount++
|
pool.failedCount++
|
||||||
pool.check()
|
pool.doCheck()
|
||||||
}
|
}
|
||||||
pool.bar.Done()
|
pool.bar.Done()
|
||||||
case RedirectSource:
|
case RedirectSource:
|
||||||
bl.FrontURL = unit.frontUrl
|
bl.FrontURL = unit.frontUrl
|
||||||
pool.tempCh <- bl
|
pool.tempCh <- bl
|
||||||
|
case CrawlSource:
|
||||||
|
pool.tempCh <- bl
|
||||||
}
|
}
|
||||||
|
|
||||||
})
|
})
|
||||||
@ -184,9 +190,12 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 如果要进行递归判断, 要满足 bl有效, mod为path-spray, 当前深度小于最大递归深度
|
// 如果要进行递归判断, 要满足 bl有效, mod为path-spray, 当前深度小于最大递归深度
|
||||||
if bl.IsValid && pool.Mod == pkg.PathSpray && bl.RecuDepth < maxRecuDepth {
|
if bl.IsValid {
|
||||||
if CompareWithExpr(pool.RecuExpr, params) {
|
pool.doCrawl(bl)
|
||||||
bl.Recu = true
|
if bl.RecuDepth < maxRecursion {
|
||||||
|
if CompareWithExpr(pool.RecuExpr, params) {
|
||||||
|
bl.Recu = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pool.OutputCh <- bl
|
pool.OutputCh <- bl
|
||||||
@ -207,7 +216,8 @@ type Pool struct {
|
|||||||
ctx context.Context
|
ctx context.Context
|
||||||
cancel context.CancelFunc
|
cancel context.CancelFunc
|
||||||
tempCh chan *pkg.Baseline // 待处理的baseline
|
tempCh chan *pkg.Baseline // 待处理的baseline
|
||||||
checkCh chan sourceType
|
checkCh chan sourceType // 独立的check管道, 防止与redirect/crawl冲突
|
||||||
|
additionCh chan *Unit
|
||||||
reqCount int
|
reqCount int
|
||||||
failedCount int
|
failedCount int
|
||||||
isFailed bool
|
isFailed bool
|
||||||
@ -215,6 +225,7 @@ type Pool struct {
|
|||||||
random *pkg.Baseline
|
random *pkg.Baseline
|
||||||
index *pkg.Baseline
|
index *pkg.Baseline
|
||||||
baselines map[int]*pkg.Baseline
|
baselines map[int]*pkg.Baseline
|
||||||
|
urls map[string]int
|
||||||
analyzeDone bool
|
analyzeDone bool
|
||||||
worder *words.Worder
|
worder *words.Worder
|
||||||
locker sync.Mutex
|
locker sync.Mutex
|
||||||
@ -253,51 +264,16 @@ func (pool *Pool) Init() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if pool.random.RedirectURL != "" {
|
|
||||||
CheckRedirect = func(redirectURL string) bool {
|
|
||||||
if redirectURL == pool.random.RedirectURL {
|
|
||||||
// 相同的RedirectURL将被认为是无效数据
|
|
||||||
return false
|
|
||||||
} else {
|
|
||||||
// path为3xx, 且与baseline中的RedirectURL不同时, 为有效数据
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pool *Pool) addRedirect(bl *pkg.Baseline, reCount int) {
|
func (pool *Pool) checkRedirect(redirectURL string) bool {
|
||||||
if reCount >= maxRedirect {
|
if redirectURL == pool.random.RedirectURL {
|
||||||
return
|
// 相同的RedirectURL将被认为是无效数据
|
||||||
}
|
return false
|
||||||
|
} else {
|
||||||
if uu, err := url.Parse(bl.RedirectURL); err == nil && uu.Hostname() == pool.index.Url.Hostname() {
|
// path为3xx, 且与baseline中的RedirectURL不同时, 为有效数据
|
||||||
pool.wg.Add(1)
|
return true
|
||||||
_ = pool.reqPool.Invoke(&Unit{
|
|
||||||
number: bl.Number,
|
|
||||||
path: uu.Path,
|
|
||||||
source: RedirectSource,
|
|
||||||
frontUrl: bl.UrlString,
|
|
||||||
reCount: reCount + 1,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pool *Pool) check() {
|
|
||||||
if pool.failedCount > pool.BreakThreshold {
|
|
||||||
// 当报错次数超过上限是, 结束任务
|
|
||||||
pool.recover()
|
|
||||||
pool.cancel()
|
|
||||||
pool.isFailed = true
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if pool.Mod == pkg.HostSpray {
|
|
||||||
pool.checkCh <- CheckSource
|
|
||||||
} else if pool.Mod == pkg.PathSpray {
|
|
||||||
pool.checkCh <- CheckSource
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -311,6 +287,11 @@ func (pool *Pool) genReq(s string) (*ihttp.Request, error) {
|
|||||||
}
|
}
|
||||||
func (pool *Pool) Run(ctx context.Context, offset, limit int) {
|
func (pool *Pool) Run(ctx context.Context, offset, limit int) {
|
||||||
pool.worder.RunWithRules()
|
pool.worder.RunWithRules()
|
||||||
|
go func() {
|
||||||
|
for unit := range pool.additionCh {
|
||||||
|
pool.reqPool.Invoke(unit)
|
||||||
|
}
|
||||||
|
}()
|
||||||
Loop:
|
Loop:
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@ -340,13 +321,16 @@ Loop:
|
|||||||
} else if pool.Mod == pkg.PathSpray {
|
} else if pool.Mod == pkg.PathSpray {
|
||||||
pool.reqPool.Invoke(newUnitWithNumber(pkg.RandPath(), source, pool.Statistor.End))
|
pool.reqPool.Invoke(newUnitWithNumber(pkg.RandPath(), source, pool.Statistor.End))
|
||||||
}
|
}
|
||||||
|
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
break Loop
|
break Loop
|
||||||
case <-pool.ctx.Done():
|
case <-pool.ctx.Done():
|
||||||
break Loop
|
break Loop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for len(pool.additionCh) > 0 {
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
}
|
||||||
pool.wg.Wait()
|
pool.wg.Wait()
|
||||||
pool.Statistor.EndTime = time.Now().Unix()
|
pool.Statistor.EndTime = time.Now().Unix()
|
||||||
pool.Close()
|
pool.Close()
|
||||||
@ -370,7 +354,7 @@ func (pool *Pool) PreCompare(resp *ihttp.Response) error {
|
|||||||
return ErrWaf
|
return ErrWaf
|
||||||
}
|
}
|
||||||
|
|
||||||
if CheckRedirect != nil && !CheckRedirect(resp.GetHeader("Location")) {
|
if !pool.checkRedirect(resp.GetHeader("Location")) {
|
||||||
return ErrRedirect
|
return ErrRedirect
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -417,7 +401,7 @@ func (pool *Pool) BaseCompare(bl *pkg.Baseline) bool {
|
|||||||
if ok && status == 0 && base.FuzzyCompare(bl) {
|
if ok && status == 0 && base.FuzzyCompare(bl) {
|
||||||
pool.Statistor.FuzzyNumber++
|
pool.Statistor.FuzzyNumber++
|
||||||
bl.Reason = ErrFuzzyCompareFailed.Error()
|
bl.Reason = ErrFuzzyCompareFailed.Error()
|
||||||
pool.PutToFuzzy(bl)
|
pool.putToFuzzy(bl)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -437,6 +421,77 @@ func CompareWithExpr(exp *vm.Program, params map[string]interface{}) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pool *Pool) doRedirect(bl *pkg.Baseline, depth int) {
|
||||||
|
if depth >= maxRedirect {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if uu, err := url.Parse(bl.RedirectURL); err == nil && uu.Hostname() == pool.index.Url.Hostname() {
|
||||||
|
pool.wg.Add(1)
|
||||||
|
pool.additionCh <- &Unit{
|
||||||
|
path: uu.Path,
|
||||||
|
source: RedirectSource,
|
||||||
|
frontUrl: bl.UrlString,
|
||||||
|
depth: depth + 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pool *Pool) doCrawl(bl *pkg.Baseline) {
|
||||||
|
bl.CollectURL()
|
||||||
|
for _, u := range bl.URLs {
|
||||||
|
if strings.HasPrefix(u, "//") {
|
||||||
|
u = bl.Url.Scheme + u
|
||||||
|
} else if strings.HasPrefix(u, "/") {
|
||||||
|
// 绝对目录拼接
|
||||||
|
u = pkg.URLJoin(pool.BaseURL, u)
|
||||||
|
} else if !strings.HasPrefix(u, "http") {
|
||||||
|
// 相对目录拼接
|
||||||
|
u = pkg.URLJoin(pool.BaseURL, u)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := pool.urls[u]; ok {
|
||||||
|
pool.urls[u]++
|
||||||
|
} else {
|
||||||
|
// 通过map去重, 只有新的url才会进入到该逻辑
|
||||||
|
pool.urls[u] = 1
|
||||||
|
if bl.ReqDepth < maxCrawl {
|
||||||
|
parsed, err := url.Parse(u)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if parsed.Host != bl.Url.Host {
|
||||||
|
// 自动限定scoop, 防止爬到其他网站
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pool.wg.Add(1)
|
||||||
|
pool.additionCh <- &Unit{
|
||||||
|
path: parsed.Path,
|
||||||
|
source: CrawlSource,
|
||||||
|
frontUrl: bl.UrlString,
|
||||||
|
depth: bl.ReqDepth + 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pool *Pool) doCheck() {
|
||||||
|
if pool.failedCount > pool.BreakThreshold {
|
||||||
|
// 当报错次数超过上限是, 结束任务
|
||||||
|
pool.recover()
|
||||||
|
pool.cancel()
|
||||||
|
pool.isFailed = true
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if pool.Mod == pkg.HostSpray {
|
||||||
|
pool.checkCh <- CheckSource
|
||||||
|
} else if pool.Mod == pkg.PathSpray {
|
||||||
|
pool.checkCh <- CheckSource
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) {
|
func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) {
|
||||||
if _, ok := pool.baselines[bl.Status]; !ok && IntsContains(FuzzyStatus, bl.Status) {
|
if _, ok := pool.baselines[bl.Status]; !ok && IntsContains(FuzzyStatus, bl.Status) {
|
||||||
bl.Collect()
|
bl.Collect()
|
||||||
@ -447,12 +502,12 @@ func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pool *Pool) PutToInvalid(bl *pkg.Baseline, reason string) {
|
func (pool *Pool) putToInvalid(bl *pkg.Baseline, reason string) {
|
||||||
bl.IsValid = false
|
bl.IsValid = false
|
||||||
pool.OutputCh <- bl
|
pool.OutputCh <- bl
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pool *Pool) PutToFuzzy(bl *pkg.Baseline) {
|
func (pool *Pool) putToFuzzy(bl *pkg.Baseline) {
|
||||||
bl.IsFuzzy = true
|
bl.IsFuzzy = true
|
||||||
pool.FuzzyCh <- bl
|
pool.FuzzyCh <- bl
|
||||||
}
|
}
|
||||||
@ -474,5 +529,6 @@ func (pool *Pool) Close() {
|
|||||||
time.Sleep(time.Duration(100) * time.Millisecond)
|
time.Sleep(time.Duration(100) * time.Millisecond)
|
||||||
}
|
}
|
||||||
close(pool.tempCh)
|
close(pool.tempCh)
|
||||||
|
close(pool.additionCh)
|
||||||
pool.bar.Close()
|
pool.bar.Close()
|
||||||
}
|
}
|
||||||
|
@ -72,6 +72,7 @@ type Runner struct {
|
|||||||
CheckOnly bool
|
CheckOnly bool
|
||||||
Force bool
|
Force bool
|
||||||
IgnoreWaf bool
|
IgnoreWaf bool
|
||||||
|
Crawl bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Runner) PrepareConfig() *pkg.Config {
|
func (r *Runner) PrepareConfig() *pkg.Config {
|
||||||
@ -90,6 +91,7 @@ func (r *Runner) PrepareConfig() *pkg.Config {
|
|||||||
FilterExpr: r.FilterExpr,
|
FilterExpr: r.FilterExpr,
|
||||||
RecuExpr: r.RecursiveExpr,
|
RecuExpr: r.RecursiveExpr,
|
||||||
IgnoreWaf: r.IgnoreWaf,
|
IgnoreWaf: r.IgnoreWaf,
|
||||||
|
Crawl: r.Crawl,
|
||||||
}
|
}
|
||||||
if config.Mod == pkg.PathSpray {
|
if config.Mod == pkg.PathSpray {
|
||||||
config.ClientType = ihttp.FAST
|
config.ClientType = ihttp.FAST
|
||||||
|
@ -51,6 +51,7 @@ const (
|
|||||||
InitRandomSource
|
InitRandomSource
|
||||||
InitIndexSource
|
InitIndexSource
|
||||||
RedirectSource
|
RedirectSource
|
||||||
|
CrawlSource
|
||||||
WordSource
|
WordSource
|
||||||
WafSource
|
WafSource
|
||||||
)
|
)
|
||||||
@ -60,15 +61,14 @@ func newUnit(path string, source sourceType) *Unit {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func newUnitWithNumber(path string, source sourceType, number int) *Unit {
|
func newUnitWithNumber(path string, source sourceType, number int) *Unit {
|
||||||
return &Unit{number: number, path: path, source: source}
|
return &Unit{path: path, source: source}
|
||||||
}
|
}
|
||||||
|
|
||||||
type Unit struct {
|
type Unit struct {
|
||||||
number int
|
|
||||||
path string
|
path string
|
||||||
source sourceType
|
source sourceType
|
||||||
frontUrl string
|
frontUrl string
|
||||||
reCount int // redirect number
|
depth int // redirect depth
|
||||||
}
|
}
|
||||||
|
|
||||||
type Task struct {
|
type Task struct {
|
||||||
|
@ -8,6 +8,7 @@ import (
|
|||||||
"github.com/chainreactors/parsers"
|
"github.com/chainreactors/parsers"
|
||||||
"github.com/chainreactors/spray/pkg/ihttp"
|
"github.com/chainreactors/spray/pkg/ihttp"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"path"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
@ -84,7 +85,9 @@ type Baseline struct {
|
|||||||
Reason string `json:"reason"`
|
Reason string `json:"reason"`
|
||||||
IsValid bool `json:"valid"`
|
IsValid bool `json:"valid"`
|
||||||
IsFuzzy bool `json:"fuzzy"`
|
IsFuzzy bool `json:"fuzzy"`
|
||||||
|
URLs []string `json:"urls"`
|
||||||
RecuDepth int `json:"-"`
|
RecuDepth int `json:"-"`
|
||||||
|
ReqDepth int `json:"depth"`
|
||||||
Recu bool `json:"-"`
|
Recu bool `json:"-"`
|
||||||
*parsers.Hashes
|
*parsers.Hashes
|
||||||
}
|
}
|
||||||
@ -106,6 +109,64 @@ func (bl *Baseline) Collect() {
|
|||||||
bl.Frameworks = FingerDetect(string(bl.Raw))
|
bl.Frameworks = FingerDetect(string(bl.Raw))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (bl *Baseline) CollectURL() {
|
||||||
|
if len(bl.Body) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, reg := range JSRegexps {
|
||||||
|
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
|
||||||
|
for _, u := range urls {
|
||||||
|
var filter bool
|
||||||
|
parsed, err := url.Parse(u[1])
|
||||||
|
if err != nil {
|
||||||
|
filter = true
|
||||||
|
} else {
|
||||||
|
for _, scoop := range BadScoop {
|
||||||
|
if scoop == parsed.Host {
|
||||||
|
filter = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if filter {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
bl.URLs = append(bl.URLs, u[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, reg := range URLRegexps {
|
||||||
|
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
|
||||||
|
for _, u := range urls {
|
||||||
|
var filter bool
|
||||||
|
parsed, err := url.Parse(u[1])
|
||||||
|
if err != nil {
|
||||||
|
filter = true
|
||||||
|
} else {
|
||||||
|
ext := path.Ext(parsed.Path)
|
||||||
|
for _, e := range BadExt {
|
||||||
|
if e == ext {
|
||||||
|
filter = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, scoop := range BadScoop {
|
||||||
|
if scoop == parsed.Host {
|
||||||
|
filter = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if filter {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
bl.URLs = append(bl.URLs, u[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Compare
|
// Compare
|
||||||
// if totally equal return 1
|
// if totally equal return 1
|
||||||
// if maybe equal return 0
|
// if maybe equal return 0
|
||||||
@ -186,6 +247,8 @@ func (bl *Baseline) Get(key string) string {
|
|||||||
return bl.Extracteds.String()
|
return bl.Extracteds.String()
|
||||||
case "frame", "framework":
|
case "frame", "framework":
|
||||||
return bl.Frameworks.String()
|
return bl.Frameworks.String()
|
||||||
|
case "full":
|
||||||
|
return bl.String()
|
||||||
default:
|
default:
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
@ -256,9 +319,9 @@ func (bl *Baseline) ColorString() string {
|
|||||||
line.WriteString(" - ")
|
line.WriteString(" - ")
|
||||||
line.WriteString(logs.GreenBold(strconv.Itoa(bl.Status)))
|
line.WriteString(logs.GreenBold(strconv.Itoa(bl.Status)))
|
||||||
line.WriteString(" - ")
|
line.WriteString(" - ")
|
||||||
line.WriteString(logs.Blue(strconv.Itoa(bl.BodyLength)))
|
line.WriteString(logs.YellowBold(strconv.Itoa(bl.BodyLength)))
|
||||||
line.WriteString(" - ")
|
line.WriteString(" - ")
|
||||||
line.WriteString(logs.Blue(strconv.Itoa(int(bl.Spended)) + "ms"))
|
line.WriteString(logs.YellowBold(strconv.Itoa(int(bl.Spended)) + "ms"))
|
||||||
line.WriteString(logs.GreenLine(bl.Additional("title")))
|
line.WriteString(logs.GreenLine(bl.Additional("title")))
|
||||||
line.WriteString(logs.Blue(bl.Frameworks.String()))
|
line.WriteString(logs.Blue(bl.Frameworks.String()))
|
||||||
line.WriteString(logs.Blue(bl.Extracteds.String()))
|
line.WriteString(logs.Blue(bl.Extracteds.String()))
|
||||||
@ -267,6 +330,12 @@ func (bl *Baseline) ColorString() string {
|
|||||||
line.WriteString(logs.CyanLine(bl.RedirectURL))
|
line.WriteString(logs.CyanLine(bl.RedirectURL))
|
||||||
line.WriteString(" ")
|
line.WriteString(" ")
|
||||||
}
|
}
|
||||||
|
if len(bl.URLs) > 0 {
|
||||||
|
line.WriteString("\n")
|
||||||
|
}
|
||||||
|
for _, u := range bl.URLs {
|
||||||
|
line.WriteString("\t" + u + "\n")
|
||||||
|
}
|
||||||
return line.String()
|
return line.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,6 +377,12 @@ func (bl *Baseline) String() string {
|
|||||||
line.WriteString(bl.RedirectURL)
|
line.WriteString(bl.RedirectURL)
|
||||||
line.WriteString(" ")
|
line.WriteString(" ")
|
||||||
}
|
}
|
||||||
|
if len(bl.URLs) > 0 {
|
||||||
|
line.WriteString("\n")
|
||||||
|
}
|
||||||
|
for _, u := range bl.URLs {
|
||||||
|
line.WriteString("\t" + u + "\n")
|
||||||
|
}
|
||||||
return line.String()
|
return line.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,4 +38,5 @@ type Config struct {
|
|||||||
FuzzyCh chan *Baseline
|
FuzzyCh chan *Baseline
|
||||||
Fuzzy bool
|
Fuzzy bool
|
||||||
IgnoreWaf bool
|
IgnoreWaf bool
|
||||||
|
Crawl bool
|
||||||
}
|
}
|
||||||
|
54
pkg/utils.go
54
pkg/utils.go
@ -1,17 +1,35 @@
|
|||||||
package pkg
|
package pkg
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"github.com/chainreactors/gogo/v2/pkg/fingers"
|
"github.com/chainreactors/gogo/v2/pkg/fingers"
|
||||||
"github.com/chainreactors/gogo/v2/pkg/utils"
|
"github.com/chainreactors/gogo/v2/pkg/utils"
|
||||||
"github.com/chainreactors/ipcs"
|
"github.com/chainreactors/ipcs"
|
||||||
"github.com/go-dedup/simhash"
|
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
Md5Fingers map[string]string = make(map[string]string)
|
||||||
|
Mmh3Fingers map[string]string = make(map[string]string)
|
||||||
|
ActivePath []string
|
||||||
|
Fingers fingers.Fingers
|
||||||
|
JSRegexps []*regexp.Regexp = []*regexp.Regexp{
|
||||||
|
regexp.MustCompile(".(https{0,1}:[^\\s,^',^’,^\",^”,^>,^<,^;,^(,^),^|,^*,^\\[]{2,250}?[^=,^*,^\\s,^',^’,^\",^”,^>,^<,^:,^;,^*,^|,^(,^),^\\[]{3}[.]js)"),
|
||||||
|
regexp.MustCompile("[\",',‘,“]\\s{0,6}(/{0,1}[^\\s,^',^’,^\",^”,^|,^>,^<,^:,^;,^*,^(,^\\),^\\[]{2,250}?[^=,^*,^\\s,^',^’,^|,^\",^”,^>,^<,^:,^;,^*,^(,^),^\\[]{3}[.]js)"),
|
||||||
|
regexp.MustCompile("=\\s{0,6}[\",',’,”]{0,1}\\s{0,6}(/{0,1}[^\\s,^',^’,^\",^”,^|,^>,^<,^;,^*,^(,^),^\\[]{2,250}?[^=,^*,^\\s,^',^’,^\",^”,^>,^|,^<,^:,^;,^*,^(,^),^\\[]{3}[.]js)"),
|
||||||
|
}
|
||||||
|
URLRegexps []*regexp.Regexp = []*regexp.Regexp{
|
||||||
|
regexp.MustCompile("[\",',‘,“]\\s{0,6}(https{0,1}:[^\\s,^',^’,^\",^”,^>,^<,^),^(]{2,250}?)\\s{0,6}[\",',‘,“]"),
|
||||||
|
regexp.MustCompile("=\\s{0,6}(https{0,1}:[^\\s,^',^’,^\",^”,^>,^<,^),^(]{2,250})"),
|
||||||
|
regexp.MustCompile("[\",',‘,“]\\s{0,6}([#,.]{0,2}/[^\\s,^',^’,^\",^”,^>,^<,^:,^),^(]{2,250}?)\\s{0,6}[\",',‘,“]"),
|
||||||
|
regexp.MustCompile("href\\s{0,6}=\\s{0,6}[\",',‘,“]{0,1}\\s{0,6}([^\\s,^',^’,^\",^“,^>,^<,^,^+),^(]{2,250})|action\\s{0,6}=\\s{0,6}[\",',‘,“]{0,1}\\s{0,6}([^\\s,^',^’,^\",^“,^>,^<,^,^+),^(]{2,250})"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
func HasStdin() bool {
|
func HasStdin() bool {
|
||||||
stat, err := os.Stdin.Stat()
|
stat, err := os.Stdin.Stat()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -24,11 +42,6 @@ func HasStdin() bool {
|
|||||||
return isPipedFromChrDev || isPipedFromFIFO
|
return isPipedFromChrDev || isPipedFromFIFO
|
||||||
}
|
}
|
||||||
|
|
||||||
func Simhash(raw []byte) string {
|
|
||||||
sh := simhash.NewSimhash()
|
|
||||||
return fmt.Sprintf("%x", sh.GetSimhash(sh.NewWordFeatureSet(raw)))
|
|
||||||
}
|
|
||||||
|
|
||||||
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
|
||||||
var src = rand.NewSource(time.Now().UnixNano())
|
var src = rand.NewSource(time.Now().UnixNano())
|
||||||
@ -80,12 +93,6 @@ func RandHost() string {
|
|||||||
return *(*string)(unsafe.Pointer(&b))
|
return *(*string)(unsafe.Pointer(&b))
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
|
||||||
Md5Fingers map[string]string = make(map[string]string)
|
|
||||||
Mmh3Fingers map[string]string = make(map[string]string)
|
|
||||||
Fingers fingers.Fingers
|
|
||||||
)
|
|
||||||
|
|
||||||
func LoadTemplates() error {
|
func LoadTemplates() error {
|
||||||
var err error
|
var err error
|
||||||
Fingers, err = fingers.LoadFingers(LoadConfig("http"))
|
Fingers, err = fingers.LoadFingers(LoadConfig("http"))
|
||||||
@ -102,6 +109,9 @@ func LoadTemplates() error {
|
|||||||
|
|
||||||
for _, f := range Fingers {
|
for _, f := range Fingers {
|
||||||
for _, rule := range f.Rules {
|
for _, rule := range f.Rules {
|
||||||
|
if rule.SendDataStr != "" {
|
||||||
|
ActivePath = append(ActivePath, rule.SendDataStr)
|
||||||
|
}
|
||||||
if rule.Favicon != nil {
|
if rule.Favicon != nil {
|
||||||
for _, mmh3 := range rule.Favicon.Mmh3 {
|
for _, mmh3 := range rule.Favicon.Mmh3 {
|
||||||
Mmh3Fingers[mmh3] = f.Name
|
Mmh3Fingers[mmh3] = f.Name
|
||||||
@ -127,3 +137,21 @@ func FingerDetect(content string) Frameworks {
|
|||||||
}
|
}
|
||||||
return frames
|
return frames
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
BadExt = []string{".js", ".css", ".scss", ",", ".jpeg", ".jpg", ".png", ".gif", ".ico", ".svg", ".vue", ".ts"}
|
||||||
|
//BadURL = []string{".js?", ".css?", ".jpeg?", ".jpg?", ".png?", ".gif?", "github.com", "www.w3.org", "example.com", "<", ">", "{", "}", "[", "]", "|", "^", ";", "/js/", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path", "*#__PURE__*", "\\n"}
|
||||||
|
BadScoop = []string{"www.w3.org", "example.com"}
|
||||||
|
)
|
||||||
|
|
||||||
|
func URLJoin(base, uri string) string {
|
||||||
|
baseSlash := strings.HasSuffix(base, "/")
|
||||||
|
uriSlash := strings.HasPrefix(uri, "/")
|
||||||
|
if (baseSlash && !uriSlash) || (!baseSlash && uriSlash) {
|
||||||
|
return base + uri
|
||||||
|
} else if baseSlash && uriSlash {
|
||||||
|
return base + uri[1:]
|
||||||
|
} else {
|
||||||
|
return base + "/" + uri
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user