diff --git a/internal/option.go b/internal/option.go index b28515c..64b5839 100644 --- a/internal/option.go +++ b/internal/option.go @@ -232,14 +232,15 @@ func (opt *Option) PrepareRunner() (*Runner, error) { logs.Log.Importantf("Loaded %d word from %s", len(dicts[i]), f) } - if len(opt.Dictionaries) > 0 && opt.Word == "" { + if len(opt.Dictionaries) == 0 && opt.Word == "" { + // 用来仅使用高级功能下, 防止无字典报错. + opt.Word = "/" + } else { opt.Word = "{?" for i, _ := range dicts { opt.Word += strconv.Itoa(i) } opt.Word += "}" - } else { - opt.Word = "/" } if opt.Suffixes != nil { @@ -376,7 +377,7 @@ func (opt *Option) PrepareRunner() (*Runner, error) { if opt.RemoveExtensions != "" { rexts := strings.Split(opt.ExcludeExtensions, ",") r.Fns = append(r.Fns, func(s string) string { - if ext := parseExtension(s); StringsContains(rexts, ext) { + if ext := parseExtension(s); pkg.StringsContains(rexts, ext) { return strings.TrimSuffix(s, "."+ext) } return s @@ -386,7 +387,7 @@ func (opt *Option) PrepareRunner() (*Runner, error) { if opt.ExcludeExtensions != "" { exexts := strings.Split(opt.ExcludeExtensions, ",") r.Fns = append(r.Fns, func(s string) string { - if ext := parseExtension(s); StringsContains(exexts, ext) { + if ext := parseExtension(s); pkg.StringsContains(exexts, ext) { return "" } return s diff --git a/internal/pool.go b/internal/pool.go index 903cbf9..51ecd18 100644 --- a/internal/pool.go +++ b/internal/pool.go @@ -221,12 +221,6 @@ func (pool *Pool) Run(ctx context.Context, offset, limit int) { } closeCh := make(chan struct{}) - //go func() { - // select { - // case <-worderDone: - // closeCh <- struct{}{} - // } - //}() var worderDone bool wait := func() { if !worderDone { @@ -279,9 +273,6 @@ Loop: } pool.wg.Wait() - for pool.analyzeDone { - time.Sleep(time.Duration(100) * time.Millisecond) - } pool.Statistor.EndTime = time.Now().Unix() pool.Close() } @@ -345,8 +336,8 @@ func (pool *Pool) Invoke(v interface{}) { bl.Collect() pool.locker.Lock() pool.random = bl - pool.locker.Unlock() pool.addFuzzyBaseline(bl) + pool.locker.Unlock() pool.initwg.Done() case InitIndexSource: bl.Collect() @@ -354,10 +345,10 @@ func (pool *Pool) Invoke(v interface{}) { pool.index = bl pool.locker.Unlock() pool.wg.Add(1) + pool.doCrawl(bl) if bl.Status == 200 || (bl.Status/100) == 3 { pool.OutputCh <- bl } - pool.doCrawl(bl) pool.initwg.Done() case CheckSource: if bl.ErrString != "" { @@ -399,7 +390,7 @@ func (pool *Pool) Invoke(v interface{}) { func (pool *Pool) PreCompare(resp *ihttp.Response) error { status := resp.StatusCode() - if IntsContains(WhiteStatus, status) { + if pkg.IntsContains(WhiteStatus, status) { // 如果为白名单状态码则直接返回 return nil } @@ -407,11 +398,11 @@ func (pool *Pool) PreCompare(resp *ihttp.Response) error { return ErrSameStatus } - if IntsContains(BlackStatus, status) { + if pkg.IntsContains(BlackStatus, status) { return ErrBadStatus } - if IntsContains(WAFStatus, status) { + if pkg.IntsContains(WAFStatus, status) { return ErrWaf } @@ -505,17 +496,47 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) { return } bl.CollectURL() + if bl.URLs == nil { + pool.wg.Done() + return + } go func() { defer pool.wg.Done() for _, u := range bl.URLs { if strings.HasPrefix(u, "//") { - u = bl.Url.Scheme + u + parsed, err := url.Parse(u) + if err != nil { + continue + } + if parsed.Host != bl.Url.Host { + continue + } + u = parsed.Path } else if strings.HasPrefix(u, "/") { // 绝对目录拼接 - u = pkg.URLJoin(pool.BaseURL, u) + // 不需要进行处理, 用来跳过下面的判断 + } else if strings.HasPrefix(u, "./") { + // "./"相对目录拼接 + if bl.Dir { + u = pkg.URLJoin(bl.Url.Path, u[2:]) + } else { + u = pkg.URLJoin(path.Dir(bl.Url.Path), u[2:]) + } } else if !strings.HasPrefix(u, "http") { // 相对目录拼接 - u = pkg.URLJoin(pool.BaseURL, u) + if bl.Dir { + u = pkg.URLJoin(bl.Url.Path, u) + } else { + u = pkg.URLJoin(path.Dir(bl.Url.Path), u) + } + } else { + parsed, err := url.Parse(u) + if err != nil { + continue + } + if parsed.Host != bl.Url.Host { + continue + } } if _, ok := pool.urls[u]; ok { @@ -526,17 +547,9 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) { pool.urls[u] = 1 pool.locker.Unlock() if bl.ReqDepth < maxCrawl { - parsed, err := url.Parse(u) - if err != nil { - continue - } - if parsed.Host != bl.Url.Host { - // 自动限定scoop, 防止爬到其他网站 - continue - } pool.wg.Add(1) pool.addAddition(&Unit{ - path: parsed.Path, + path: u[1:], source: CrawlSource, depth: bl.ReqDepth + 1, }) @@ -645,7 +658,7 @@ func (pool *Pool) addAddition(u *Unit) { } func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) { - if _, ok := pool.baselines[bl.Status]; !ok && IntsContains(FuzzyStatus, bl.Status) { + if _, ok := pool.baselines[bl.Status]; !ok && pkg.IntsContains(FuzzyStatus, bl.Status) { bl.Collect() pool.wg.Add(1) pool.doCrawl(bl) diff --git a/internal/runner.go b/internal/runner.go index 6583d9c..e7b8406 100644 --- a/internal/runner.go +++ b/internal/runner.go @@ -326,6 +326,13 @@ func (r *Runner) Done() { } func (r *Runner) Outputting() { + debugPrint := func(bl *pkg.Baseline) { + if r.Color { + logs.Log.Debug(bl.ColorString()) + } else { + logs.Log.Debug(bl.String()) + } + } go func() { var saveFunc func(*pkg.Baseline) @@ -355,7 +362,6 @@ func (r *Runner) Outputting() { logs.Log.Console("[+] " + bl.String() + "\n") } } - } } @@ -375,11 +381,7 @@ func (r *Runner) Outputting() { r.AddPool(&Task{baseUrl: bl.UrlString, depth: bl.RecuDepth + 1}) } } else { - if r.Color { - logs.Log.Debug(bl.ColorString()) - } else { - logs.Log.Debug(bl.String()) - } + debugPrint(bl) } } } @@ -411,6 +413,8 @@ func (r *Runner) Outputting() { } if r.Fuzzy { fuzzySaveFunc(bl) + } else { + debugPrint(bl) } } } diff --git a/internal/utils.go b/internal/utils.go index 8f7cb68..201ca9b 100644 --- a/internal/utils.go +++ b/internal/utils.go @@ -16,24 +16,6 @@ func parseExtension(s string) string { return "" } -func StringsContains(s []string, e string) bool { - for _, v := range s { - if v == e { - return true - } - } - return false -} - -func IntsContains(s []int, e int) bool { - for _, v := range s { - if v == e { - return true - } - } - return false -} - func loadFileToSlice(filename string) ([]string, error) { var ss []string content, err := ioutil.ReadFile(filename) diff --git a/pkg/baseline.go b/pkg/baseline.go index a1ec5d6..272eb51 100644 --- a/pkg/baseline.go +++ b/pkg/baseline.go @@ -18,9 +18,9 @@ func GetSourceName(s int) string { case 1: return "check" case 2: - return "index" - case 3: return "random" + case 3: + return "index" case 4: return "redirect" case 5: @@ -177,7 +177,7 @@ func (bl *Baseline) CollectURL() { urls := reg.FindAllStringSubmatch(string(bl.Body), -1) for _, u := range urls { if !filterJs(u[1]) { - bl.URLs = append(bl.URLs, u[1]) + bl.URLs = append(bl.URLs, formatURL(u[1])) } } } @@ -186,7 +186,7 @@ func (bl *Baseline) CollectURL() { urls := reg.FindAllStringSubmatch(string(bl.Body), -1) for _, u := range urls { if !filterUrl(u[1]) { - bl.URLs = append(bl.URLs, u[1]) + bl.URLs = append(bl.URLs, formatURL(u[1])) } } } @@ -194,7 +194,7 @@ func (bl *Baseline) CollectURL() { if bl.URLs != nil { bl.Extracteds = append(bl.Extracteds, &fingers.Extracted{ Name: "crawl", - ExtractResult: bl.URLs, + ExtractResult: RemoveDuplication(bl.URLs), }) } } diff --git a/pkg/utils.go b/pkg/utils.go index 1549181..6e2941f 100644 --- a/pkg/utils.go +++ b/pkg/utils.go @@ -77,6 +77,22 @@ func IntsContains(s []int, e int) bool { return false } +func RemoveDuplication(arr []string) []string { + set := make(map[string]struct{}, len(arr)) + j := 0 + for _, v := range arr { + _, ok := set[v] + if ok { + continue + } + set[v] = struct{}{} + arr[j] = v + j++ + } + + return arr[:j] +} + func HasStdin() bool { stat, err := os.Stdin.Stat() if err != nil { @@ -210,20 +226,22 @@ func FingerDetect(content string) Frameworks { var ( BadExt = []string{".js", ".css", ".scss", ",", ".jpeg", ".jpg", ".png", ".gif", ".ico", ".svg", ".vue", ".ts"} - //BadURL = []string{".js?", ".css?", ".jpeg?", ".jpg?", ".png?", ".gif?", "github.com", "www.w3.org", "example.com", "<", ">", "{", "}", "[", "]", "|", "^", ";", "/js/", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path", "*#__PURE__*", "\\n"} - BadScoop = []string{"www.w3.org", "example.com"} + BadURL = []string{"www.w3.org", "example.com", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path", "*#__PURE__*"} ) func filterJs(u string) bool { - for _, scoop := range BadScoop { - if strings.Contains(u, scoop) { - return true - } + if commonFilter(u) { + return true } + return false } func filterUrl(u string) bool { + if commonFilter(u) { + return true + } + parsed, err := url.Parse(u) if err != nil { return true @@ -235,7 +253,26 @@ func filterUrl(u string) bool { } } } - for _, scoop := range BadScoop { + return false +} + +func formatURL(u string) string { + // 去掉frag与params, 节约url.parse性能, 防止带参数造成意外的影响 + if i := strings.Index(u, "?"); i != -1 { + return u[:i] + } + if i := strings.Index(u, "#"); i != -1 { + return u[:i] + } + return u +} + +func commonFilter(u string) bool { + if strings.HasPrefix(u, "http") && len(u) < 9 { + return true + } + + for _, scoop := range BadURL { if strings.Contains(u, scoop) { return true }