初步实现简易爬虫

This commit is contained in:
M09Ic 2023-01-03 17:09:32 +08:00
parent 494ce9414a
commit 9e9b0de039
7 changed files with 243 additions and 77 deletions

View File

@ -32,6 +32,7 @@ type InputOptions struct {
ResumeFrom string `long:"resume-from"` ResumeFrom string `long:"resume-from"`
URL string `short:"u" long:"url" description:"String, input baseurl (separated by commas), e.g.: http://google.com, http://baidu.com"` URL string `short:"u" long:"url" description:"String, input baseurl (separated by commas), e.g.: http://google.com, http://baidu.com"`
URLFile string `short:"l" long:"list" description:"File, input filename"` URLFile string `short:"l" long:"list" description:"File, input filename"`
Raw string `long:"raw" description:"File, input raw request filename"`
Offset int `long:"offset" description:"Int, wordlist offset"` Offset int `long:"offset" description:"Int, wordlist offset"`
Limit int `long:"limit" description:"Int, wordlist limit, start with offset. e.g.: --offset 1000 --limit 100"` Limit int `long:"limit" description:"Int, wordlist limit, start with offset. e.g.: --offset 1000 --limit 100"`
Dictionaries []string `short:"d" long:"dict" description:"Files, dict files, e.g.: -d 1.txt -d 2.txt"` Dictionaries []string `short:"d" long:"dict" description:"Files, dict files, e.g.: -d 1.txt -d 2.txt"`
@ -77,6 +78,8 @@ type ModeOptions struct {
CheckOnly bool `long:"check-only" description:"Bool, check only"` CheckOnly bool `long:"check-only" description:"Bool, check only"`
Recursive string `long:"recursive" default:"current.IsDir()" description:"String,custom recursive rule, e.g.: --recursive current.IsDir()"` Recursive string `long:"recursive" default:"current.IsDir()" description:"String,custom recursive rule, e.g.: --recursive current.IsDir()"`
Depth int `long:"depth" default:"0" description:"Int, recursive depth"` Depth int `long:"depth" default:"0" description:"Int, recursive depth"`
Crawl bool `long:"crawl" description:"Bool, enable crawl"`
CrawlDepth int `long:"spider-depth" default:"3" description:"Int, crawl depth"`
CheckPeriod int `long:"check-period" default:"200" description:"Int, check period when request"` CheckPeriod int `long:"check-period" default:"200" description:"Int, check period when request"`
ErrPeriod int `long:"error-period" default:"10" description:"Int, check period when error"` ErrPeriod int `long:"error-period" default:"10" description:"Int, check period when error"`
BreakThreshold int `long:"error-threshold" default:"20" description:"Int, break when the error exceeds the threshold "` BreakThreshold int `long:"error-threshold" default:"20" description:"Int, break when the error exceeds the threshold "`
@ -123,6 +126,7 @@ func (opt *Option) PrepareRunner() (*Runner, error) {
CheckPeriod: opt.CheckPeriod, CheckPeriod: opt.CheckPeriod,
ErrPeriod: opt.ErrPeriod, ErrPeriod: opt.ErrPeriod,
BreakThreshold: opt.BreakThreshold, BreakThreshold: opt.BreakThreshold,
Crawl: opt.Crawl,
} }
err = pkg.LoadTemplates() err = pkg.LoadTemplates()

View File

@ -20,13 +20,12 @@ import (
) )
var ( var (
CheckRedirect func(string) bool max = 2147483647
maxRedirect = 3
maxCrawl = 3
maxRecursion = 0
) )
var max = 2147483647
var maxRedirect = 3
var maxRecuDepth = 0
func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) { func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
pctx, cancel := context.WithCancel(ctx) pctx, cancel := context.WithCancel(ctx)
pool := &Pool{ pool := &Pool{
@ -35,8 +34,10 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
cancel: cancel, cancel: cancel,
client: ihttp.NewClient(config.Thread, 2, config.ClientType), client: ihttp.NewClient(config.Thread, 2, config.ClientType),
baselines: make(map[int]*pkg.Baseline), baselines: make(map[int]*pkg.Baseline),
urls: make(map[string]int),
tempCh: make(chan *pkg.Baseline, config.Thread), tempCh: make(chan *pkg.Baseline, config.Thread),
checkCh: make(chan sourceType), checkCh: make(chan sourceType),
additionCh: make(chan *Unit, 100),
wg: sync.WaitGroup{}, wg: sync.WaitGroup{},
initwg: sync.WaitGroup{}, initwg: sync.WaitGroup{},
reqCount: 1, reqCount: 1,
@ -80,7 +81,7 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
bl.RedirectURL = "/" + strings.TrimLeft(bl.RedirectURL, "/") bl.RedirectURL = "/" + strings.TrimLeft(bl.RedirectURL, "/")
bl.RedirectURL = pool.BaseURL + bl.RedirectURL bl.RedirectURL = pool.BaseURL + bl.RedirectURL
} }
pool.addRedirect(bl, unit.reCount) pool.doRedirect(bl, unit.depth)
} }
pool.addFuzzyBaseline(bl) pool.addFuzzyBaseline(bl)
} else { } else {
@ -89,14 +90,17 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
} }
} }
bl.ReqDepth = unit.depth
bl.Spended = time.Since(start).Milliseconds() bl.Spended = time.Since(start).Milliseconds()
switch unit.source { switch unit.source {
case InitRandomSource: case InitRandomSource:
pool.random = bl pool.random = bl
pool.addFuzzyBaseline(bl) pool.addFuzzyBaseline(bl)
pool.doCrawl(bl)
pool.initwg.Done() pool.initwg.Done()
case InitIndexSource: case InitIndexSource:
pool.index = bl pool.index = bl
pool.doCrawl(bl)
pool.initwg.Done() pool.initwg.Done()
case CheckSource: case CheckSource:
if bl.ErrString != "" { if bl.ErrString != "" {
@ -122,15 +126,17 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
pool.reqCount++ pool.reqCount++
if pool.reqCount%pool.CheckPeriod == 0 { if pool.reqCount%pool.CheckPeriod == 0 {
pool.reqCount++ pool.reqCount++
pool.check() pool.doCheck()
} else if pool.failedCount%pool.ErrPeriod == 0 { } else if pool.failedCount%pool.ErrPeriod == 0 {
pool.failedCount++ pool.failedCount++
pool.check() pool.doCheck()
} }
pool.bar.Done() pool.bar.Done()
case RedirectSource: case RedirectSource:
bl.FrontURL = unit.frontUrl bl.FrontURL = unit.frontUrl
pool.tempCh <- bl pool.tempCh <- bl
case CrawlSource:
pool.tempCh <- bl
} }
}) })
@ -184,11 +190,14 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
} }
// 如果要进行递归判断, 要满足 bl有效, mod为path-spray, 当前深度小于最大递归深度 // 如果要进行递归判断, 要满足 bl有效, mod为path-spray, 当前深度小于最大递归深度
if bl.IsValid && pool.Mod == pkg.PathSpray && bl.RecuDepth < maxRecuDepth { if bl.IsValid {
pool.doCrawl(bl)
if bl.RecuDepth < maxRecursion {
if CompareWithExpr(pool.RecuExpr, params) { if CompareWithExpr(pool.RecuExpr, params) {
bl.Recu = true bl.Recu = true
} }
} }
}
pool.OutputCh <- bl pool.OutputCh <- bl
pool.wg.Done() pool.wg.Done()
} }
@ -207,7 +216,8 @@ type Pool struct {
ctx context.Context ctx context.Context
cancel context.CancelFunc cancel context.CancelFunc
tempCh chan *pkg.Baseline // 待处理的baseline tempCh chan *pkg.Baseline // 待处理的baseline
checkCh chan sourceType checkCh chan sourceType // 独立的check管道 防止与redirect/crawl冲突
additionCh chan *Unit
reqCount int reqCount int
failedCount int failedCount int
isFailed bool isFailed bool
@ -215,6 +225,7 @@ type Pool struct {
random *pkg.Baseline random *pkg.Baseline
index *pkg.Baseline index *pkg.Baseline
baselines map[int]*pkg.Baseline baselines map[int]*pkg.Baseline
urls map[string]int
analyzeDone bool analyzeDone bool
worder *words.Worder worder *words.Worder
locker sync.Mutex locker sync.Mutex
@ -253,8 +264,10 @@ func (pool *Pool) Init() error {
} }
} }
if pool.random.RedirectURL != "" { return nil
CheckRedirect = func(redirectURL string) bool { }
func (pool *Pool) checkRedirect(redirectURL string) bool {
if redirectURL == pool.random.RedirectURL { if redirectURL == pool.random.RedirectURL {
// 相同的RedirectURL将被认为是无效数据 // 相同的RedirectURL将被认为是无效数据
return false return false
@ -262,43 +275,6 @@ func (pool *Pool) Init() error {
// path为3xx, 且与baseline中的RedirectURL不同时, 为有效数据 // path为3xx, 且与baseline中的RedirectURL不同时, 为有效数据
return true return true
} }
}
}
return nil
}
func (pool *Pool) addRedirect(bl *pkg.Baseline, reCount int) {
if reCount >= maxRedirect {
return
}
if uu, err := url.Parse(bl.RedirectURL); err == nil && uu.Hostname() == pool.index.Url.Hostname() {
pool.wg.Add(1)
_ = pool.reqPool.Invoke(&Unit{
number: bl.Number,
path: uu.Path,
source: RedirectSource,
frontUrl: bl.UrlString,
reCount: reCount + 1,
})
}
}
func (pool *Pool) check() {
if pool.failedCount > pool.BreakThreshold {
// 当报错次数超过上限是, 结束任务
pool.recover()
pool.cancel()
pool.isFailed = true
return
}
if pool.Mod == pkg.HostSpray {
pool.checkCh <- CheckSource
} else if pool.Mod == pkg.PathSpray {
pool.checkCh <- CheckSource
}
} }
func (pool *Pool) genReq(s string) (*ihttp.Request, error) { func (pool *Pool) genReq(s string) (*ihttp.Request, error) {
@ -311,6 +287,11 @@ func (pool *Pool) genReq(s string) (*ihttp.Request, error) {
} }
func (pool *Pool) Run(ctx context.Context, offset, limit int) { func (pool *Pool) Run(ctx context.Context, offset, limit int) {
pool.worder.RunWithRules() pool.worder.RunWithRules()
go func() {
for unit := range pool.additionCh {
pool.reqPool.Invoke(unit)
}
}()
Loop: Loop:
for { for {
select { select {
@ -340,13 +321,16 @@ Loop:
} else if pool.Mod == pkg.PathSpray { } else if pool.Mod == pkg.PathSpray {
pool.reqPool.Invoke(newUnitWithNumber(pkg.RandPath(), source, pool.Statistor.End)) pool.reqPool.Invoke(newUnitWithNumber(pkg.RandPath(), source, pool.Statistor.End))
} }
case <-ctx.Done(): case <-ctx.Done():
break Loop break Loop
case <-pool.ctx.Done(): case <-pool.ctx.Done():
break Loop break Loop
} }
} }
for len(pool.additionCh) > 0 {
time.Sleep(time.Second)
}
pool.wg.Wait() pool.wg.Wait()
pool.Statistor.EndTime = time.Now().Unix() pool.Statistor.EndTime = time.Now().Unix()
pool.Close() pool.Close()
@ -370,7 +354,7 @@ func (pool *Pool) PreCompare(resp *ihttp.Response) error {
return ErrWaf return ErrWaf
} }
if CheckRedirect != nil && !CheckRedirect(resp.GetHeader("Location")) { if !pool.checkRedirect(resp.GetHeader("Location")) {
return ErrRedirect return ErrRedirect
} }
@ -417,7 +401,7 @@ func (pool *Pool) BaseCompare(bl *pkg.Baseline) bool {
if ok && status == 0 && base.FuzzyCompare(bl) { if ok && status == 0 && base.FuzzyCompare(bl) {
pool.Statistor.FuzzyNumber++ pool.Statistor.FuzzyNumber++
bl.Reason = ErrFuzzyCompareFailed.Error() bl.Reason = ErrFuzzyCompareFailed.Error()
pool.PutToFuzzy(bl) pool.putToFuzzy(bl)
return false return false
} }
@ -437,6 +421,77 @@ func CompareWithExpr(exp *vm.Program, params map[string]interface{}) bool {
} }
} }
func (pool *Pool) doRedirect(bl *pkg.Baseline, depth int) {
if depth >= maxRedirect {
return
}
if uu, err := url.Parse(bl.RedirectURL); err == nil && uu.Hostname() == pool.index.Url.Hostname() {
pool.wg.Add(1)
pool.additionCh <- &Unit{
path: uu.Path,
source: RedirectSource,
frontUrl: bl.UrlString,
depth: depth + 1,
}
}
}
func (pool *Pool) doCrawl(bl *pkg.Baseline) {
bl.CollectURL()
for _, u := range bl.URLs {
if strings.HasPrefix(u, "//") {
u = bl.Url.Scheme + u
} else if strings.HasPrefix(u, "/") {
// 绝对目录拼接
u = pkg.URLJoin(pool.BaseURL, u)
} else if !strings.HasPrefix(u, "http") {
// 相对目录拼接
u = pkg.URLJoin(pool.BaseURL, u)
}
if _, ok := pool.urls[u]; ok {
pool.urls[u]++
} else {
// 通过map去重, 只有新的url才会进入到该逻辑
pool.urls[u] = 1
if bl.ReqDepth < maxCrawl {
parsed, err := url.Parse(u)
if err != nil {
continue
}
if parsed.Host != bl.Url.Host {
// 自动限定scoop, 防止爬到其他网站
continue
}
pool.wg.Add(1)
pool.additionCh <- &Unit{
path: parsed.Path,
source: CrawlSource,
frontUrl: bl.UrlString,
depth: bl.ReqDepth + 1,
}
}
}
}
}
func (pool *Pool) doCheck() {
if pool.failedCount > pool.BreakThreshold {
// 当报错次数超过上限是, 结束任务
pool.recover()
pool.cancel()
pool.isFailed = true
return
}
if pool.Mod == pkg.HostSpray {
pool.checkCh <- CheckSource
} else if pool.Mod == pkg.PathSpray {
pool.checkCh <- CheckSource
}
}
func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) { func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) {
if _, ok := pool.baselines[bl.Status]; !ok && IntsContains(FuzzyStatus, bl.Status) { if _, ok := pool.baselines[bl.Status]; !ok && IntsContains(FuzzyStatus, bl.Status) {
bl.Collect() bl.Collect()
@ -447,12 +502,12 @@ func (pool *Pool) addFuzzyBaseline(bl *pkg.Baseline) {
} }
} }
func (pool *Pool) PutToInvalid(bl *pkg.Baseline, reason string) { func (pool *Pool) putToInvalid(bl *pkg.Baseline, reason string) {
bl.IsValid = false bl.IsValid = false
pool.OutputCh <- bl pool.OutputCh <- bl
} }
func (pool *Pool) PutToFuzzy(bl *pkg.Baseline) { func (pool *Pool) putToFuzzy(bl *pkg.Baseline) {
bl.IsFuzzy = true bl.IsFuzzy = true
pool.FuzzyCh <- bl pool.FuzzyCh <- bl
} }
@ -474,5 +529,6 @@ func (pool *Pool) Close() {
time.Sleep(time.Duration(100) * time.Millisecond) time.Sleep(time.Duration(100) * time.Millisecond)
} }
close(pool.tempCh) close(pool.tempCh)
close(pool.additionCh)
pool.bar.Close() pool.bar.Close()
} }

View File

@ -72,6 +72,7 @@ type Runner struct {
CheckOnly bool CheckOnly bool
Force bool Force bool
IgnoreWaf bool IgnoreWaf bool
Crawl bool
} }
func (r *Runner) PrepareConfig() *pkg.Config { func (r *Runner) PrepareConfig() *pkg.Config {
@ -90,6 +91,7 @@ func (r *Runner) PrepareConfig() *pkg.Config {
FilterExpr: r.FilterExpr, FilterExpr: r.FilterExpr,
RecuExpr: r.RecursiveExpr, RecuExpr: r.RecursiveExpr,
IgnoreWaf: r.IgnoreWaf, IgnoreWaf: r.IgnoreWaf,
Crawl: r.Crawl,
} }
if config.Mod == pkg.PathSpray { if config.Mod == pkg.PathSpray {
config.ClientType = ihttp.FAST config.ClientType = ihttp.FAST

View File

@ -51,6 +51,7 @@ const (
InitRandomSource InitRandomSource
InitIndexSource InitIndexSource
RedirectSource RedirectSource
CrawlSource
WordSource WordSource
WafSource WafSource
) )
@ -60,15 +61,14 @@ func newUnit(path string, source sourceType) *Unit {
} }
func newUnitWithNumber(path string, source sourceType, number int) *Unit { func newUnitWithNumber(path string, source sourceType, number int) *Unit {
return &Unit{number: number, path: path, source: source} return &Unit{path: path, source: source}
} }
type Unit struct { type Unit struct {
number int
path string path string
source sourceType source sourceType
frontUrl string frontUrl string
reCount int // redirect number depth int // redirect depth
} }
type Task struct { type Task struct {

View File

@ -8,6 +8,7 @@ import (
"github.com/chainreactors/parsers" "github.com/chainreactors/parsers"
"github.com/chainreactors/spray/pkg/ihttp" "github.com/chainreactors/spray/pkg/ihttp"
"net/url" "net/url"
"path"
"strconv" "strconv"
"strings" "strings"
) )
@ -84,7 +85,9 @@ type Baseline struct {
Reason string `json:"reason"` Reason string `json:"reason"`
IsValid bool `json:"valid"` IsValid bool `json:"valid"`
IsFuzzy bool `json:"fuzzy"` IsFuzzy bool `json:"fuzzy"`
URLs []string `json:"urls"`
RecuDepth int `json:"-"` RecuDepth int `json:"-"`
ReqDepth int `json:"depth"`
Recu bool `json:"-"` Recu bool `json:"-"`
*parsers.Hashes *parsers.Hashes
} }
@ -106,6 +109,64 @@ func (bl *Baseline) Collect() {
bl.Frameworks = FingerDetect(string(bl.Raw)) bl.Frameworks = FingerDetect(string(bl.Raw))
} }
func (bl *Baseline) CollectURL() {
if len(bl.Body) == 0 {
return
}
for _, reg := range JSRegexps {
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
for _, u := range urls {
var filter bool
parsed, err := url.Parse(u[1])
if err != nil {
filter = true
} else {
for _, scoop := range BadScoop {
if scoop == parsed.Host {
filter = true
break
}
}
}
if filter {
continue
}
bl.URLs = append(bl.URLs, u[1])
}
}
for _, reg := range URLRegexps {
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
for _, u := range urls {
var filter bool
parsed, err := url.Parse(u[1])
if err != nil {
filter = true
} else {
ext := path.Ext(parsed.Path)
for _, e := range BadExt {
if e == ext {
filter = true
break
}
}
for _, scoop := range BadScoop {
if scoop == parsed.Host {
filter = true
break
}
}
}
if filter {
continue
}
bl.URLs = append(bl.URLs, u[1])
}
}
}
// Compare // Compare
// if totally equal return 1 // if totally equal return 1
// if maybe equal return 0 // if maybe equal return 0
@ -186,6 +247,8 @@ func (bl *Baseline) Get(key string) string {
return bl.Extracteds.String() return bl.Extracteds.String()
case "frame", "framework": case "frame", "framework":
return bl.Frameworks.String() return bl.Frameworks.String()
case "full":
return bl.String()
default: default:
return "" return ""
} }
@ -256,9 +319,9 @@ func (bl *Baseline) ColorString() string {
line.WriteString(" - ") line.WriteString(" - ")
line.WriteString(logs.GreenBold(strconv.Itoa(bl.Status))) line.WriteString(logs.GreenBold(strconv.Itoa(bl.Status)))
line.WriteString(" - ") line.WriteString(" - ")
line.WriteString(logs.Blue(strconv.Itoa(bl.BodyLength))) line.WriteString(logs.YellowBold(strconv.Itoa(bl.BodyLength)))
line.WriteString(" - ") line.WriteString(" - ")
line.WriteString(logs.Blue(strconv.Itoa(int(bl.Spended)) + "ms")) line.WriteString(logs.YellowBold(strconv.Itoa(int(bl.Spended)) + "ms"))
line.WriteString(logs.GreenLine(bl.Additional("title"))) line.WriteString(logs.GreenLine(bl.Additional("title")))
line.WriteString(logs.Blue(bl.Frameworks.String())) line.WriteString(logs.Blue(bl.Frameworks.String()))
line.WriteString(logs.Blue(bl.Extracteds.String())) line.WriteString(logs.Blue(bl.Extracteds.String()))
@ -267,6 +330,12 @@ func (bl *Baseline) ColorString() string {
line.WriteString(logs.CyanLine(bl.RedirectURL)) line.WriteString(logs.CyanLine(bl.RedirectURL))
line.WriteString(" ") line.WriteString(" ")
} }
if len(bl.URLs) > 0 {
line.WriteString("\n")
}
for _, u := range bl.URLs {
line.WriteString("\t" + u + "\n")
}
return line.String() return line.String()
} }
@ -308,6 +377,12 @@ func (bl *Baseline) String() string {
line.WriteString(bl.RedirectURL) line.WriteString(bl.RedirectURL)
line.WriteString(" ") line.WriteString(" ")
} }
if len(bl.URLs) > 0 {
line.WriteString("\n")
}
for _, u := range bl.URLs {
line.WriteString("\t" + u + "\n")
}
return line.String() return line.String()
} }

View File

@ -38,4 +38,5 @@ type Config struct {
FuzzyCh chan *Baseline FuzzyCh chan *Baseline
Fuzzy bool Fuzzy bool
IgnoreWaf bool IgnoreWaf bool
Crawl bool
} }

View File

@ -1,17 +1,35 @@
package pkg package pkg
import ( import (
"fmt"
"github.com/chainreactors/gogo/v2/pkg/fingers" "github.com/chainreactors/gogo/v2/pkg/fingers"
"github.com/chainreactors/gogo/v2/pkg/utils" "github.com/chainreactors/gogo/v2/pkg/utils"
"github.com/chainreactors/ipcs" "github.com/chainreactors/ipcs"
"github.com/go-dedup/simhash"
"math/rand" "math/rand"
"os" "os"
"regexp"
"strings"
"time" "time"
"unsafe" "unsafe"
) )
var (
Md5Fingers map[string]string = make(map[string]string)
Mmh3Fingers map[string]string = make(map[string]string)
ActivePath []string
Fingers fingers.Fingers
JSRegexps []*regexp.Regexp = []*regexp.Regexp{
regexp.MustCompile(".(https{0,1}:[^\\s,^',^,^\",^”,^>,^<,^;,^(,^),^|,^*,^\\[]{2,250}?[^=,^*,^\\s,^',^,^\",^”,^>,^<,^:,^;,^*,^|,^(,^),^\\[]{3}[.]js)"),
regexp.MustCompile("[\",',,“]\\s{0,6}(/{0,1}[^\\s,^',^,^\",^”,^|,^>,^<,^:,^;,^*,^(,^\\),^\\[]{2,250}?[^=,^*,^\\s,^',^,^|,^\",^”,^>,^<,^:,^;,^*,^(,^),^\\[]{3}[.]js)"),
regexp.MustCompile("=\\s{0,6}[\",',,”]{0,1}\\s{0,6}(/{0,1}[^\\s,^',^,^\",^”,^|,^>,^<,^;,^*,^(,^),^\\[]{2,250}?[^=,^*,^\\s,^',^,^\",^”,^>,^|,^<,^:,^;,^*,^(,^),^\\[]{3}[.]js)"),
}
URLRegexps []*regexp.Regexp = []*regexp.Regexp{
regexp.MustCompile("[\",',,“]\\s{0,6}(https{0,1}:[^\\s,^',^,^\",^”,^>,^<,^),^(]{2,250}?)\\s{0,6}[\",',,“]"),
regexp.MustCompile("=\\s{0,6}(https{0,1}:[^\\s,^',^,^\",^”,^>,^<,^),^(]{2,250})"),
regexp.MustCompile("[\",',,“]\\s{0,6}([#,.]{0,2}/[^\\s,^',^,^\",^”,^>,^<,^:,^),^(]{2,250}?)\\s{0,6}[\",',,“]"),
regexp.MustCompile("href\\s{0,6}=\\s{0,6}[\",',,“]{0,1}\\s{0,6}([^\\s,^',^,^\",^“,^>,^<,^,^+),^(]{2,250})|action\\s{0,6}=\\s{0,6}[\",',,“]{0,1}\\s{0,6}([^\\s,^',^,^\",^“,^>,^<,^,^+),^(]{2,250})"),
}
)
func HasStdin() bool { func HasStdin() bool {
stat, err := os.Stdin.Stat() stat, err := os.Stdin.Stat()
if err != nil { if err != nil {
@ -24,11 +42,6 @@ func HasStdin() bool {
return isPipedFromChrDev || isPipedFromFIFO return isPipedFromChrDev || isPipedFromFIFO
} }
func Simhash(raw []byte) string {
sh := simhash.NewSimhash()
return fmt.Sprintf("%x", sh.GetSimhash(sh.NewWordFeatureSet(raw)))
}
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
var src = rand.NewSource(time.Now().UnixNano()) var src = rand.NewSource(time.Now().UnixNano())
@ -80,12 +93,6 @@ func RandHost() string {
return *(*string)(unsafe.Pointer(&b)) return *(*string)(unsafe.Pointer(&b))
} }
var (
Md5Fingers map[string]string = make(map[string]string)
Mmh3Fingers map[string]string = make(map[string]string)
Fingers fingers.Fingers
)
func LoadTemplates() error { func LoadTemplates() error {
var err error var err error
Fingers, err = fingers.LoadFingers(LoadConfig("http")) Fingers, err = fingers.LoadFingers(LoadConfig("http"))
@ -102,6 +109,9 @@ func LoadTemplates() error {
for _, f := range Fingers { for _, f := range Fingers {
for _, rule := range f.Rules { for _, rule := range f.Rules {
if rule.SendDataStr != "" {
ActivePath = append(ActivePath, rule.SendDataStr)
}
if rule.Favicon != nil { if rule.Favicon != nil {
for _, mmh3 := range rule.Favicon.Mmh3 { for _, mmh3 := range rule.Favicon.Mmh3 {
Mmh3Fingers[mmh3] = f.Name Mmh3Fingers[mmh3] = f.Name
@ -127,3 +137,21 @@ func FingerDetect(content string) Frameworks {
} }
return frames return frames
} }
var (
BadExt = []string{".js", ".css", ".scss", ",", ".jpeg", ".jpg", ".png", ".gif", ".ico", ".svg", ".vue", ".ts"}
//BadURL = []string{".js?", ".css?", ".jpeg?", ".jpg?", ".png?", ".gif?", "github.com", "www.w3.org", "example.com", "<", ">", "{", "}", "[", "]", "|", "^", ";", "/js/", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path", "*#__PURE__*", "\\n"}
BadScoop = []string{"www.w3.org", "example.com"}
)
func URLJoin(base, uri string) string {
baseSlash := strings.HasSuffix(base, "/")
uriSlash := strings.HasPrefix(uri, "/")
if (baseSlash && !uriSlash) || (!baseSlash && uriSlash) {
return base + uri
} else if baseSlash && uriSlash {
return base + uri[1:]
} else {
return base + "/" + uri
}
}