重写了目录拼接的所有逻辑, 发现做到安全的目录拼接有些困难, 只能做到尽可能安全

This commit is contained in:
M09Ic 2023-01-10 23:44:03 +08:00
parent 0b8fed7e80
commit b120d703b8
6 changed files with 232 additions and 148 deletions

View File

@ -86,6 +86,7 @@ type ModeOptions struct {
Recursive string `long:"recursive" default:"current.IsDir()" description:"String,custom recursive rule, e.g.: --recursive current.IsDir()"`
Depth int `long:"depth" default:"0" description:"Int, recursive depth"`
CrawlDepth int `long:"crawl-depth" default:"3" description:"Int, crawl depth"`
CrawlScope string `long:"crawl-scope" description:"Int, crawl scope (todo)"`
CheckPeriod int `long:"check-period" default:"200" description:"Int, check period when request"`
ErrPeriod int `long:"error-period" default:"10" description:"Int, check period when error"`
BreakThreshold int `long:"error-threshold" default:"20" description:"Int, break when the error exceeds the threshold "`
@ -225,15 +226,16 @@ func (opt *Option) PrepareRunner() (*Runner, error) {
logs.Log.Importantf("Loaded %d word from %s", len(dicts[i]), f)
}
if len(opt.Dictionaries) == 0 && opt.Word == "" {
// 用来仅使用高级功能下, 防止无字典报错.
opt.Word = "/"
} else {
opt.Word = "{?"
for i, _ := range dicts {
opt.Word += strconv.Itoa(i)
if opt.Word == "" {
if len(opt.Dictionaries) == 0 {
opt.Word = "/"
} else {
opt.Word = "{?"
for i, _ := range dicts {
opt.Word += strconv.Itoa(i)
}
opt.Word += "}"
}
opt.Word += "}"
}
if opt.Suffixes != nil {

View File

@ -39,6 +39,8 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
pctx, cancel := context.WithCancel(ctx)
pool := &Pool{
Config: config,
base: u.Scheme + "://" + u.Hostname(),
isDir: strings.HasSuffix(config.BaseURL, "/"),
url: u,
ctx: pctx,
cancel: cancel,
@ -54,6 +56,15 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
failedCount: 1,
}
// 格式化dir, 保证至少有一个"/"
if pool.isDir {
pool.dir = pool.url.Path
} else if pool.url.Path == "" {
pool.dir = "/"
} else {
pool.dir = Dir(pool.url.Path)
}
p, _ := ants.NewPoolWithFunc(config.Thread, pool.Invoke)
pool.reqPool = p
@ -130,6 +141,9 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
type Pool struct {
*pkg.Config
base string // url的根目录, 在爬虫或者redirect时, 会需要用到根目录进行拼接
isDir bool // url是否以/结尾
dir string
url *url.URL
Statistor *pkg.Statistor
client *ihttp.Client
@ -159,7 +173,7 @@ func (pool *Pool) Init() error {
// 分成两步是为了避免闭包的线程安全问题
pool.initwg.Add(2)
pool.reqPool.Invoke(newUnit("", InitIndexSource))
pool.reqPool.Invoke(newUnit(pkg.RandPath(), InitRandomSource))
pool.reqPool.Invoke(newUnit(pool.safePath(pkg.RandPath()), InitRandomSource))
pool.initwg.Wait()
if pool.index.ErrString != "" {
return fmt.Errorf(pool.index.String())
@ -206,7 +220,7 @@ func (pool *Pool) genReq(s string) (*ihttp.Request, error) {
if pool.Mod == pkg.HostSpray {
return ihttp.BuildHostRequest(pool.ClientType, pool.BaseURL, s)
} else if pool.Mod == pkg.PathSpray {
return ihttp.BuildPathRequest(pool.ClientType, pool.BaseURL, s)
return ihttp.BuildPathRequest(pool.ClientType, pool.base, s)
}
return nil, fmt.Errorf("unknown mod")
}
@ -258,13 +272,13 @@ Loop:
}
pool.wg.Add(1)
pool.reqPool.Invoke(newUnit(u, WordSource))
pool.reqPool.Invoke(newUnit(pool.safePath(u), WordSource)) // 原样的目录拼接, 输入了几个"/"就是几个, 适配java的目录解析
case source := <-pool.checkCh:
pool.Statistor.CheckNumber++
if pool.Mod == pkg.HostSpray {
pool.reqPool.Invoke(newUnit(pkg.RandHost(), source))
} else if pool.Mod == pkg.PathSpray {
pool.reqPool.Invoke(newUnit(safePath(pool.BaseURL, pkg.RandPath()), source))
pool.reqPool.Invoke(newUnit(pool.safePath(pkg.RandPath()), source))
}
case unit, ok := <-pool.additionCh:
if !ok {
@ -306,32 +320,29 @@ func (pool *Pool) Invoke(v interface{}) {
if reqerr != nil && reqerr != fasthttp.ErrBodyTooLarge {
pool.failedCount++
atomic.AddInt32(&pool.Statistor.FailedNumber, 1)
bl = &pkg.Baseline{UrlString: pool.BaseURL + unit.path, IsValid: false, ErrString: reqerr.Error(), Reason: ErrRequestFailed.Error()}
bl = &pkg.Baseline{UrlString: pool.base + unit.path, IsValid: false, ErrString: reqerr.Error(), Reason: ErrRequestFailed.Error()}
pool.failedBaselines = append(pool.failedBaselines, bl)
} else {
if unit.source <= 3 || unit.source == CrawlSource || unit.source == CommonFileSource {
// 一些高优先级的source, 将跳过PreCompare
bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
} else if pool.MatchExpr != nil {
// 如果自定义了match函数, 则所有数据送入tempch中
bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
} else if err = pool.PreCompare(resp); err == nil {
// 通过预对比跳过一些无用数据, 减少性能消耗
bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
} else {
if pool.MatchExpr != nil {
// 如果非wordsource, 或自定义了match函数, 则所有数据送入tempch中
bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
} else if err = pool.PreCompare(resp); err == nil {
// 通过预对比跳过一些无用数据, 减少性能消耗
bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
if err != ErrRedirect && bl.RedirectURL != "" {
if bl.RedirectURL != "" && !strings.HasPrefix(bl.RedirectURL, "http") {
bl.RedirectURL = "/" + strings.TrimLeft(bl.RedirectURL, "/")
bl.RedirectURL = pool.BaseURL + bl.RedirectURL
}
pool.wg.Add(1)
pool.doRedirect(bl, unit.depth)
}
} else {
bl = pkg.NewInvalidBaseline(req.URI(), req.Host(), resp, err.Error())
}
bl = pkg.NewInvalidBaseline(req.URI(), req.Host(), resp, err.Error())
}
}
// 手动处理重定向
if bl.IsValid && unit.source != CheckSource && bl.RedirectURL != "" {
pool.wg.Add(1)
pool.doRedirect(bl, unit.depth)
}
if ihttp.DefaultMaxBodySize != 0 && bl.BodyLength > ihttp.DefaultMaxBodySize {
bl.ExceedLength = true
}
@ -484,7 +495,7 @@ func (pool *Pool) Upgrade(bl *pkg.Baseline) error {
rurl, err := url.Parse(bl.RedirectURL)
if err == nil && rurl.Hostname() == bl.Url.Hostname() && bl.Url.Scheme == "http" && rurl.Scheme == "https" {
logs.Log.Infof("baseurl %s upgrade http to https, reinit", pool.BaseURL)
pool.BaseURL = strings.Replace(pool.BaseURL, "http", "https", 1)
pool.base = strings.Replace(pool.BaseURL, "http", "https", 1)
pool.url.Scheme = "https"
// 重新初始化
err = pool.Init()
@ -501,20 +512,19 @@ func (pool *Pool) doRedirect(bl *pkg.Baseline, depth int) {
if depth >= MaxRedirect {
return
}
reURL := FormatURL(bl.Url.Path, bl.RedirectURL)
if uu, err := url.Parse(bl.RedirectURL); err == nil && uu.Hostname() == pool.index.Url.Hostname() {
pool.wg.Add(1)
go pool.addAddition(&Unit{
path: uu.Path,
source: RedirectSource,
frontUrl: bl.UrlString,
depth: depth + 1,
})
}
pool.wg.Add(1)
go pool.addAddition(&Unit{
path: reURL,
source: RedirectSource,
frontUrl: bl.UrlString,
depth: depth + 1,
})
}
func (pool *Pool) doCrawl(bl *pkg.Baseline) {
if !pool.Crawl {
if !pool.Crawl || bl.ReqDepth >= MaxCrawl {
pool.wg.Done()
return
}
@ -523,46 +533,12 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) {
pool.wg.Done()
return
}
go func() {
defer pool.wg.Done()
for _, u := range bl.URLs {
if strings.HasPrefix(u, "//") {
parsed, err := url.Parse(u)
if err != nil {
continue
}
if parsed.Host != bl.Url.Host || len(parsed.Path) <= 1 {
continue
}
u = parsed.Path
} else if strings.HasPrefix(u, "/") {
// 绝对目录拼接
// 不需要进行处理, 用来跳过下面的判断
} else if strings.HasPrefix(u, "./") {
// "./"相对目录拼接
if bl.Dir {
u = pkg.URLJoin(bl.Url.Path, u[2:])
} else {
u = pkg.URLJoin(path.Dir(bl.Url.Path), u[2:])
}
} else if strings.HasPrefix(u, "../") {
u = path.Join(path.Dir(bl.Url.Path), u)
} else if !strings.HasPrefix(u, "http") {
// 相对目录拼接
if bl.Dir {
u = pkg.URLJoin(bl.Url.Path, u)
} else {
u = pkg.URLJoin(path.Dir(bl.Url.Path), u)
}
} else {
parsed, err := url.Parse(u)
if err != nil {
continue
}
if parsed.Host != bl.Url.Host || len(parsed.Path) <= 1 {
continue
}
u = parsed.Path
if u = FormatURL(bl.Url.Path, u); u == "" || u == pool.url.Path {
continue
}
pool.locker.Lock()
@ -571,14 +547,12 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) {
} else {
// 通过map去重, 只有新的url才会进入到该逻辑
pool.urls[u] = 1
if bl.ReqDepth < MaxCrawl {
pool.wg.Add(1)
pool.addAddition(&Unit{
path: u[1:],
source: CrawlSource,
depth: bl.ReqDepth + 1,
})
}
pool.wg.Add(1)
pool.addAddition(&Unit{
path: u,
source: CrawlSource,
depth: bl.ReqDepth + 1,
})
}
pool.locker.Unlock()
}
@ -601,7 +575,7 @@ func (pool *Pool) doRule(bl *pkg.Baseline) {
for u := range rule.RunAsStream(pool.AppendRule.Expressions, path.Base(bl.Path)) {
pool.wg.Add(1)
pool.addAddition(&Unit{
path: path.Join(path.Dir(bl.Path), u),
path: Dir(bl.Url.Path) + u,
source: RuleSource,
})
}
@ -613,7 +587,7 @@ func (pool *Pool) doActive() {
for _, u := range pkg.ActivePath {
pool.wg.Add(1)
pool.addAddition(&Unit{
path: safePath(pool.BaseURL, u),
path: pool.dir + u[1:],
source: ActiveSource,
})
}
@ -629,7 +603,7 @@ func (pool *Pool) doBak() {
for w := range worder.C {
pool.wg.Add(1)
pool.addAddition(&Unit{
path: safePath(pool.BaseURL, w),
path: pool.dir + w,
source: BakSource,
})
}
@ -642,7 +616,7 @@ func (pool *Pool) doBak() {
for w := range worder.C {
pool.wg.Add(1)
pool.addAddition(&Unit{
path: safePath(pool.BaseURL, w),
path: pool.dir + w,
source: BakSource,
})
}
@ -653,7 +627,7 @@ func (pool *Pool) doCommonFile() {
for _, u := range mask.SpecialWords["common_file"] {
pool.wg.Add(1)
pool.addAddition(&Unit{
path: safePath(pool.BaseURL, u),
path: pool.dir + u,
source: CommonFileSource,
})
}
@ -719,3 +693,17 @@ func (pool *Pool) Close() {
close(pool.additionCh)
pool.bar.Close()
}
func (pool *Pool) safePath(u string) string {
// 自动生成的目录将采用safepath的方式拼接到相对目录中, 避免出现//的情况. 例如init, check, common
if u == "" {
return pool.url.Path
}
if strings.HasPrefix(u, "/") {
// 如果path已经有"/", 则去掉
return pool.dir + u[1:]
} else {
return pool.dir + u
}
}

View File

@ -6,6 +6,8 @@ import (
"github.com/chainreactors/words/mask"
"github.com/chainreactors/words/rule"
"io/ioutil"
"net/url"
"path"
"strings"
)
@ -107,14 +109,107 @@ func loadRuleWithFiles(ruleFiles []string, filter string) ([]rule.Expression, er
return rule.Compile(rules.String(), filter).Expressions, nil
}
func safePath(url, path string) string {
urlSlash := strings.HasSuffix(url, "/")
pathSlash := strings.HasPrefix(path, "/")
if !urlSlash && !pathSlash {
return "/" + path
} else if urlSlash && pathSlash {
return path[1:]
func relaPath(base, u string) string {
// 拼接相对目录, 不使用path.join的原因是, 如果存在"////"这样的情况, 可能真的是有意义的路由, 不能随意去掉.
// "" /a /a
// "" a /a
// / "" /
// /a/ b /a/b
// /a/ /b /a/b
// /a b /b
// /a /b /b
if u == "" {
return base
}
pathSlash := strings.HasPrefix(u, "/")
if base == "" {
if pathSlash {
return u[1:]
} else {
return "/" + u
}
} else if strings.HasSuffix(base, "/") {
if pathSlash {
return base + u[1:]
} else {
return base + u
}
} else {
return path
if pathSlash {
return Dir(base) + u[1:]
} else {
return Dir(base) + u
}
}
}
func Dir(u string) string {
// 安全的获取目录, 不会额外处理多个"//", 并非用来获取上级目录
// /a /
// /a/ /a/
// a/ a/
// aaa /
if strings.HasSuffix(u, "/") {
return u
} else if i := strings.LastIndex(u, "/"); i == -1 {
return "/"
} else {
return u[:i+1]
}
}
func FormatURL(base, u string) string {
if strings.HasPrefix(u, "http") {
parsed, err := url.Parse(u)
if err != nil {
return ""
}
if len(parsed.Path) <= 1 {
return ""
}
return parsed.Path
} else if strings.HasPrefix(u, "//") {
parsed, err := url.Parse(u)
if err != nil {
return ""
}
if len(parsed.Path) <= 1 {
// 跳过"/"与空目录
return ""
}
return parsed.Path
} else if strings.HasPrefix(u, "/") {
// 绝对目录拼接
// 不需要进行处理, 用来跳过下面的判断
return u
} else if strings.HasPrefix(u, "./") {
// "./"相对目录拼接
return relaPath(base, u[2:])
} else if strings.HasPrefix(u, "../") {
return path.Join(Dir(base), u)
} else {
// 相对目录拼接
return relaPath(base, u)
}
}
//func Join(base, u string) string {
// // //././ ../../../a
// base = Dir(base)
// for strings.HasPrefix(u, "../") {
// u = u[3:]
// for strings.HasSuffix(base, "/") {
// // 去掉多余的"/"
// base = base[:len(base)-2]
// }
// if i := strings.LastIndex(base, "/"); i == -1 {
// return "/"
// } else {
// return base[:i+1]
// }
// }
// return base + u
//}

View File

@ -19,15 +19,6 @@ func NewBaseline(u, host string, resp *ihttp.Response) *Baseline {
Status: resp.StatusCode(),
IsValid: true,
}
uu, err := url.Parse(u)
if err == nil {
bl.Path = uu.Path
bl.Url = uu
}
bl.Dir = bl.IsDir()
if bl.Url.Host != host {
bl.Host = host
}
header := resp.Header()
bl.Header = make([]byte, len(header))
copy(bl.Header, header)
@ -53,6 +44,16 @@ func NewBaseline(u, host string, resp *ihttp.Response) *Baseline {
}
bl.Raw = append(bl.Header, bl.Body...)
bl.RedirectURL = resp.GetHeader("Location")
uu, err := url.Parse(u)
if err == nil {
bl.Path = uu.Path
bl.Url = uu
}
bl.Dir = bl.IsDir()
if bl.Url.Host != host {
bl.Host = host
}
return bl
}
@ -64,10 +65,16 @@ func NewInvalidBaseline(u, host string, resp *ihttp.Response, reason string) *Ba
Reason: reason,
}
// 无效数据也要读取body, 否则keep-alive不生效
resp.Body()
bl.BodyLength = resp.ContentLength()
bl.RedirectURL = string(resp.GetHeader("Location"))
uu, err := url.Parse(u)
if err == nil {
bl.Path = uu.Path
bl.Url = uu
return bl
}
bl.Dir = bl.IsDir()
@ -75,11 +82,6 @@ func NewInvalidBaseline(u, host string, resp *ihttp.Response, reason string) *Ba
bl.Host = host
}
// 无效数据也要读取body, 否则keep-alive不生效
resp.Body()
bl.BodyLength = resp.ContentLength()
bl.RedirectURL = string(resp.GetHeader("Location"))
return bl
}
@ -152,7 +154,7 @@ func (bl *Baseline) CollectURL() {
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
for _, u := range urls {
u[1] = formatURL(u[1])
if !filterJs(u[1]) {
if u[1] != "" && !filterJs(u[1]) {
bl.URLs = append(bl.URLs, u[1])
}
}
@ -162,7 +164,7 @@ func (bl *Baseline) CollectURL() {
urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
for _, u := range urls {
u[1] = formatURL(u[1])
if !filterUrl(u[1]) {
if u[1] != "" && !filterUrl(u[1]) {
bl.URLs = append(bl.URLs, u[1])
}
}
@ -314,12 +316,11 @@ func (bl *Baseline) Format(probes []string) string {
func (bl *Baseline) ColorString() string {
var line strings.Builder
line.WriteString(logs.GreenLine("[" + GetSourceName(bl.Source) + "]"))
line.WriteString(logs.GreenLine("[" + GetSourceName(bl.Source) + "] "))
if bl.FrontURL != "" {
line.WriteString(logs.CyanLine(bl.FrontURL))
line.WriteString(" --> ")
}
line.WriteString(" ")
line.WriteString(logs.GreenLine(bl.UrlString))
if bl.Host != "" {
line.WriteString(" (" + bl.Host + ")")
@ -368,12 +369,11 @@ func (bl *Baseline) ColorString() string {
func (bl *Baseline) String() string {
var line strings.Builder
line.WriteString(logs.GreenLine("[" + GetSourceName(bl.Source) + "]"))
line.WriteString(logs.GreenLine("[" + GetSourceName(bl.Source) + "] "))
if bl.FrontURL != "" {
line.WriteString(bl.FrontURL)
line.WriteString(" --> ")
}
line.WriteString(" ")
line.WriteString(bl.UrlString)
if bl.Host != "" {
line.WriteString(" (" + bl.Host + ")")

View File

@ -3,16 +3,15 @@ package ihttp
import (
"github.com/valyala/fasthttp"
"net/http"
"strings"
)
func BuildPathRequest(clientType int, base, path string) (*Request, error) {
if clientType == FAST {
req := fasthttp.AcquireRequest()
req.SetRequestURI(safeUrlJoin(base, path))
req.SetRequestURI(base + path)
return &Request{FastRequest: req, ClientType: FAST}, nil
} else {
req, err := http.NewRequest("GET", safeUrlJoin(base, path), nil)
req, err := http.NewRequest("GET", base+path, nil)
return &Request{StandardRequest: req, ClientType: STANDARD}, err
}
}
@ -75,15 +74,3 @@ func (r *Request) Host() string {
return ""
}
}
func safeUrlJoin(base, uri string) string {
if uri == "" {
// 如果url为空, 则直接对原样的url请求
return base
}
if !strings.HasSuffix(base, "/") && !strings.HasPrefix(uri, "/") {
return base + "/" + uri
} else {
return base + uri
}
}

View File

@ -29,8 +29,8 @@ var (
}
URLRegexps []*regexp.Regexp = []*regexp.Regexp{
regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s',"”><;()|*\[]{2,250})`),
regexp.MustCompile(`["']([^\s',"”><;()|*\[]{2,250}\.[a-zA-Z]\w{1,3})["']`),
regexp.MustCompile(`["'](https?:[^\s',"”><;()|*\[]{2,250}?\.[^\s',"”><;()|*\[]{2,250}?)["']`),
regexp.MustCompile(`["']([^\s',"”><.@;()|*\[]{2,250}\.[a-zA-Z]\w{1,4})["']`),
regexp.MustCompile(`["'](https?:[^\s',"”><;()@|*\[]{2,250}?\.[^\s',"”><;()|*\[]{2,250}?)["']`),
regexp.MustCompile(`["']\s{0,6}([#,.]{0,2}/[^\s',"”><;()|*\[]{2,250}?)\s{0,6}["']`),
regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s',"”><;()|*\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s'"“><)(]{2,250})`),
}
@ -227,7 +227,7 @@ func FingerDetect(content string) Frameworks {
var (
BadExt = []string{".js", ".css", ".scss", ".,", ".jpeg", ".jpg", ".png", ".gif", ".svg", ".vue", ".ts", ".swf", ".pdf", ".mp4"}
BadURL = []string{";", "}", "{", "www.w3.org", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path", "*#__PURE__*"}
BadURL = []string{";", "}", "webpack://", "{", "www.w3.org", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path"}
)
func filterJs(u string) bool {
@ -291,17 +291,29 @@ func commonFilter(u string) bool {
return false
}
func URLJoin(base, uri string) string {
baseSlash := strings.HasSuffix(base, "/")
uriSlash := strings.HasPrefix(uri, "/")
if (baseSlash && !uriSlash) || (!baseSlash && uriSlash) {
return base + uri
} else if baseSlash && uriSlash {
return base + uri[1:]
} else {
return base + "/" + uri
}
}
//func SafeJoin(base, uri string) string {
// baseSlash := strings.HasSuffix(base, "/")
// uriSlash := strings.HasPrefix(uri, "/")
// if (baseSlash && !uriSlash) || (!baseSlash && uriSlash) {
// return base + uri
// } else if baseSlash && uriSlash {
// return base + uri[1:]
// } else {
// return base + "/" + uri
// }
//}
//func SafePath(url, path string) string {
// urlSlash := strings.HasSuffix(url, "/")
// pathSlash := strings.HasPrefix(path, "/")
// if !urlSlash && !pathSlash {
// return "/" + path
// } else if urlSlash && pathSlash {
// return path[1:]
// } else {
// return path
// }
//}
func BakGenerator(domain string) []string {
var possibilities []string