重写了目录拼接的所有逻辑, 发现做到安全的目录拼接有些困难, 只能做到尽可能安全

2025-05-06 18:51:22 +00:00 · 2023-01-10 23:44:03 +08:00 · 2023-01-10 23:44:03 +08:00 · b120d703b8
commit b120d703b8
parent 0b8fed7e80
6 changed files with 232 additions and 148 deletions
--- a/internal/option.go
+++ b/internal/option.go
@ -86,6 +86,7 @@ type ModeOptions struct {
 	Recursive       string `long:"recursive" default:"current.IsDir()" description:"String,custom recursive rule, e.g.: --recursive current.IsDir()"`
 	Depth           int    `long:"depth" default:"0" description:"Int, recursive depth"`
 	CrawlDepth      int    `long:"crawl-depth" default:"3" description:"Int, crawl depth"`
+	CrawlScope      string `long:"crawl-scope" description:"Int, crawl scope (todo)"`
 	CheckPeriod     int    `long:"check-period" default:"200" description:"Int, check period when request"`
 	ErrPeriod       int    `long:"error-period" default:"10" description:"Int, check period when error"`
 	BreakThreshold  int    `long:"error-threshold" default:"20" description:"Int, break when the error exceeds the threshold "`
@ -225,15 +226,16 @@ func (opt *Option) PrepareRunner() (*Runner, error) {
 		logs.Log.Importantf("Loaded %d word from %s", len(dicts[i]), f)
 	}

-	if len(opt.Dictionaries) == 0 && opt.Word == "" {
-		// 用来仅使用高级功能下, 防止无字典报错.
-		opt.Word = "/"
-	} else {
-		opt.Word = "{?"
-		for i, _ := range dicts {
-			opt.Word += strconv.Itoa(i)
+	if opt.Word == "" {
+		if len(opt.Dictionaries) == 0 {
+			opt.Word = "/"
+		} else {
+			opt.Word = "{?"
+			for i, _ := range dicts {
+				opt.Word += strconv.Itoa(i)
+			}
+			opt.Word += "}"
 		}
-		opt.Word += "}"
 	}

 	if opt.Suffixes != nil {
--- a/internal/pool.go
+++ b/internal/pool.go
@ -39,6 +39,8 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
 	pctx, cancel := context.WithCancel(ctx)
 	pool := &Pool{
 		Config:      config,
+		base:        u.Scheme + "://" + u.Hostname(),
+		isDir:       strings.HasSuffix(config.BaseURL, "/"),
 		url:         u,
 		ctx:         pctx,
 		cancel:      cancel,
@ -54,6 +56,15 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {
 		failedCount: 1,
 	}

+	// 格式化dir, 保证至少有一个"/"
+	if pool.isDir {
+		pool.dir = pool.url.Path
+	} else if pool.url.Path == "" {
+		pool.dir = "/"
+	} else {
+		pool.dir = Dir(pool.url.Path)
+	}
+
 	p, _ := ants.NewPoolWithFunc(config.Thread, pool.Invoke)
 	pool.reqPool = p

@ -130,6 +141,9 @@ func NewPool(ctx context.Context, config *pkg.Config) (*Pool, error) {

 type Pool struct {
 	*pkg.Config
+	base            string // url的根目录, 在爬虫或者redirect时, 会需要用到根目录进行拼接
+	isDir           bool   // url是否以/结尾
+	dir             string
 	url             *url.URL
 	Statistor       *pkg.Statistor
 	client          *ihttp.Client
@ -159,7 +173,7 @@ func (pool *Pool) Init() error {
 	// 分成两步是为了避免闭包的线程安全问题
 	pool.initwg.Add(2)
 	pool.reqPool.Invoke(newUnit("", InitIndexSource))
-	pool.reqPool.Invoke(newUnit(pkg.RandPath(), InitRandomSource))
+	pool.reqPool.Invoke(newUnit(pool.safePath(pkg.RandPath()), InitRandomSource))
 	pool.initwg.Wait()
 	if pool.index.ErrString != "" {
 		return fmt.Errorf(pool.index.String())
@ -206,7 +220,7 @@ func (pool *Pool) genReq(s string) (*ihttp.Request, error) {
 	if pool.Mod == pkg.HostSpray {
 		return ihttp.BuildHostRequest(pool.ClientType, pool.BaseURL, s)
 	} else if pool.Mod == pkg.PathSpray {
-		return ihttp.BuildPathRequest(pool.ClientType, pool.BaseURL, s)
+		return ihttp.BuildPathRequest(pool.ClientType, pool.base, s)
 	}
 	return nil, fmt.Errorf("unknown mod")
 }
@ -258,13 +272,13 @@ Loop:
 			}

 			pool.wg.Add(1)
-			pool.reqPool.Invoke(newUnit(u, WordSource))
+			pool.reqPool.Invoke(newUnit(pool.safePath(u), WordSource)) // 原样的目录拼接, 输入了几个"/"就是几个, 适配java的目录解析
 		case source := <-pool.checkCh:
 			pool.Statistor.CheckNumber++
 			if pool.Mod == pkg.HostSpray {
 				pool.reqPool.Invoke(newUnit(pkg.RandHost(), source))
 			} else if pool.Mod == pkg.PathSpray {
-				pool.reqPool.Invoke(newUnit(safePath(pool.BaseURL, pkg.RandPath()), source))
+				pool.reqPool.Invoke(newUnit(pool.safePath(pkg.RandPath()), source))
 			}
 		case unit, ok := <-pool.additionCh:
 			if !ok {
@ -306,32 +320,29 @@ func (pool *Pool) Invoke(v interface{}) {
 	if reqerr != nil && reqerr != fasthttp.ErrBodyTooLarge {
 		pool.failedCount++
 		atomic.AddInt32(&pool.Statistor.FailedNumber, 1)
-		bl = &pkg.Baseline{UrlString: pool.BaseURL + unit.path, IsValid: false, ErrString: reqerr.Error(), Reason: ErrRequestFailed.Error()}
+		bl = &pkg.Baseline{UrlString: pool.base + unit.path, IsValid: false, ErrString: reqerr.Error(), Reason: ErrRequestFailed.Error()}
 		pool.failedBaselines = append(pool.failedBaselines, bl)
 	} else {
 		if unit.source <= 3 || unit.source == CrawlSource || unit.source == CommonFileSource {
+			// 一些高优先级的source, 将跳过PreCompare
+			bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
+		} else if pool.MatchExpr != nil {
+			// 如果自定义了match函数, 则所有数据送入tempch中
+			bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
+		} else if err = pool.PreCompare(resp); err == nil {
+			// 通过预对比跳过一些无用数据, 减少性能消耗
 			bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
 		} else {
-			if pool.MatchExpr != nil {
-				// 如果非wordsource, 或自定义了match函数, 则所有数据送入tempch中
-				bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
-			} else if err = pool.PreCompare(resp); err == nil {
-				// 通过预对比跳过一些无用数据, 减少性能消耗
-				bl = pkg.NewBaseline(req.URI(), req.Host(), resp)
-				if err != ErrRedirect && bl.RedirectURL != "" {
-					if bl.RedirectURL != "" && !strings.HasPrefix(bl.RedirectURL, "http") {
-						bl.RedirectURL = "/" + strings.TrimLeft(bl.RedirectURL, "/")
-						bl.RedirectURL = pool.BaseURL + bl.RedirectURL
-					}
-					pool.wg.Add(1)
-					pool.doRedirect(bl, unit.depth)
-				}
-			} else {
-				bl = pkg.NewInvalidBaseline(req.URI(), req.Host(), resp, err.Error())
-			}
+			bl = pkg.NewInvalidBaseline(req.URI(), req.Host(), resp, err.Error())
 		}
 	}

+	// 手动处理重定向
+	if bl.IsValid && unit.source != CheckSource && bl.RedirectURL != "" {
+		pool.wg.Add(1)
+		pool.doRedirect(bl, unit.depth)
+	}
+
 	if ihttp.DefaultMaxBodySize != 0 && bl.BodyLength > ihttp.DefaultMaxBodySize {
 		bl.ExceedLength = true
 	}
@ -484,7 +495,7 @@ func (pool *Pool) Upgrade(bl *pkg.Baseline) error {
 	rurl, err := url.Parse(bl.RedirectURL)
 	if err == nil && rurl.Hostname() == bl.Url.Hostname() && bl.Url.Scheme == "http" && rurl.Scheme == "https" {
 		logs.Log.Infof("baseurl %s upgrade http to https, reinit", pool.BaseURL)
-		pool.BaseURL = strings.Replace(pool.BaseURL, "http", "https", 1)
+		pool.base = strings.Replace(pool.BaseURL, "http", "https", 1)
 		pool.url.Scheme = "https"
 		// 重新初始化
 		err = pool.Init()
@ -501,20 +512,19 @@ func (pool *Pool) doRedirect(bl *pkg.Baseline, depth int) {
 	if depth >= MaxRedirect {
 		return
 	}
+	reURL := FormatURL(bl.Url.Path, bl.RedirectURL)

-	if uu, err := url.Parse(bl.RedirectURL); err == nil && uu.Hostname() == pool.index.Url.Hostname() {
-		pool.wg.Add(1)
-		go pool.addAddition(&Unit{
-			path:     uu.Path,
-			source:   RedirectSource,
-			frontUrl: bl.UrlString,
-			depth:    depth + 1,
-		})
-	}
+	pool.wg.Add(1)
+	go pool.addAddition(&Unit{
+		path:     reURL,
+		source:   RedirectSource,
+		frontUrl: bl.UrlString,
+		depth:    depth + 1,
+	})
 }

 func (pool *Pool) doCrawl(bl *pkg.Baseline) {
-	if !pool.Crawl {
+	if !pool.Crawl || bl.ReqDepth >= MaxCrawl {
 		pool.wg.Done()
 		return
 	}
@ -523,46 +533,12 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) {
 		pool.wg.Done()
 		return
 	}
+
 	go func() {
 		defer pool.wg.Done()
 		for _, u := range bl.URLs {
-			if strings.HasPrefix(u, "//") {
-				parsed, err := url.Parse(u)
-				if err != nil {
-					continue
-				}
-				if parsed.Host != bl.Url.Host || len(parsed.Path) <= 1 {
-					continue
-				}
-				u = parsed.Path
-			} else if strings.HasPrefix(u, "/") {
-				// 绝对目录拼接
-				// 不需要进行处理, 用来跳过下面的判断
-			} else if strings.HasPrefix(u, "./") {
-				// "./"相对目录拼接
-				if bl.Dir {
-					u = pkg.URLJoin(bl.Url.Path, u[2:])
-				} else {
-					u = pkg.URLJoin(path.Dir(bl.Url.Path), u[2:])
-				}
-			} else if strings.HasPrefix(u, "../") {
-				u = path.Join(path.Dir(bl.Url.Path), u)
-			} else if !strings.HasPrefix(u, "http") {
-				// 相对目录拼接
-				if bl.Dir {
-					u = pkg.URLJoin(bl.Url.Path, u)
-				} else {
-					u = pkg.URLJoin(path.Dir(bl.Url.Path), u)
-				}
-			} else {
-				parsed, err := url.Parse(u)
-				if err != nil {
-					continue
-				}
-				if parsed.Host != bl.Url.Host || len(parsed.Path) <= 1 {
-					continue
-				}
-				u = parsed.Path
+			if u = FormatURL(bl.Url.Path, u); u == "" || u == pool.url.Path {
+				continue
 			}

 			pool.locker.Lock()
@ -571,14 +547,12 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) {
 			} else {
 				// 通过map去重,  只有新的url才会进入到该逻辑
 				pool.urls[u] = 1
-				if bl.ReqDepth < MaxCrawl {
-					pool.wg.Add(1)
-					pool.addAddition(&Unit{
-						path:   u[1:],
-						source: CrawlSource,
-						depth:  bl.ReqDepth + 1,
-					})
-				}
+				pool.wg.Add(1)
+				pool.addAddition(&Unit{
+					path:   u,
+					source: CrawlSource,
+					depth:  bl.ReqDepth + 1,
+				})
 			}
 			pool.locker.Unlock()
 		}
@ -601,7 +575,7 @@ func (pool *Pool) doRule(bl *pkg.Baseline) {
 		for u := range rule.RunAsStream(pool.AppendRule.Expressions, path.Base(bl.Path)) {
 			pool.wg.Add(1)
 			pool.addAddition(&Unit{
-				path:   path.Join(path.Dir(bl.Path), u),
+				path:   Dir(bl.Url.Path) + u,
 				source: RuleSource,
 			})
 		}
@ -613,7 +587,7 @@ func (pool *Pool) doActive() {
 	for _, u := range pkg.ActivePath {
 		pool.wg.Add(1)
 		pool.addAddition(&Unit{
-			path:   safePath(pool.BaseURL, u),
+			path:   pool.dir + u[1:],
 			source: ActiveSource,
 		})
 	}
@ -629,7 +603,7 @@ func (pool *Pool) doBak() {
 	for w := range worder.C {
 		pool.wg.Add(1)
 		pool.addAddition(&Unit{
-			path:   safePath(pool.BaseURL, w),
+			path:   pool.dir + w,
 			source: BakSource,
 		})
 	}
@ -642,7 +616,7 @@ func (pool *Pool) doBak() {
 	for w := range worder.C {
 		pool.wg.Add(1)
 		pool.addAddition(&Unit{
-			path:   safePath(pool.BaseURL, w),
+			path:   pool.dir + w,
 			source: BakSource,
 		})
 	}
@ -653,7 +627,7 @@ func (pool *Pool) doCommonFile() {
 	for _, u := range mask.SpecialWords["common_file"] {
 		pool.wg.Add(1)
 		pool.addAddition(&Unit{
-			path:   safePath(pool.BaseURL, u),
+			path:   pool.dir + u,
 			source: CommonFileSource,
 		})
 	}
@ -719,3 +693,17 @@ func (pool *Pool) Close() {
 	close(pool.additionCh)
 	pool.bar.Close()
 }
+
+func (pool *Pool) safePath(u string) string {
+	// 自动生成的目录将采用safepath的方式拼接到相对目录中, 避免出现//的情况. 例如init, check, common
+	if u == "" {
+		return pool.url.Path
+	}
+
+	if strings.HasPrefix(u, "/") {
+		// 如果path已经有"/", 则去掉
+		return pool.dir + u[1:]
+	} else {
+		return pool.dir + u
+	}
+}
--- a/internal/utils.go
+++ b/internal/utils.go
@ -6,6 +6,8 @@ import (
 	"github.com/chainreactors/words/mask"
 	"github.com/chainreactors/words/rule"
 	"io/ioutil"
+	"net/url"
+	"path"
 	"strings"
 )

@ -107,14 +109,107 @@ func loadRuleWithFiles(ruleFiles []string, filter string) ([]rule.Expression, er
 	return rule.Compile(rules.String(), filter).Expressions, nil
 }

-func safePath(url, path string) string {
-	urlSlash := strings.HasSuffix(url, "/")
-	pathSlash := strings.HasPrefix(path, "/")
-	if !urlSlash && !pathSlash {
-		return "/" + path
-	} else if urlSlash && pathSlash {
-		return path[1:]
+func relaPath(base, u string) string {
+	// 拼接相对目录, 不使用path.join的原因是, 如果存在"////"这样的情况, 可能真的是有意义的路由, 不能随意去掉.
+	// ""	/a 	/a
+	// "" 	a  	/a
+	// /    ""  /
+	// /a/ 	b 	/a/b
+	// /a/ 	/b 	/a/b
+	// /a  	b 	/b
+	// /a  	/b 	/b
+
+	if u == "" {
+		return base
+	}
+
+	pathSlash := strings.HasPrefix(u, "/")
+	if base == "" {
+		if pathSlash {
+			return u[1:]
+		} else {
+			return "/" + u
+		}
+	} else if strings.HasSuffix(base, "/") {
+		if pathSlash {
+			return base + u[1:]
+		} else {
+			return base + u
+		}
 	} else {
-		return path
+		if pathSlash {
+			return Dir(base) + u[1:]
+		} else {
+			return Dir(base) + u
+		}
 	}
 }
+
+func Dir(u string) string {
+	// 安全的获取目录, 不会额外处理多个"//", 并非用来获取上级目录
+	// /a 	/
+	// /a/ 	/a/
+	// a/ 	a/
+	// aaa 	/
+
+	if strings.HasSuffix(u, "/") {
+		return u
+	} else if i := strings.LastIndex(u, "/"); i == -1 {
+		return "/"
+	} else {
+		return u[:i+1]
+	}
+}
+
+func FormatURL(base, u string) string {
+	if strings.HasPrefix(u, "http") {
+		parsed, err := url.Parse(u)
+		if err != nil {
+			return ""
+		}
+		if len(parsed.Path) <= 1 {
+			return ""
+		}
+		return parsed.Path
+	} else if strings.HasPrefix(u, "//") {
+		parsed, err := url.Parse(u)
+		if err != nil {
+			return ""
+		}
+		if len(parsed.Path) <= 1 {
+			// 跳过"/"与空目录
+			return ""
+		}
+		return parsed.Path
+	} else if strings.HasPrefix(u, "/") {
+		// 绝对目录拼接
+		// 不需要进行处理, 用来跳过下面的判断
+		return u
+	} else if strings.HasPrefix(u, "./") {
+		// "./"相对目录拼接
+		return relaPath(base, u[2:])
+	} else if strings.HasPrefix(u, "../") {
+		return path.Join(Dir(base), u)
+	} else {
+		// 相对目录拼接
+		return relaPath(base, u)
+	}
+}
+
+//func Join(base, u string) string {
+//	// //././ ../../../a
+//	base = Dir(base)
+//	for strings.HasPrefix(u, "../") {
+//		u = u[3:]
+//		for strings.HasSuffix(base, "/") {
+//			// 去掉多余的"/"
+//			base = base[:len(base)-2]
+//		}
+//		if i := strings.LastIndex(base, "/"); i == -1 {
+//			return "/"
+//		} else {
+//			return base[:i+1]
+//		}
+//	}
+//	return base + u
+//}
--- a/pkg/baseline.go
+++ b/pkg/baseline.go
@ -19,15 +19,6 @@ func NewBaseline(u, host string, resp *ihttp.Response) *Baseline {
 		Status:    resp.StatusCode(),
 		IsValid:   true,
 	}
-	uu, err := url.Parse(u)
-	if err == nil {
-		bl.Path = uu.Path
-		bl.Url = uu
-	}
-	bl.Dir = bl.IsDir()
-	if bl.Url.Host != host {
-		bl.Host = host
-	}
 	header := resp.Header()
 	bl.Header = make([]byte, len(header))
 	copy(bl.Header, header)
@ -53,6 +44,16 @@ func NewBaseline(u, host string, resp *ihttp.Response) *Baseline {
 	}
 	bl.Raw = append(bl.Header, bl.Body...)
 	bl.RedirectURL = resp.GetHeader("Location")
+
+	uu, err := url.Parse(u)
+	if err == nil {
+		bl.Path = uu.Path
+		bl.Url = uu
+	}
+	bl.Dir = bl.IsDir()
+	if bl.Url.Host != host {
+		bl.Host = host
+	}
 	return bl
 }

@ -64,10 +65,16 @@ func NewInvalidBaseline(u, host string, resp *ihttp.Response, reason string) *Ba
 		Reason:    reason,
 	}

+	// 无效数据也要读取body, 否则keep-alive不生效
+	resp.Body()
+	bl.BodyLength = resp.ContentLength()
+	bl.RedirectURL = string(resp.GetHeader("Location"))
+
 	uu, err := url.Parse(u)
 	if err == nil {
 		bl.Path = uu.Path
 		bl.Url = uu
+		return bl
 	}
 	bl.Dir = bl.IsDir()

@ -75,11 +82,6 @@ func NewInvalidBaseline(u, host string, resp *ihttp.Response, reason string) *Ba
 		bl.Host = host
 	}

-	// 无效数据也要读取body, 否则keep-alive不生效
-	resp.Body()
-	bl.BodyLength = resp.ContentLength()
-	bl.RedirectURL = string(resp.GetHeader("Location"))
-
 	return bl
 }

@ -152,7 +154,7 @@ func (bl *Baseline) CollectURL() {
 		urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
 		for _, u := range urls {
 			u[1] = formatURL(u[1])
-			if !filterJs(u[1]) {
+			if u[1] != "" && !filterJs(u[1]) {
 				bl.URLs = append(bl.URLs, u[1])
 			}
 		}
@ -162,7 +164,7 @@ func (bl *Baseline) CollectURL() {
 		urls := reg.FindAllStringSubmatch(string(bl.Body), -1)
 		for _, u := range urls {
 			u[1] = formatURL(u[1])
-			if !filterUrl(u[1]) {
+			if u[1] != "" && !filterUrl(u[1]) {
 				bl.URLs = append(bl.URLs, u[1])
 			}
 		}
@ -314,12 +316,11 @@ func (bl *Baseline) Format(probes []string) string {

 func (bl *Baseline) ColorString() string {
 	var line strings.Builder
-	line.WriteString(logs.GreenLine("[" + GetSourceName(bl.Source) + "]"))
+	line.WriteString(logs.GreenLine("[" + GetSourceName(bl.Source) + "] "))
 	if bl.FrontURL != "" {
 		line.WriteString(logs.CyanLine(bl.FrontURL))
 		line.WriteString(" --> ")
 	}
-	line.WriteString(" ")
 	line.WriteString(logs.GreenLine(bl.UrlString))
 	if bl.Host != "" {
 		line.WriteString(" (" + bl.Host + ")")
@ -368,12 +369,11 @@ func (bl *Baseline) ColorString() string {

 func (bl *Baseline) String() string {
 	var line strings.Builder
-	line.WriteString(logs.GreenLine("[" + GetSourceName(bl.Source) + "]"))
+	line.WriteString(logs.GreenLine("[" + GetSourceName(bl.Source) + "] "))
 	if bl.FrontURL != "" {
 		line.WriteString(bl.FrontURL)
 		line.WriteString(" --> ")
 	}
-	line.WriteString(" ")
 	line.WriteString(bl.UrlString)
 	if bl.Host != "" {
 		line.WriteString(" (" + bl.Host + ")")
--- a/pkg/ihttp/request.go
+++ b/pkg/ihttp/request.go
@ -3,16 +3,15 @@ package ihttp
 import (
 	"github.com/valyala/fasthttp"
 	"net/http"
-	"strings"
 )

 func BuildPathRequest(clientType int, base, path string) (*Request, error) {
 	if clientType == FAST {
 		req := fasthttp.AcquireRequest()
-		req.SetRequestURI(safeUrlJoin(base, path))
+		req.SetRequestURI(base + path)
 		return &Request{FastRequest: req, ClientType: FAST}, nil
 	} else {
-		req, err := http.NewRequest("GET", safeUrlJoin(base, path), nil)
+		req, err := http.NewRequest("GET", base+path, nil)
 		return &Request{StandardRequest: req, ClientType: STANDARD}, err
 	}
 }
@ -75,15 +74,3 @@ func (r *Request) Host() string {
 		return ""
 	}
 }
-
-func safeUrlJoin(base, uri string) string {
-	if uri == "" {
-		// 如果url为空, 则直接对原样的url请求
-		return base
-	}
-	if !strings.HasSuffix(base, "/") && !strings.HasPrefix(uri, "/") {
-		return base + "/" + uri
-	} else {
-		return base + uri
-	}
-}
--- a/pkg/utils.go
+++ b/pkg/utils.go
@ -29,8 +29,8 @@ var (
 	}
 	URLRegexps []*regexp.Regexp = []*regexp.Regexp{
 		regexp.MustCompile(`=\s{0,6}(https{0,1}:[^\s',’"”><;()|*\[]{2,250})`),
-		regexp.MustCompile(`["']([^\s',’"”><;()|*\[]{2,250}\.[a-zA-Z]\w{1,3})["']`),
-		regexp.MustCompile(`["'](https?:[^\s',’"”><;()|*\[]{2,250}?\.[^\s',’"”><;()|*\[]{2,250}?)["']`),
+		regexp.MustCompile(`["']([^\s',’"”><.@;()|*\[]{2,250}\.[a-zA-Z]\w{1,4})["']`),
+		regexp.MustCompile(`["'](https?:[^\s',’"”><;()@|*\[]{2,250}?\.[^\s',’"”><;()|*\[]{2,250}?)["']`),
 		regexp.MustCompile(`["']\s{0,6}([#,.]{0,2}/[^\s',’"”><;()|*\[]{2,250}?)\s{0,6}["']`),
 		regexp.MustCompile(`href\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s',’"”><;()|*\[]{2,250})|action\s{0,6}=\s{0,6}["'‘“]{0,1}\s{0,6}([^\s'’"“><)(]{2,250})`),
 	}
@ -227,7 +227,7 @@ func FingerDetect(content string) Frameworks {

 var (
 	BadExt = []string{".js", ".css", ".scss", ".,", ".jpeg", ".jpg", ".png", ".gif", ".svg", ".vue", ".ts", ".swf", ".pdf", ".mp4"}
-	BadURL = []string{";", "}", "{", "www.w3.org", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path", "*#__PURE__*"}
+	BadURL = []string{";", "}", "webpack://", "{", "www.w3.org", ".src", ".url", ".att", ".href", "location.href", "javascript:", "location:", ".createObject", ":location", ".path"}
 )

 func filterJs(u string) bool {
@ -291,17 +291,29 @@ func commonFilter(u string) bool {
 	return false
 }

-func URLJoin(base, uri string) string {
-	baseSlash := strings.HasSuffix(base, "/")
-	uriSlash := strings.HasPrefix(uri, "/")
-	if (baseSlash && !uriSlash) || (!baseSlash && uriSlash) {
-		return base + uri
-	} else if baseSlash && uriSlash {
-		return base + uri[1:]
-	} else {
-		return base + "/" + uri
-	}
-}
+//func SafeJoin(base, uri string) string {
+//	baseSlash := strings.HasSuffix(base, "/")
+//	uriSlash := strings.HasPrefix(uri, "/")
+//	if (baseSlash && !uriSlash) || (!baseSlash && uriSlash) {
+//		return base + uri
+//	} else if baseSlash && uriSlash {
+//		return base + uri[1:]
+//	} else {
+//		return base + "/" + uri
+//	}
+//}
+
+//func SafePath(url, path string) string {
+//	urlSlash := strings.HasSuffix(url, "/")
+//	pathSlash := strings.HasPrefix(path, "/")
+//	if !urlSlash && !pathSlash {
+//		return "/" + path
+//	} else if urlSlash && pathSlash {
+//		return path[1:]
+//	} else {
+//		return path
+//	}
+//}

 func BakGenerator(domain string) []string {
 	var possibilities []string