调整全局的去重列表以及日志输出

This commit is contained in:
M09Ic 2023-01-11 11:26:03 +08:00
parent 8233dcefd3
commit a4d912ed4d
2 changed files with 3 additions and 2 deletions

View File

@ -292,6 +292,7 @@ Loop:
continue continue
} }
if _, ok := pool.urls[unit.path]; ok { if _, ok := pool.urls[unit.path]; ok {
logs.Log.Debugf("[%s] duplicate path: %s, skipped", pkg.GetSourceName(unit.source), pool.base+unit.path)
pool.wg.Done() pool.wg.Done()
} else { } else {
pool.urls[unit.path] = struct{}{} pool.urls[unit.path] = struct{}{}
@ -554,7 +555,6 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) {
} }
// 通过map去重, 只有新的url才会进入到该逻辑 // 通过map去重, 只有新的url才会进入到该逻辑
pool.urls[u] = struct{}{}
pool.wg.Add(1) pool.wg.Add(1)
pool.addAddition(&Unit{ pool.addAddition(&Unit{
path: u, path: u,

View File

@ -170,10 +170,11 @@ func (bl *Baseline) CollectURL() {
} }
} }
bl.URLs = RemoveDuplication(bl.URLs)
if bl.URLs != nil { if bl.URLs != nil {
bl.Extracteds = append(bl.Extracteds, &fingers.Extracted{ bl.Extracteds = append(bl.Extracteds, &fingers.Extracted{
Name: "crawl", Name: "crawl",
ExtractResult: RemoveDuplication(bl.URLs), ExtractResult: bl.URLs,
}) })
} }
} }