调整全局的去重列表以及日志输出

This commit is contained in:
M09Ic 2023-01-11 11:26:03 +08:00
parent 8233dcefd3
commit a4d912ed4d
2 changed files with 3 additions and 2 deletions

View File

@ -292,6 +292,7 @@ Loop:
continue
}
if _, ok := pool.urls[unit.path]; ok {
logs.Log.Debugf("[%s] duplicate path: %s, skipped", pkg.GetSourceName(unit.source), pool.base+unit.path)
pool.wg.Done()
} else {
pool.urls[unit.path] = struct{}{}
@ -554,7 +555,6 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) {
}
// 通过map去重, 只有新的url才会进入到该逻辑
pool.urls[u] = struct{}{}
pool.wg.Add(1)
pool.addAddition(&Unit{
path: u,

View File

@ -170,10 +170,11 @@ func (bl *Baseline) CollectURL() {
}
}
bl.URLs = RemoveDuplication(bl.URLs)
if bl.URLs != nil {
bl.Extracteds = append(bl.Extracteds, &fingers.Extracted{
Name: "crawl",
ExtractResult: RemoveDuplication(bl.URLs),
ExtractResult: bl.URLs,
})
}
}