mirror of
https://github.com/chainreactors/spray.git
synced 2025-05-06 10:41:21 +00:00
调整全局的去重列表以及日志输出
This commit is contained in:
parent
8233dcefd3
commit
a4d912ed4d
@ -292,6 +292,7 @@ Loop:
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if _, ok := pool.urls[unit.path]; ok {
|
if _, ok := pool.urls[unit.path]; ok {
|
||||||
|
logs.Log.Debugf("[%s] duplicate path: %s, skipped", pkg.GetSourceName(unit.source), pool.base+unit.path)
|
||||||
pool.wg.Done()
|
pool.wg.Done()
|
||||||
} else {
|
} else {
|
||||||
pool.urls[unit.path] = struct{}{}
|
pool.urls[unit.path] = struct{}{}
|
||||||
@ -554,7 +555,6 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 通过map去重, 只有新的url才会进入到该逻辑
|
// 通过map去重, 只有新的url才会进入到该逻辑
|
||||||
pool.urls[u] = struct{}{}
|
|
||||||
pool.wg.Add(1)
|
pool.wg.Add(1)
|
||||||
pool.addAddition(&Unit{
|
pool.addAddition(&Unit{
|
||||||
path: u,
|
path: u,
|
||||||
|
@ -170,10 +170,11 @@ func (bl *Baseline) CollectURL() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bl.URLs = RemoveDuplication(bl.URLs)
|
||||||
if bl.URLs != nil {
|
if bl.URLs != nil {
|
||||||
bl.Extracteds = append(bl.Extracteds, &fingers.Extracted{
|
bl.Extracteds = append(bl.Extracteds, &fingers.Extracted{
|
||||||
Name: "crawl",
|
Name: "crawl",
|
||||||
ExtractResult: RemoveDuplication(bl.URLs),
|
ExtractResult: bl.URLs,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user