From a4d912ed4dd2ba090066b74eac9c53b4cf66076b Mon Sep 17 00:00:00 2001 From: M09Ic Date: Wed, 11 Jan 2023 11:26:03 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4=E5=85=A8=E5=B1=80=E7=9A=84?= =?UTF-8?q?=E5=8E=BB=E9=87=8D=E5=88=97=E8=A1=A8=E4=BB=A5=E5=8F=8A=E6=97=A5?= =?UTF-8?q?=E5=BF=97=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/pool.go | 2 +- pkg/baseline.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/pool.go b/internal/pool.go index 72e1856..c07f7e8 100644 --- a/internal/pool.go +++ b/internal/pool.go @@ -292,6 +292,7 @@ Loop: continue } if _, ok := pool.urls[unit.path]; ok { + logs.Log.Debugf("[%s] duplicate path: %s, skipped", pkg.GetSourceName(unit.source), pool.base+unit.path) pool.wg.Done() } else { pool.urls[unit.path] = struct{}{} @@ -554,7 +555,6 @@ func (pool *Pool) doCrawl(bl *pkg.Baseline) { } // 通过map去重, 只有新的url才会进入到该逻辑 - pool.urls[u] = struct{}{} pool.wg.Add(1) pool.addAddition(&Unit{ path: u, diff --git a/pkg/baseline.go b/pkg/baseline.go index 626242e..54646e0 100644 --- a/pkg/baseline.go +++ b/pkg/baseline.go @@ -170,10 +170,11 @@ func (bl *Baseline) CollectURL() { } } + bl.URLs = RemoveDuplication(bl.URLs) if bl.URLs != nil { bl.Extracteds = append(bl.Extracteds, &fingers.Extracted{ Name: "crawl", - ExtractResult: RemoveDuplication(bl.URLs), + ExtractResult: bl.URLs, }) } }