fscan/Plugins/WebTitle.go

380 lines
11 KiB
Go
Raw Normal View History

2020-12-29 17:17:10 +08:00
package Plugins
import (
2021-05-12 10:57:12 +08:00
"compress/gzip"
2021-09-10 22:43:50 +08:00
"crypto/tls"
2020-12-29 17:17:10 +08:00
"fmt"
2021-05-12 10:57:12 +08:00
"io"
2020-12-29 17:17:10 +08:00
"net/http"
"net/url"
2020-12-29 17:17:10 +08:00
"regexp"
"strings"
2021-09-10 22:43:50 +08:00
"time"
2022-02-17 14:37:06 +08:00
"unicode/utf8"
2023-06-05 19:02:55 +03:00
2024-12-18 22:00:18 +08:00
"github.com/shadow1ng/fscan/Common"
2023-06-05 19:02:55 +03:00
"github.com/shadow1ng/fscan/WebScan"
"github.com/shadow1ng/fscan/WebScan/lib"
"golang.org/x/text/encoding/simplifiedchinese"
)
// WebTitle 获取Web标题和指纹信息
2024-12-19 16:15:53 +08:00
func WebTitle(info *Common.HostInfo) error {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("开始获取Web标题初始信息: %+v", info))
// 获取网站标题信息
2023-11-13 11:27:34 +08:00
err, CheckData := GOWebTitle(info)
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("GOWebTitle执行完成 - 错误: %v, 检查数据长度: %d", err, len(CheckData)))
2022-02-17 14:37:06 +08:00
info.Infostr = WebScan.InfoCheck(info.Url, &CheckData)
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("信息检查完成,获得信息: %v", info.Infostr))
// 检查是否为打印机,避免意外打印
2024-10-25 16:41:19 +08:00
for _, v := range info.Infostr {
if v == "打印机" {
2025-01-04 14:04:41 +08:00
Common.LogDebug("检测到打印机,停止扫描")
2024-10-25 16:41:19 +08:00
return nil
}
}
// 输出错误信息(如果有)
if err != nil {
errlog := fmt.Sprintf("网站标题 %v %v", info.Url, err)
2024-12-18 22:00:18 +08:00
Common.LogError(errlog)
2021-03-30 18:12:54 +08:00
}
2021-03-30 18:12:54 +08:00
return err
}
// GOWebTitle 获取网站标题并处理URL
2024-12-19 16:15:53 +08:00
func GOWebTitle(info *Common.HostInfo) (err error, CheckData []WebScan.CheckDatas) {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("开始处理URL: %s", info.Url))
// 如果URL未指定根据端口生成URL
if info.Url == "" {
2025-01-04 14:04:41 +08:00
Common.LogDebug("URL为空根据端口生成URL")
2022-02-17 14:37:06 +08:00
switch info.Ports {
case "80":
info.Url = fmt.Sprintf("http://%s", info.Host)
2022-02-17 14:37:06 +08:00
case "443":
info.Url = fmt.Sprintf("https://%s", info.Host)
2022-02-17 14:37:06 +08:00
default:
2021-09-10 22:43:50 +08:00
host := fmt.Sprintf("%s:%s", info.Host, info.Ports)
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("正在检测主机协议: %s", host))
2024-12-18 22:00:18 +08:00
protocol := GetProtocol(host, Common.Timeout)
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("检测到协议: %s", protocol))
2021-09-10 22:43:50 +08:00
info.Url = fmt.Sprintf("%s://%s:%s", protocol, info.Host, info.Ports)
}
2020-12-29 17:17:10 +08:00
} else {
// 处理未指定协议的URL
if !strings.Contains(info.Url, "://") {
2025-01-04 14:04:41 +08:00
Common.LogDebug("URL未包含协议开始检测")
2022-02-17 14:37:06 +08:00
host := strings.Split(info.Url, "/")[0]
2024-12-18 22:00:18 +08:00
protocol := GetProtocol(host, Common.Timeout)
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("检测到协议: %s", protocol))
2021-09-10 22:43:50 +08:00
info.Url = fmt.Sprintf("%s://%s", protocol, info.Url)
}
2020-12-29 17:17:10 +08:00
}
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("协议检测完成后的URL: %s", info.Url))
2021-09-10 22:43:50 +08:00
// 第一次获取URL
2025-01-04 14:04:41 +08:00
Common.LogDebug("第一次尝试访问URL")
2023-11-13 11:27:34 +08:00
err, result, CheckData := geturl(info, 1, CheckData)
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("第一次访问结果 - 错误: %v, 返回信息: %s", err, result))
2021-05-14 16:02:22 +08:00
if err != nil && !strings.Contains(err.Error(), "EOF") {
2022-02-17 14:37:06 +08:00
return
2020-12-29 17:17:10 +08:00
}
2021-09-10 20:32:51 +08:00
// 处理URL跳转
if strings.Contains(result, "://") {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("检测到重定向到: %s", result))
2022-02-17 14:37:06 +08:00
info.Url = result
2023-11-13 11:27:34 +08:00
err, result, CheckData = geturl(info, 3, CheckData)
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("重定向请求结果 - 错误: %v, 返回信息: %s", err, result))
2022-02-17 14:37:06 +08:00
if err != nil {
return
}
}
// 处理HTTP到HTTPS的升级
2021-09-10 22:43:50 +08:00
if result == "https" && !strings.HasPrefix(info.Url, "https://") {
2025-01-04 14:04:41 +08:00
Common.LogDebug("正在升级到HTTPS")
2021-05-14 16:02:22 +08:00
info.Url = strings.Replace(info.Url, "http://", "https://", 1)
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("升级后的URL: %s", info.Url))
2023-11-13 11:27:34 +08:00
err, result, CheckData = geturl(info, 1, CheckData)
// 处理升级后的跳转
if strings.Contains(result, "://") {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("HTTPS升级后发现重定向到: %s", result))
2022-02-17 14:37:06 +08:00
info.Url = result
2023-11-13 11:27:34 +08:00
err, _, CheckData = geturl(info, 3, CheckData)
if err != nil {
2022-02-17 14:37:06 +08:00
return
}
2020-12-29 17:17:10 +08:00
}
}
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("GOWebTitle执行完成 - 错误: %v", err))
if err != nil {
2022-02-17 14:37:06 +08:00
return
2020-12-29 17:17:10 +08:00
}
2022-02-17 14:37:06 +08:00
return
2020-12-29 17:17:10 +08:00
}
2024-12-19 16:15:53 +08:00
func geturl(info *Common.HostInfo, flag int, CheckData []WebScan.CheckDatas) (error, string, []WebScan.CheckDatas) {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("geturl开始执行 - URL: %s, 标志位: %d", info.Url, flag))
// 处理目标URL
Url := info.Url
if flag == 2 {
2025-01-04 14:04:41 +08:00
Common.LogDebug("处理favicon.ico URL")
URL, err := url.Parse(Url)
if err == nil {
Url = fmt.Sprintf("%s://%s/favicon.ico", URL.Scheme, URL.Host)
} else {
Url += "/favicon.ico"
}
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("favicon URL: %s", Url))
}
// 创建HTTP请求
2025-01-04 14:04:41 +08:00
Common.LogDebug("开始创建HTTP请求")
req, err := http.NewRequest("GET", Url, nil)
2022-02-17 14:37:06 +08:00
if err != nil {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("创建HTTP请求失败: %v", err))
2022-02-17 14:37:06 +08:00
return err, "", CheckData
}
// 设置请求头
2024-12-18 22:00:18 +08:00
req.Header.Set("User-agent", Common.UserAgent)
req.Header.Set("Accept", Common.Accept)
2022-02-17 14:37:06 +08:00
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
2024-12-18 22:00:18 +08:00
if Common.Cookie != "" {
req.Header.Set("Cookie", Common.Cookie)
}
2022-02-17 14:37:06 +08:00
req.Header.Set("Connection", "close")
2025-01-04 14:04:41 +08:00
Common.LogDebug("已设置请求头")
// 选择HTTP客户端
2022-02-17 14:37:06 +08:00
var client *http.Client
if flag == 1 {
2025-01-04 14:04:41 +08:00
client = lib.ClientNoRedirect
Common.LogDebug("使用不跟随重定向的客户端")
2022-02-17 14:37:06 +08:00
} else {
2025-01-04 14:04:41 +08:00
client = lib.Client
Common.LogDebug("使用普通客户端")
}
// 检查客户端是否为空
if client == nil {
Common.LogDebug("错误: HTTP客户端为空")
return fmt.Errorf("HTTP客户端未初始化"), "", CheckData
2022-02-17 14:37:06 +08:00
}
// 发送请求
2025-01-04 14:04:41 +08:00
Common.LogDebug("开始发送HTTP请求")
2022-02-17 14:37:06 +08:00
resp, err := client.Do(req)
if err != nil {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("HTTP请求失败: %v", err))
2021-05-14 16:02:22 +08:00
return err, "https", CheckData
2020-12-29 17:17:10 +08:00
}
2022-02-17 14:37:06 +08:00
defer resp.Body.Close()
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("收到HTTP响应状态码: %d", resp.StatusCode))
// 读取响应内容
2022-02-17 14:37:06 +08:00
body, err := getRespBody(resp)
if err != nil {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("读取响应内容失败: %v", err))
2022-02-17 14:37:06 +08:00
return err, "https", CheckData
}
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("成功读取响应内容,长度: %d", len(body)))
// 保存检查数据
2023-11-13 17:41:54 +08:00
CheckData = append(CheckData, WebScan.CheckDatas{body, fmt.Sprintf("%s", resp.Header)})
2025-01-04 14:04:41 +08:00
Common.LogDebug("已保存检查数据")
// 处理非favicon请求
2022-02-17 14:37:06 +08:00
var reurl string
if flag != 2 {
// 处理编码
2023-11-13 17:41:54 +08:00
if !utf8.Valid(body) {
2025-01-04 14:04:41 +08:00
Common.LogDebug("检测到非UTF8编码尝试GBK解码")
2023-11-13 17:41:54 +08:00
body, _ = simplifiedchinese.GBK.NewDecoder().Bytes(body)
}
// 获取页面信息
title := gettitle(body)
2022-02-17 14:37:06 +08:00
length := resp.Header.Get("Content-Length")
if length == "" {
length = fmt.Sprintf("%v", len(body))
}
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("提取的标题: %s, 内容长度: %s", title, length))
// 处理重定向
2022-02-17 14:37:06 +08:00
redirURL, err1 := resp.Location()
if err1 == nil {
reurl = redirURL.String()
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("检测到重定向URL: %s", reurl))
2022-02-17 14:37:06 +08:00
}
// 输出结果
result := fmt.Sprintf("网站标题 %-25v 状态码:%-3v 长度:%-6v 标题:%v",
resp.Request.URL, resp.StatusCode, length, title)
2022-02-17 14:37:06 +08:00
if reurl != "" {
result += fmt.Sprintf(" 重定向地址: %s", reurl)
2022-02-17 14:37:06 +08:00
}
2024-12-18 22:00:18 +08:00
Common.LogSuccess(result)
2022-02-17 14:37:06 +08:00
}
// 返回结果
2022-02-17 14:37:06 +08:00
if reurl != "" {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("返回重定向URL: %s", reurl))
2022-02-17 14:37:06 +08:00
return nil, reurl, CheckData
}
if resp.StatusCode == 400 && !strings.HasPrefix(info.Url, "https") {
2025-01-04 14:04:41 +08:00
Common.LogDebug("返回HTTPS升级标志")
2022-02-17 14:37:06 +08:00
return nil, "https", CheckData
}
2025-01-04 14:04:41 +08:00
Common.LogDebug("geturl执行完成无特殊返回")
2022-02-17 14:37:06 +08:00
return nil, "", CheckData
}
2021-05-12 10:57:12 +08:00
// getRespBody 读取HTTP响应体内容
2021-05-12 10:57:12 +08:00
func getRespBody(oResp *http.Response) ([]byte, error) {
2025-01-04 14:04:41 +08:00
Common.LogDebug("开始读取响应体内容")
2021-05-12 10:57:12 +08:00
var body []byte
// 处理gzip压缩的响应
2021-05-12 10:57:12 +08:00
if oResp.Header.Get("Content-Encoding") == "gzip" {
2025-01-04 14:04:41 +08:00
Common.LogDebug("检测到gzip压缩开始解压")
2021-05-12 10:57:12 +08:00
gr, err := gzip.NewReader(oResp.Body)
if err != nil {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("创建gzip解压器失败: %v", err))
2021-05-12 10:57:12 +08:00
return nil, err
}
defer gr.Close()
// 循环读取解压内容
2021-05-12 10:57:12 +08:00
for {
buf := make([]byte, 1024)
n, err := gr.Read(buf)
if err != nil && err != io.EOF {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("读取压缩内容失败: %v", err))
2021-05-12 10:57:12 +08:00
return nil, err
}
if n == 0 {
break
}
body = append(body, buf...)
}
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("gzip解压完成内容长度: %d", len(body)))
2021-05-12 10:57:12 +08:00
} else {
// 直接读取未压缩的响应
2025-01-04 14:04:41 +08:00
Common.LogDebug("读取未压缩的响应内容")
2023-06-05 19:02:55 +03:00
raw, err := io.ReadAll(oResp.Body)
2021-05-12 10:57:12 +08:00
if err != nil {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("读取响应内容失败: %v", err))
2021-05-12 10:57:12 +08:00
return nil, err
}
body = raw
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("读取完成,内容长度: %d", len(body)))
2021-05-12 10:57:12 +08:00
}
return body, nil
}
2021-09-10 22:43:50 +08:00
// gettitle 从HTML内容中提取网页标题
2022-02-17 14:37:06 +08:00
func gettitle(body []byte) (title string) {
2025-01-04 14:04:41 +08:00
Common.LogDebug("开始提取网页标题")
// 使用正则表达式匹配title标签内容
2024-01-15 16:22:40 +08:00
re := regexp.MustCompile("(?ims)<title.*?>(.*?)</title>")
2022-02-17 14:37:06 +08:00
find := re.FindSubmatch(body)
2022-02-17 14:37:06 +08:00
if len(find) > 1 {
title = string(find[1])
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("找到原始标题: %s", title))
// 清理标题内容
title = strings.TrimSpace(title) // 去除首尾空格
title = strings.Replace(title, "\n", "", -1) // 去除换行
title = strings.Replace(title, "\r", "", -1) // 去除回车
title = strings.Replace(title, "&nbsp;", " ", -1) // 替换HTML空格
// 截断过长的标题
2022-02-17 14:37:06 +08:00
if len(title) > 100 {
2025-01-04 14:04:41 +08:00
Common.LogDebug("标题超过100字符进行截断")
2022-02-17 14:37:06 +08:00
title = title[:100]
}
// 处理空标题
2023-11-13 17:41:54 +08:00
if title == "" {
2025-01-04 14:04:41 +08:00
Common.LogDebug("标题为空,使用双引号代替")
title = "\"\""
2023-11-13 17:41:54 +08:00
}
} else {
2025-01-04 14:04:41 +08:00
Common.LogDebug("未找到标题标签")
title = "无标题"
2022-02-17 14:37:06 +08:00
}
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("最终标题: %s", title))
2022-02-17 14:37:06 +08:00
return
}
// GetProtocol 检测目标主机的协议类型(HTTP/HTTPS)
2023-11-13 11:27:34 +08:00
func GetProtocol(host string, Timeout int64) (protocol string) {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("开始检测主机协议 - 主机: %s, 超时: %d秒", host, Timeout))
2023-11-13 11:27:34 +08:00
protocol = "http"
// 根据标准端口快速判断协议
2022-02-17 14:37:06 +08:00
if strings.HasSuffix(host, ":80") || !strings.Contains(host, ":") {
2025-01-04 14:04:41 +08:00
Common.LogDebug("检测到HTTP标准端口或无端口使用HTTP协议")
2023-11-13 11:27:34 +08:00
return
} else if strings.HasSuffix(host, ":443") {
2025-01-04 14:04:41 +08:00
Common.LogDebug("检测到HTTPS标准端口使用HTTPS协议")
2023-11-13 11:27:34 +08:00
protocol = "https"
return
2022-02-17 14:37:06 +08:00
}
// 尝试建立TCP连接
2025-01-04 14:04:41 +08:00
Common.LogDebug("尝试建立TCP连接")
2024-12-18 22:00:18 +08:00
socksconn, err := Common.WrapperTcpWithTimeout("tcp", host, time.Duration(Timeout)*time.Second)
2022-05-08 02:19:41 +08:00
if err != nil {
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("TCP连接失败: %v", err))
2022-05-08 02:19:41 +08:00
return
}
// 尝试TLS握手
2025-01-04 14:04:41 +08:00
Common.LogDebug("开始TLS握手")
conn := tls.Client(socksconn, &tls.Config{
MinVersion: tls.VersionTLS10,
InsecureSkipVerify: true,
})
// 确保连接关闭
2021-09-10 22:43:50 +08:00
defer func() {
if conn != nil {
2022-05-08 00:16:58 +08:00
defer func() {
if err := recover(); err != nil {
Common.LogError(fmt.Sprintf("连接关闭时发生错误: %v", err))
2022-05-08 00:16:58 +08:00
}
}()
2025-01-04 14:04:41 +08:00
Common.LogDebug("关闭连接")
2021-09-10 22:43:50 +08:00
conn.Close()
}
}()
// 设置连接超时
2022-05-09 12:27:05 +08:00
conn.SetDeadline(time.Now().Add(time.Duration(Timeout) * time.Second))
// 执行TLS握手
2022-05-09 12:27:05 +08:00
err = conn.Handshake()
2021-09-10 22:43:50 +08:00
if err == nil || strings.Contains(err.Error(), "handshake failure") {
2025-01-04 14:04:41 +08:00
Common.LogDebug("TLS握手成功或握手失败但确认是HTTPS协议")
2021-09-10 22:43:50 +08:00
protocol = "https"
2025-01-04 14:04:41 +08:00
} else {
Common.LogDebug(fmt.Sprintf("TLS握手失败: %v使用HTTP协议", err))
2021-09-10 22:43:50 +08:00
}
2025-01-04 14:04:41 +08:00
Common.LogDebug(fmt.Sprintf("协议检测完成,使用: %s", protocol))
2021-09-10 22:43:50 +08:00
return protocol
}