Add files via upload

2025-05-05 10:17:39 +00:00 · 2025-02-11 15:17:47 +08:00 · 2025-02-11 15:17:47 +08:00 · 30ea01af7c
commit 30ea01af7c
parent 2b2578eb8f
8 changed files with 572 additions and 62 deletions
--- a/README.MD
+++ b/README.MD
@ -4,6 +4,19 @@
 MirrorFlower(镜花)是一款基于 AI 的代码安全审计工具，支持多种编程语言的代码分析，可以帮助开发者快速发现代码中的潜在安全漏洞。支持DeepSeek-R1，ChatGPT-4o等多种大模型。
 ## 更新记录
 ### 2024-02-11
 - 完善了Python代码分析功能：
  - 添加了完整的依赖分析，支持追踪导入关系和别名
  - 增强了函数调用分析，支持类方法和实例方法的调用追踪
  - 添加了变量使用分析，支持追踪全局变量和实例变量
  - 改进了类继承分析，支持多级继承路径分析
 - 优化了分析器架构：
  - 使用访问者模式重构了代码分析逻辑
  - 添加了类型提示和详细文档
  - 改进了错误处理机制
 ## 支持的API接口
 FREEGPTAPI：https://github.com/popjane/free_chatgpt_api
@ -143,11 +156,6 @@ OPENAI_MODEL=your_preferred_model
 uvicorn backend.app:app --reload
 ```
 5. 访问Mirror-Flower
 ```bash
 http://localhost:8000/ui
 ```
 ## 注意事项
 1. 文件大小限制：10MB
--- a/backend/pycache/app.cpython-313.pyc
+++ b/backend/pycache/app.cpython-313.pyc
--- a/code_analyzer.py
+++ b/code_analyzer.py
@ -0,0 +1,281 @@
 import ast
 from typing import Dict, Set, List, Optional, Union
 from pathlib import Path
 from dataclasses import dataclass
@dataclass
 class AnalyzerConfig:
    max_workers: int = 4  # 并行处理的最大线程数
    ignore_patterns: List[str] = None  # 要忽略的文件模式
    follow_imports: bool = True  # 是否分析导入的模块
    max_depth: int = 3  # 分析的最大深度
    encoding: str = 'utf-8'  # 文件编码
 class CodeAnalyzer:
    def __init__(self, config: Optional[AnalyzerConfig] = None):
        self.config = config or AnalyzerConfig()
        self.dependencies: Dict[str, Set[str]] = {}
        self.globals: Dict[str, Set[str]] = {}
        self.function_calls: Dict[str, Set[str]] = {}
        self.class_hierarchy: Dict[str, List[str]] = {}
        self.variable_usages: Dict[str, Set[str]] = {}  # 存储变量使用位置
    def _analyze_dependencies(self, content: str, file_path: str) -> None:
        """分析文件的导入依赖关系"""
        tree = ast.parse(content)
        class ImportVisitor(ast.NodeVisitor):
            def __init__(self, analyzer, file_path):
                self.analyzer = analyzer
                self.file_path = file_path
                self.aliases = {}  # 记录导入别名
            def visit_Import(self, node):
                for name in node.names:
                    self.analyzer.dependencies.setdefault(self.file_path, set()).add(name.name)
                    if name.asname:
                        self.aliases[name.asname] = name.name
            def visit_ImportFrom(self, node):
                module = node.module if node.module else ''
                for name in node.names:
                    full_name = f"{module}.{name.name}" if module else name.name
                    self.analyzer.dependencies.setdefault(self.file_path, set()).add(full_name)
                    if name.asname:
                        self.aliases[name.asname] = full_name
            def visit_Name(self, node):
                # 检查是否使用了导入的别名
                if node.id in self.aliases:
                    self.analyzer.dependencies.setdefault(self.file_path, set()).add(self.aliases[node.id])
                self.generic_visit(node)
        visitor = ImportVisitor(self, file_path)
        visitor.visit(tree)
    def _analyze_globals(self, content: str, file_path: str) -> None:
        """分析全局变量"""
        tree = ast.parse(content)
        class GlobalVisitor(ast.NodeVisitor):
            def __init__(self, analyzer, file_path):
                self.analyzer = analyzer
                self.file_path = file_path
                self.current_scope = None
            def visit_Module(self, node):
                old_scope = self.current_scope
                self.current_scope = 'module'
                self.generic_visit(node)
                self.current_scope = old_scope
            def visit_Global(self, node):
                for name in node.names:
                    self.analyzer.globals.setdefault(self.file_path, set()).add(name)
            def visit_Assign(self, node):
                if self.current_scope == 'module' and isinstance(node.targets[0], ast.Name):
                    self.analyzer.globals.setdefault(self.file_path, set()).add(node.targets[0].id)
                self.generic_visit(node)
        visitor = GlobalVisitor(self, file_path)
        visitor.visit(tree)
    def _analyze_function_calls(self, content: str, file_path: str) -> None:
        """分析函数调用关系"""
        tree = ast.parse(content)
        class FunctionCallVisitor(ast.NodeVisitor):
            def __init__(self, analyzer, file_path):
                self.analyzer = analyzer
                self.file_path = file_path
                self.current_function = None
                self.current_class = None
            def visit_ClassDef(self, node):
                old_class = self.current_class
                self.current_class = node.name
                self.generic_visit(node)
                self.current_class = old_class
            def visit_FunctionDef(self, node):
                old_function = self.current_function
                if self.current_class:
                    self.current_function = f"{self.current_class}.{node.name}"
                else:
                    self.current_function = node.name
                self.generic_visit(node)
                self.current_function = old_function
            def visit_Call(self, node):
                if not self.current_function:
                    return
                caller = f"{self.file_path}:{self.current_function}"
                if isinstance(node.func, ast.Name):
                    callee = node.func.id
                elif isinstance(node.func, ast.Attribute):
                    # 处理方法调用
                    if isinstance(node.func.value, ast.Name):
                        callee = f"{node.func.value.id}.{node.func.attr}"
                    else:
                        callee = node.func.attr
                else:
                    return
                self.analyzer.function_calls.setdefault(caller, set()).add(callee)
                self.generic_visit(node)
        visitor = FunctionCallVisitor(self, file_path)
        visitor.visit(tree)
    def _analyze_class_hierarchy(self, content: str, file_path: str) -> None:
        """分析类继承关系"""
        tree = ast.parse(content)
        class ClassVisitor(ast.NodeVisitor):
            def __init__(self, analyzer, file_path):
                self.analyzer = analyzer
                self.file_path = file_path
                self.current_module = None
            def visit_Module(self, node):
                for imp in node.body:
                    if isinstance(imp, ast.ImportFrom):
                        self.current_module = imp.module
                self.generic_visit(node)
            def visit_ClassDef(self, node):
                class_name = f"{self.file_path}:{node.name}"
                bases = []
                for base in node.bases:
                    if isinstance(base, ast.Name):
                        bases.append(base.id)
                    elif isinstance(base, ast.Attribute):
                        # 处理完整的模块路径
                        parts = []
                        current = base
                        while isinstance(current, ast.Attribute):
                            parts.append(current.attr)
                            current = current.value
                        if isinstance(current, ast.Name):
                            parts.append(current.id)
                        bases.append('.'.join(reversed(parts)))
                if bases:
                    self.analyzer.class_hierarchy[class_name] = bases
        visitor = ClassVisitor(self, file_path)
        visitor.visit(tree)
    def _analyze_variable_usage(self, content: str, file_path: str) -> None:
        """分析变量的使用位置"""
        tree = ast.parse(content)
        class VariableVisitor(ast.NodeVisitor):
            def __init__(self, analyzer, file_path):
                self.analyzer = analyzer
                self.file_path = file_path
                self.current_function = None
                self.current_class = None
            def visit_ClassDef(self, node):
                old_class = self.current_class
                self.current_class = node.name
                self.generic_visit(node)
                self.current_class = old_class
            def visit_FunctionDef(self, node):
                old_function = self.current_function
                scope = f"{self.current_class}.{node.name}" if self.current_class else node.name
                self.current_function = scope
                self.generic_visit(node)
                self.current_function = old_function
            def visit_Name(self, node):
                if isinstance(node.ctx, (ast.Load, ast.Store)):
                    scope = f"{self.file_path}:{self.current_function}" if self.current_function else self.file_path
                    self.analyzer.variable_usages.setdefault(node.id, set()).add(scope)
                self.generic_visit(node)
            def visit_Attribute(self, node):
                if isinstance(node.ctx, (ast.Load, ast.Store)) and isinstance(node.value, ast.Name):
                    if node.value.id == 'self' and self.current_class:
                        # 记录实例变量
                        var_name = f"{self.current_class}.{node.attr}"
                        scope = f"{self.file_path}:{self.current_function}"
                        self.analyzer.variable_usages.setdefault(var_name, set()).add(scope)
                self.generic_visit(node)
        visitor = VariableVisitor(self, file_path)
        visitor.visit(tree)
    def get_file_dependencies(self, file_path: str) -> set:
        """获取指定文件的依赖"""
        return self.dependencies.get(file_path, set())
    def get_file_globals(self, file_path: str) -> set:
        """获取指定文件的全局变量"""
        return self.globals.get(file_path, set())
    def get_function_calls(self, function_name: str) -> set:
        """获取指定函数调用的其他函数"""
        return self.function_calls.get(function_name, set())
    def get_class_bases(self, class_name: str) -> list:
        """获取指定类的父类"""
        return self.class_hierarchy.get(class_name, [])
    def get_variable_usages(self, variable_name: str) -> set:
        """获取变量的所有使用位置"""
        return self.variable_usages.get(variable_name, set())
    def analyze_file(self, content: str, file_path: Union[str, Path]) -> None:
        """分析单个文件的所有关系
        Args:
            content (str): 文件内容
            file_path (Union[str, Path]): 文件路径
        Raises:
            SyntaxError: 当文件包含语法错误时
            Exception: 其他分析错误
        """
        if isinstance(file_path, Path):
            file_path = str(file_path)
        try:
            # 分析文件依赖
            self._analyze_dependencies(content, file_path)
            # 分析全局变量
            self._analyze_globals(content, file_path)
            # 分析函数调用关系
            self._analyze_function_calls(content, file_path)
            # 分析类继承关系
            self._analyze_class_hierarchy(content, file_path)
            # 分析变量使用位置
            self._analyze_variable_usage(content, file_path)
        except SyntaxError:
            print(f"语法错误: {file_path}")
        except Exception as e:
            print(f"分析错误 {file_path}: {str(e)}")
    def get_file_analysis(self, file_path: str) -> dict:
        """获取指定文件的完整分析结果"""
        return {
            'dependencies': self.get_file_dependencies(file_path),
            'globals': self.get_file_globals(file_path),
            'function_calls': {
                caller: self.get_function_calls(caller)
                for caller in self.function_calls
                if caller.startswith(f"{file_path}:")
            },
            'class_hierarchy': {
                class_name: self.get_class_bases(class_name)
                for class_name in self.class_hierarchy
                if class_name.startswith(f"{file_path}:")
            }
        } 
--- a/core/analyzers/init.py
+++ b/core/analyzers/init.py
@ -0,0 +1,3 @@
 from .context_analyzer import ContextAnalyzer
 __all__ = ['ContextAnalyzer'] 
--- a/core/analyzers/context_analyzer.py
+++ b/core/analyzers/context_analyzer.py
@ -1,47 +1,230 @@
-class ContextAnalyzer:
+from code_analyzer import CodeAnalyzer
-    def __init__(self):
+from typing import Dict, Set, List, Any, Optional, Union
-        self.file_dependencies = {}
+from pathlib import Path
-        self.global_variables = {}
+from concurrent.futures import ThreadPoolExecutor
-        self.function_calls = {}
+import logging
-        self.class_hierarchy = {}
+from functools import lru_cache
-        
+from tqdm import tqdm
-    def analyze_project_context(self, files):
+import json
-        """分析项目整体上下文"""
+from code_analyzer import AnalyzerConfig
-        for file_path in files:
+
-            self._analyze_file_context(file_path)
+logger = logging.getLogger(__name__)
-            
+
-    def _analyze_file_context(self, file_path):
+class ContextAnalyzer:
-        """分析单个文件的上下文"""
+    def __init__(self, config: Optional[AnalyzerConfig] = None):
-        with open(file_path, 'r') as f:
+        self.code_analyzer = CodeAnalyzer(config)
-            content = f.read()
+        self._cache: Dict[str, bool] = {}  # 缓存分析结果
-            
+        
-        # 分析文件依赖
+    def analyze_project_context(self, files: List[Union[str, Path]]) -> None:
-        self._analyze_dependencies(content, file_path)
+        """分析项目整体上下文
-        
+        
-        # 分析全局变量
+        Args:
-        self._analyze_globals(content, file_path)
+            files: 要分析的文件路径列表
-        
+            
-        # 分析函数调用关系
+        Example:
-        self._analyze_function_calls(content, file_path)
+            analyzer = ContextAnalyzer()
-        
+            analyzer.analyze_project_context(['file1.py', 'file2.py'])
-        # 分析类继承关系
+        """
-        self._analyze_class_hierarchy(content, file_path)
+        with ThreadPoolExecutor(max_workers=self.code_analyzer.config.max_workers) as executor:
-        
+            list(tqdm(
-    def get_call_graph(self, function_name):
+                executor.map(self._analyze_file_context, files),
-        """获取函数调用图"""
+                total=len(files),
-        call_graph = {
+                desc="分析项目文件"
-            'name': function_name,
+            ))
-            'calls': self.function_calls.get(function_name, []),
+            
-            'called_by': self._find_callers(function_name)
+    def _analyze_file_context(self, file_path: Union[str, Path]) -> None:
-        }
+        """分析单个文件的上下文"""
-        return call_graph
+        try:
-        
+            # 检查缓存
-    def get_variable_scope(self, variable_name):
+            if file_path in self._cache:
-        """获取变量作用域"""
+                return
-        if variable_name in self.global_variables:
+            
-            return {
+            # 检查文件是否存在
-                'type': 'global',
+            if not Path(file_path).exists():
-                'defined_in': self.global_variables[variable_name],
+                raise FileNotFoundError(f"文件不存在: {file_path}")
-                'used_in': self._find_variable_usage(variable_name)
+            
-            }
+            # 检查文件是否是 Python 文件
-        return None 
+            if not str(file_path).endswith('.py'):
                raise ValueError(f"不是 Python 文件: {file_path}")
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            self.code_analyzer.analyze_file(content, file_path)
            self._cache[file_path] = True
        except (UnicodeDecodeError, FileNotFoundError, ValueError) as e:
            logger.error(f"分析文件失败 {file_path}: {str(e)}")
        except Exception as e:
            logger.exception(f"未知错误 {file_path}: {str(e)}")
    @lru_cache(maxsize=128)
    def get_call_graph(self, function_name: str) -> Dict[str, Any]:
        """获取函数调用图
        Args:
            function_name: 要分析的函数名
        Returns:
            包含函数调用关系的字典，格式为:
            {
                'name': 函数名,
                'calls': 该函数调用的其他函数集合,
                'called_by': 调用该函数的其他函数集合
            }
        """
        calls = self.code_analyzer.get_function_calls(function_name)
        called_by = set()
        # 查找调用该函数的其他函数
        for caller, callees in self.code_analyzer.function_calls.items():
            if function_name in callees:
                called_by.add(caller)
        return {
            'name': function_name,
            'calls': calls,
            'called_by': called_by
        }
    @lru_cache(maxsize=128)
    def get_variable_scope(self, variable_name: str) -> Optional[Dict[str, Any]]:
        """获取变量作用域"""
        # 在所有文件中查找该变量的定义
        defined_in = set()
        for file_path, globals_set in self.code_analyzer.globals.items():
            if variable_name in globals_set:
                defined_in.add(file_path)
        if defined_in:
            # 获取变量的使用信息
            usage_info = self._find_variable_usage(variable_name)
            return {
                'type': 'global',
                'defined_in': list(defined_in),
                'used_in': usage_info
            }
        return None
    def _find_variable_usage(self, variable_name: str) -> Dict[str, Any]:
        """查找变量的使用位置"""
        usages = self.code_analyzer.get_variable_usages(variable_name)
        # 将使用位置按文件分组
        usage_by_file = {}
        for usage in usages:
            if ':' in usage:
                file_path, function_name = usage.split(':', 1)
                usage_by_file.setdefault(file_path, set()).add(function_name)
            else:
                # 模块级别的使用
                usage_by_file.setdefault(usage, set()).add('module_level')
        return {
            'files': list(usage_by_file.keys()),
            'details': {
                file_path: {
                    'module_level': 'module_level' in functions,
                    'functions': [f for f in functions if f != 'module_level']
                }
                for file_path, functions in usage_by_file.items()
            }
        }
    def get_file_context(self, file_path: str) -> dict:
        """获取文件的完整上下文信息"""
        return {
            'code_analysis': self.code_analyzer.get_file_analysis(file_path)
        }
    def get_project_analysis(self) -> dict:
        """获取项目整体分析结果"""
        return {
            'all_dependencies': self.code_analyzer.dependencies,
            'all_globals': self.code_analyzer.globals,
            'function_call_graph': self.code_analyzer.function_calls,
            'class_hierarchy_graph': self.code_analyzer.class_hierarchy,
            'variable_usage_map': self.code_analyzer.variable_usages
        }
    def clear_cache(self):
        """清除缓存"""
        self._cache.clear()
    def validate_analysis(self) -> List[str]:
        """验证分析结果的完整性和一致性
        Returns:
            发现的问题列表
        """
        issues = []
        # 检查函数调用的一致性
        for caller, callees in self.code_analyzer.function_calls.items():
            if ':' not in caller:
                issues.append(f"无效的调用者格式: {caller}")
        # 检查类继承的有效性
        for class_name, bases in self.code_analyzer.class_hierarchy.items():
            if ':' not in class_name:
                issues.append(f"无效的类名格式: {class_name}")
        # 检查变量使用的有效性
        for var_name, usages in self.code_analyzer.variable_usages.items():
            for usage in usages:
                if ':' not in usage and not usage.endswith('.py'):
                    issues.append(f"无效的变量使用位置: {usage}")
        return issues 
    def clear_analysis(self) -> None:
        """清理所有分析结果"""
        self._cache.clear()
        self.code_analyzer.dependencies.clear()
        self.code_analyzer.globals.clear()
        self.code_analyzer.function_calls.clear()
        self.code_analyzer.class_hierarchy.clear()
        self.code_analyzer.variable_usages.clear()
    def save_analysis(self, output_path: Union[str, Path]) -> None:
        """保存分析结果到文件"""
        result = {
            'dependencies': {k: list(v) for k, v in self.code_analyzer.dependencies.items()},
            'globals': {k: list(v) for k, v in self.code_analyzer.globals.items()},
            'function_calls': {k: list(v) for k, v in self.code_analyzer.function_calls.items()},
            'class_hierarchy': self.code_analyzer.class_hierarchy,
            'variable_usages': {k: list(v) for k, v in self.code_analyzer.variable_usages.items()}
        }
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(result, f, indent=2)
    def load_analysis(self, input_path: Union[str, Path]) -> None:
        """从文件加载分析结果"""
        with open(input_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        self.code_analyzer.dependencies = {k: set(v) for k, v in data['dependencies'].items()}
        self.code_analyzer.globals = {k: set(v) for k, v in data['globals'].items()}
        self.code_analyzer.function_calls = {k: set(v) for k, v in data['function_calls'].items()}
        self.code_analyzer.class_hierarchy = data['class_hierarchy']
        self.code_analyzer.variable_usages = {k: set(v) for k, v in data['variable_usages'].items()} 
    def get_analysis_stats(self) -> Dict[str, Any]:
        """获取分析结果的统计信息"""
        return {
            'total_files': len(self.code_analyzer.dependencies),
            'total_functions': len({
                func.split(':')[1] 
                for func in self.code_analyzer.function_calls.keys()
            }),
            'total_classes': len(self.code_analyzer.class_hierarchy),
            'total_globals': sum(len(vars) for vars in self.code_analyzer.globals.values()),
            'dependencies_stats': {
                'total': sum(len(deps) for deps in self.code_analyzer.dependencies.values()),
                'by_file': {
                    file: len(deps) 
                    for file, deps in self.code_analyzer.dependencies.items()
                }
            }
        } 
--- a/examples/basic_usage.py
+++ b/examples/basic_usage.py
@ -0,0 +1,24 @@
 from core.analyzers.context_analyzer import ContextAnalyzer
 from code_analyzer import AnalyzerConfig
 def analyze_single_file():
    """单文件分析示例"""
    config = AnalyzerConfig()
    analyzer = ContextAnalyzer(config)
    # 分析单个文件
    analyzer.analyze_project_context(['example.py'])
    # 获取分析结果
    context = analyzer.get_file_context('example.py')
    print("文件分析结果:", context)
    # 获取函数调用图
    call_graph = analyzer.get_call_graph('main')
    print("函数调用图:", call_graph)
    # 清理
    analyzer.clear_analysis()
 if __name__ == '__main__':
    analyze_single_file() 
--- a/requirements.txt
+++ b/requirements.txt
@ -1,10 +1 @@
-fastapi>=0.68.0
+tqdm>=4.65.0 
 uvicorn>=0.15.0
 python-multipart>=0.0.5
 openai>=1.0.0
 javalang>=0.13.0
 aiohttp>=3.8.1
 python-dotenv>=0.19.0
 pydantic>=2.0.0
 pydantic-settings>=2.0.0
 php-ast>=1.1.0 
--- a/tests/test_analyzers.py
+++ b/tests/test_analyzers.py
@ -0,0 +1,20 @@
 import unittest
 from pathlib import Path
 from core.analyzers.context_analyzer import ContextAnalyzer
 from code_analyzer import AnalyzerConfig
 class TestContextAnalyzer(unittest.TestCase):
    def setUp(self):
        self.analyzer = ContextAnalyzer(AnalyzerConfig())
    def test_analyze_file(self):
        test_file = Path(__file__).parent / 'test_data' / 'simple.py'
        self.analyzer.analyze_project_context([test_file])
        context = self.analyzer.get_file_context(str(test_file))
        self.assertIsNotNone(context)
    def tearDown(self):
        self.analyzer.clear_analysis()
 if __name__ == '__main__':
    unittest.main()
		`@ -0,0 +1,3 @@`
							`from .context_analyzer import ContextAnalyzer`

							`__all__ = ['ContextAnalyzer']`