Mirror-Flowers/core/analyzers/context_analyzer.py
2025-02-11 15:17:47 +08:00

230 lines
8.9 KiB
Python

from code_analyzer import CodeAnalyzer
from typing import Dict, Set, List, Any, Optional, Union
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
import logging
from functools import lru_cache
from tqdm import tqdm
import json
from code_analyzer import AnalyzerConfig
logger = logging.getLogger(__name__)
class ContextAnalyzer:
def __init__(self, config: Optional[AnalyzerConfig] = None):
self.code_analyzer = CodeAnalyzer(config)
self._cache: Dict[str, bool] = {} # 缓存分析结果
def analyze_project_context(self, files: List[Union[str, Path]]) -> None:
"""分析项目整体上下文
Args:
files: 要分析的文件路径列表
Example:
analyzer = ContextAnalyzer()
analyzer.analyze_project_context(['file1.py', 'file2.py'])
"""
with ThreadPoolExecutor(max_workers=self.code_analyzer.config.max_workers) as executor:
list(tqdm(
executor.map(self._analyze_file_context, files),
total=len(files),
desc="分析项目文件"
))
def _analyze_file_context(self, file_path: Union[str, Path]) -> None:
"""分析单个文件的上下文"""
try:
# 检查缓存
if file_path in self._cache:
return
# 检查文件是否存在
if not Path(file_path).exists():
raise FileNotFoundError(f"文件不存在: {file_path}")
# 检查文件是否是 Python 文件
if not str(file_path).endswith('.py'):
raise ValueError(f"不是 Python 文件: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
self.code_analyzer.analyze_file(content, file_path)
self._cache[file_path] = True
except (UnicodeDecodeError, FileNotFoundError, ValueError) as e:
logger.error(f"分析文件失败 {file_path}: {str(e)}")
except Exception as e:
logger.exception(f"未知错误 {file_path}: {str(e)}")
@lru_cache(maxsize=128)
def get_call_graph(self, function_name: str) -> Dict[str, Any]:
"""获取函数调用图
Args:
function_name: 要分析的函数名
Returns:
包含函数调用关系的字典,格式为:
{
'name': 函数名,
'calls': 该函数调用的其他函数集合,
'called_by': 调用该函数的其他函数集合
}
"""
calls = self.code_analyzer.get_function_calls(function_name)
called_by = set()
# 查找调用该函数的其他函数
for caller, callees in self.code_analyzer.function_calls.items():
if function_name in callees:
called_by.add(caller)
return {
'name': function_name,
'calls': calls,
'called_by': called_by
}
@lru_cache(maxsize=128)
def get_variable_scope(self, variable_name: str) -> Optional[Dict[str, Any]]:
"""获取变量作用域"""
# 在所有文件中查找该变量的定义
defined_in = set()
for file_path, globals_set in self.code_analyzer.globals.items():
if variable_name in globals_set:
defined_in.add(file_path)
if defined_in:
# 获取变量的使用信息
usage_info = self._find_variable_usage(variable_name)
return {
'type': 'global',
'defined_in': list(defined_in),
'used_in': usage_info
}
return None
def _find_variable_usage(self, variable_name: str) -> Dict[str, Any]:
"""查找变量的使用位置"""
usages = self.code_analyzer.get_variable_usages(variable_name)
# 将使用位置按文件分组
usage_by_file = {}
for usage in usages:
if ':' in usage:
file_path, function_name = usage.split(':', 1)
usage_by_file.setdefault(file_path, set()).add(function_name)
else:
# 模块级别的使用
usage_by_file.setdefault(usage, set()).add('module_level')
return {
'files': list(usage_by_file.keys()),
'details': {
file_path: {
'module_level': 'module_level' in functions,
'functions': [f for f in functions if f != 'module_level']
}
for file_path, functions in usage_by_file.items()
}
}
def get_file_context(self, file_path: str) -> dict:
"""获取文件的完整上下文信息"""
return {
'code_analysis': self.code_analyzer.get_file_analysis(file_path)
}
def get_project_analysis(self) -> dict:
"""获取项目整体分析结果"""
return {
'all_dependencies': self.code_analyzer.dependencies,
'all_globals': self.code_analyzer.globals,
'function_call_graph': self.code_analyzer.function_calls,
'class_hierarchy_graph': self.code_analyzer.class_hierarchy,
'variable_usage_map': self.code_analyzer.variable_usages
}
def clear_cache(self):
"""清除缓存"""
self._cache.clear()
def validate_analysis(self) -> List[str]:
"""验证分析结果的完整性和一致性
Returns:
发现的问题列表
"""
issues = []
# 检查函数调用的一致性
for caller, callees in self.code_analyzer.function_calls.items():
if ':' not in caller:
issues.append(f"无效的调用者格式: {caller}")
# 检查类继承的有效性
for class_name, bases in self.code_analyzer.class_hierarchy.items():
if ':' not in class_name:
issues.append(f"无效的类名格式: {class_name}")
# 检查变量使用的有效性
for var_name, usages in self.code_analyzer.variable_usages.items():
for usage in usages:
if ':' not in usage and not usage.endswith('.py'):
issues.append(f"无效的变量使用位置: {usage}")
return issues
def clear_analysis(self) -> None:
"""清理所有分析结果"""
self._cache.clear()
self.code_analyzer.dependencies.clear()
self.code_analyzer.globals.clear()
self.code_analyzer.function_calls.clear()
self.code_analyzer.class_hierarchy.clear()
self.code_analyzer.variable_usages.clear()
def save_analysis(self, output_path: Union[str, Path]) -> None:
"""保存分析结果到文件"""
result = {
'dependencies': {k: list(v) for k, v in self.code_analyzer.dependencies.items()},
'globals': {k: list(v) for k, v in self.code_analyzer.globals.items()},
'function_calls': {k: list(v) for k, v in self.code_analyzer.function_calls.items()},
'class_hierarchy': self.code_analyzer.class_hierarchy,
'variable_usages': {k: list(v) for k, v in self.code_analyzer.variable_usages.items()}
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(result, f, indent=2)
def load_analysis(self, input_path: Union[str, Path]) -> None:
"""从文件加载分析结果"""
with open(input_path, 'r', encoding='utf-8') as f:
data = json.load(f)
self.code_analyzer.dependencies = {k: set(v) for k, v in data['dependencies'].items()}
self.code_analyzer.globals = {k: set(v) for k, v in data['globals'].items()}
self.code_analyzer.function_calls = {k: set(v) for k, v in data['function_calls'].items()}
self.code_analyzer.class_hierarchy = data['class_hierarchy']
self.code_analyzer.variable_usages = {k: set(v) for k, v in data['variable_usages'].items()}
def get_analysis_stats(self) -> Dict[str, Any]:
"""获取分析结果的统计信息"""
return {
'total_files': len(self.code_analyzer.dependencies),
'total_functions': len({
func.split(':')[1]
for func in self.code_analyzer.function_calls.keys()
}),
'total_classes': len(self.code_analyzer.class_hierarchy),
'total_globals': sum(len(vars) for vars in self.code_analyzer.globals.values()),
'dependencies_stats': {
'total': sum(len(deps) for deps in self.code_analyzer.dependencies.values()),
'by_file': {
file: len(deps)
for file, deps in self.code_analyzer.dependencies.items()
}
}
}