Spaces:
Sleeping
Sleeping
| """ | |
| pipeline.py β The main DiffContext pipeline. | |
| Connects all stages: parse -> graph -> diff -> blast radius -> score -> select -> compile | |
| Key fixes vs original: | |
| - expanded_deps is now passed into compute_impact_scores so those symbols | |
| are actually scored (previously they were collected but never used). | |
| - Graph is built with a single pass and cached import maps (see graph_builder). | |
| - warn_unknown_symbols is called before any scoring to surface typos early. | |
| """ | |
| import difflib | |
| import logging | |
| import os | |
| import time | |
| from typing import Dict, List, Optional | |
| from .models import ( | |
| RepositoryIndex, ImpactResult, ContextPackage, Symbol, | |
| ) | |
| from .parser import extract_all_symbols | |
| from .graph_builder import build_repository_graph | |
| from .impact.blast_radius import get_blast_radius | |
| from .impact.scoring import compute_impact_scores | |
| from .impact.traversal import expand_dependencies | |
| from .context.selector import select_context | |
| from .context.compiler import compile_context | |
| logger = logging.getLogger(__name__) | |
| def _suggest_similar_symbol(unknown_id: str, known_ids) -> Optional[str]: | |
| """Fuzzy-match an unknown symbol ID against known ones (typo correction).""" | |
| matches = difflib.get_close_matches(unknown_id, known_ids, n=1, cutoff=0.6) | |
| return matches[0] if matches else None | |
| def index_repository(repo_path: str) -> RepositoryIndex: | |
| """ | |
| Phase 1: Parse repository and build dependency graph. | |
| Returns a RepositoryIndex with all symbols, the call graph, and the | |
| list of files (if any) that failed to parse due to a SyntaxError. | |
| """ | |
| repo_path = os.path.abspath(repo_path) | |
| broken_files: List[str] = [] | |
| symbols = extract_all_symbols(repo_path, broken_files=broken_files) | |
| graph = build_repository_graph(repo_path) | |
| return RepositoryIndex(symbols=symbols, graph=graph, broken_files=broken_files) | |
| def warn_unknown_symbols(index: RepositoryIndex, changed_symbols: List[str]) -> List[str]: | |
| """ | |
| Check `changed_symbols` against the index and warn about any that don't | |
| actually exist. Returns the list of unknown symbol IDs. | |
| """ | |
| unknown = [s for s in changed_symbols if s not in index.graph and s not in index.symbols] | |
| for sym_id in unknown: | |
| suggestion = _suggest_similar_symbol(sym_id, index.symbols.keys()) | |
| if suggestion: | |
| logger.warning( | |
| "\033[93m'%s' was not found in the index -- did you mean '%s'? " | |
| "Its blast radius will show as empty, which does NOT mean " | |
| "the real symbol has no impact.\033[0m", | |
| sym_id, suggestion, | |
| ) | |
| else: | |
| logger.warning( | |
| "\033[93m'%s' was not found in the index (typo, renamed, or " | |
| "deleted symbol?). Its blast radius will show as empty, " | |
| "which does NOT mean the real symbol has no impact.\033[0m", | |
| sym_id, | |
| ) | |
| return unknown | |
| def analyze_impact( | |
| index: RepositoryIndex, | |
| changed_symbols: List[str], | |
| max_depth: Optional[int] = 2, | |
| ) -> ImpactResult: | |
| """ | |
| Phase 2: Given changed symbols, compute blast radius and impact scores. | |
| Fix: expanded_deps is now passed into compute_impact_scores so those | |
| nodes are actually scored. Previously they were computed and discarded. | |
| """ | |
| warn_unknown_symbols(index, changed_symbols) | |
| # ββ Blast radius (reverse graph / callers) ββββββββββββββββββββββββββββ | |
| blast_radii: Dict[str, List[str]] = {} | |
| all_blast: List[str] = [] | |
| for sym_id in changed_symbols: | |
| if sym_id in index.graph: | |
| radius = get_blast_radius(index.graph, sym_id) | |
| blast_radii[sym_id] = radius | |
| all_blast.extend(radius) | |
| # ββ Forward dependency expansion ββββββββββββββββββββββββββββββββββββββ | |
| # Seed with changed + blast so we also pull in what callers depend on. | |
| deps = expand_dependencies( | |
| index.graph, | |
| changed_symbols + all_blast, | |
| max_depth=max_depth, | |
| ) | |
| # ββ Impact scoring ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # FIX: pass expanded deps so they get scored (previously ignored). | |
| scores = compute_impact_scores( | |
| index.graph, | |
| changed_symbols, | |
| blast_radii, | |
| expanded_deps=deps, | |
| ) | |
| return ImpactResult( | |
| changed=changed_symbols, | |
| blast_radius=list(set(all_blast)), | |
| dependencies=deps, | |
| scores=scores, | |
| ) | |
| def compile( | |
| index: RepositoryIndex, | |
| impact: ImpactResult, | |
| max_tokens: Optional[int] = 10000, | |
| notes: Optional[str] = None, | |
| ) -> ContextPackage: | |
| """ | |
| Phase 3: Select symbols and compile into LLM context. | |
| """ | |
| selected, dropped = select_context( | |
| index.symbols, | |
| impact.scores, | |
| impact.changed, | |
| max_tokens=max_tokens, | |
| ) | |
| return compile_context( | |
| index.symbols, | |
| selected, | |
| impact.changed, | |
| impact.scores, | |
| graph=index.graph, | |
| dropped_ids=dropped, | |
| skipped_files=index.broken_files, | |
| notes=notes, | |
| ) | |
| def run_pipeline( | |
| repo_path: str, | |
| changed_symbols: List[str], | |
| max_depth: Optional[int] = 2, | |
| max_tokens: Optional[int] = 10000, | |
| ) -> ContextPackage: | |
| """ | |
| Full pipeline in one call: | |
| repo_path + changed_symbols -> ContextPackage | |
| """ | |
| index = index_repository(repo_path) | |
| impact = analyze_impact(index, changed_symbols, max_depth=max_depth) | |
| return compile(index, impact, max_tokens=max_tokens) |