skill-engine / analyzer /github_fetcher.py
Aman Githala
Deploying AI Engine
677f286
from github import Github
from github import Auth
import os
def fetch_user_data(username: str, token: str):
"""
Fetches public repos for a user.
"""
try:
auth = Auth.Token(token)
g = Github(auth=auth)
user = g.get_user(username)
# ⚡️ OPTIMIZATION: Only fetch top 10 most recent repos
repos = user.get_repos(sort="updated", direction="desc")[:10]
repo_data = []
# print(f" (Debug) Scanning {len(repos)} repositories for {username}...")
for repo in repos:
repo_data.append({
"name": repo.name,
"description": repo.description,
"language": repo.language,
"updated_at": repo.updated_at,
"created_at": repo.created_at,
"stars": repo.stargazers_count,
"size": repo.size,
"object": repo
})
return repo_data
except Exception as e:
print(f"Error fetching GitHub data: {e}")
return []
def fetch_file_content(repo_object, extension_filter_list):
"""
Recursively searches for code files with Strict Limits.
"""
files_content = []
# Queue: (path, depth)
dirs_to_check = [("", 0)]
max_files = 3 # ⚡️ STOP after finding 3 good files (was 5 or 10)
max_depth = 3 # Depth limit (folder inside folder inside folder)
max_dirs_scanned = 20 # ⚡️ HARD LIMIT: Don't check more than 20 folders per repo
scanned_count = 0
try:
while dirs_to_check and len(files_content) < max_files:
if scanned_count > max_dirs_scanned:
break # Give up on this repo, it's too big/messy
scanned_count += 1
current_path, depth = dirs_to_check.pop(0)
if depth > max_depth: continue
# Get contents
try:
contents = repo_object.get_contents(current_path)
except:
continue # Skip if permission denied or empty
for file_content in contents:
if file_content.type == "file":
# Check extensions
if any(file_content.path.endswith(ext) for ext in extension_filter_list):
try:
decoded = file_content.decoded_content.decode('utf-8')
# Only keep files between 50 and 100,000 chars to avoid memory crashes
if 50 < len(decoded) < 100000:
files_content.append(decoded)
# print(f" [Found] {file_content.path}")
if len(files_content) >= max_files: break
except:
pass
elif file_content.type == "dir":
# Smart Skip: Ignore huge dependency folders
if file_content.name not in ["node_modules", "venv", ".git", "build", "dist", "vendor", "ios", "android"]:
dirs_to_check.append((file_content.path, depth + 1))
except Exception as e:
pass
return files_content