2026-02-08 04:53:31 -08:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
"""
|
2026-02-09 00:21:54 -08:00
|
|
|
|
Lists the longest and shortest code files in the project, and counts duplicated function names across files. Useful for identifying potential refactoring targets and enforcing code size guidelines.
|
2026-02-08 04:53:31 -08:00
|
|
|
|
Threshold can be set to warn about files longer or shorter than a certain number of lines.
|
2026-02-09 11:34:18 -08:00
|
|
|
|
|
|
|
|
|
|
CI mode (--compare-to): Only warns about files that grew past threshold compared to a base ref.
|
|
|
|
|
|
Use --strict to exit non-zero on violations for CI gating.
|
2026-02-09 13:41:36 -08:00
|
|
|
|
|
|
|
|
|
|
GitHub Actions: when GITHUB_ACTIONS=true, emits ::error annotations on flagged files
|
|
|
|
|
|
and writes a Markdown job summary to $GITHUB_STEP_SUMMARY (if set).
|
2026-02-08 04:53:31 -08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
import re
|
2026-02-09 11:34:18 -08:00
|
|
|
|
import sys
|
|
|
|
|
|
import subprocess
|
2026-02-08 04:53:31 -08:00
|
|
|
|
import argparse
|
|
|
|
|
|
from pathlib import Path
|
2026-02-09 11:34:18 -08:00
|
|
|
|
from typing import List, Tuple, Dict, Set, Optional
|
2026-02-08 04:53:31 -08:00
|
|
|
|
from collections import defaultdict
|
|
|
|
|
|
|
|
|
|
|
|
# File extensions to consider as code files
|
|
|
|
|
|
CODE_EXTENSIONS = {
|
2026-02-09 19:57:13 -08:00
|
|
|
|
".ts",
|
|
|
|
|
|
".tsx",
|
|
|
|
|
|
".js",
|
|
|
|
|
|
".jsx",
|
|
|
|
|
|
".mjs",
|
|
|
|
|
|
".cjs", # TypeScript/JavaScript
|
|
|
|
|
|
".swift", # macOS/iOS
|
|
|
|
|
|
".kt",
|
|
|
|
|
|
".java", # Android
|
|
|
|
|
|
".py",
|
|
|
|
|
|
".sh", # Scripts
|
2026-02-08 04:53:31 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Directories to skip
|
|
|
|
|
|
SKIP_DIRS = {
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"node_modules",
|
|
|
|
|
|
".git",
|
|
|
|
|
|
"dist",
|
|
|
|
|
|
"build",
|
|
|
|
|
|
"coverage",
|
|
|
|
|
|
"__pycache__",
|
|
|
|
|
|
".turbo",
|
|
|
|
|
|
"out",
|
|
|
|
|
|
".worktrees",
|
|
|
|
|
|
"vendor",
|
|
|
|
|
|
"Pods",
|
|
|
|
|
|
"DerivedData",
|
|
|
|
|
|
".gradle",
|
|
|
|
|
|
".idea",
|
|
|
|
|
|
"Swabble", # Separate Swift package
|
|
|
|
|
|
"skills", # Standalone skill scripts
|
|
|
|
|
|
".pi", # Pi editor extensions
|
2026-02-08 04:53:31 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Filename patterns to skip in short-file warnings (barrel exports, stubs)
|
|
|
|
|
|
SKIP_SHORT_PATTERNS = {
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"index.js",
|
|
|
|
|
|
"index.ts",
|
|
|
|
|
|
"postinstall.js",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
}
|
2026-02-09 19:57:13 -08:00
|
|
|
|
SKIP_SHORT_SUFFIXES = ("-cli.ts",)
|
2026-02-08 04:53:31 -08:00
|
|
|
|
|
2026-02-09 13:18:51 -08:00
|
|
|
|
# Function names to skip in duplicate detection.
|
|
|
|
|
|
# Only list names so generic they're expected to appear independently in many modules.
|
|
|
|
|
|
# Do NOT use prefix-based skipping — it hides real duplication (e.g. formatDuration,
|
|
|
|
|
|
# stripPrefix, parseConfig are specific enough to flag).
|
2026-02-08 04:53:31 -08:00
|
|
|
|
SKIP_DUPLICATE_FUNCTIONS = {
|
2026-02-09 13:18:51 -08:00
|
|
|
|
# Lifecycle / framework plumbing
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"main",
|
|
|
|
|
|
"init",
|
|
|
|
|
|
"setup",
|
|
|
|
|
|
"teardown",
|
|
|
|
|
|
"cleanup",
|
|
|
|
|
|
"dispose",
|
|
|
|
|
|
"destroy",
|
|
|
|
|
|
"open",
|
|
|
|
|
|
"close",
|
|
|
|
|
|
"connect",
|
|
|
|
|
|
"disconnect",
|
|
|
|
|
|
"execute",
|
|
|
|
|
|
"run",
|
|
|
|
|
|
"start",
|
|
|
|
|
|
"stop",
|
|
|
|
|
|
"render",
|
|
|
|
|
|
"update",
|
|
|
|
|
|
"refresh",
|
|
|
|
|
|
"reset",
|
|
|
|
|
|
"clear",
|
|
|
|
|
|
"flush",
|
2026-02-09 13:18:51 -08:00
|
|
|
|
# Too-short / too-generic identifiers
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"text",
|
|
|
|
|
|
"json",
|
|
|
|
|
|
"pad",
|
|
|
|
|
|
"mask",
|
|
|
|
|
|
"digest",
|
|
|
|
|
|
"confirm",
|
|
|
|
|
|
"intro",
|
|
|
|
|
|
"outro",
|
|
|
|
|
|
"exists",
|
|
|
|
|
|
"send",
|
|
|
|
|
|
"receive",
|
|
|
|
|
|
"listen",
|
|
|
|
|
|
"log",
|
|
|
|
|
|
"warn",
|
|
|
|
|
|
"error",
|
|
|
|
|
|
"info",
|
|
|
|
|
|
"help",
|
|
|
|
|
|
"version",
|
|
|
|
|
|
"config",
|
|
|
|
|
|
"configure",
|
|
|
|
|
|
"describe",
|
|
|
|
|
|
"test",
|
|
|
|
|
|
"action",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
}
|
2026-02-09 19:57:13 -08:00
|
|
|
|
SKIP_DUPLICATE_FILE_PATTERNS = (".test.ts", ".test.tsx", ".spec.ts")
|
2026-02-08 04:53:31 -08:00
|
|
|
|
|
|
|
|
|
|
# Known packages in the monorepo
|
2026-02-09 19:57:13 -08:00
|
|
|
|
PACKAGES = {"src", "apps", "extensions", "packages", "scripts", "ui", "test", "docs"}
|
2026-02-08 04:53:31 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_package(file_path: Path, root_dir: Path) -> str:
|
|
|
|
|
|
"""Get the package name for a file, or 'root' if at top level."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
relative = file_path.relative_to(root_dir)
|
|
|
|
|
|
parts = relative.parts
|
|
|
|
|
|
if len(parts) > 0 and parts[0] in PACKAGES:
|
|
|
|
|
|
return parts[0]
|
2026-02-09 19:57:13 -08:00
|
|
|
|
return "root"
|
2026-02-08 04:53:31 -08:00
|
|
|
|
except ValueError:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
return "root"
|
2026-02-08 04:53:31 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def count_lines(file_path: Path) -> int:
|
|
|
|
|
|
"""Count the number of lines in a file."""
|
|
|
|
|
|
try:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
2026-02-08 04:53:31 -08:00
|
|
|
|
return sum(1 for _ in f)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_code_files(root_dir: Path) -> List[Tuple[Path, int]]:
|
|
|
|
|
|
"""Find all code files and their line counts."""
|
|
|
|
|
|
files_with_counts = []
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
for dirpath, dirnames, filenames in os.walk(root_dir):
|
|
|
|
|
|
# Remove skip directories from dirnames to prevent walking into them
|
|
|
|
|
|
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
for filename in filenames:
|
|
|
|
|
|
file_path = Path(dirpath) / filename
|
|
|
|
|
|
if file_path.suffix.lower() in CODE_EXTENSIONS:
|
|
|
|
|
|
line_count = count_lines(file_path)
|
|
|
|
|
|
files_with_counts.append((file_path, line_count))
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
return files_with_counts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Regex patterns for TypeScript functions (exported and internal)
|
|
|
|
|
|
TS_FUNCTION_PATTERNS = [
|
|
|
|
|
|
# export function name(...) or function name(...)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
re.compile(r"^(?:export\s+)?(?:async\s+)?function\s+(\w+)", re.MULTILINE),
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# export const name = or const name =
|
2026-02-09 19:57:13 -08:00
|
|
|
|
re.compile(
|
|
|
|
|
|
r"^(?:export\s+)?const\s+(\w+)\s*=\s*(?:\([^)]*\)|\w+)\s*=>", re.MULTILINE
|
|
|
|
|
|
),
|
2026-02-08 04:53:31 -08:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_functions(file_path: Path) -> Set[str]:
|
|
|
|
|
|
"""Extract function names from a TypeScript file."""
|
2026-02-09 19:57:13 -08:00
|
|
|
|
if file_path.suffix.lower() not in {".ts", ".tsx"}:
|
2026-02-08 04:53:31 -08:00
|
|
|
|
return set()
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
try:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
2026-02-08 04:53:31 -08:00
|
|
|
|
content = f.read()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return set()
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-09 11:34:18 -08:00
|
|
|
|
return extract_functions_from_content(content)
|
2026-02-08 04:53:31 -08:00
|
|
|
|
|
|
|
|
|
|
|
2026-02-09 19:57:13 -08:00
|
|
|
|
def find_duplicate_functions(
|
|
|
|
|
|
files: List[Tuple[Path, int]], root_dir: Path
|
|
|
|
|
|
) -> Dict[str, List[Path]]:
|
2026-02-08 04:53:31 -08:00
|
|
|
|
"""Find function names that appear in multiple files."""
|
|
|
|
|
|
function_locations: Dict[str, List[Path]] = defaultdict(list)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
for file_path, _ in files:
|
|
|
|
|
|
# Skip test files for duplicate detection
|
|
|
|
|
|
if any(file_path.name.endswith(pat) for pat in SKIP_DUPLICATE_FILE_PATTERNS):
|
|
|
|
|
|
continue
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
functions = extract_functions(file_path)
|
|
|
|
|
|
for func in functions:
|
|
|
|
|
|
# Skip known common function names
|
|
|
|
|
|
if func in SKIP_DUPLICATE_FUNCTIONS:
|
|
|
|
|
|
continue
|
|
|
|
|
|
function_locations[func].append(file_path)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
|
|
|
|
|
# Filter to only duplicates, ignoring cross-package duplicates.
|
|
|
|
|
|
# Independent packages (extensions/*, apps/*, ui/) are treated like separate codebases —
|
|
|
|
|
|
# the same function name in extensions/telegram and extensions/discord,
|
|
|
|
|
|
# or in apps/ios and apps/macos, is expected, not duplication.
|
2026-02-09 13:18:51 -08:00
|
|
|
|
result: Dict[str, List[Path]] = {}
|
|
|
|
|
|
for name, paths in function_locations.items():
|
|
|
|
|
|
if len(paths) < 2:
|
|
|
|
|
|
continue
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
|
|
|
|
|
# Identify which independent package each path belongs to (if any)
|
|
|
|
|
|
# Returns a unique package key or None if it's core code
|
|
|
|
|
|
def get_independent_package(p: Path) -> Optional[str]:
|
2026-02-09 13:18:51 -08:00
|
|
|
|
try:
|
|
|
|
|
|
rel = p.relative_to(root_dir)
|
|
|
|
|
|
parts = rel.parts
|
2026-02-09 19:57:13 -08:00
|
|
|
|
if len(parts) >= 2:
|
|
|
|
|
|
# extensions/<name>, apps/<name> are each independent
|
|
|
|
|
|
if parts[0] in ("extensions", "apps"):
|
|
|
|
|
|
return f"{parts[0]}/{parts[1]}"
|
|
|
|
|
|
# ui/ is a single independent package (browser frontend)
|
|
|
|
|
|
if len(parts) >= 1 and parts[0] == "ui":
|
|
|
|
|
|
return "ui"
|
|
|
|
|
|
return None
|
2026-02-09 13:18:51 -08:00
|
|
|
|
except ValueError:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
package_keys = set()
|
|
|
|
|
|
has_core = False
|
|
|
|
|
|
for p in paths:
|
|
|
|
|
|
pkg = get_independent_package(p)
|
|
|
|
|
|
if pkg:
|
|
|
|
|
|
package_keys.add(pkg)
|
|
|
|
|
|
else:
|
|
|
|
|
|
has_core = True
|
|
|
|
|
|
|
|
|
|
|
|
# Skip if ALL instances are in different independent packages (no core overlap)
|
|
|
|
|
|
if not has_core and len(package_keys) == len(paths):
|
2026-02-09 13:18:51 -08:00
|
|
|
|
continue
|
|
|
|
|
|
result[name] = paths
|
|
|
|
|
|
return result
|
2026-02-08 04:53:31 -08:00
|
|
|
|
|
|
|
|
|
|
|
2026-02-09 11:34:18 -08:00
|
|
|
|
def validate_git_ref(root_dir: Path, ref: str) -> bool:
|
|
|
|
|
|
"""Validate that a git ref exists. Exits with error if not."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = subprocess.run(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
["git", "rev-parse", "--verify", ref],
|
2026-02-09 11:34:18 -08:00
|
|
|
|
capture_output=True,
|
|
|
|
|
|
cwd=root_dir,
|
2026-02-09 19:57:13 -08:00
|
|
|
|
encoding="utf-8",
|
2026-02-09 11:34:18 -08:00
|
|
|
|
)
|
|
|
|
|
|
return result.returncode == 0
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_file_content_at_ref(file_path: Path, root_dir: Path, ref: str) -> Optional[str]:
|
|
|
|
|
|
"""Get content of a file at a specific git ref. Returns None if file doesn't exist at ref."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
relative_path = file_path.relative_to(root_dir)
|
|
|
|
|
|
# Use forward slashes for git paths
|
2026-02-09 19:57:13 -08:00
|
|
|
|
git_path = str(relative_path).replace("\\", "/")
|
2026-02-09 11:34:18 -08:00
|
|
|
|
result = subprocess.run(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
["git", "show", f"{ref}:{git_path}"],
|
2026-02-09 11:34:18 -08:00
|
|
|
|
capture_output=True,
|
|
|
|
|
|
cwd=root_dir,
|
2026-02-09 19:57:13 -08:00
|
|
|
|
encoding="utf-8",
|
|
|
|
|
|
errors="ignore",
|
2026-02-09 11:34:18 -08:00
|
|
|
|
)
|
|
|
|
|
|
if result.returncode != 0:
|
|
|
|
|
|
stderr = result.stderr.strip()
|
|
|
|
|
|
# "does not exist" or "exists on disk, but not in" = file missing at ref (OK)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
if "does not exist" in stderr or "exists on disk" in stderr:
|
2026-02-09 11:34:18 -08:00
|
|
|
|
return None
|
|
|
|
|
|
# Other errors (bad ref, git broken) = genuine failure
|
|
|
|
|
|
if stderr:
|
|
|
|
|
|
print(f"⚠️ git show error for {git_path}: {stderr}", file=sys.stderr)
|
|
|
|
|
|
return None
|
|
|
|
|
|
return result.stdout
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"⚠️ failed to read {file_path} at {ref}: {e}", file=sys.stderr)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_line_count_at_ref(file_path: Path, root_dir: Path, ref: str) -> Optional[int]:
|
|
|
|
|
|
"""Get line count of a file at a specific git ref. Returns None if file doesn't exist at ref."""
|
|
|
|
|
|
content = get_file_content_at_ref(file_path, root_dir, ref)
|
|
|
|
|
|
if content is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
return len(content.splitlines())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_functions_from_content(content: str) -> Set[str]:
|
|
|
|
|
|
"""Extract function names from TypeScript content string."""
|
|
|
|
|
|
functions = set()
|
|
|
|
|
|
for pattern in TS_FUNCTION_PATTERNS:
|
|
|
|
|
|
for match in pattern.finditer(content):
|
|
|
|
|
|
functions.add(match.group(1))
|
|
|
|
|
|
return functions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_changed_files(root_dir: Path, compare_ref: str) -> Set[str]:
|
|
|
|
|
|
"""Get set of files changed between compare_ref and HEAD (relative paths with forward slashes)."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = subprocess.run(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
["git", "diff", "--name-only", compare_ref, "HEAD"],
|
2026-02-09 11:34:18 -08:00
|
|
|
|
capture_output=True,
|
|
|
|
|
|
cwd=root_dir,
|
2026-02-09 19:57:13 -08:00
|
|
|
|
encoding="utf-8",
|
|
|
|
|
|
errors="ignore",
|
2026-02-09 11:34:18 -08:00
|
|
|
|
)
|
|
|
|
|
|
if result.returncode != 0:
|
|
|
|
|
|
return set()
|
|
|
|
|
|
return {line.strip() for line in result.stdout.splitlines() if line.strip()}
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return set()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_duplicate_regressions(
|
|
|
|
|
|
files: List[Tuple[Path, int]],
|
|
|
|
|
|
root_dir: Path,
|
|
|
|
|
|
compare_ref: str,
|
|
|
|
|
|
) -> Dict[str, List[Path]]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Find new duplicate function names that didn't exist at the base ref.
|
|
|
|
|
|
Only checks functions in files that changed to keep CI fast.
|
|
|
|
|
|
Returns dict of function_name -> list of current file paths, only for
|
|
|
|
|
|
duplicates that are new (weren't duplicated at compare_ref).
|
|
|
|
|
|
"""
|
|
|
|
|
|
# Build current duplicate map
|
|
|
|
|
|
current_dupes = find_duplicate_functions(files, root_dir)
|
|
|
|
|
|
if not current_dupes:
|
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
# Get changed files to scope the comparison
|
|
|
|
|
|
changed_files = get_changed_files(root_dir, compare_ref)
|
|
|
|
|
|
if not changed_files:
|
|
|
|
|
|
return {} # Nothing changed, no new duplicates possible
|
|
|
|
|
|
|
|
|
|
|
|
# Only check duplicate functions that involve at least one changed file
|
|
|
|
|
|
relevant_dupes: Dict[str, List[Path]] = {}
|
|
|
|
|
|
for func_name, paths in current_dupes.items():
|
|
|
|
|
|
involves_changed = any(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
str(p.relative_to(root_dir)).replace("\\", "/") in changed_files
|
2026-02-09 11:34:18 -08:00
|
|
|
|
for p in paths
|
|
|
|
|
|
)
|
|
|
|
|
|
if involves_changed:
|
|
|
|
|
|
relevant_dupes[func_name] = paths
|
|
|
|
|
|
|
|
|
|
|
|
if not relevant_dupes:
|
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
# For relevant duplicates, check if they were already duplicated at base ref
|
|
|
|
|
|
# Only need to read base versions of files involved in these duplicates
|
|
|
|
|
|
files_to_check: Set[Path] = set()
|
|
|
|
|
|
for paths in relevant_dupes.values():
|
|
|
|
|
|
files_to_check.update(paths)
|
|
|
|
|
|
|
|
|
|
|
|
base_function_locations: Dict[str, List[Path]] = defaultdict(list)
|
|
|
|
|
|
for file_path in files_to_check:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
if file_path.suffix.lower() not in {".ts", ".tsx"}:
|
2026-02-09 11:34:18 -08:00
|
|
|
|
continue
|
|
|
|
|
|
content = get_file_content_at_ref(file_path, root_dir, compare_ref)
|
|
|
|
|
|
if content is None:
|
|
|
|
|
|
continue
|
|
|
|
|
|
functions = extract_functions_from_content(content)
|
|
|
|
|
|
for func in functions:
|
|
|
|
|
|
if func in SKIP_DUPLICATE_FUNCTIONS:
|
|
|
|
|
|
continue
|
|
|
|
|
|
base_function_locations[func].append(file_path)
|
|
|
|
|
|
|
2026-02-09 19:57:13 -08:00
|
|
|
|
base_dupes = {
|
|
|
|
|
|
name for name, paths in base_function_locations.items() if len(paths) > 1
|
|
|
|
|
|
}
|
2026-02-09 11:34:18 -08:00
|
|
|
|
|
|
|
|
|
|
# Return only new duplicates
|
2026-02-09 19:57:13 -08:00
|
|
|
|
return {
|
|
|
|
|
|
name: paths for name, paths in relevant_dupes.items() if name not in base_dupes
|
|
|
|
|
|
}
|
2026-02-09 11:34:18 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_threshold_regressions(
|
|
|
|
|
|
files: List[Tuple[Path, int]],
|
|
|
|
|
|
root_dir: Path,
|
|
|
|
|
|
compare_ref: str,
|
|
|
|
|
|
threshold: int,
|
2026-02-09 11:51:51 -08:00
|
|
|
|
) -> Tuple[List[Tuple[Path, int, Optional[int]]], List[Tuple[Path, int, int]]]:
|
2026-02-09 11:34:18 -08:00
|
|
|
|
"""
|
2026-02-09 11:51:51 -08:00
|
|
|
|
Find files that crossed the threshold or grew while already over it.
|
|
|
|
|
|
Returns two lists:
|
|
|
|
|
|
- crossed: (path, current_lines, base_lines) for files that newly crossed the threshold
|
|
|
|
|
|
- grew: (path, current_lines, base_lines) for files already over threshold that got larger
|
2026-02-09 11:34:18 -08:00
|
|
|
|
"""
|
2026-02-09 11:51:51 -08:00
|
|
|
|
crossed = []
|
|
|
|
|
|
grew = []
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-09 11:34:18 -08:00
|
|
|
|
for file_path, current_lines in files:
|
|
|
|
|
|
if current_lines < threshold:
|
|
|
|
|
|
continue # Not over threshold now, skip
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-09 11:34:18 -08:00
|
|
|
|
base_lines = get_line_count_at_ref(file_path, root_dir, compare_ref)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-09 11:34:18 -08:00
|
|
|
|
if base_lines is None or base_lines < threshold:
|
2026-02-09 11:51:51 -08:00
|
|
|
|
# New file or crossed the threshold
|
|
|
|
|
|
crossed.append((file_path, current_lines, base_lines))
|
|
|
|
|
|
elif current_lines > base_lines:
|
|
|
|
|
|
# Already over threshold and grew larger
|
|
|
|
|
|
grew.append((file_path, current_lines, base_lines))
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-09 11:51:51 -08:00
|
|
|
|
return crossed, grew
|
2026-02-09 11:34:18 -08:00
|
|
|
|
|
|
|
|
|
|
|
2026-02-09 13:41:36 -08:00
|
|
|
|
def _write_github_summary(
|
|
|
|
|
|
summary_path: str,
|
|
|
|
|
|
crossed: List[Tuple[Path, int, Optional[int]]],
|
|
|
|
|
|
grew: List[Tuple[Path, int, int]],
|
|
|
|
|
|
new_dupes: Dict[str, List[Path]],
|
|
|
|
|
|
root_dir: Path,
|
|
|
|
|
|
threshold: int,
|
|
|
|
|
|
compare_ref: str,
|
|
|
|
|
|
) -> None:
|
|
|
|
|
|
"""Write a Markdown job summary to $GITHUB_STEP_SUMMARY."""
|
|
|
|
|
|
lines: List[str] = []
|
|
|
|
|
|
lines.append("## Code Size Check Failed\n")
|
2026-02-09 14:30:36 -08:00
|
|
|
|
lines.append("> ⚠️ **DO NOT trash the code base!** The goal is maintainability.\n")
|
2026-02-09 13:41:36 -08:00
|
|
|
|
|
|
|
|
|
|
if crossed:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
lines.append(
|
|
|
|
|
|
f"### {len(crossed)} file(s) crossed the {threshold}-line threshold\n"
|
|
|
|
|
|
)
|
2026-02-09 13:41:36 -08:00
|
|
|
|
lines.append("| File | Before | After | Delta |")
|
|
|
|
|
|
lines.append("|------|-------:|------:|------:|")
|
|
|
|
|
|
for file_path, current, base in crossed:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
rel = str(file_path.relative_to(root_dir)).replace("\\", "/")
|
2026-02-09 13:41:36 -08:00
|
|
|
|
before = f"{base:,}" if base is not None else "new"
|
2026-02-09 19:57:13 -08:00
|
|
|
|
lines.append(
|
|
|
|
|
|
f"| `{rel}` | {before} | {current:,} | +{current - (base or 0):,} |"
|
|
|
|
|
|
)
|
2026-02-09 13:41:36 -08:00
|
|
|
|
lines.append("")
|
|
|
|
|
|
|
|
|
|
|
|
if grew:
|
|
|
|
|
|
lines.append(f"### {len(grew)} already-large file(s) grew larger\n")
|
|
|
|
|
|
lines.append("| File | Before | After | Delta |")
|
|
|
|
|
|
lines.append("|------|-------:|------:|------:|")
|
|
|
|
|
|
for file_path, current, base in grew:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
rel = str(file_path.relative_to(root_dir)).replace("\\", "/")
|
2026-02-09 13:41:36 -08:00
|
|
|
|
lines.append(f"| `{rel}` | {base:,} | {current:,} | +{current - base:,} |")
|
|
|
|
|
|
lines.append("")
|
|
|
|
|
|
|
|
|
|
|
|
if new_dupes:
|
|
|
|
|
|
lines.append(f"### {len(new_dupes)} new duplicate function name(s)\n")
|
|
|
|
|
|
lines.append("| Function | Files |")
|
|
|
|
|
|
lines.append("|----------|-------|")
|
|
|
|
|
|
for func_name in sorted(new_dupes.keys()):
|
|
|
|
|
|
paths = new_dupes[func_name]
|
2026-02-09 19:57:13 -08:00
|
|
|
|
file_list = ", ".join(
|
|
|
|
|
|
f"`{str(p.relative_to(root_dir)).replace(chr(92), '/')}`" for p in paths
|
|
|
|
|
|
)
|
2026-02-09 13:41:36 -08:00
|
|
|
|
lines.append(f"| `{func_name}` | {file_list} |")
|
|
|
|
|
|
lines.append("")
|
|
|
|
|
|
|
|
|
|
|
|
lines.append("<details><summary>How to fix</summary>\n")
|
|
|
|
|
|
lines.append("- Split large files into smaller, focused modules")
|
|
|
|
|
|
lines.append("- Extract helpers, types, or constants into separate files")
|
|
|
|
|
|
lines.append("- See `AGENTS.md` for guidelines (~500–700 LOC target)")
|
|
|
|
|
|
lines.append(f"- This check compares your PR against `{compare_ref}`")
|
2026-02-09 19:57:13 -08:00
|
|
|
|
lines.append(
|
|
|
|
|
|
f"- Only code files are checked: {', '.join(f'`{e}`' for e in sorted(CODE_EXTENSIONS))}"
|
|
|
|
|
|
)
|
2026-02-09 13:41:36 -08:00
|
|
|
|
lines.append("- Docs, test names, and config files are **not** affected")
|
|
|
|
|
|
lines.append("\n</details>")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
with open(summary_path, "a", encoding="utf-8") as f:
|
|
|
|
|
|
f.write("\n".join(lines) + "\n")
|
2026-02-09 13:41:36 -08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"⚠️ Failed to write job summary: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
def main():
|
|
|
|
|
|
parser = argparse.ArgumentParser(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
description="Analyze code files: list longest/shortest files, find duplicate function names"
|
2026-02-08 04:53:31 -08:00
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"-t",
|
|
|
|
|
|
"--threshold",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
type=int,
|
|
|
|
|
|
default=1000,
|
2026-02-09 19:57:13 -08:00
|
|
|
|
help="Warn about files longer than this many lines (default: 1000)",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"--min-threshold",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
type=int,
|
|
|
|
|
|
default=10,
|
2026-02-09 19:57:13 -08:00
|
|
|
|
help="Warn about files shorter than this many lines (default: 10)",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"-n",
|
|
|
|
|
|
"--top",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
type=int,
|
|
|
|
|
|
default=20,
|
2026-02-09 19:57:13 -08:00
|
|
|
|
help="Show top N longest files (default: 20)",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"-b",
|
|
|
|
|
|
"--bottom",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
type=int,
|
|
|
|
|
|
default=10,
|
2026-02-09 19:57:13 -08:00
|
|
|
|
help="Show bottom N shortest files (default: 10)",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"-d",
|
|
|
|
|
|
"--directory",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
type=str,
|
2026-02-09 19:57:13 -08:00
|
|
|
|
default=".",
|
|
|
|
|
|
help="Directory to scan (default: current directory)",
|
2026-02-08 04:53:31 -08:00
|
|
|
|
)
|
2026-02-09 11:34:18 -08:00
|
|
|
|
parser.add_argument(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"--compare-to",
|
2026-02-09 11:34:18 -08:00
|
|
|
|
type=str,
|
|
|
|
|
|
default=None,
|
2026-02-09 19:57:13 -08:00
|
|
|
|
help="Git ref to compare against (e.g., origin/main). Only warn about files that grew past threshold.",
|
2026-02-09 11:34:18 -08:00
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
2026-02-09 19:57:13 -08:00
|
|
|
|
"--strict",
|
|
|
|
|
|
action="store_true",
|
|
|
|
|
|
help="Exit with non-zero status if any violations found (for CI)",
|
2026-02-09 11:34:18 -08:00
|
|
|
|
)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
args = parser.parse_args()
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
root_dir = Path(args.directory).resolve()
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-09 11:34:18 -08:00
|
|
|
|
# CI delta mode: only show regressions
|
|
|
|
|
|
if args.compare_to:
|
|
|
|
|
|
print(f"\n📂 Scanning: {root_dir}")
|
|
|
|
|
|
print(f"🔍 Comparing to: {args.compare_to}\n")
|
|
|
|
|
|
|
|
|
|
|
|
if not validate_git_ref(root_dir, args.compare_to):
|
|
|
|
|
|
print(f"❌ Invalid git ref: {args.compare_to}", file=sys.stderr)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
" Make sure the ref exists (e.g. run 'git fetch origin <branch>')",
|
|
|
|
|
|
file=sys.stderr,
|
|
|
|
|
|
)
|
2026-02-09 11:34:18 -08:00
|
|
|
|
sys.exit(2)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-09 11:34:18 -08:00
|
|
|
|
files = find_code_files(root_dir)
|
|
|
|
|
|
violations = False
|
|
|
|
|
|
|
|
|
|
|
|
# Check file length regressions
|
2026-02-09 19:57:13 -08:00
|
|
|
|
crossed, grew = find_threshold_regressions(
|
|
|
|
|
|
files, root_dir, args.compare_to, args.threshold
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-02-09 11:51:51 -08:00
|
|
|
|
if crossed:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f"⚠️ {len(crossed)} file(s) crossed {args.threshold} line threshold:\n"
|
|
|
|
|
|
)
|
2026-02-09 11:51:51 -08:00
|
|
|
|
for file_path, current, base in crossed:
|
2026-02-09 11:34:18 -08:00
|
|
|
|
relative_path = file_path.relative_to(root_dir)
|
|
|
|
|
|
if base is None:
|
|
|
|
|
|
print(f" {relative_path}: {current:,} lines (new file)")
|
|
|
|
|
|
else:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f" {relative_path}: {base:,} → {current:,} lines (+{current - base:,})"
|
|
|
|
|
|
)
|
2026-02-09 11:34:18 -08:00
|
|
|
|
print()
|
|
|
|
|
|
violations = True
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"✅ No files crossed {args.threshold} line threshold")
|
|
|
|
|
|
|
2026-02-09 11:51:51 -08:00
|
|
|
|
if grew:
|
|
|
|
|
|
print(f"⚠️ {len(grew)} already-large file(s) grew larger:\n")
|
|
|
|
|
|
for file_path, current, base in grew:
|
|
|
|
|
|
relative_path = file_path.relative_to(root_dir)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f" {relative_path}: {base:,} → {current:,} lines (+{current - base:,})"
|
|
|
|
|
|
)
|
2026-02-09 11:51:51 -08:00
|
|
|
|
print()
|
|
|
|
|
|
violations = True
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"✅ No already-large files grew")
|
|
|
|
|
|
|
2026-02-09 11:34:18 -08:00
|
|
|
|
# Check new duplicate function names
|
|
|
|
|
|
new_dupes = find_duplicate_regressions(files, root_dir, args.compare_to)
|
|
|
|
|
|
|
|
|
|
|
|
if new_dupes:
|
|
|
|
|
|
print(f"⚠️ {len(new_dupes)} new duplicate function name(s):\n")
|
|
|
|
|
|
for func_name in sorted(new_dupes.keys()):
|
|
|
|
|
|
paths = new_dupes[func_name]
|
|
|
|
|
|
print(f" {func_name}:")
|
|
|
|
|
|
for path in paths:
|
|
|
|
|
|
print(f" {path.relative_to(root_dir)}")
|
|
|
|
|
|
print()
|
|
|
|
|
|
violations = True
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"✅ No new duplicate function names")
|
|
|
|
|
|
|
|
|
|
|
|
print()
|
|
|
|
|
|
if args.strict and violations:
|
2026-02-09 13:41:36 -08:00
|
|
|
|
# Emit GitHub Actions file annotations so violations appear inline in the PR diff
|
2026-02-09 19:57:13 -08:00
|
|
|
|
in_gha = os.environ.get("GITHUB_ACTIONS") == "true"
|
2026-02-09 13:41:36 -08:00
|
|
|
|
if in_gha:
|
|
|
|
|
|
for file_path, current, base in crossed:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
rel = str(file_path.relative_to(root_dir)).replace("\\", "/")
|
2026-02-09 13:41:36 -08:00
|
|
|
|
if base is None:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f"::error file={rel},title=File over {args.threshold} lines::{rel} is {current:,} lines (new file). Split into smaller modules."
|
|
|
|
|
|
)
|
2026-02-09 13:41:36 -08:00
|
|
|
|
else:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f"::error file={rel},title=File crossed {args.threshold} lines::{rel} grew from {base:,} to {current:,} lines (+{current - base:,}). Split into smaller modules."
|
|
|
|
|
|
)
|
2026-02-09 13:41:36 -08:00
|
|
|
|
for file_path, current, base in grew:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
rel = str(file_path.relative_to(root_dir)).replace("\\", "/")
|
|
|
|
|
|
print(
|
|
|
|
|
|
f"::error file={rel},title=Large file grew larger::{rel} is already {base:,} lines and grew to {current:,} (+{current - base:,}). Consider refactoring."
|
|
|
|
|
|
)
|
2026-02-09 13:41:36 -08:00
|
|
|
|
for func_name in sorted(new_dupes.keys()):
|
|
|
|
|
|
for p in new_dupes[func_name]:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
rel = str(p.relative_to(root_dir)).replace("\\", "/")
|
|
|
|
|
|
print(
|
|
|
|
|
|
f"::error file={rel},title=Duplicate function '{func_name}'::Function '{func_name}' appears in multiple files. Centralize or rename."
|
|
|
|
|
|
)
|
2026-02-09 13:41:36 -08:00
|
|
|
|
|
|
|
|
|
|
# Write GitHub Actions job summary (visible in the Actions check details)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
|
2026-02-09 13:41:36 -08:00
|
|
|
|
if summary_path:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
_write_github_summary(
|
|
|
|
|
|
summary_path,
|
|
|
|
|
|
crossed,
|
|
|
|
|
|
grew,
|
|
|
|
|
|
new_dupes,
|
|
|
|
|
|
root_dir,
|
|
|
|
|
|
args.threshold,
|
|
|
|
|
|
args.compare_to,
|
|
|
|
|
|
)
|
2026-02-09 13:41:36 -08:00
|
|
|
|
|
2026-02-09 13:18:51 -08:00
|
|
|
|
# Print actionable summary so contributors know what to do
|
|
|
|
|
|
print("─" * 60)
|
|
|
|
|
|
print("❌ Code size check failed\n")
|
2026-02-09 14:30:36 -08:00
|
|
|
|
print(" ⚠️ DO NOT just trash the code base!")
|
|
|
|
|
|
print(" The goal is maintainability.\n")
|
2026-02-09 13:18:51 -08:00
|
|
|
|
if crossed:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f" {len(crossed)} file(s) grew past the {args.threshold}-line limit."
|
|
|
|
|
|
)
|
2026-02-09 13:18:51 -08:00
|
|
|
|
if grew:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f" {len(grew)} file(s) already over {args.threshold} lines got larger."
|
|
|
|
|
|
)
|
2026-02-09 13:18:51 -08:00
|
|
|
|
print()
|
|
|
|
|
|
print(" How to fix:")
|
|
|
|
|
|
print(" • Split large files into smaller, focused modules")
|
|
|
|
|
|
print(" • Extract helpers, types, or constants into separate files")
|
|
|
|
|
|
print(" • See AGENTS.md for guidelines (~500-700 LOC target)")
|
|
|
|
|
|
print()
|
|
|
|
|
|
print(f" This check compares your PR against {args.compare_to}.")
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f" Only code files are checked ({', '.join(sorted(e for e in CODE_EXTENSIONS))})."
|
|
|
|
|
|
)
|
2026-02-09 13:18:51 -08:00
|
|
|
|
print(" Docs, tests names, and config files are not affected.")
|
|
|
|
|
|
print("─" * 60)
|
2026-02-09 11:34:18 -08:00
|
|
|
|
sys.exit(1)
|
2026-02-09 13:18:51 -08:00
|
|
|
|
elif args.strict:
|
|
|
|
|
|
print("─" * 60)
|
|
|
|
|
|
print("✅ Code size check passed — no files exceed thresholds.")
|
|
|
|
|
|
print("─" * 60)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-09 11:34:18 -08:00
|
|
|
|
return
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
print(f"\n📂 Scanning: {root_dir}\n")
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Find and sort files by line count
|
|
|
|
|
|
files = find_code_files(root_dir)
|
|
|
|
|
|
files_desc = sorted(files, key=lambda x: x[1], reverse=True)
|
|
|
|
|
|
files_asc = sorted(files, key=lambda x: x[1])
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Show top N longest files
|
2026-02-09 19:57:13 -08:00
|
|
|
|
top_files = files_desc[: args.top]
|
|
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
print(f"📊 Top {min(args.top, len(top_files))} longest code files:\n")
|
|
|
|
|
|
print(f"{'Lines':>8} {'File'}")
|
|
|
|
|
|
print("-" * 60)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
long_warnings = []
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
for file_path, line_count in top_files:
|
|
|
|
|
|
relative_path = file_path.relative_to(root_dir)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Check if over threshold
|
|
|
|
|
|
if line_count >= args.threshold:
|
|
|
|
|
|
marker = " ⚠️"
|
|
|
|
|
|
long_warnings.append((relative_path, line_count))
|
|
|
|
|
|
else:
|
|
|
|
|
|
marker = ""
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
print(f"{line_count:>8} {relative_path}{marker}")
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Show bottom N shortest files
|
2026-02-09 19:57:13 -08:00
|
|
|
|
bottom_files = files_asc[: args.bottom]
|
|
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
print(f"\n📉 Bottom {min(args.bottom, len(bottom_files))} shortest code files:\n")
|
|
|
|
|
|
print(f"{'Lines':>8} {'File'}")
|
|
|
|
|
|
print("-" * 60)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
short_warnings = []
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
for file_path, line_count in bottom_files:
|
|
|
|
|
|
relative_path = file_path.relative_to(root_dir)
|
|
|
|
|
|
filename = file_path.name
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Skip known barrel exports and stubs
|
2026-02-09 19:57:13 -08:00
|
|
|
|
is_expected_short = filename in SKIP_SHORT_PATTERNS or any(
|
|
|
|
|
|
filename.endswith(suffix) for suffix in SKIP_SHORT_SUFFIXES
|
2026-02-08 04:53:31 -08:00
|
|
|
|
)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Check if under threshold
|
|
|
|
|
|
if line_count <= args.min_threshold and not is_expected_short:
|
|
|
|
|
|
marker = " ⚠️"
|
|
|
|
|
|
short_warnings.append((relative_path, line_count))
|
|
|
|
|
|
else:
|
|
|
|
|
|
marker = ""
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
print(f"{line_count:>8} {relative_path}{marker}")
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Summary
|
|
|
|
|
|
total_files = len(files)
|
|
|
|
|
|
total_lines = sum(count for _, count in files)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
print("-" * 60)
|
|
|
|
|
|
print(f"\n📈 Summary:")
|
|
|
|
|
|
print(f" Total code files: {total_files:,}")
|
|
|
|
|
|
print(f" Total lines: {total_lines:,}")
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f" Average lines/file: {total_lines // total_files if total_files else 0:,}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Per-package breakdown
|
|
|
|
|
|
package_stats: dict[str, dict] = {}
|
|
|
|
|
|
for file_path, line_count in files:
|
|
|
|
|
|
pkg = get_package(file_path, root_dir)
|
|
|
|
|
|
if pkg not in package_stats:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
package_stats[pkg] = {"files": 0, "lines": 0}
|
|
|
|
|
|
package_stats[pkg]["files"] += 1
|
|
|
|
|
|
package_stats[pkg]["lines"] += line_count
|
|
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
print(f"\n📦 Per-package breakdown:\n")
|
|
|
|
|
|
print(f"{'Package':<15} {'Files':>8} {'Lines':>10} {'Avg':>8}")
|
|
|
|
|
|
print("-" * 45)
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
|
|
|
|
|
for pkg in sorted(
|
|
|
|
|
|
package_stats.keys(), key=lambda p: package_stats[p]["lines"], reverse=True
|
|
|
|
|
|
):
|
2026-02-08 04:53:31 -08:00
|
|
|
|
stats = package_stats[pkg]
|
2026-02-09 19:57:13 -08:00
|
|
|
|
avg = stats["lines"] // stats["files"] if stats["files"] else 0
|
2026-02-08 04:53:31 -08:00
|
|
|
|
print(f"{pkg:<15} {stats['files']:>8,} {stats['lines']:>10,} {avg:>8,}")
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Long file warnings
|
|
|
|
|
|
if long_warnings:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f"\n⚠️ Warning: {len(long_warnings)} file(s) exceed {args.threshold} lines (consider refactoring):"
|
|
|
|
|
|
)
|
2026-02-08 04:53:31 -08:00
|
|
|
|
for path, count in long_warnings:
|
|
|
|
|
|
print(f" - {path} ({count:,} lines)")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"\n✅ No files exceed {args.threshold} lines")
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Short file warnings
|
|
|
|
|
|
if short_warnings:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f"\n⚠️ Warning: {len(short_warnings)} file(s) are {args.min_threshold} lines or less (check if needed):"
|
|
|
|
|
|
)
|
2026-02-08 04:53:31 -08:00
|
|
|
|
for path, count in short_warnings:
|
|
|
|
|
|
print(f" - {path} ({count} lines)")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"\n✅ No files are {args.min_threshold} lines or less")
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
# Duplicate function names
|
|
|
|
|
|
duplicates = find_duplicate_functions(files, root_dir)
|
|
|
|
|
|
if duplicates:
|
2026-02-09 19:57:13 -08:00
|
|
|
|
print(
|
|
|
|
|
|
f"\n⚠️ Warning: {len(duplicates)} function name(s) appear in multiple files (consider renaming):"
|
|
|
|
|
|
)
|
2026-02-08 04:53:31 -08:00
|
|
|
|
for func_name in sorted(duplicates.keys()):
|
|
|
|
|
|
paths = duplicates[func_name]
|
|
|
|
|
|
print(f" - {func_name}:")
|
|
|
|
|
|
for path in paths:
|
|
|
|
|
|
print(f" {path.relative_to(root_dir)}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"\n✅ No duplicate function names")
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-08 04:53:31 -08:00
|
|
|
|
print()
|
2026-02-09 19:57:13 -08:00
|
|
|
|
|
2026-02-09 11:34:18 -08:00
|
|
|
|
# Exit with error if --strict and there are violations
|
|
|
|
|
|
if args.strict and long_warnings:
|
|
|
|
|
|
sys.exit(1)
|
2026-02-08 04:53:31 -08:00
|
|
|
|
|
|
|
|
|
|
|
2026-02-09 19:57:13 -08:00
|
|
|
|
if __name__ == "__main__":
|
2026-02-08 04:53:31 -08:00
|
|
|
|
main()
|