mirror of
https://github.com/trailofbits/algo.git
synced 2025-09-10 22:13:17 +02:00
* chore: Conservative dependency updates for security - Update Ansible from 9.1.0 to 9.2.0 (one minor version bump only) - Update Jinja2 to ~3.1.6 to fix CVE-2025-27516 (critical security fix) - Pin netaddr to 1.3.0 (current stable version) This is a minimal, conservative update focused on: 1. Critical security fix for Jinja2 2. Minor ansible update for bug fixes 3. Pinning netaddr to prevent surprises No changes to Ansible collections - keeping them unpinned for now. * fix: Address linter issues (ruff, yamllint, shellcheck) - Fixed ruff configuration by moving linter settings to [tool.ruff.lint] section - Fixed ruff code issues: - Moved imports to top of files (E402) - Removed unused variables or commented them out - Updated string formatting from % to .format() - Replaced dict() calls with literals - Fixed assert False usage in tests - Fixed yamllint issues: - Added missing newlines at end of files - Removed trailing spaces - Added document start markers (---) to YAML files - Fixed 'on:' truthy warnings in GitHub workflows - Fixed shellcheck issues: - Properly quoted variables in shell scripts - Fixed A && B || C pattern with proper if/then/else - Improved FreeBSD rc script quoting All linters now pass without errors related to our code changes. * fix: Additional yamllint fixes for GitHub workflows - Added document start markers (---) to test-effectiveness.yml - Fixed 'on:' truthy warning by quoting as 'on:' - Removed trailing spaces from main.yml - Added missing newline at end of test-effectiveness.yml
234 lines
8 KiB
Python
Executable file
234 lines
8 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Track test effectiveness by analyzing CI failures and correlating with issues/PRs
|
|
This helps identify which tests actually catch bugs vs just failing randomly
|
|
"""
|
|
import json
|
|
import subprocess
|
|
from collections import defaultdict
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
|
|
|
|
def get_github_api_data(endpoint):
|
|
"""Fetch data from GitHub API"""
|
|
cmd = ['gh', 'api', endpoint]
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
if result.returncode != 0:
|
|
print(f"Error fetching {endpoint}: {result.stderr}")
|
|
return None
|
|
return json.loads(result.stdout)
|
|
|
|
|
|
def analyze_workflow_runs(repo_owner, repo_name, days_back=30):
|
|
"""Analyze workflow runs to find test failures"""
|
|
since = (datetime.now() - timedelta(days=days_back)).isoformat()
|
|
|
|
# Get workflow runs
|
|
runs = get_github_api_data(
|
|
f'/repos/{repo_owner}/{repo_name}/actions/runs?created=>{since}&status=failure'
|
|
)
|
|
|
|
if not runs:
|
|
return {}
|
|
|
|
test_failures = defaultdict(list)
|
|
|
|
for run in runs.get('workflow_runs', []):
|
|
# Get jobs for this run
|
|
jobs = get_github_api_data(
|
|
f'/repos/{repo_owner}/{repo_name}/actions/runs/{run["id"]}/jobs'
|
|
)
|
|
|
|
if not jobs:
|
|
continue
|
|
|
|
for job in jobs.get('jobs', []):
|
|
if job['conclusion'] == 'failure':
|
|
# Try to extract which test failed from logs
|
|
logs_url = job.get('logs_url')
|
|
if logs_url:
|
|
# Parse logs to find test failures
|
|
test_name = extract_failed_test(job['name'], run['id'])
|
|
if test_name:
|
|
test_failures[test_name].append({
|
|
'run_id': run['id'],
|
|
'run_number': run['run_number'],
|
|
'date': run['created_at'],
|
|
'branch': run['head_branch'],
|
|
'commit': run['head_sha'][:7],
|
|
'pr': extract_pr_number(run)
|
|
})
|
|
|
|
return test_failures
|
|
|
|
|
|
def extract_failed_test(job_name, run_id):
|
|
"""Extract test name from job - this is simplified"""
|
|
# Map job names to test categories
|
|
job_to_tests = {
|
|
'Basic sanity tests': 'test_basic_sanity',
|
|
'Ansible syntax check': 'ansible_syntax',
|
|
'Docker build test': 'docker_tests',
|
|
'Configuration generation test': 'config_generation',
|
|
'Ansible dry-run validation': 'ansible_dry_run'
|
|
}
|
|
return job_to_tests.get(job_name, job_name)
|
|
|
|
|
|
def extract_pr_number(run):
|
|
"""Extract PR number from workflow run"""
|
|
for pr in run.get('pull_requests', []):
|
|
return pr['number']
|
|
return None
|
|
|
|
|
|
def correlate_with_issues(repo_owner, repo_name, test_failures):
|
|
"""Correlate test failures with issues/PRs that fixed them"""
|
|
correlations = defaultdict(lambda: {'caught_bugs': 0, 'false_positives': 0})
|
|
|
|
for test_name, failures in test_failures.items():
|
|
for failure in failures:
|
|
if failure['pr']:
|
|
# Check if PR was merged (indicating it fixed a real issue)
|
|
pr = get_github_api_data(
|
|
f'/repos/{repo_owner}/{repo_name}/pulls/{failure["pr"]}'
|
|
)
|
|
|
|
if pr and pr.get('merged'):
|
|
# Check PR title/body for bug indicators
|
|
title = pr.get('title', '').lower()
|
|
body = pr.get('body', '').lower()
|
|
|
|
bug_keywords = ['fix', 'bug', 'error', 'issue', 'broken', 'fail']
|
|
is_bug_fix = any(keyword in title or keyword in body
|
|
for keyword in bug_keywords)
|
|
|
|
if is_bug_fix:
|
|
correlations[test_name]['caught_bugs'] += 1
|
|
else:
|
|
correlations[test_name]['false_positives'] += 1
|
|
|
|
return correlations
|
|
|
|
|
|
def generate_effectiveness_report(test_failures, correlations):
|
|
"""Generate test effectiveness report"""
|
|
report = []
|
|
report.append("# Test Effectiveness Report")
|
|
report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
|
|
# Summary
|
|
report.append("## Summary")
|
|
total_failures = sum(len(f) for f in test_failures.values())
|
|
report.append(f"- Total test failures: {total_failures}")
|
|
report.append(f"- Unique tests that failed: {len(test_failures)}")
|
|
report.append("")
|
|
|
|
# Effectiveness scores
|
|
report.append("## Test Effectiveness Scores")
|
|
report.append("| Test | Failures | Caught Bugs | False Positives | Effectiveness |")
|
|
report.append("|------|----------|-------------|-----------------|---------------|")
|
|
|
|
scores = []
|
|
for test_name, failures in test_failures.items():
|
|
failure_count = len(failures)
|
|
caught = correlations[test_name]['caught_bugs']
|
|
false_pos = correlations[test_name]['false_positives']
|
|
|
|
# Calculate effectiveness (bugs caught / total failures)
|
|
if failure_count > 0:
|
|
effectiveness = caught / failure_count
|
|
else:
|
|
effectiveness = 0
|
|
|
|
scores.append((test_name, failure_count, caught, false_pos, effectiveness))
|
|
|
|
# Sort by effectiveness
|
|
scores.sort(key=lambda x: x[4], reverse=True)
|
|
|
|
for test_name, failures, caught, false_pos, effectiveness in scores:
|
|
report.append(f"| {test_name} | {failures} | {caught} | {false_pos} | {effectiveness:.1%} |")
|
|
|
|
# Recommendations
|
|
report.append("\n## Recommendations")
|
|
|
|
for test_name, failures, caught, false_pos, effectiveness in scores:
|
|
if effectiveness < 0.2 and failures > 5:
|
|
report.append(f"- ⚠️ Consider improving or removing `{test_name}` (only {effectiveness:.0%} effective)")
|
|
elif effectiveness > 0.8:
|
|
report.append(f"- ✅ `{test_name}` is highly effective ({effectiveness:.0%})")
|
|
|
|
return '\n'.join(report)
|
|
|
|
|
|
def save_metrics(test_failures, correlations):
|
|
"""Save metrics to JSON for historical tracking"""
|
|
metrics_file = Path('.metrics/test-effectiveness.json')
|
|
metrics_file.parent.mkdir(exist_ok=True)
|
|
|
|
# Load existing metrics
|
|
if metrics_file.exists():
|
|
with open(metrics_file) as f:
|
|
historical = json.load(f)
|
|
else:
|
|
historical = []
|
|
|
|
# Add current metrics
|
|
current = {
|
|
'date': datetime.now().isoformat(),
|
|
'test_failures': {
|
|
test: len(failures) for test, failures in test_failures.items()
|
|
},
|
|
'effectiveness': {
|
|
test: {
|
|
'caught_bugs': data['caught_bugs'],
|
|
'false_positives': data['false_positives'],
|
|
'score': data['caught_bugs'] / (data['caught_bugs'] + data['false_positives'])
|
|
if (data['caught_bugs'] + data['false_positives']) > 0 else 0
|
|
}
|
|
for test, data in correlations.items()
|
|
}
|
|
}
|
|
|
|
historical.append(current)
|
|
|
|
# Keep last 12 months of data
|
|
cutoff = datetime.now() - timedelta(days=365)
|
|
historical = [
|
|
h for h in historical
|
|
if datetime.fromisoformat(h['date']) > cutoff
|
|
]
|
|
|
|
with open(metrics_file, 'w') as f:
|
|
json.dump(historical, f, indent=2)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# Configure these for your repo
|
|
REPO_OWNER = 'trailofbits'
|
|
REPO_NAME = 'algo'
|
|
|
|
print("Analyzing test effectiveness...")
|
|
|
|
# Analyze last 30 days of CI runs
|
|
test_failures = analyze_workflow_runs(REPO_OWNER, REPO_NAME, days_back=30)
|
|
|
|
# Correlate with issues/PRs
|
|
correlations = correlate_with_issues(REPO_OWNER, REPO_NAME, test_failures)
|
|
|
|
# Generate report
|
|
report = generate_effectiveness_report(test_failures, correlations)
|
|
|
|
print("\n" + report)
|
|
|
|
# Save report
|
|
report_file = Path('.metrics/test-effectiveness-report.md')
|
|
report_file.parent.mkdir(exist_ok=True)
|
|
with open(report_file, 'w') as f:
|
|
f.write(report)
|
|
print(f"\nReport saved to: {report_file}")
|
|
|
|
# Save metrics for tracking
|
|
save_metrics(test_failures, correlations)
|
|
print("Metrics saved to: .metrics/test-effectiveness.json")
|