#!/usr/bin/env python3 """ Completed Files Scanner Scans all files in docs/completed/ for analysis """ import os import json from pathlib import Path from datetime import datetime def scan_completed_files(completed_dir): """Scan all files in docs/completed/""" completed_path = Path(completed_dir) if not completed_path.exists(): return {'error': 'Completed directory not found'} files = [] # Find all markdown files for md_file in completed_path.rglob('*.md'): if md_file.is_file() and md_file.name != 'README.md': try: with open(md_file, 'r', encoding='utf-8') as f: content = f.read() # Get relative path from completed directory relative_path = md_file.relative_to(completed_path) category = relative_path.parts[0] if len(relative_path.parts) > 1 else 'general' files.append({ 'file_path': str(md_file), 'relative_path': str(relative_path), 'category': category, 'filename': md_file.name, 'file_size': md_file.stat().st_size, 'content_length': len(content), 'last_modified': datetime.fromtimestamp(md_file.stat().st_mtime).isoformat(), 'content_preview': content[:300] + '...' if len(content) > 300 else content }) except Exception as e: files.append({ 'file_path': str(md_file), 'relative_path': str(md_file.relative_to(completed_path)), 'category': 'error', 'filename': md_file.name, 'error': str(e) }) # Categorize files category_summary = {} for file_info in files: category = file_info['category'] if category not in category_summary: category_summary[category] = { 'files': [], 'total_files': 0, 'total_size': 0 } category_summary[category]['files'].append(file_info) category_summary[category]['total_files'] += 1 category_summary[category]['total_size'] += file_info.get('file_size', 0) return { 'total_files_scanned': len(files), 'categories_found': len(category_summary), 'category_summary': category_summary, 'all_files': files } if __name__ == "__main__": completed_dir = '/opt/aitbc/docs/completed' output_file = 'completed_files_scan.json' scan_results = scan_completed_files(completed_dir) # Save results with open(output_file, 'w') as f: json.dump(scan_results, f, indent=2) # Print summary print(f"Completed files scan complete:") print(f" Total files scanned: {scan_results['total_files_scanned']}") print(f" Categories found: {scan_results['categories_found']}") print("") print("Files by category:") for category, summary in scan_results['category_summary'].items(): print(f" {category}: {summary['total_files']} files, {summary['total_size']} bytes")