Files
aitbc/workspace/planning-analysis/analyze_content.py
AITBC System 6cb51c270c docs(planning): clean up next milestone document and remove completion markers
- Remove excessive completion checkmarks and status markers throughout document
- Consolidate redundant sections on completed features
- Streamline executive summary and current status sections
- Focus content on upcoming quick wins and active tasks
- Remove duplicate phase completion listings
- Clean up success metrics and KPI sections
- Maintain essential planning information while reducing noise
2026-03-08 13:42:14 +01:00

250 lines
7.7 KiB
Python

#!/usr/bin/env python3
"""
Content Analyzer for Documentation
Analyzes completed files to determine documentation conversion strategy
"""
import json
import re
from pathlib import Path
def extract_documentation_metadata(content, filename):
"""Extract metadata for documentation conversion"""
metadata = {
'title': filename.replace('.md', '').replace('_', ' ').title(),
'type': 'analysis',
'category': 'general',
'keywords': [],
'sections': [],
'has_implementation_details': False,
'has_technical_specs': False,
'has_status_info': False,
'completion_indicators': []
}
# Extract title from first heading
title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
if title_match:
metadata['title'] = title_match.group(1).strip()
# Find sections
section_matches = re.findall(r'^#{1,6}\s+(.+)$', content, re.MULTILINE)
metadata['sections'] = section_matches
# Check for implementation details
impl_patterns = [
r'implementation',
r'architecture',
r'technical',
r'specification',
r'design',
r'code',
r'api',
r'endpoint',
r'service'
]
metadata['has_implementation_details'] = any(
re.search(pattern, content, re.IGNORECASE) for pattern in impl_patterns
)
# Check for technical specs
tech_patterns = [
r'```',
r'config',
r'setup',
r'deployment',
r'infrastructure',
r'security',
r'performance'
]
metadata['has_technical_specs'] = any(
re.search(pattern, content, re.IGNORECASE) for pattern in tech_patterns
)
# Check for status information
status_patterns = [
r'status',
r'complete',
r'operational',
r'deployed',
r'working',
r'functional'
]
metadata['has_status_info'] = any(
re.search(pattern, content, re.IGNORECASE) for pattern in status_patterns
)
# Find completion indicators
completion_patterns = [
r'\s*\*\*COMPLETE\*\*',
r'\s*\*\*IMPLEMENTED\*\*',
r'\s*\*\*OPERATIONAL\*\*',
r'\s*\*\*DEPLOYED\*\*',
r'\s*\*\*WORKING\*\*',
r'\s*\*\*FUNCTIONAL\*\*',
r'\s*\*\*ACHIEVED\*\*',
r'\s*COMPLETE\s*',
r'\s*IMPLEMENTED\s*',
r'\s*OPERATIONAL\s*',
r'\s*DEPLOYED\s*',
r'\s*WORKING\s*',
r'\s*FUNCTIONAL\s*',
r'\s*ACHIEVED\s*'
]
for pattern in completion_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
if matches:
metadata['completion_indicators'].extend(matches)
# Extract keywords from sections and title
all_text = content.lower()
keyword_patterns = [
r'cli',
r'backend',
r'infrastructure',
r'security',
r'exchange',
r'blockchain',
r'analytics',
r'marketplace',
r'maintenance',
r'implementation',
r'testing',
r'api',
r'service',
r'trading',
r'wallet',
r'network',
r'deployment'
]
for pattern in keyword_patterns:
if re.search(r'\b' + pattern + r'\b', all_text):
metadata['keywords'].append(pattern)
# Determine documentation type
if 'analysis' in metadata['title'].lower() or 'analysis' in filename.lower():
metadata['type'] = 'analysis'
elif 'implementation' in metadata['title'].lower() or 'implementation' in filename.lower():
metadata['type'] = 'implementation'
elif 'summary' in metadata['title'].lower() or 'summary' in filename.lower():
metadata['type'] = 'summary'
elif 'test' in metadata['title'].lower() or 'test' in filename.lower():
metadata['type'] = 'testing'
else:
metadata['type'] = 'general'
return metadata
def analyze_files_for_documentation(scan_file):
"""Analyze files for documentation conversion"""
with open(scan_file, 'r') as f:
scan_results = json.load(f)
analysis_results = []
for file_info in scan_results['all_files']:
if 'error' in file_info:
continue
try:
with open(file_info['file_path'], 'r', encoding='utf-8') as f:
content = f.read()
metadata = extract_documentation_metadata(content, file_info['filename'])
analysis_result = {
**file_info,
'documentation_metadata': metadata,
'recommended_action': determine_action(metadata),
'target_category': determine_target_category(metadata, file_info['category'])
}
analysis_results.append(analysis_result)
except Exception as e:
analysis_results.append({
**file_info,
'error': f"Analysis failed: {str(e)}"
})
# Summarize by action
action_summary = {}
for result in analysis_results:
action = result.get('recommended_action', 'unknown')
if action not in action_summary:
action_summary[action] = 0
action_summary[action] += 1
return {
'total_files_analyzed': len(analysis_results),
'action_summary': action_summary,
'analysis_results': analysis_results
}
def determine_action(metadata):
"""Determine the recommended action for the file"""
if metadata['has_implementation_details'] or metadata['has_technical_specs']:
return 'convert_to_technical_doc'
elif metadata['has_status_info'] or metadata['completion_indicators']:
return 'convert_to_status_doc'
elif metadata['type'] == 'analysis':
return 'convert_to_analysis_doc'
elif metadata['type'] == 'summary':
return 'convert_to_summary_doc'
else:
return 'convert_to_general_doc'
def determine_target_category(metadata, current_category):
"""Determine the best target category in main docs/"""
# Check keywords for specific categories
keywords = metadata['keywords']
if any(kw in keywords for kw in ['cli', 'command']):
return 'cli'
elif any(kw in keywords for kw in ['backend', 'api', 'service']):
return 'backend'
elif any(kw in keywords for kw in ['infrastructure', 'network', 'deployment']):
return 'infrastructure'
elif any(kw in keywords for kw in ['security', 'firewall']):
return 'security'
elif any(kw in keywords for kw in ['exchange', 'trading', 'marketplace']):
return 'exchange'
elif any(kw in keywords for kw in ['blockchain', 'wallet']):
return 'blockchain'
elif any(kw in keywords for kw in ['analytics', 'monitoring']):
return 'analytics'
elif any(kw in keywords for kw in ['maintenance', 'requirements']):
return 'maintenance'
elif metadata['type'] == 'implementation':
return 'implementation'
elif metadata['type'] == 'testing':
return 'testing'
else:
return 'general'
if __name__ == "__main__":
scan_file = 'completed_files_scan.json'
output_file = 'content_analysis_results.json'
analysis_results = analyze_files_for_documentation(scan_file)
# Save results
with open(output_file, 'w') as f:
json.dump(analysis_results, f, indent=2)
# Print summary
print(f"Content analysis complete:")
print(f" Total files analyzed: {analysis_results['total_files_analyzed']}")
print("")
print("Recommended actions:")
for action, count in analysis_results['action_summary'].items():
print(f" {action}: {count} files")