- Remove excessive completion checkmarks and status markers throughout document - Consolidate redundant sections on completed features - Streamline executive summary and current status sections - Focus content on upcoming quick wins and active tasks - Remove duplicate phase completion listings - Clean up success metrics and KPI sections - Maintain essential planning information while reducing noise
250 lines
7.7 KiB
Python
250 lines
7.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Content Analyzer for Documentation
|
|
Analyzes completed files to determine documentation conversion strategy
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
def extract_documentation_metadata(content, filename):
|
|
"""Extract metadata for documentation conversion"""
|
|
metadata = {
|
|
'title': filename.replace('.md', '').replace('_', ' ').title(),
|
|
'type': 'analysis',
|
|
'category': 'general',
|
|
'keywords': [],
|
|
'sections': [],
|
|
'has_implementation_details': False,
|
|
'has_technical_specs': False,
|
|
'has_status_info': False,
|
|
'completion_indicators': []
|
|
}
|
|
|
|
# Extract title from first heading
|
|
title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
|
|
if title_match:
|
|
metadata['title'] = title_match.group(1).strip()
|
|
|
|
# Find sections
|
|
section_matches = re.findall(r'^#{1,6}\s+(.+)$', content, re.MULTILINE)
|
|
metadata['sections'] = section_matches
|
|
|
|
# Check for implementation details
|
|
impl_patterns = [
|
|
r'implementation',
|
|
r'architecture',
|
|
r'technical',
|
|
r'specification',
|
|
r'design',
|
|
r'code',
|
|
r'api',
|
|
r'endpoint',
|
|
r'service'
|
|
]
|
|
|
|
metadata['has_implementation_details'] = any(
|
|
re.search(pattern, content, re.IGNORECASE) for pattern in impl_patterns
|
|
)
|
|
|
|
# Check for technical specs
|
|
tech_patterns = [
|
|
r'```',
|
|
r'config',
|
|
r'setup',
|
|
r'deployment',
|
|
r'infrastructure',
|
|
r'security',
|
|
r'performance'
|
|
]
|
|
|
|
metadata['has_technical_specs'] = any(
|
|
re.search(pattern, content, re.IGNORECASE) for pattern in tech_patterns
|
|
)
|
|
|
|
# Check for status information
|
|
status_patterns = [
|
|
r'status',
|
|
r'complete',
|
|
r'operational',
|
|
r'deployed',
|
|
r'working',
|
|
r'functional'
|
|
]
|
|
|
|
metadata['has_status_info'] = any(
|
|
re.search(pattern, content, re.IGNORECASE) for pattern in status_patterns
|
|
)
|
|
|
|
# Find completion indicators
|
|
completion_patterns = [
|
|
r'✅\s*\*\*COMPLETE\*\*',
|
|
r'✅\s*\*\*IMPLEMENTED\*\*',
|
|
r'✅\s*\*\*OPERATIONAL\*\*',
|
|
r'✅\s*\*\*DEPLOYED\*\*',
|
|
r'✅\s*\*\*WORKING\*\*',
|
|
r'✅\s*\*\*FUNCTIONAL\*\*',
|
|
r'✅\s*\*\*ACHIEVED\*\*',
|
|
r'✅\s*COMPLETE\s*',
|
|
r'✅\s*IMPLEMENTED\s*',
|
|
r'✅\s*OPERATIONAL\s*',
|
|
r'✅\s*DEPLOYED\s*',
|
|
r'✅\s*WORKING\s*',
|
|
r'✅\s*FUNCTIONAL\s*',
|
|
r'✅\s*ACHIEVED\s*'
|
|
]
|
|
|
|
for pattern in completion_patterns:
|
|
matches = re.findall(pattern, content, re.IGNORECASE)
|
|
if matches:
|
|
metadata['completion_indicators'].extend(matches)
|
|
|
|
# Extract keywords from sections and title
|
|
all_text = content.lower()
|
|
keyword_patterns = [
|
|
r'cli',
|
|
r'backend',
|
|
r'infrastructure',
|
|
r'security',
|
|
r'exchange',
|
|
r'blockchain',
|
|
r'analytics',
|
|
r'marketplace',
|
|
r'maintenance',
|
|
r'implementation',
|
|
r'testing',
|
|
r'api',
|
|
r'service',
|
|
r'trading',
|
|
r'wallet',
|
|
r'network',
|
|
r'deployment'
|
|
]
|
|
|
|
for pattern in keyword_patterns:
|
|
if re.search(r'\b' + pattern + r'\b', all_text):
|
|
metadata['keywords'].append(pattern)
|
|
|
|
# Determine documentation type
|
|
if 'analysis' in metadata['title'].lower() or 'analysis' in filename.lower():
|
|
metadata['type'] = 'analysis'
|
|
elif 'implementation' in metadata['title'].lower() or 'implementation' in filename.lower():
|
|
metadata['type'] = 'implementation'
|
|
elif 'summary' in metadata['title'].lower() or 'summary' in filename.lower():
|
|
metadata['type'] = 'summary'
|
|
elif 'test' in metadata['title'].lower() or 'test' in filename.lower():
|
|
metadata['type'] = 'testing'
|
|
else:
|
|
metadata['type'] = 'general'
|
|
|
|
return metadata
|
|
|
|
def analyze_files_for_documentation(scan_file):
|
|
"""Analyze files for documentation conversion"""
|
|
|
|
with open(scan_file, 'r') as f:
|
|
scan_results = json.load(f)
|
|
|
|
analysis_results = []
|
|
|
|
for file_info in scan_results['all_files']:
|
|
if 'error' in file_info:
|
|
continue
|
|
|
|
try:
|
|
with open(file_info['file_path'], 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
metadata = extract_documentation_metadata(content, file_info['filename'])
|
|
|
|
analysis_result = {
|
|
**file_info,
|
|
'documentation_metadata': metadata,
|
|
'recommended_action': determine_action(metadata),
|
|
'target_category': determine_target_category(metadata, file_info['category'])
|
|
}
|
|
|
|
analysis_results.append(analysis_result)
|
|
|
|
except Exception as e:
|
|
analysis_results.append({
|
|
**file_info,
|
|
'error': f"Analysis failed: {str(e)}"
|
|
})
|
|
|
|
# Summarize by action
|
|
action_summary = {}
|
|
for result in analysis_results:
|
|
action = result.get('recommended_action', 'unknown')
|
|
if action not in action_summary:
|
|
action_summary[action] = 0
|
|
action_summary[action] += 1
|
|
|
|
return {
|
|
'total_files_analyzed': len(analysis_results),
|
|
'action_summary': action_summary,
|
|
'analysis_results': analysis_results
|
|
}
|
|
|
|
def determine_action(metadata):
|
|
"""Determine the recommended action for the file"""
|
|
|
|
if metadata['has_implementation_details'] or metadata['has_technical_specs']:
|
|
return 'convert_to_technical_doc'
|
|
elif metadata['has_status_info'] or metadata['completion_indicators']:
|
|
return 'convert_to_status_doc'
|
|
elif metadata['type'] == 'analysis':
|
|
return 'convert_to_analysis_doc'
|
|
elif metadata['type'] == 'summary':
|
|
return 'convert_to_summary_doc'
|
|
else:
|
|
return 'convert_to_general_doc'
|
|
|
|
def determine_target_category(metadata, current_category):
|
|
"""Determine the best target category in main docs/"""
|
|
|
|
# Check keywords for specific categories
|
|
keywords = metadata['keywords']
|
|
|
|
if any(kw in keywords for kw in ['cli', 'command']):
|
|
return 'cli'
|
|
elif any(kw in keywords for kw in ['backend', 'api', 'service']):
|
|
return 'backend'
|
|
elif any(kw in keywords for kw in ['infrastructure', 'network', 'deployment']):
|
|
return 'infrastructure'
|
|
elif any(kw in keywords for kw in ['security', 'firewall']):
|
|
return 'security'
|
|
elif any(kw in keywords for kw in ['exchange', 'trading', 'marketplace']):
|
|
return 'exchange'
|
|
elif any(kw in keywords for kw in ['blockchain', 'wallet']):
|
|
return 'blockchain'
|
|
elif any(kw in keywords for kw in ['analytics', 'monitoring']):
|
|
return 'analytics'
|
|
elif any(kw in keywords for kw in ['maintenance', 'requirements']):
|
|
return 'maintenance'
|
|
elif metadata['type'] == 'implementation':
|
|
return 'implementation'
|
|
elif metadata['type'] == 'testing':
|
|
return 'testing'
|
|
else:
|
|
return 'general'
|
|
|
|
if __name__ == "__main__":
|
|
scan_file = 'completed_files_scan.json'
|
|
output_file = 'content_analysis_results.json'
|
|
|
|
analysis_results = analyze_files_for_documentation(scan_file)
|
|
|
|
# Save results
|
|
with open(output_file, 'w') as f:
|
|
json.dump(analysis_results, f, indent=2)
|
|
|
|
# Print summary
|
|
print(f"Content analysis complete:")
|
|
print(f" Total files analyzed: {analysis_results['total_files_analyzed']}")
|
|
print("")
|
|
print("Recommended actions:")
|
|
for action, count in analysis_results['action_summary'].items():
|
|
print(f" {action}: {count} files")
|