Update authentication system, database models, and UI components
This commit is contained in:
355
analyze_db_legacy.py
Normal file
355
analyze_db_legacy.py
Normal file
@ -0,0 +1,355 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Database Legacy Data Analysis Script
|
||||
Analyzes the database for legacy data that doesn't match current authentication implementation
|
||||
"""
|
||||
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from sqlmodel import Session, select
|
||||
from database import engine
|
||||
from models import User, UserQuota, UploadLog, DBSession, PublicStream
|
||||
import re
|
||||
|
||||
def validate_email_format(email):
|
||||
"""Validate email format using RFC 5322 compliant regex"""
|
||||
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
|
||||
return re.match(pattern, email) is not None
|
||||
|
||||
def analyze_user_table():
|
||||
"""Analyze User table for legacy data issues"""
|
||||
print("\n=== ANALYZING USER TABLE ===")
|
||||
issues = []
|
||||
|
||||
with Session(engine) as session:
|
||||
users = session.exec(select(User)).all()
|
||||
print(f"Total users: {len(users)}")
|
||||
|
||||
for user in users:
|
||||
user_issues = []
|
||||
|
||||
# Check if email (primary key) is valid email format
|
||||
if not validate_email_format(user.email):
|
||||
user_issues.append(f"Invalid email format: {user.email}")
|
||||
|
||||
# Check if username is also email format (current requirement)
|
||||
if not validate_email_format(user.username):
|
||||
user_issues.append(f"Username not in email format: {user.username}")
|
||||
|
||||
# Check if email and username match (should be same after migration)
|
||||
if user.email != user.username:
|
||||
user_issues.append(f"Email/username mismatch: email={user.email}, username={user.username}")
|
||||
|
||||
# Check for missing or empty display_name
|
||||
if not user.display_name or user.display_name.strip() == "":
|
||||
user_issues.append(f"Empty display_name")
|
||||
|
||||
# Check for very old tokens (potential security issue)
|
||||
if user.token_created < datetime.utcnow() - timedelta(days=30):
|
||||
user_issues.append(f"Very old token (created: {user.token_created})")
|
||||
|
||||
# Check for unconfirmed users
|
||||
if not user.confirmed:
|
||||
user_issues.append(f"Unconfirmed user")
|
||||
|
||||
if user_issues:
|
||||
issues.append({
|
||||
'email': user.email,
|
||||
'username': user.username,
|
||||
'issues': user_issues
|
||||
})
|
||||
|
||||
print(f"Users with issues: {len(issues)}")
|
||||
for issue in issues:
|
||||
print(f" User {issue['email']}:")
|
||||
for problem in issue['issues']:
|
||||
print(f" - {problem}")
|
||||
|
||||
return issues
|
||||
|
||||
def analyze_session_table():
|
||||
"""Analyze DBSession table for legacy data issues"""
|
||||
print("\n=== ANALYZING SESSION TABLE ===")
|
||||
issues = []
|
||||
|
||||
with Session(engine) as session:
|
||||
sessions = session.exec(select(DBSession)).all()
|
||||
print(f"Total sessions: {len(sessions)}")
|
||||
|
||||
active_sessions = [s for s in sessions if s.is_active]
|
||||
expired_sessions = [s for s in sessions if s.expires_at < datetime.utcnow()]
|
||||
old_sessions = [s for s in sessions if s.created_at < datetime.utcnow() - timedelta(days=7)]
|
||||
|
||||
print(f"Active sessions: {len(active_sessions)}")
|
||||
print(f"Expired sessions: {len(expired_sessions)}")
|
||||
print(f"Sessions older than 7 days: {len(old_sessions)}")
|
||||
|
||||
for db_session in sessions:
|
||||
session_issues = []
|
||||
|
||||
# Check if user_id is in email format (current requirement)
|
||||
if not validate_email_format(db_session.user_id):
|
||||
session_issues.append(f"user_id not in email format: {db_session.user_id}")
|
||||
|
||||
# Check for expired but still active sessions
|
||||
if db_session.is_active and db_session.expires_at < datetime.utcnow():
|
||||
session_issues.append(f"Expired but still marked active (expires: {db_session.expires_at})")
|
||||
|
||||
# Check for very old sessions that should be cleaned up
|
||||
if db_session.created_at < datetime.utcnow() - timedelta(days=30):
|
||||
session_issues.append(f"Very old session (created: {db_session.created_at})")
|
||||
|
||||
# Check for sessions with 1-hour expiry (old system)
|
||||
session_duration = db_session.expires_at - db_session.created_at
|
||||
if session_duration < timedelta(hours=2): # Less than 2 hours indicates old 1-hour sessions
|
||||
session_issues.append(f"Short session duration: {session_duration} (should be 24h)")
|
||||
|
||||
if session_issues:
|
||||
issues.append({
|
||||
'token': db_session.token[:10] + '...',
|
||||
'user_id': db_session.user_id,
|
||||
'created_at': db_session.created_at,
|
||||
'expires_at': db_session.expires_at,
|
||||
'issues': session_issues
|
||||
})
|
||||
|
||||
print(f"Sessions with issues: {len(issues)}")
|
||||
for issue in issues:
|
||||
print(f" Session {issue['token']} (user: {issue['user_id']}):")
|
||||
for problem in issue['issues']:
|
||||
print(f" - {problem}")
|
||||
|
||||
return issues
|
||||
|
||||
def analyze_quota_table():
|
||||
"""Analyze UserQuota table for legacy data issues"""
|
||||
print("\n=== ANALYZING USER QUOTA TABLE ===")
|
||||
issues = []
|
||||
|
||||
with Session(engine) as session:
|
||||
quotas = session.exec(select(UserQuota)).all()
|
||||
print(f"Total quota records: {len(quotas)}")
|
||||
|
||||
for quota in quotas:
|
||||
quota_issues = []
|
||||
|
||||
# Check if uid is in email format (current requirement)
|
||||
if not validate_email_format(quota.uid):
|
||||
quota_issues.append(f"UID not in email format: {quota.uid}")
|
||||
|
||||
# Check for negative storage
|
||||
if quota.storage_bytes < 0:
|
||||
quota_issues.append(f"Negative storage: {quota.storage_bytes}")
|
||||
|
||||
# Check for excessive storage (over 100MB limit)
|
||||
if quota.storage_bytes > 100 * 1024 * 1024:
|
||||
quota_issues.append(f"Storage over 100MB limit: {quota.storage_bytes / (1024*1024):.1f}MB")
|
||||
|
||||
if quota_issues:
|
||||
issues.append({
|
||||
'uid': quota.uid,
|
||||
'storage_bytes': quota.storage_bytes,
|
||||
'issues': quota_issues
|
||||
})
|
||||
|
||||
print(f"Quota records with issues: {len(issues)}")
|
||||
for issue in issues:
|
||||
print(f" Quota {issue['uid']} ({issue['storage_bytes']} bytes):")
|
||||
for problem in issue['issues']:
|
||||
print(f" - {problem}")
|
||||
|
||||
return issues
|
||||
|
||||
def analyze_upload_log_table():
|
||||
"""Analyze UploadLog table for legacy data issues"""
|
||||
print("\n=== ANALYZING UPLOAD LOG TABLE ===")
|
||||
issues = []
|
||||
|
||||
with Session(engine) as session:
|
||||
uploads = session.exec(select(UploadLog)).all()
|
||||
print(f"Total upload records: {len(uploads)}")
|
||||
|
||||
for upload in uploads:
|
||||
upload_issues = []
|
||||
|
||||
# Check if uid is in email format (current requirement)
|
||||
if not validate_email_format(upload.uid):
|
||||
upload_issues.append(f"UID not in email format: {upload.uid}")
|
||||
|
||||
# Check for missing processed_filename
|
||||
if not upload.processed_filename:
|
||||
upload_issues.append(f"Missing processed_filename")
|
||||
|
||||
# Check for negative file size
|
||||
if upload.size_bytes < 0:
|
||||
upload_issues.append(f"Negative file size: {upload.size_bytes}")
|
||||
|
||||
# Check for very old uploads
|
||||
if upload.created_at < datetime.utcnow() - timedelta(days=365):
|
||||
upload_issues.append(f"Very old upload (created: {upload.created_at})")
|
||||
|
||||
if upload_issues:
|
||||
issues.append({
|
||||
'id': upload.id,
|
||||
'uid': upload.uid,
|
||||
'filename': upload.filename,
|
||||
'created_at': upload.created_at,
|
||||
'issues': upload_issues
|
||||
})
|
||||
|
||||
print(f"Upload records with issues: {len(issues)}")
|
||||
for issue in issues:
|
||||
print(f" Upload {issue['id']} (user: {issue['uid']}, file: {issue['filename']}):")
|
||||
for problem in issue['issues']:
|
||||
print(f" - {problem}")
|
||||
|
||||
return issues
|
||||
|
||||
def analyze_public_stream_table():
|
||||
"""Analyze PublicStream table for legacy data issues"""
|
||||
print("\n=== ANALYZING PUBLIC STREAM TABLE ===")
|
||||
issues = []
|
||||
|
||||
with Session(engine) as session:
|
||||
streams = session.exec(select(PublicStream)).all()
|
||||
print(f"Total public stream records: {len(streams)}")
|
||||
|
||||
for stream in streams:
|
||||
stream_issues = []
|
||||
|
||||
# Check if uid is in email format (current requirement)
|
||||
if not validate_email_format(stream.uid):
|
||||
stream_issues.append(f"UID not in email format: {stream.uid}")
|
||||
|
||||
# Check if username is also email format (should match uid)
|
||||
if stream.username and not validate_email_format(stream.username):
|
||||
stream_issues.append(f"Username not in email format: {stream.username}")
|
||||
|
||||
# Check if uid and username match (should be same after migration)
|
||||
if stream.username and stream.uid != stream.username:
|
||||
stream_issues.append(f"UID/username mismatch: uid={stream.uid}, username={stream.username}")
|
||||
|
||||
# Check for negative storage
|
||||
if stream.storage_bytes < 0:
|
||||
stream_issues.append(f"Negative storage: {stream.storage_bytes}")
|
||||
|
||||
# Check for missing display_name
|
||||
if not stream.display_name or stream.display_name.strip() == "":
|
||||
stream_issues.append(f"Empty display_name")
|
||||
|
||||
if stream_issues:
|
||||
issues.append({
|
||||
'uid': stream.uid,
|
||||
'username': stream.username,
|
||||
'display_name': stream.display_name,
|
||||
'issues': stream_issues
|
||||
})
|
||||
|
||||
print(f"Public stream records with issues: {len(issues)}")
|
||||
for issue in issues:
|
||||
print(f" Stream {issue['uid']} (username: {issue['username']}):")
|
||||
for problem in issue['issues']:
|
||||
print(f" - {problem}")
|
||||
|
||||
return issues
|
||||
|
||||
def check_referential_integrity():
|
||||
"""Check for referential integrity issues between tables"""
|
||||
print("\n=== CHECKING REFERENTIAL INTEGRITY ===")
|
||||
issues = []
|
||||
|
||||
with Session(engine) as session:
|
||||
# Get all unique UIDs from each table
|
||||
users = session.exec(select(User.email)).all()
|
||||
user_usernames = session.exec(select(User.username)).all()
|
||||
quotas = session.exec(select(UserQuota.uid)).all()
|
||||
uploads = session.exec(select(UploadLog.uid)).all()
|
||||
streams = session.exec(select(PublicStream.uid)).all()
|
||||
sessions = session.exec(select(DBSession.user_id)).all()
|
||||
|
||||
user_emails = set(users)
|
||||
user_usernames_set = set(user_usernames)
|
||||
quota_uids = set(quotas)
|
||||
upload_uids = set(uploads)
|
||||
stream_uids = set(streams)
|
||||
session_uids = set(sessions)
|
||||
|
||||
print(f"Unique user emails: {len(user_emails)}")
|
||||
print(f"Unique user usernames: {len(user_usernames_set)}")
|
||||
print(f"Unique quota UIDs: {len(quota_uids)}")
|
||||
print(f"Unique upload UIDs: {len(upload_uids)}")
|
||||
print(f"Unique stream UIDs: {len(stream_uids)}")
|
||||
print(f"Unique session user_ids: {len(session_uids)}")
|
||||
|
||||
# Check for orphaned records
|
||||
orphaned_quotas = quota_uids - user_emails
|
||||
orphaned_uploads = upload_uids - user_emails
|
||||
orphaned_streams = stream_uids - user_emails
|
||||
orphaned_sessions = session_uids - user_usernames_set # Sessions use username as user_id
|
||||
|
||||
if orphaned_quotas:
|
||||
issues.append(f"Orphaned quota records (no matching user): {orphaned_quotas}")
|
||||
|
||||
if orphaned_uploads:
|
||||
issues.append(f"Orphaned upload records (no matching user): {orphaned_uploads}")
|
||||
|
||||
if orphaned_streams:
|
||||
issues.append(f"Orphaned stream records (no matching user): {orphaned_streams}")
|
||||
|
||||
if orphaned_sessions:
|
||||
issues.append(f"Orphaned session records (no matching user): {orphaned_sessions}")
|
||||
|
||||
# Check for users without quota records
|
||||
users_without_quota = user_emails - quota_uids
|
||||
if users_without_quota:
|
||||
issues.append(f"Users without quota records: {users_without_quota}")
|
||||
|
||||
# Check for users without stream records
|
||||
users_without_streams = user_emails - stream_uids
|
||||
if users_without_streams:
|
||||
issues.append(f"Users without stream records: {users_without_streams}")
|
||||
|
||||
print(f"Referential integrity issues: {len(issues)}")
|
||||
for issue in issues:
|
||||
print(f" - {issue}")
|
||||
|
||||
return issues
|
||||
|
||||
def main():
|
||||
"""Run complete database legacy analysis"""
|
||||
print("=== DATABASE LEGACY DATA ANALYSIS ===")
|
||||
print(f"Analysis started at: {datetime.utcnow()}")
|
||||
|
||||
all_issues = {}
|
||||
|
||||
try:
|
||||
all_issues['users'] = analyze_user_table()
|
||||
all_issues['sessions'] = analyze_session_table()
|
||||
all_issues['quotas'] = analyze_quota_table()
|
||||
all_issues['uploads'] = analyze_upload_log_table()
|
||||
all_issues['streams'] = analyze_public_stream_table()
|
||||
all_issues['integrity'] = check_referential_integrity()
|
||||
|
||||
# Summary
|
||||
print("\n=== SUMMARY ===")
|
||||
total_issues = sum(len(issues) if isinstance(issues, list) else 1 for issues in all_issues.values())
|
||||
print(f"Total issues found: {total_issues}")
|
||||
|
||||
for table, issues in all_issues.items():
|
||||
if issues:
|
||||
count = len(issues) if isinstance(issues, list) else 1
|
||||
print(f" {table}: {count} issues")
|
||||
|
||||
if total_issues == 0:
|
||||
print("✅ No legacy data issues found! Database is clean.")
|
||||
else:
|
||||
print("⚠️ Legacy data issues found. Consider running cleanup scripts.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error during analysis: {e}")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
Reference in New Issue
Block a user