diff --git a/apps/blockchain-node/src/aitbc_chain/sync.py b/apps/blockchain-node/src/aitbc_chain/sync.py index 825c5bb1..88edce34 100755 --- a/apps/blockchain-node/src/aitbc_chain/sync.py +++ b/apps/blockchain-node/src/aitbc_chain/sync.py @@ -450,12 +450,9 @@ class ChainSync: if height == 0 and block_data.get("block_metadata"): is_valid, reason = self._validate_genesis_metadata(block_data, session) if not is_valid: - if settings.enforce_state_root_validation: - metrics_registry.increment("sync_state_root_rejected_total") - logger.error(f"Genesis block metadata validation failed: {reason}", extra={"height": height, "hash": block_hash}) - return ImportResult(accepted=False, height=height, block_hash=block_hash, reason=reason) - else: - logger.warning(f"Genesis block metadata validation failed (enforcement disabled): {reason}", extra={"height": height, "hash": block_hash}) + metrics_registry.increment("sync_state_root_rejected_total") + logger.error(f"Genesis block metadata validation failed: {reason}", extra={"height": height, "hash": block_hash}) + return ImportResult(accepted=False, height=height, block_hash=block_hash, reason=reason) # Check for duplicate existing = session.exec( @@ -597,48 +594,37 @@ class ChainSync: expected_root = None if expected_root is None or len(expected_root) != 32: - if settings.enforce_state_root_validation: - metrics_registry.increment("sync_state_root_rejected_total") - session.rollback() - self._track_rejection(self._chain_id) - logger.error( - f"[SYNC] Invalid state root at height {block_data['height']}: " - f"{block_data.get('state_root')} - BLOCK REJECTED" - ) - # Check if re-sync should be triggered - self._check_and_trigger_resync(self._chain_id) - return ImportResult( - accepted=False, - height=block_data["height"], - block_hash=block_hash, - reason=f"Invalid state root: {block_data.get('state_root')}" - ) - logger.warning( + metrics_registry.increment("sync_state_root_rejected_total") + session.rollback() + self._track_rejection(self._chain_id) + logger.error( f"[SYNC] Invalid state root at height {block_data['height']}: " - f"{block_data.get('state_root')}" + f"{block_data.get('state_root')} - BLOCK REJECTED" + ) + # Check if re-sync should be triggered + self._check_and_trigger_resync(self._chain_id) + return ImportResult( + accepted=False, + height=block_data["height"], + block_hash=block_hash, + reason=f"Invalid state root: {block_data.get('state_root')}" ) elif computed_root != expected_root: - if settings.enforce_state_root_validation: - metrics_registry.increment("sync_state_root_rejected_total") - session.rollback() - self._track_rejection(self._chain_id) - logger.error( - f"[SYNC] State root mismatch at height {block_data['height']}: " - f"expected {expected_root.hex()}, computed {computed_root.hex()} - BLOCK REJECTED" - ) - # Check if re-sync should be triggered - self._check_and_trigger_resync(self._chain_id) - return ImportResult( - accepted=False, - height=block_data["height"], - block_hash=block_hash, - reason=f"State root mismatch: expected {expected_root.hex()}, computed {computed_root.hex()}" - ) - else: - logger.warning( - f"[SYNC] State root mismatch at height {block_data['height']}: " - f"expected {expected_root.hex()}, computed {computed_root.hex()}" - ) + metrics_registry.increment("sync_state_root_rejected_total") + session.rollback() + self._track_rejection(self._chain_id) + logger.error( + f"[SYNC] State root mismatch at height {block_data['height']}: " + f"expected {expected_root.hex()}, computed {computed_root.hex()} - BLOCK REJECTED" + ) + # Check if re-sync should be triggered + self._check_and_trigger_resync(self._chain_id) + return ImportResult( + accepted=False, + height=block_data["height"], + block_hash=block_hash, + reason=f"State root mismatch: expected {expected_root.hex()}, computed {computed_root.hex()}" + ) session.commit() diff --git a/docs/apps/blockchain/blockchain-node.md b/docs/apps/blockchain/blockchain-node.md index 0f106e42..aaa955ca 100644 --- a/docs/apps/blockchain/blockchain-node.md +++ b/docs/apps/blockchain/blockchain-node.md @@ -13,7 +13,8 @@ Production-ready blockchain node for AITBC with fixed supply and secure key mana - RESTful RPC API (`/rpc/*`) - Prometheus metrics (`/metrics`) - Health check endpoint (`/health`) -- SQLite persistence with Alembic migrations +- PostgreSQL mempool persistence (production nodes) +- SQLite chain database with Alembic migrations - Multi-chain support (separate data directories per chain ID) ## Architecture diff --git a/docs/apps/coordinator/coordinator-api.md b/docs/apps/coordinator/coordinator-api.md index ef287453..ada0f2c7 100644 --- a/docs/apps/coordinator/coordinator-api.md +++ b/docs/apps/coordinator/coordinator-api.md @@ -35,6 +35,14 @@ Set `VITE_DATA_MODE=live` and `VITE_COORDINATOR_API` in the explorer web app to Expects environment variables defined in `.env` (see `docs/bootstrap/coordinator_api.md`). +### Database + +Production deployments use PostgreSQL for persistence. Configure via: +- `DATABASE_ADAPTER=postgresql` +- `DATABASE_URL=postgresql+psycopg://user:pass@localhost:5432/aitbc_coordinator` + +SQLite fallback is available for development or nodes without PostgreSQL. + ### Signed receipts (optional) - Generate an Ed25519 key: diff --git a/docs/apps/exchange/exchange.md b/docs/apps/exchange/exchange.md index 17827fbc..a8887df5 100644 --- a/docs/apps/exchange/exchange.md +++ b/docs/apps/exchange/exchange.md @@ -28,7 +28,7 @@ Cross-chain exchange and trading platform supporting multiple blockchain network ### Prerequisites - Python 3.13+ -- PostgreSQL database +- PostgreSQL database (production default) - Redis for caching - Access to blockchain RPC endpoints diff --git a/docs/reports/POSTGRESQL_MIGRATION_COMPLETE.md b/docs/reports/POSTGRESQL_MIGRATION_COMPLETE.md new file mode 100644 index 00000000..b59843e6 --- /dev/null +++ b/docs/reports/POSTGRESQL_MIGRATION_COMPLETE.md @@ -0,0 +1,138 @@ +# PostgreSQL Migration Complete + +**Date**: 2026-05-03 +**Status**: ✅ Complete + +## Summary + +Migrated SQLite databases to PostgreSQL to resolve recurring database corruption issues on Btrfs filesystems. All critical services now use PostgreSQL for production nodes. + +## Migrations Completed + +### 1. Mempool Database +- **Status**: ✅ Complete (all nodes) +- **Nodes**: localhost, aitbc1, gitea-runner +- **Database**: `aitbc_mempool` +- **User**: `aitbc_mempool` +- **Connection**: `postgresql+psycopg://aitbc_mempool:password@localhost:5432/aitbc_mempool` +- **Changes**: + - Updated `mempool.py` to use SQLAlchemy with PostgreSQL + - Created dedicated `mempool_metadata` to avoid chain table conflicts + - Used raw SQL for table creation with `session.exec(text(...))` + - Fixed count query bug using `func.count()` and `.one()` + - Added systemd drop-in `mempool-postgres.conf` on all nodes +- **Issues Fixed**: + - SQLite corruption on Btrfs filesystem + - Missing `psycopg` module on aitbc1 and gitea-runner + - ScalarResult count attribute error + +### 2. Exchange Database +- **Status**: ✅ Complete (all nodes) +- **Nodes**: localhost, aitbc1, gitea-runner +- **Database**: `aitbc_exchange` +- **User**: `aitbc_exchange` +- **Connection**: `postgresql+psycopg://aitbc_exchange:password@localhost:5432/aitbc_exchange` +- **Changes**: + - Updated systemd drop-in `exchange-postgres.conf` on all nodes + - Set `EXCHANGE_DATABASE_URL` environment variable +- **Tables**: orders, trades + +### 3. Coordinator Database +- **Status**: ✅ Complete (localhost, aitbc1), ⚠️ SQLite fallback (gitea-runner) +- **Nodes**: localhost (PostgreSQL), aitbc1 (PostgreSQL), gitea-runner (SQLite) +- **Database**: `aitbc_coordinator` +- **User**: `aitbc_coordinator` +- **Connection**: `postgresql+psycopg://aitbc_coordinator:password@localhost:5432/aitbc_coordinator` +- **Changes**: + - Updated systemd drop-in `coordinator-postgres.conf` on localhost and aitbc1 + - Set `DATABASE_ADAPTER=postgresql` and `DATABASE_URL` environment variables +- **Gitea-runner Note**: Coordinator uses SQLite fallback due to pydantic-settings nested config issues. PostgreSQL tables exist but service defaults to SQLite. +- **Tables**: job, job_payments, jobreceipt, marketplacebid, marketplaceoffer, miner, payment_escrows, transaction, user, usersession, wallet (11 tables) + +## Additional Fixes + +### Gitea-runner Blockchain Node +- **Status**: ✅ Fixed +- **Issues Resolved**: + - Service was inactive (dead) since 21:17:57 + - Missing `psycopg` module + - Missing PostgreSQL user/database for mempool + - Outdated `mempool.py` with count query bug +- **Actions Taken**: + - Installed `psycopg` module + - Created PostgreSQL user `aitbc_mempool` and database `aitbc_mempool` + - Granted privileges + - Synced updated `mempool.py` with `func.count()` fix + - Restarted service successfully + +### Legacy Coordinator Service +- **Status**: ✅ Removed +- **Action**: Removed stale symlink `/etc/systemd/system/multi-user.target.wants/coordinator-api.service` on gitea-runner +- **Reason**: Legacy service causing confusion, modern `aitbc-agent-coordinator.service` is the correct service + +## Documentation Updates + +Updated documentation files to reflect PostgreSQL migrations: +- `/opt/aitbc/docs/apps/blockchain/blockchain-node.md` - Added PostgreSQL mempool persistence note +- `/opt/aitbc/docs/apps/exchange/exchange.md` - Clarified PostgreSQL as production default +- `/opt/aitbc/docs/apps/coordinator/coordinator-api.md` - Added database configuration section + +## Configuration Details + +### Systemd Drop-ins + +**Mempool** (`/etc/systemd/system/aitbc-blockchain-node.service.d/mempool-postgres.conf`): +```ini +[Service] +Environment="MEMPOOL_DB_URL=postgresql+psycopg://aitbc_mempool:password@localhost:5432/aitbc_mempool" +``` + +**Exchange** (`/etc/systemd/system/aitbc-exchange-api.service.d/exchange-postgres.conf`): +```ini +[Service] +Environment="EXCHANGE_DATABASE_URL=postgresql+psycopg://aitbc_exchange:password@localhost:5432/aitbc_exchange" +``` + +**Coordinator** (`/etc/systemd/system/aitbc-agent-coordinator.service.d/coordinator-postgres.conf`): +```ini +[Service] +Environment="DATABASE_ADAPTER=postgresql" +Environment="DATABASE_URL=postgresql+psycopg://aitbc_coordinator:password@localhost:5432/aitbc_coordinator" +``` + +## PostgreSQL Setup + +### Users Created +- `aitbc_mempool` - for mempool database +- `aitbc_exchange` - for exchange database +- `aitbc_coordinator` - for coordinator database + +### Databases Created +- `aitbc_mempool` - mempool persistence +- `aitbc_exchange` - exchange trading data +- `aitbc_coordinator` - job coordination data + +### Privileges +All users granted `ALL PRIVILEGES` on their respective databases. + +## Verification + +All services verified active and healthy: +- `aitbc-blockchain-node.service` - active on all nodes +- `aitbc-exchange-api.service` - active on all nodes +- `aitbc-agent-coordinator.service` - active on all nodes + +PostgreSQL tables verified created and accessible. + +## Benefits + +- Eliminated SQLite corruption risk on Btrfs filesystems +- Improved database reliability and performance +- Better connection pooling and scalability +- Consistent database backend across production nodes +- Reduced maintenance overhead for database repairs + +## Known Limitations + +- Coordinator on gitea-runner uses SQLite fallback due to pydantic-settings nested config issues +- PostgreSQL not required on gitea-runner for coordinator (SQLite is acceptable for this CI/runner node)