chore: enhance .gitignore and remove obsolete documentation files - Reorganize .gitignore with categorized sections for better maintainability - Add comprehensive ignore patterns for Python, Node.js, databases, logs, and build artifacts - Add project-specific ignore rules for coordinator, explorer, and deployment files - Remove outdated documentation: BITCOIN-WALLET-SETUP.md, LOCAL_ASSETS_SUMMARY.md, README-CONTAINER-DEPLOYMENT.md, README-DOMAIN-DEPLOYMENT.md ```
218 lines
7.0 KiB
Python
Executable File
218 lines
7.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
GPU Access Test - Check if miner can access local GPU resources
|
|
"""
|
|
|
|
import argparse
|
|
import subprocess
|
|
import json
|
|
import time
|
|
import psutil
|
|
|
|
def check_nvidia_gpu():
|
|
"""Check NVIDIA GPU availability"""
|
|
print("🔍 Checking NVIDIA GPU...")
|
|
|
|
try:
|
|
# Check nvidia-smi
|
|
result = subprocess.run(
|
|
["nvidia-smi", "--query-gpu=name,memory.total,memory.free,utilization.gpu",
|
|
"--format=csv,noheader,nounits"],
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
lines = result.stdout.strip().split('\n')
|
|
print(f"✅ NVIDIA GPU(s) Found: {len(lines)}")
|
|
|
|
for i, line in enumerate(lines, 1):
|
|
parts = line.split(', ')
|
|
if len(parts) >= 4:
|
|
name = parts[0]
|
|
total_mem = parts[1]
|
|
free_mem = parts[2]
|
|
util = parts[3]
|
|
print(f"\n GPU {i}:")
|
|
print(f" 📦 Model: {name}")
|
|
print(f" 💾 Memory: {free_mem}/{total_mem} MB free")
|
|
print(f" ⚡ Utilization: {util}%")
|
|
|
|
return True
|
|
else:
|
|
print("❌ nvidia-smi command failed")
|
|
return False
|
|
|
|
except FileNotFoundError:
|
|
print("❌ nvidia-smi not found - NVIDIA drivers not installed")
|
|
return False
|
|
|
|
def check_cuda():
|
|
"""Check CUDA availability"""
|
|
print("\n🔍 Checking CUDA...")
|
|
|
|
try:
|
|
# Try to import pynvml
|
|
import pynvml
|
|
pynvml.nvmlInit()
|
|
|
|
device_count = pynvml.nvmlDeviceGetCount()
|
|
print(f"✅ CUDA Available - {device_count} device(s)")
|
|
|
|
for i in range(device_count):
|
|
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
|
name = pynvml.nvmlDeviceGetName(handle).decode('utf-8')
|
|
memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
|
|
|
print(f"\n CUDA Device {i}:")
|
|
print(f" 📦 Name: {name}")
|
|
print(f" 💾 Memory: {memory_info.free // 1024**2}/{memory_info.total // 1024**2} MB free")
|
|
|
|
return True
|
|
|
|
except ImportError:
|
|
print("⚠️ pynvml not installed - install with: pip install pynvml")
|
|
return False
|
|
except Exception as e:
|
|
print(f"❌ CUDA error: {e}")
|
|
return False
|
|
|
|
def check_pytorch():
|
|
"""Check PyTorch CUDA support"""
|
|
print("\n🔍 Checking PyTorch CUDA...")
|
|
|
|
try:
|
|
import torch
|
|
|
|
print(f"✅ PyTorch Installed: {torch.__version__}")
|
|
print(f" CUDA Available: {torch.cuda.is_available()}")
|
|
|
|
if torch.cuda.is_available():
|
|
print(f" CUDA Version: {torch.version.cuda}")
|
|
print(f" GPU Count: {torch.cuda.device_count()}")
|
|
|
|
for i in range(torch.cuda.device_count()):
|
|
props = torch.cuda.get_device_properties(i)
|
|
print(f"\n PyTorch GPU {i}:")
|
|
print(f" 📦 Name: {props.name}")
|
|
print(f" 💾 Memory: {props.total_memory // 1024**2} MB")
|
|
print(f" Compute: {props.major}.{props.minor}")
|
|
|
|
return torch.cuda.is_available()
|
|
|
|
except ImportError:
|
|
print("❌ PyTorch not installed - install with: pip install torch")
|
|
return False
|
|
|
|
def run_gpu_stress_test(duration=10):
|
|
"""Run a quick GPU stress test"""
|
|
print(f"\n🔥 Running GPU Stress Test ({duration}s)...")
|
|
|
|
try:
|
|
import torch
|
|
|
|
if not torch.cuda.is_available():
|
|
print("❌ CUDA not available for stress test")
|
|
return False
|
|
|
|
device = torch.device('cuda')
|
|
|
|
# Create tensors and perform matrix multiplication
|
|
print(" ⚡ Performing matrix multiplications...")
|
|
start_time = time.time()
|
|
|
|
while time.time() - start_time < duration:
|
|
# Create large matrices
|
|
a = torch.randn(1000, 1000, device=device)
|
|
b = torch.randn(1000, 1000, device=device)
|
|
|
|
# Multiply them
|
|
c = torch.mm(a, b)
|
|
|
|
# Sync to ensure computation completes
|
|
torch.cuda.synchronize()
|
|
|
|
print("✅ Stress test completed successfully")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Stress test failed: {e}")
|
|
return False
|
|
|
|
def check_system_resources():
|
|
"""Check system resources"""
|
|
print("\n💻 System Resources:")
|
|
|
|
# CPU
|
|
cpu_percent = psutil.cpu_percent(interval=1)
|
|
print(f" 🖥️ CPU Usage: {cpu_percent}%")
|
|
print(f" 🧠 CPU Cores: {psutil.cpu_count()} logical, {psutil.cpu_count(logical=False)} physical")
|
|
|
|
# Memory
|
|
memory = psutil.virtual_memory()
|
|
print(f" 💾 RAM: {memory.used // 1024**2}/{memory.total // 1024**2} MB used ({memory.percent}%)")
|
|
|
|
# Disk
|
|
disk = psutil.disk_usage('/')
|
|
print(f" 💿 Disk: {disk.used // 1024**3}/{disk.total // 1024**3} GB used")
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="GPU Access Test for AITBC Miner")
|
|
parser.add_argument("--stress", type=int, default=0, help="Run stress test for N seconds")
|
|
parser.add_argument("--all", action="store_true", help="Run all tests including stress")
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("🚀 AITBC GPU Access Test")
|
|
print("=" * 60)
|
|
|
|
# Check system resources
|
|
check_system_resources()
|
|
|
|
# Check GPU availability
|
|
has_nvidia = check_nvidia_gpu()
|
|
has_cuda = check_cuda()
|
|
has_pytorch = check_pytorch()
|
|
|
|
# Summary
|
|
print("\n📊 SUMMARY")
|
|
print("=" * 60)
|
|
|
|
if has_nvidia or has_cuda or has_pytorch:
|
|
print("✅ GPU is available for mining!")
|
|
|
|
if args.stress > 0 or args.all:
|
|
run_gpu_stress_test(args.stress if args.stress > 0 else 10)
|
|
|
|
print("\n💡 Miner can run GPU-intensive tasks:")
|
|
print(" • Model inference (LLaMA, Stable Diffusion)")
|
|
print(" • Training jobs")
|
|
print(" • Batch processing")
|
|
|
|
else:
|
|
print("❌ No GPU available - miner will run in CPU-only mode")
|
|
print("\n💡 To enable GPU mining:")
|
|
print(" 1. Install NVIDIA drivers")
|
|
print(" 2. Install CUDA toolkit")
|
|
print(" 3. Install PyTorch with CUDA: pip install torch")
|
|
|
|
# Check if miner service is running
|
|
print("\n🔍 Checking miner service...")
|
|
try:
|
|
result = subprocess.run(
|
|
["systemctl", "is-active", "aitbc-gpu-miner"],
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
|
|
if result.stdout.strip() == "active":
|
|
print("✅ Miner service is running")
|
|
else:
|
|
print("⚠️ Miner service is not running")
|
|
print(" Start with: sudo systemctl start aitbc-gpu-miner")
|
|
except:
|
|
print("⚠️ Could not check miner service status")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|