Add custom nodes, Civitai loras (LFS), and vast.ai setup script
Some checks failed
Python Linting / Run Ruff (push) Has been cancelled
Python Linting / Run Pylint (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Has been cancelled
Execution Tests / test (macos-latest) (push) Has been cancelled
Execution Tests / test (ubuntu-latest) (push) Has been cancelled
Execution Tests / test (windows-latest) (push) Has been cancelled
Test server launches without errors / test (push) Has been cancelled
Unit Tests / test (macos-latest) (push) Has been cancelled
Unit Tests / test (ubuntu-latest) (push) Has been cancelled
Unit Tests / test (windows-2022) (push) Has been cancelled
Some checks failed
Python Linting / Run Ruff (push) Has been cancelled
Python Linting / Run Pylint (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Has been cancelled
Execution Tests / test (macos-latest) (push) Has been cancelled
Execution Tests / test (ubuntu-latest) (push) Has been cancelled
Execution Tests / test (windows-latest) (push) Has been cancelled
Test server launches without errors / test (push) Has been cancelled
Unit Tests / test (macos-latest) (push) Has been cancelled
Unit Tests / test (ubuntu-latest) (push) Has been cancelled
Unit Tests / test (windows-2022) (push) Has been cancelled
Includes 30 custom nodes committed directly, 7 Civitai-exclusive loras stored via Git LFS, and a setup script that installs all dependencies and downloads HuggingFace-hosted models on vast.ai. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
300
custom_nodes/ComfyUI-Crystools/general/gpu.py
Normal file
300
custom_nodes/ComfyUI-Crystools/general/gpu.py
Normal file
@@ -0,0 +1,300 @@
|
||||
import torch
|
||||
import comfy.model_management
|
||||
from ..core import logger
|
||||
import os
|
||||
import platform
|
||||
|
||||
def is_jetson() -> bool:
|
||||
"""
|
||||
Determines if the Python environment is running on a Jetson device by checking the device model
|
||||
information or the platform release.
|
||||
"""
|
||||
PROC_DEVICE_MODEL = ''
|
||||
try:
|
||||
with open('/proc/device-tree/model', 'r') as f:
|
||||
PROC_DEVICE_MODEL = f.read().strip()
|
||||
logger.info(f"Device model: {PROC_DEVICE_MODEL}")
|
||||
return "NVIDIA" in PROC_DEVICE_MODEL
|
||||
except Exception as e:
|
||||
# logger.warning(f"JETSON: Could not read /proc/device-tree/model: {e} (If you're not using Jetson, ignore this warning)")
|
||||
# If /proc/device-tree/model is not available, check platform.release()
|
||||
platform_release = platform.release()
|
||||
logger.info(f"Platform release: {platform_release}")
|
||||
if 'tegra' in platform_release.lower():
|
||||
logger.info("Detected 'tegra' in platform release. Assuming Jetson device.")
|
||||
return True
|
||||
else:
|
||||
logger.info("JETSON: Not detected.")
|
||||
return False
|
||||
|
||||
IS_JETSON = is_jetson()
|
||||
|
||||
class CGPUInfo:
|
||||
"""
|
||||
This class is responsible for getting information from GPU (ONLY).
|
||||
"""
|
||||
cuda = False
|
||||
pynvmlLoaded = False
|
||||
jtopLoaded = False
|
||||
cudaAvailable = False
|
||||
torchDevice = 'cpu'
|
||||
cudaDevice = 'cpu'
|
||||
cudaDevicesFound = 0
|
||||
switchGPU = True
|
||||
switchVRAM = True
|
||||
switchTemperature = True
|
||||
gpus = []
|
||||
gpusUtilization = []
|
||||
gpusVRAM = []
|
||||
gpusTemperature = []
|
||||
|
||||
def __init__(self):
|
||||
if IS_JETSON:
|
||||
# Try to import jtop for Jetson devices
|
||||
try:
|
||||
from jtop import jtop
|
||||
self.jtopInstance = jtop()
|
||||
self.jtopInstance.start()
|
||||
self.jtopLoaded = True
|
||||
logger.info('jtop initialized on Jetson device.')
|
||||
except ImportError as e:
|
||||
logger.error('jtop is not installed. ' + str(e))
|
||||
except Exception as e:
|
||||
logger.error('Could not initialize jtop. ' + str(e))
|
||||
else:
|
||||
# Try to import pynvml for non-Jetson devices
|
||||
try:
|
||||
import pynvml
|
||||
self.pynvml = pynvml
|
||||
self.pynvml.nvmlInit()
|
||||
self.pynvmlLoaded = True
|
||||
logger.info('pynvml (NVIDIA) initialized.')
|
||||
except ImportError as e:
|
||||
logger.error('pynvml is not installed. ' + str(e))
|
||||
except Exception as e:
|
||||
logger.error('Could not init pynvml (NVIDIA). ' + str(e))
|
||||
|
||||
self.anygpuLoaded = self.pynvmlLoaded or self.jtopLoaded
|
||||
|
||||
try:
|
||||
self.torchDevice = comfy.model_management.get_torch_device_name(comfy.model_management.get_torch_device())
|
||||
except Exception as e:
|
||||
logger.error('Could not pick default device. ' + str(e))
|
||||
|
||||
if self.pynvmlLoaded and not self.jtopLoaded and not self.deviceGetCount():
|
||||
logger.warning('No GPU detected, disabling GPU monitoring.')
|
||||
self.anygpuLoaded = False
|
||||
self.pynvmlLoaded = False
|
||||
self.jtopLoaded = False
|
||||
|
||||
if self.anygpuLoaded:
|
||||
if self.deviceGetCount() > 0:
|
||||
self.cudaDevicesFound = self.deviceGetCount()
|
||||
|
||||
logger.info(f"GPU/s:")
|
||||
|
||||
for deviceIndex in range(self.cudaDevicesFound):
|
||||
deviceHandle = self.deviceGetHandleByIndex(deviceIndex)
|
||||
|
||||
gpuName = self.deviceGetName(deviceHandle, deviceIndex)
|
||||
|
||||
logger.info(f"{deviceIndex}) {gpuName}")
|
||||
|
||||
self.gpus.append({
|
||||
'index': deviceIndex,
|
||||
'name': gpuName,
|
||||
})
|
||||
|
||||
# Same index as gpus, with default values
|
||||
self.gpusUtilization.append(True)
|
||||
self.gpusVRAM.append(True)
|
||||
self.gpusTemperature.append(True)
|
||||
|
||||
self.cuda = True
|
||||
logger.info(self.systemGetDriverVersion())
|
||||
else:
|
||||
logger.warning('No GPU with CUDA detected.')
|
||||
else:
|
||||
logger.warning('No GPU monitoring libraries available.')
|
||||
|
||||
self.cudaDevice = 'cpu' if self.torchDevice == 'cpu' else 'cuda'
|
||||
self.cudaAvailable = torch.cuda.is_available()
|
||||
|
||||
if self.cuda and self.cudaAvailable and self.torchDevice == 'cpu':
|
||||
logger.warning('CUDA is available, but torch is using CPU.')
|
||||
|
||||
def getInfo(self):
|
||||
logger.debug('Getting GPUs info...')
|
||||
return self.gpus
|
||||
|
||||
def getStatus(self):
|
||||
gpuUtilization = -1
|
||||
gpuTemperature = -1
|
||||
vramUsed = -1
|
||||
vramTotal = -1
|
||||
vramPercent = -1
|
||||
|
||||
gpuType = ''
|
||||
gpus = []
|
||||
|
||||
if self.cudaDevice == 'cpu':
|
||||
gpuType = 'cpu'
|
||||
gpus.append({
|
||||
'gpu_utilization': -1,
|
||||
'gpu_temperature': -1,
|
||||
'vram_total': -1,
|
||||
'vram_used': -1,
|
||||
'vram_used_percent': -1,
|
||||
})
|
||||
else:
|
||||
gpuType = self.cudaDevice
|
||||
|
||||
if self.anygpuLoaded and self.cuda and self.cudaAvailable:
|
||||
for deviceIndex in range(self.cudaDevicesFound):
|
||||
deviceHandle = self.deviceGetHandleByIndex(deviceIndex)
|
||||
|
||||
gpuUtilization = -1
|
||||
vramPercent = -1
|
||||
vramUsed = -1
|
||||
vramTotal = -1
|
||||
gpuTemperature = -1
|
||||
|
||||
# GPU Utilization
|
||||
if self.switchGPU and self.gpusUtilization[deviceIndex]:
|
||||
try:
|
||||
gpuUtilization = self.deviceGetUtilizationRates(deviceHandle)
|
||||
except Exception as e:
|
||||
logger.error('Could not get GPU utilization. ' + str(e))
|
||||
logger.error('Monitor of GPU is turning off.')
|
||||
self.switchGPU = False
|
||||
|
||||
if self.switchVRAM and self.gpusVRAM[deviceIndex]:
|
||||
try:
|
||||
memory = self.deviceGetMemoryInfo(deviceHandle)
|
||||
vramUsed = memory['used']
|
||||
vramTotal = memory['total']
|
||||
|
||||
# Check if vramTotal is not zero or None
|
||||
if vramTotal and vramTotal != 0:
|
||||
vramPercent = vramUsed / vramTotal * 100
|
||||
except Exception as e:
|
||||
logger.error('Could not get GPU memory info. ' + str(e))
|
||||
self.switchVRAM = False
|
||||
|
||||
# Temperature
|
||||
if self.switchTemperature and self.gpusTemperature[deviceIndex]:
|
||||
try:
|
||||
gpuTemperature = self.deviceGetTemperature(deviceHandle)
|
||||
except Exception as e:
|
||||
logger.error('Could not get GPU temperature. Turning off this feature. ' + str(e))
|
||||
self.switchTemperature = False
|
||||
|
||||
gpus.append({
|
||||
'gpu_utilization': gpuUtilization,
|
||||
'gpu_temperature': gpuTemperature,
|
||||
'vram_total': vramTotal,
|
||||
'vram_used': vramUsed,
|
||||
'vram_used_percent': vramPercent,
|
||||
})
|
||||
|
||||
return {
|
||||
'device_type': gpuType,
|
||||
'gpus': gpus,
|
||||
}
|
||||
|
||||
def deviceGetCount(self):
|
||||
if self.pynvmlLoaded:
|
||||
return self.pynvml.nvmlDeviceGetCount()
|
||||
elif self.jtopLoaded:
|
||||
# For Jetson devices, we assume there's one GPU
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def deviceGetHandleByIndex(self, index):
|
||||
if self.pynvmlLoaded:
|
||||
return self.pynvml.nvmlDeviceGetHandleByIndex(index)
|
||||
elif self.jtopLoaded:
|
||||
return index # On Jetson, index acts as handle
|
||||
else:
|
||||
return 0
|
||||
|
||||
def deviceGetName(self, deviceHandle, deviceIndex):
|
||||
if self.pynvmlLoaded:
|
||||
gpuName = 'Unknown GPU'
|
||||
|
||||
try:
|
||||
gpuName = self.pynvml.nvmlDeviceGetName(deviceHandle)
|
||||
try:
|
||||
gpuName = gpuName.decode('utf-8', errors='ignore')
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
except UnicodeDecodeError as e:
|
||||
gpuName = 'Unknown GPU (decoding error)'
|
||||
logger.error(f"UnicodeDecodeError: {e}")
|
||||
|
||||
return gpuName
|
||||
elif self.jtopLoaded:
|
||||
# Access the GPU name from self.jtopInstance.gpu
|
||||
try:
|
||||
gpu_info = self.jtopInstance.gpu
|
||||
gpu_name = next(iter(gpu_info.keys()))
|
||||
return gpu_name
|
||||
except Exception as e:
|
||||
logger.error('Could not get GPU name. ' + str(e))
|
||||
return 'Unknown GPU'
|
||||
else:
|
||||
return ''
|
||||
|
||||
def systemGetDriverVersion(self):
|
||||
if self.pynvmlLoaded:
|
||||
return f'NVIDIA Driver: {self.pynvml.nvmlSystemGetDriverVersion()}'
|
||||
elif self.jtopLoaded:
|
||||
# No direct method to get driver version from jtop
|
||||
return 'NVIDIA Driver: unknown'
|
||||
else:
|
||||
return 'Driver unknown'
|
||||
|
||||
def deviceGetUtilizationRates(self, deviceHandle):
|
||||
if self.pynvmlLoaded:
|
||||
return self.pynvml.nvmlDeviceGetUtilizationRates(deviceHandle).gpu
|
||||
elif self.jtopLoaded:
|
||||
# GPU utilization from jtop stats
|
||||
try:
|
||||
gpu_util = self.jtopInstance.stats.get('GPU', -1)
|
||||
return gpu_util
|
||||
except Exception as e:
|
||||
logger.error('Could not get GPU utilization. ' + str(e))
|
||||
return -1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def deviceGetMemoryInfo(self, deviceHandle):
|
||||
if self.pynvmlLoaded:
|
||||
mem = self.pynvml.nvmlDeviceGetMemoryInfo(deviceHandle)
|
||||
return {'total': mem.total, 'used': mem.used}
|
||||
elif self.jtopLoaded:
|
||||
mem_data = self.jtopInstance.memory['RAM']
|
||||
total = mem_data['tot']
|
||||
used = mem_data['used']
|
||||
return {'total': total, 'used': used}
|
||||
else:
|
||||
return {'total': 1, 'used': 1}
|
||||
|
||||
def deviceGetTemperature(self, deviceHandle):
|
||||
if self.pynvmlLoaded:
|
||||
return self.pynvml.nvmlDeviceGetTemperature(deviceHandle, self.pynvml.NVML_TEMPERATURE_GPU)
|
||||
elif self.jtopLoaded:
|
||||
try:
|
||||
temperature = self.jtopInstance.stats.get('Temp gpu', -1)
|
||||
return temperature
|
||||
except Exception as e:
|
||||
logger.error('Could not get GPU temperature. ' + str(e))
|
||||
return -1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def close(self):
|
||||
if self.jtopLoaded and self.jtopInstance is not None:
|
||||
self.jtopInstance.close()
|
||||
Reference in New Issue
Block a user