Speed up lora compute and lower memory usage by doing it in fp16. (#11161)

This commit is contained in:
comfyanonymous
2025-12-06 15:36:20 -08:00
committed by GitHub
parent 7ac7d69d94
commit 50ca97e776
2 changed files with 17 additions and 2 deletions

View File

@@ -614,10 +614,11 @@ class ModelPatcher:
if key not in self.backup:
self.backup[key] = collections.namedtuple('Dimension', ['weight', 'inplace_update'])(weight.to(device=self.offload_device, copy=inplace_update), inplace_update)
temp_dtype = comfy.model_management.lora_compute_dtype(device_to)
if device_to is not None:
temp_weight = comfy.model_management.cast_to_device(weight, device_to, torch.float32, copy=True)
temp_weight = comfy.model_management.cast_to_device(weight, device_to, temp_dtype, copy=True)
else:
temp_weight = weight.to(torch.float32, copy=True)
temp_weight = weight.to(temp_dtype, copy=True)
if convert_func is not None:
temp_weight = convert_func(temp_weight, inplace=True)