Speed up lora compute and lower memory usage by doing it in fp16. (#11161)

2025-12-06 15:36:20 -08:00
parent 7ac7d69d94
commit 50ca97e776
2 changed files with 17 additions and 2 deletions
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -614,10 +614,11 @@ class ModelPatcher:
        if key not in self.backup:
            self.backup[key] = collections.namedtuple('Dimension', ['weight', 'inplace_update'])(weight.to(device=self.offload_device, copy=inplace_update), inplace_update)

+        temp_dtype = comfy.model_management.lora_compute_dtype(device_to)
        if device_to is not None:
-            temp_weight = comfy.model_management.cast_to_device(weight, device_to, torch.float32, copy=True)
+            temp_weight = comfy.model_management.cast_to_device(weight, device_to, temp_dtype, copy=True)
        else:
-            temp_weight = weight.to(torch.float32, copy=True)
+            temp_weight = weight.to(temp_dtype, copy=True)
        if convert_func is not None:
            temp_weight = convert_func(temp_weight, inplace=True)