Optimize nvfp4 lora applying. (#11866)

This changes results a bit but it also speeds up things a lot.
2026-01-13 21:49:38 -08:00
parent 712cca36a1
commit 6165c38cb5
3 changed files with 49 additions and 11 deletions
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -104,7 +104,7 @@ class TensorCoreNVFP4Layout(_CKNvfp4Layout):
        needs_padding = padded_shape != orig_shape

        if stochastic_rounding > 0:
-            qdata, block_scale = comfy.float.stochastic_round_quantize_nvfp4(tensor, scale, pad_16x=needs_padding, seed=stochastic_rounding)
+            qdata, block_scale = comfy.float.stochastic_round_quantize_nvfp4_by_block(tensor, scale, pad_16x=needs_padding, seed=stochastic_rounding)
        else:
            qdata, block_scale = ck.quantize_nvfp4(tensor, scale, pad_16x=needs_padding)