Optimize nvfp4 lora applying. (#11866)

This changes results a bit but it also speeds up things a lot.
This commit is contained in:
comfyanonymous
2026-01-13 21:49:38 -08:00
committed by GitHub
parent 712cca36a1
commit 6165c38cb5
3 changed files with 49 additions and 11 deletions

View File

@@ -104,7 +104,7 @@ class TensorCoreNVFP4Layout(_CKNvfp4Layout):
needs_padding = padded_shape != orig_shape
if stochastic_rounding > 0:
qdata, block_scale = comfy.float.stochastic_round_quantize_nvfp4(tensor, scale, pad_16x=needs_padding, seed=stochastic_rounding)
qdata, block_scale = comfy.float.stochastic_round_quantize_nvfp4_by_block(tensor, scale, pad_16x=needs_padding, seed=stochastic_rounding)
else:
qdata, block_scale = ck.quantize_nvfp4(tensor, scale, pad_16x=needs_padding)