Kandinsky5 model support (#10988)

* Add Kandinsky5 model support

lite and pro T2V tested to work

* Update kandinsky5.py

* Fix fp8

* Fix fp8_scaled text encoder

* Add transformer_options for attention

* Code cleanup, optimizations, use fp32 for all layers originally at fp32

* ImageToVideo -node

* Fix I2V, add necessary latent post process nodes

* Support text to image model

* Support block replace patches (SLG mostly)

* Support official LoRAs

* Don't scale RoPE for lite model as that just doesn't work...

* Update supported_models.py

* Rever RoPE scaling to simpler one

* Fix typo

* Handle latent dim difference for image model in the VAE instead

* Add node to use different prompts for clip_l and qwen25_7b

* Reduce peak VRAM usage a bit

* Further reduce peak VRAM consumption by chunking ffn

* Update chunking

* Update memory_usage_factor

* Code cleanup, don't force the fp32 layers as it has minimal effect

* Allow for stronger changes with first frames normalization

Default values are too weak for any meaningful changes, these should probably be exposed as advanced node options when that's available.

* Add image model's own chat template, remove unused image2video template

* Remove hard error in ReplaceVideoLatentFrames -node

* Update kandinsky5.py

* Update supported_models.py

* Fix typos in prompt template

They were now fixed in the original repository as well

* Update ReplaceVideoLatentFrames

Add tooltips
Make source optional
Better handle negative index

* Rename NormalizeVideoLatentFrames -node

For bit better clarity what it does

* Fix NormalizeVideoLatentStart node out on non-op
This commit is contained in:
Jukka Seppänen
2025-12-06 05:20:22 +02:00
committed by GitHub
parent bed12674a1
commit fd109325db
11 changed files with 791 additions and 3 deletions

View File

@@ -4,7 +4,7 @@ import torch
import nodes
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io
import logging
def reshape_latent_to(target_shape, latent, repeat_batch=True):
if latent.shape[1:] != target_shape[1:]:
@@ -388,6 +388,42 @@ class LatentOperationSharpen(io.ComfyNode):
return luminance * sharpened
return io.NodeOutput(sharpen)
class ReplaceVideoLatentFrames(io.ComfyNode):
@classmethod
def define_schema(cls):
return io.Schema(
node_id="ReplaceVideoLatentFrames",
category="latent/batch",
inputs=[
io.Latent.Input("destination", tooltip="The destination latent where frames will be replaced."),
io.Latent.Input("source", optional=True, tooltip="The source latent providing frames to insert into the destination latent. If not provided, the destination latent is returned unchanged."),
io.Int.Input("index", default=0, min=-nodes.MAX_RESOLUTION, max=nodes.MAX_RESOLUTION, step=1, tooltip="The starting latent frame index in the destination latent where the source latent frames will be placed. Negative values count from the end."),
],
outputs=[
io.Latent.Output(),
],
)
@classmethod
def execute(cls, destination, index, source=None) -> io.NodeOutput:
if source is None:
return io.NodeOutput(destination)
dest_frames = destination["samples"].shape[2]
source_frames = source["samples"].shape[2]
if index < 0:
index = dest_frames + index
if index > dest_frames:
logging.warning(f"ReplaceVideoLatentFrames: Index {index} is out of bounds for destination latent frames {dest_frames}.")
return io.NodeOutput(destination)
if index + source_frames > dest_frames:
logging.warning(f"ReplaceVideoLatentFrames: Source latent frames {source_frames} do not fit within destination latent frames {dest_frames} at the specified index {index}.")
return io.NodeOutput(destination)
s = source.copy()
s_source = source["samples"]
s_destination = destination["samples"].clone()
s_destination[:, :, index:index + s_source.shape[2]] = s_source
s["samples"] = s_destination
return io.NodeOutput(s)
class LatentExtension(ComfyExtension):
@override
@@ -405,6 +441,7 @@ class LatentExtension(ComfyExtension):
LatentApplyOperationCFG,
LatentOperationTonemapReinhard,
LatentOperationSharpen,
ReplaceVideoLatentFrames
]