Add custom nodes, Civitai loras (LFS), and vast.ai setup script

Includes 30 custom nodes committed directly, 7 Civitai-exclusive loras stored via Git LFS, and a setup script that installs all dependencies and downloads HuggingFace-hosted models on vast.ai. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 00:55:26 +00:00
parent 2b70ab9ad0
commit f09734b0ee
2274 changed files with 748556 additions and 3 deletions
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/init.py
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/init.py
@@ -0,0 +1 @@
+
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/bert_config.json
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/bert_config.json
@@ -0,0 +1,21 @@
+{
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "type_vocab_size": 2,
+  "vocab_size": 30522,
+  "encoder_width": 768,
+  "add_cross_attention": true   
+}
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/caption_coco.yaml
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/caption_coco.yaml
@@ -0,0 +1,33 @@
+image_root: '/export/share/datasets/vision/coco/images/'
+ann_root: 'annotation'
+coco_gt_root: 'annotation/coco_gt'
+
+# set pretrained as a file path or an url
+pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
+
+# size of vit model; base or large
+vit: 'base'
+vit_grad_ckpt: False
+vit_ckpt_layer: 0
+batch_size: 32
+init_lr: 1e-5
+
+# vit: 'large'
+# vit_grad_ckpt: True
+# vit_ckpt_layer: 5
+# batch_size: 16
+# init_lr: 2e-6
+
+image_size: 384
+
+# generation configs
+max_length: 20  
+min_length: 5
+num_beams: 3
+prompt: 'a picture of '
+
+# optimizer
+weight_decay: 0.05
+min_lr: 0
+max_epoch: 5
+
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/med_config.json
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/med_config.json
@@ -0,0 +1,21 @@
+{
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "type_vocab_size": 2,
+  "vocab_size": 30524,
+  "encoder_width": 768,
+  "add_cross_attention": true   
+}
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/nlvr.yaml
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/nlvr.yaml
@@ -0,0 +1,21 @@
+image_root: '/export/share/datasets/vision/NLVR2/' 
+ann_root: 'annotation'
+
+# set pretrained as a file path or an url
+pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_nlvr.pth'
+
+#size of vit model; base or large
+vit: 'base'
+batch_size_train: 16 
+batch_size_test: 64 
+vit_grad_ckpt: False
+vit_ckpt_layer: 0
+max_epoch: 15
+
+image_size: 384
+
+# optimizer
+weight_decay: 0.05
+init_lr: 3e-5
+min_lr: 0
+
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/nocaps.yaml
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/nocaps.yaml
@@ -0,0 +1,15 @@
+image_root: '/export/share/datasets/vision/nocaps/'
+ann_root: 'annotation'
+
+# set pretrained as a file path or an url
+pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
+
+vit: 'base'
+batch_size: 32
+
+image_size: 384
+
+max_length: 20
+min_length: 5
+num_beams: 3
+prompt: 'a picture of '
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/pretrain.yaml
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/pretrain.yaml
@@ -0,0 +1,27 @@
+train_file: ['/export/share/junnan-li/VL_pretrain/annotation/coco_karpathy_train.json',
+             '/export/share/junnan-li/VL_pretrain/annotation/vg_caption.json',
+             ]
+laion_path: ''   
+
+# size of vit model; base or large
+vit: 'base'
+vit_grad_ckpt: False
+vit_ckpt_layer: 0
+
+image_size: 224
+batch_size: 75
+
+queue_size: 57600
+alpha: 0.4
+
+# optimizer
+weight_decay: 0.05
+init_lr: 3e-4
+min_lr: 1e-6
+warmup_lr: 1e-6
+lr_decay_rate: 0.9
+max_epoch: 20
+warmup_steps: 3000
+
+
+
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/retrieval_coco.yaml
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/retrieval_coco.yaml
@@ -0,0 +1,34 @@
+image_root: '/export/share/datasets/vision/coco/images/'
+ann_root: 'annotation'
+dataset: 'coco'
+
+# set pretrained as a file path or an url
+pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth'
+
+# size of vit model; base or large
+
+vit: 'base'
+batch_size_train: 32
+batch_size_test: 64
+vit_grad_ckpt: True
+vit_ckpt_layer: 4
+init_lr: 1e-5
+
+# vit: 'large'
+# batch_size_train: 16
+# batch_size_test: 32
+# vit_grad_ckpt: True
+# vit_ckpt_layer: 12
+# init_lr: 5e-6
+
+image_size: 384
+queue_size: 57600
+alpha: 0.4
+k_test: 256
+negative_all_rank: True
+
+# optimizer
+weight_decay: 0.05
+min_lr: 0
+max_epoch: 6
+
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/retrieval_flickr.yaml
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/retrieval_flickr.yaml
@@ -0,0 +1,34 @@
+image_root: '/export/share/datasets/vision/flickr30k/'
+ann_root: 'annotation'
+dataset: 'flickr'
+
+# set pretrained as a file path or an url
+pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_flickr.pth'
+
+# size of vit model; base or large
+
+vit: 'base'
+batch_size_train: 32
+batch_size_test: 64
+vit_grad_ckpt: True
+vit_ckpt_layer: 4
+init_lr: 1e-5
+
+# vit: 'large'
+# batch_size_train: 16
+# batch_size_test: 32
+# vit_grad_ckpt: True
+# vit_ckpt_layer: 10
+# init_lr: 5e-6
+
+image_size: 384
+queue_size: 57600
+alpha: 0.4
+k_test: 128
+negative_all_rank: False
+
+# optimizer
+weight_decay: 0.05
+min_lr: 0
+max_epoch: 6
+
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/retrieval_msrvtt.yaml
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/retrieval_msrvtt.yaml
@@ -0,0 +1,12 @@
+video_root: '/export/share/dongxuli/data/msrvtt_retrieval/videos'
+ann_root: 'annotation'
+
+# set pretrained as a file path or an url
+pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth'
+
+# size of vit model; base or large
+vit: 'base'
+batch_size: 64
+k_test: 128
+image_size: 384
+num_frm_test: 8
--- a/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/vqa.yaml
+++ b/custom_nodes/was-node-suite-comfyui/modules/BLIP/blip_configs/vqa.yaml
@@ -0,0 +1,25 @@
+vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #followed by train2014/
+vg_root: '/export/share/datasets/vision/visual-genome/'  #followed by image/
+train_files: ['vqa_train','vqa_val','vg_qa']
+ann_root: 'annotation'
+
+# set pretrained as a file path or an url
+pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth'
+
+# size of vit model; base or large
+vit: 'base'
+batch_size_train: 16 
+batch_size_test: 32 
+vit_grad_ckpt: False
+vit_ckpt_layer: 0
+init_lr: 2e-5
+
+image_size: 480
+
+k_test: 128
+inference: 'rank'
+
+# optimizer
+weight_decay: 0.05
+min_lr: 0
+max_epoch: 10