Add custom nodes, Civitai loras (LFS), and vast.ai setup script
Some checks failed
Python Linting / Run Ruff (push) Has been cancelled
Python Linting / Run Pylint (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Has been cancelled
Execution Tests / test (macos-latest) (push) Has been cancelled
Execution Tests / test (ubuntu-latest) (push) Has been cancelled
Execution Tests / test (windows-latest) (push) Has been cancelled
Test server launches without errors / test (push) Has been cancelled
Unit Tests / test (macos-latest) (push) Has been cancelled
Unit Tests / test (ubuntu-latest) (push) Has been cancelled
Unit Tests / test (windows-2022) (push) Has been cancelled
Some checks failed
Python Linting / Run Ruff (push) Has been cancelled
Python Linting / Run Pylint (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Has been cancelled
Execution Tests / test (macos-latest) (push) Has been cancelled
Execution Tests / test (ubuntu-latest) (push) Has been cancelled
Execution Tests / test (windows-latest) (push) Has been cancelled
Test server launches without errors / test (push) Has been cancelled
Unit Tests / test (macos-latest) (push) Has been cancelled
Unit Tests / test (ubuntu-latest) (push) Has been cancelled
Unit Tests / test (windows-2022) (push) Has been cancelled
Includes 30 custom nodes committed directly, 7 Civitai-exclusive loras stored via Git LFS, and a setup script that installs all dependencies and downloads HuggingFace-hosted models on vast.ai. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"architectures": [
|
||||
"BertModel"
|
||||
],
|
||||
"attention_probs_dropout_prob": 0.1,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.1,
|
||||
"hidden_size": 768,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 3072,
|
||||
"layer_norm_eps": 1e-12,
|
||||
"max_position_embeddings": 512,
|
||||
"model_type": "bert",
|
||||
"num_attention_heads": 12,
|
||||
"num_hidden_layers": 12,
|
||||
"pad_token_id": 0,
|
||||
"type_vocab_size": 2,
|
||||
"vocab_size": 30522,
|
||||
"encoder_width": 768,
|
||||
"add_cross_attention": true
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
image_root: '/export/share/datasets/vision/coco/images/'
|
||||
ann_root: 'annotation'
|
||||
coco_gt_root: 'annotation/coco_gt'
|
||||
|
||||
# set pretrained as a file path or an url
|
||||
pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
|
||||
|
||||
# size of vit model; base or large
|
||||
vit: 'base'
|
||||
vit_grad_ckpt: False
|
||||
vit_ckpt_layer: 0
|
||||
batch_size: 32
|
||||
init_lr: 1e-5
|
||||
|
||||
# vit: 'large'
|
||||
# vit_grad_ckpt: True
|
||||
# vit_ckpt_layer: 5
|
||||
# batch_size: 16
|
||||
# init_lr: 2e-6
|
||||
|
||||
image_size: 384
|
||||
|
||||
# generation configs
|
||||
max_length: 20
|
||||
min_length: 5
|
||||
num_beams: 3
|
||||
prompt: 'a picture of '
|
||||
|
||||
# optimizer
|
||||
weight_decay: 0.05
|
||||
min_lr: 0
|
||||
max_epoch: 5
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"architectures": [
|
||||
"BertModel"
|
||||
],
|
||||
"attention_probs_dropout_prob": 0.1,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.1,
|
||||
"hidden_size": 768,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 3072,
|
||||
"layer_norm_eps": 1e-12,
|
||||
"max_position_embeddings": 512,
|
||||
"model_type": "bert",
|
||||
"num_attention_heads": 12,
|
||||
"num_hidden_layers": 12,
|
||||
"pad_token_id": 0,
|
||||
"type_vocab_size": 2,
|
||||
"vocab_size": 30524,
|
||||
"encoder_width": 768,
|
||||
"add_cross_attention": true
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
image_root: '/export/share/datasets/vision/NLVR2/'
|
||||
ann_root: 'annotation'
|
||||
|
||||
# set pretrained as a file path or an url
|
||||
pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_nlvr.pth'
|
||||
|
||||
#size of vit model; base or large
|
||||
vit: 'base'
|
||||
batch_size_train: 16
|
||||
batch_size_test: 64
|
||||
vit_grad_ckpt: False
|
||||
vit_ckpt_layer: 0
|
||||
max_epoch: 15
|
||||
|
||||
image_size: 384
|
||||
|
||||
# optimizer
|
||||
weight_decay: 0.05
|
||||
init_lr: 3e-5
|
||||
min_lr: 0
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
image_root: '/export/share/datasets/vision/nocaps/'
|
||||
ann_root: 'annotation'
|
||||
|
||||
# set pretrained as a file path or an url
|
||||
pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
|
||||
|
||||
vit: 'base'
|
||||
batch_size: 32
|
||||
|
||||
image_size: 384
|
||||
|
||||
max_length: 20
|
||||
min_length: 5
|
||||
num_beams: 3
|
||||
prompt: 'a picture of '
|
||||
@@ -0,0 +1,27 @@
|
||||
train_file: ['/export/share/junnan-li/VL_pretrain/annotation/coco_karpathy_train.json',
|
||||
'/export/share/junnan-li/VL_pretrain/annotation/vg_caption.json',
|
||||
]
|
||||
laion_path: ''
|
||||
|
||||
# size of vit model; base or large
|
||||
vit: 'base'
|
||||
vit_grad_ckpt: False
|
||||
vit_ckpt_layer: 0
|
||||
|
||||
image_size: 224
|
||||
batch_size: 75
|
||||
|
||||
queue_size: 57600
|
||||
alpha: 0.4
|
||||
|
||||
# optimizer
|
||||
weight_decay: 0.05
|
||||
init_lr: 3e-4
|
||||
min_lr: 1e-6
|
||||
warmup_lr: 1e-6
|
||||
lr_decay_rate: 0.9
|
||||
max_epoch: 20
|
||||
warmup_steps: 3000
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
image_root: '/export/share/datasets/vision/coco/images/'
|
||||
ann_root: 'annotation'
|
||||
dataset: 'coco'
|
||||
|
||||
# set pretrained as a file path or an url
|
||||
pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth'
|
||||
|
||||
# size of vit model; base or large
|
||||
|
||||
vit: 'base'
|
||||
batch_size_train: 32
|
||||
batch_size_test: 64
|
||||
vit_grad_ckpt: True
|
||||
vit_ckpt_layer: 4
|
||||
init_lr: 1e-5
|
||||
|
||||
# vit: 'large'
|
||||
# batch_size_train: 16
|
||||
# batch_size_test: 32
|
||||
# vit_grad_ckpt: True
|
||||
# vit_ckpt_layer: 12
|
||||
# init_lr: 5e-6
|
||||
|
||||
image_size: 384
|
||||
queue_size: 57600
|
||||
alpha: 0.4
|
||||
k_test: 256
|
||||
negative_all_rank: True
|
||||
|
||||
# optimizer
|
||||
weight_decay: 0.05
|
||||
min_lr: 0
|
||||
max_epoch: 6
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
image_root: '/export/share/datasets/vision/flickr30k/'
|
||||
ann_root: 'annotation'
|
||||
dataset: 'flickr'
|
||||
|
||||
# set pretrained as a file path or an url
|
||||
pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_flickr.pth'
|
||||
|
||||
# size of vit model; base or large
|
||||
|
||||
vit: 'base'
|
||||
batch_size_train: 32
|
||||
batch_size_test: 64
|
||||
vit_grad_ckpt: True
|
||||
vit_ckpt_layer: 4
|
||||
init_lr: 1e-5
|
||||
|
||||
# vit: 'large'
|
||||
# batch_size_train: 16
|
||||
# batch_size_test: 32
|
||||
# vit_grad_ckpt: True
|
||||
# vit_ckpt_layer: 10
|
||||
# init_lr: 5e-6
|
||||
|
||||
image_size: 384
|
||||
queue_size: 57600
|
||||
alpha: 0.4
|
||||
k_test: 128
|
||||
negative_all_rank: False
|
||||
|
||||
# optimizer
|
||||
weight_decay: 0.05
|
||||
min_lr: 0
|
||||
max_epoch: 6
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
video_root: '/export/share/dongxuli/data/msrvtt_retrieval/videos'
|
||||
ann_root: 'annotation'
|
||||
|
||||
# set pretrained as a file path or an url
|
||||
pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth'
|
||||
|
||||
# size of vit model; base or large
|
||||
vit: 'base'
|
||||
batch_size: 64
|
||||
k_test: 128
|
||||
image_size: 384
|
||||
num_frm_test: 8
|
||||
@@ -0,0 +1,25 @@
|
||||
vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #followed by train2014/
|
||||
vg_root: '/export/share/datasets/vision/visual-genome/' #followed by image/
|
||||
train_files: ['vqa_train','vqa_val','vg_qa']
|
||||
ann_root: 'annotation'
|
||||
|
||||
# set pretrained as a file path or an url
|
||||
pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth'
|
||||
|
||||
# size of vit model; base or large
|
||||
vit: 'base'
|
||||
batch_size_train: 16
|
||||
batch_size_test: 32
|
||||
vit_grad_ckpt: False
|
||||
vit_ckpt_layer: 0
|
||||
init_lr: 2e-5
|
||||
|
||||
image_size: 480
|
||||
|
||||
k_test: 128
|
||||
inference: 'rank'
|
||||
|
||||
# optimizer
|
||||
weight_decay: 0.05
|
||||
min_lr: 0
|
||||
max_epoch: 10
|
||||
Reference in New Issue
Block a user