Files
jaidaken f09734b0ee
Some checks failed
Python Linting / Run Ruff (push) Has been cancelled
Python Linting / Run Pylint (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Has been cancelled
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Has been cancelled
Execution Tests / test (macos-latest) (push) Has been cancelled
Execution Tests / test (ubuntu-latest) (push) Has been cancelled
Execution Tests / test (windows-latest) (push) Has been cancelled
Test server launches without errors / test (push) Has been cancelled
Unit Tests / test (macos-latest) (push) Has been cancelled
Unit Tests / test (ubuntu-latest) (push) Has been cancelled
Unit Tests / test (windows-2022) (push) Has been cancelled
Add custom nodes, Civitai loras (LFS), and vast.ai setup script
Includes 30 custom nodes committed directly, 7 Civitai-exclusive
loras stored via Git LFS, and a setup script that installs all
dependencies and downloads HuggingFace-hosted models on vast.ai.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 00:56:42 +00:00

86 lines
3.0 KiB
Python

import logging
import re
import urllib.parse
import urllib.request
from pathlib import Path
logging.basicConfig(level=logging.INFO)
def _fetch_hf_html(repo_id: str, folder_path: str) -> str:
"""Fetch HTML from HuggingFace tree page."""
url = f"https://huggingface.co/datasets/{repo_id}/tree/main/{folder_path}"
with urllib.request.urlopen(url) as response:
return response.read().decode("utf-8")
def list_hf_subfolders(repo_id: str, folder_path: str) -> list[str]:
"""List subfolders in a HuggingFace dataset folder."""
try:
html = _fetch_hf_html(repo_id, folder_path)
pattern = rf'/datasets/{repo_id}/tree/main/({folder_path}/[^"/?]+)'
return sorted(set(re.findall(pattern, html)))
except Exception as e:
logging.error(f"Failed to list subfolders in {folder_path}: {e}")
return []
def list_hf_files(
repo_id: str,
folder_path: str,
extensions: tuple = (".jpg", ".jpeg", ".png", ".webp"),
) -> list[str]:
"""List image files in a HuggingFace dataset folder."""
try:
html = _fetch_hf_html(repo_id, folder_path)
pattern = rf'/datasets/{repo_id}/blob/main/({folder_path}/[^"]+?({"|".join(e for e in extensions)}))'
return [urllib.parse.unquote(match[0]) for match in re.findall(pattern, html)]
except Exception as e:
logging.error(f"Failed to list files in {folder_path}: {e}")
return []
def download_test_images(save_dir: Path, repo_folder: str, repo_id: str) -> Path:
"""Download the test_images/ folder from the HF test dataset repo"""
# Discover all subfolders and collect files
subfolders = list_hf_subfolders(repo_id, repo_folder)
if not subfolders:
logging.warning(f"No subfolders found in {repo_folder}")
return save_dir
all_files = [f for folder in subfolders for f in list_hf_files(repo_id, folder)]
if not all_files:
logging.warning(f"No image files found in {repo_folder}")
return save_dir
logging.info(f"Found {len(all_files)} files from {len(subfolders)} folders")
# Download files, preserving folder structure
save_dir_path = Path(save_dir)
downloaded = 0
skipped = 0
for file_path in all_files:
relative_path = Path(file_path).relative_to(repo_folder)
save_path = save_dir_path / relative_path
if save_path.exists():
logging.info(f"Skipping {relative_path} (already exists)")
skipped += 1
continue
save_path.parent.mkdir(parents=True, exist_ok=True)
url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_path}"
logging.info(f"Downloading {relative_path}...")
urllib.request.urlretrieve(url, save_path)
downloaded += 1
logging.info(f"Downloaded {downloaded} files, skipped {skipped} existing files")
return save_dir_path
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
download_test_images(
repo_id="ssitu/ultimatesdupscale_test",
save_dir=Path("./test/test_images/"),
repo_folder="test_images",
)