Assets Part 2 - add more endpoints (#12125)
This commit is contained in:
281
tests-unit/assets_test/test_uploads.py
Normal file
281
tests-unit/assets_test/test_uploads.py
Normal file
@@ -0,0 +1,281 @@
|
||||
import json
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import requests
|
||||
import pytest
|
||||
|
||||
|
||||
def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, make_asset_bytes):
|
||||
name = "dup_a.safetensors"
|
||||
tags = ["models", "checkpoints", "unit-tests", "alpha"]
|
||||
meta = {"purpose": "dup"}
|
||||
data = make_asset_bytes(name)
|
||||
files = {"file": (name, data, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(tags), "name": name, "user_metadata": json.dumps(meta)}
|
||||
r1 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
a1 = r1.json()
|
||||
assert r1.status_code == 201, a1
|
||||
assert a1["created_new"] is True
|
||||
|
||||
# Second upload with the same data and name should return created_new == False and the same asset
|
||||
files = {"file": (name, data, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(tags), "name": name, "user_metadata": json.dumps(meta)}
|
||||
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
a2 = r2.json()
|
||||
assert r2.status_code == 200, a2
|
||||
assert a2["created_new"] is False
|
||||
assert a2["asset_hash"] == a1["asset_hash"]
|
||||
assert a2["id"] == a1["id"] # old reference
|
||||
|
||||
# Third upload with the same data but new name should return created_new == False and the new AssetReference
|
||||
files = {"file": (name, data, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(tags), "name": name + "_d", "user_metadata": json.dumps(meta)}
|
||||
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
a3 = r2.json()
|
||||
assert r2.status_code == 200, a3
|
||||
assert a3["created_new"] is False
|
||||
assert a3["asset_hash"] == a1["asset_hash"]
|
||||
assert a3["id"] != a1["id"] # old reference
|
||||
|
||||
|
||||
def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_base: str):
|
||||
# Seed a small file first
|
||||
name = "fastpath_seed.safetensors"
|
||||
tags = ["models", "checkpoints", "unit-tests"]
|
||||
meta = {}
|
||||
files = {"file": (name, b"B" * 1024, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(tags), "name": name, "user_metadata": json.dumps(meta)}
|
||||
r1 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
b1 = r1.json()
|
||||
assert r1.status_code == 201, b1
|
||||
h = b1["asset_hash"]
|
||||
|
||||
# Now POST /api/assets with only hash and no file
|
||||
files = [
|
||||
("hash", (None, h)),
|
||||
("tags", (None, json.dumps(tags))),
|
||||
("name", (None, "fastpath_copy.safetensors")),
|
||||
("user_metadata", (None, json.dumps({"purpose": "copy"}))),
|
||||
]
|
||||
r2 = http.post(api_base + "/api/assets", files=files, timeout=120)
|
||||
b2 = r2.json()
|
||||
assert r2.status_code == 200, b2 # fast path returns 200 with created_new == False
|
||||
assert b2["created_new"] is False
|
||||
assert b2["asset_hash"] == h
|
||||
|
||||
|
||||
def test_upload_fastpath_with_known_hash_and_file(
|
||||
http: requests.Session, api_base: str
|
||||
):
|
||||
# Seed
|
||||
files = {"file": ("seed.safetensors", b"C" * 128, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(["models", "checkpoints", "unit-tests", "fp"]), "name": "seed.safetensors", "user_metadata": json.dumps({})}
|
||||
r1 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
b1 = r1.json()
|
||||
assert r1.status_code == 201, b1
|
||||
h = b1["asset_hash"]
|
||||
|
||||
# Send both file and hash of existing content -> server must drain file and create from hash (200)
|
||||
files = {"file": ("ignored.bin", b"ignored" * 10, "application/octet-stream")}
|
||||
form = {"hash": h, "tags": json.dumps(["models", "checkpoints", "unit-tests", "fp"]), "name": "copy_from_hash.safetensors", "user_metadata": json.dumps({})}
|
||||
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
b2 = r2.json()
|
||||
assert r2.status_code == 200, b2
|
||||
assert b2["created_new"] is False
|
||||
assert b2["asset_hash"] == h
|
||||
|
||||
|
||||
def test_upload_multiple_tags_fields_are_merged(http: requests.Session, api_base: str):
|
||||
data = [
|
||||
("tags", "models,checkpoints"),
|
||||
("tags", json.dumps(["unit-tests", "alpha"])),
|
||||
("name", "merge.safetensors"),
|
||||
("user_metadata", json.dumps({"u": 1})),
|
||||
]
|
||||
files = {"file": ("merge.safetensors", b"B" * 256, "application/octet-stream")}
|
||||
r1 = http.post(api_base + "/api/assets", data=data, files=files, timeout=120)
|
||||
created = r1.json()
|
||||
assert r1.status_code in (200, 201), created
|
||||
aid = created["id"]
|
||||
|
||||
# Verify all tags are present on the resource
|
||||
rg = http.get(f"{api_base}/api/assets/{aid}", timeout=120)
|
||||
detail = rg.json()
|
||||
assert rg.status_code == 200, detail
|
||||
tags = set(detail["tags"])
|
||||
assert {"models", "checkpoints", "unit-tests", "alpha"}.issubset(tags)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("root", ["input", "output"])
|
||||
def test_concurrent_upload_identical_bytes_different_names(
|
||||
root: str,
|
||||
http: requests.Session,
|
||||
api_base: str,
|
||||
make_asset_bytes,
|
||||
):
|
||||
"""
|
||||
Two concurrent uploads of identical bytes but different names.
|
||||
Expect a single Asset (same hash), two AssetInfo rows, and exactly one created_new=True.
|
||||
"""
|
||||
scope = f"concupload-{uuid.uuid4().hex[:6]}"
|
||||
name1, name2 = "cu_a.bin", "cu_b.bin"
|
||||
data = make_asset_bytes("concurrent", 4096)
|
||||
tags = [root, "unit-tests", scope]
|
||||
|
||||
def _do_upload(args):
|
||||
url, form_data, files_data = args
|
||||
with requests.Session() as s:
|
||||
return s.post(url, data=form_data, files=files_data, timeout=120)
|
||||
|
||||
url = api_base + "/api/assets"
|
||||
form1 = {"tags": json.dumps(tags), "name": name1, "user_metadata": json.dumps({})}
|
||||
files1 = {"file": (name1, data, "application/octet-stream")}
|
||||
form2 = {"tags": json.dumps(tags), "name": name2, "user_metadata": json.dumps({})}
|
||||
files2 = {"file": (name2, data, "application/octet-stream")}
|
||||
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
futures = list(executor.map(_do_upload, [(url, form1, files1), (url, form2, files2)]))
|
||||
r1, r2 = futures
|
||||
|
||||
b1, b2 = r1.json(), r2.json()
|
||||
assert r1.status_code in (200, 201), b1
|
||||
assert r2.status_code in (200, 201), b2
|
||||
assert b1["asset_hash"] == b2["asset_hash"]
|
||||
assert b1["id"] != b2["id"]
|
||||
|
||||
created_flags = sorted([bool(b1.get("created_new")), bool(b2.get("created_new"))])
|
||||
assert created_flags == [False, True]
|
||||
|
||||
rl = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"include_tags": f"unit-tests,{scope}", "sort": "name"},
|
||||
timeout=120,
|
||||
)
|
||||
bl = rl.json()
|
||||
assert rl.status_code == 200, bl
|
||||
names = [a["name"] for a in bl.get("assets", [])]
|
||||
assert set([name1, name2]).issubset(names)
|
||||
|
||||
|
||||
def test_create_from_hash_endpoint_404(http: requests.Session, api_base: str):
|
||||
payload = {
|
||||
"hash": "blake3:" + "0" * 64,
|
||||
"name": "nonexistent.bin",
|
||||
"tags": ["models", "checkpoints", "unit-tests"],
|
||||
}
|
||||
r = http.post(api_base + "/api/assets/from-hash", json=payload, timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 404
|
||||
assert body["error"]["code"] == "ASSET_NOT_FOUND"
|
||||
|
||||
|
||||
def test_upload_zero_byte_rejected(http: requests.Session, api_base: str):
|
||||
files = {"file": ("empty.safetensors", b"", "application/octet-stream")}
|
||||
form = {"tags": json.dumps(["models", "checkpoints", "unit-tests", "edge"]), "name": "empty.safetensors", "user_metadata": json.dumps({})}
|
||||
r = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 400
|
||||
assert body["error"]["code"] == "EMPTY_UPLOAD"
|
||||
|
||||
|
||||
def test_upload_invalid_root_tag_rejected(http: requests.Session, api_base: str):
|
||||
files = {"file": ("badroot.bin", b"A" * 64, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(["not-a-root", "whatever"]), "name": "badroot.bin", "user_metadata": json.dumps({})}
|
||||
r = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 400
|
||||
assert body["error"]["code"] == "INVALID_BODY"
|
||||
|
||||
|
||||
def test_upload_user_metadata_must_be_json(http: requests.Session, api_base: str):
|
||||
files = {"file": ("badmeta.bin", b"A" * 128, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(["models", "checkpoints", "unit-tests", "edge"]), "name": "badmeta.bin", "user_metadata": "{not json}"}
|
||||
r = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 400
|
||||
assert body["error"]["code"] == "INVALID_BODY"
|
||||
|
||||
|
||||
def test_upload_requires_multipart(http: requests.Session, api_base: str):
|
||||
r = http.post(api_base + "/api/assets", json={"foo": "bar"}, timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 415
|
||||
assert body["error"]["code"] == "UNSUPPORTED_MEDIA_TYPE"
|
||||
|
||||
|
||||
def test_upload_missing_file_and_hash(http: requests.Session, api_base: str):
|
||||
files = [
|
||||
("tags", (None, json.dumps(["models", "checkpoints", "unit-tests"]))),
|
||||
("name", (None, "x.safetensors")),
|
||||
]
|
||||
r = http.post(api_base + "/api/assets", files=files, timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 400
|
||||
assert body["error"]["code"] == "MISSING_FILE"
|
||||
|
||||
|
||||
def test_upload_models_unknown_category(http: requests.Session, api_base: str):
|
||||
files = {"file": ("m.safetensors", b"A" * 128, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(["models", "no_such_category", "unit-tests"]), "name": "m.safetensors"}
|
||||
r = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 400
|
||||
assert body["error"]["code"] == "INVALID_BODY"
|
||||
assert body["error"]["message"].startswith("unknown models category")
|
||||
|
||||
|
||||
def test_upload_models_requires_category(http: requests.Session, api_base: str):
|
||||
files = {"file": ("nocat.safetensors", b"A" * 64, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(["models"]), "name": "nocat.safetensors", "user_metadata": json.dumps({})}
|
||||
r = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 400
|
||||
assert body["error"]["code"] == "INVALID_BODY"
|
||||
|
||||
|
||||
def test_upload_tags_traversal_guard(http: requests.Session, api_base: str):
|
||||
files = {"file": ("evil.safetensors", b"A" * 256, "application/octet-stream")}
|
||||
form = {"tags": json.dumps(["models", "checkpoints", "unit-tests", "..", "zzz"]), "name": "evil.safetensors"}
|
||||
r = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
|
||||
body = r.json()
|
||||
assert r.status_code == 400
|
||||
assert body["error"]["code"] in ("BAD_REQUEST", "INVALID_BODY")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("root", ["input", "output"])
|
||||
def test_duplicate_upload_same_display_name_does_not_clobber(
|
||||
root: str,
|
||||
http: requests.Session,
|
||||
api_base: str,
|
||||
asset_factory,
|
||||
make_asset_bytes,
|
||||
):
|
||||
"""
|
||||
Two uploads use the same tags and the same display name but different bytes.
|
||||
With hash-based filenames, they must NOT overwrite each other. Both assets
|
||||
remain accessible and serve their original content.
|
||||
"""
|
||||
scope = f"dup-path-{uuid.uuid4().hex[:6]}"
|
||||
display_name = "same_display.bin"
|
||||
|
||||
d1 = make_asset_bytes(scope + "-v1", 1536)
|
||||
d2 = make_asset_bytes(scope + "-v2", 2048)
|
||||
tags = [root, "unit-tests", scope]
|
||||
|
||||
first = asset_factory(display_name, tags, {}, d1)
|
||||
second = asset_factory(display_name, tags, {}, d2)
|
||||
|
||||
assert first["id"] != second["id"]
|
||||
assert first["asset_hash"] != second["asset_hash"] # different content
|
||||
assert first["name"] == second["name"] == display_name
|
||||
|
||||
# Both must be independently retrievable
|
||||
r1 = http.get(f"{api_base}/api/assets/{first['id']}/content", timeout=120)
|
||||
b1 = r1.content
|
||||
assert r1.status_code == 200
|
||||
assert b1 == d1
|
||||
r2 = http.get(f"{api_base}/api/assets/{second['id']}/content", timeout=120)
|
||||
b2 = r2.content
|
||||
assert r2.status_code == 200
|
||||
assert b2 == d2
|
||||
Reference in New Issue
Block a user