# pip install -r requirements.txt
# python download_tooth_dataset.py
import requests
import zipfile
import shutil
import os, glob
import os.path as osp
from tqdm.auto import tqdm

API_KEY = "ENTER_YOUR_MORPHOSOURCE_API_KEY_HERE"
DOWNLOAD_DIR = "./downloads"
PLY_DIR = "./plys"

# Ensure download directory exists
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

payload = {
    "use_statement": "I will use this data as part of a research project that wishes to automatically analyse the complexity of primate teeth.",
    "use_categories": ["Research"],
    "use_category_other": "Studying algorithms that estimate the complexity of shapes.",
    "agreements_accepted": True
}
headers = {
    "Content-Type": "application/json",
    "Accept": "application/json",
    "Authorization": API_KEY
}

headers2 = {
    "Authorization": API_KEY
}

# load file IDs
with open("tooth_filenames.txt", "r") as f:
    filenames = [line.rstrip('\n') for line in f.readlines()]

# download each file
for fileid in tqdm(filenames, total=len(filenames), desc="Files"):
    url = f"https://www.morphosource.org/api/download/{fileid}"
    
    response = requests.post(url, json=payload, headers=headers)

    #print(response.json())

    download_url = response.json()["response"]["media"]["download_url"]


    with requests.get(download_url, headers=headers2, stream=True) as r:
        r.raise_for_status()

        total_size = int(r.headers.get("Content-Length", 0)) or None

        # Try to extract filename from Content-Disposition header
        filename = None
        cd = r.headers.get("Content-Disposition")
        if cd and "filename=" in cd:
            filename = cd.split("filename=")[-1].strip("\"")

        if filename is None:
            # Fallback if server does not provide a filename
            filename = f"{fileid}.bin"

        filepath = os.path.join(DOWNLOAD_DIR, filename)

        #with open(filepath, "wb") as f:
        with open(filepath, "wb") as f, tqdm(
                total=total_size,
                unit="B",
                unit_scale=True,
                unit_divisor=1024,
                desc=filename,
                leave=False
            ) as bar:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    bar.update(len(chunk))

# unpack zip files
zip_filepaths = glob.glob(osp.join(DOWNLOAD_DIR, '*.zip'))
os.makedirs(PLY_DIR, exist_ok=True)

for zip_path in tqdm(zip_filepaths, total=len(zip_filepaths)):
    with zipfile.ZipFile(zip_path, 'r') as zf:
        # find the .ply file (there should only be one)
        ply_members = [m for m in zf.namelist() if m.lower().endswith('.ply')]
        if len(ply_members) != 1:
            raise RuntimeError(f'Expected 1 ply in {zip_path}, found {len(ply_members)}')

        ply_member = ply_members[0]
        ply_name = osp.basename(ply_member)
        dst_path = osp.join(PLY_DIR, ply_name)

        # extract without recreating inner directories
        with zf.open(ply_member) as src, open(dst_path, 'wb') as dst:
            shutil.copyfileobj(src, dst)

print('Done!')