| |
| import os |
| import io |
| import math |
| from typing import Tuple, Dict, Any |
| from PIL import Image, ImageOps |
| import numpy as np |
|
|
| import torch |
| from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler |
| from transformers import logging as hf_logging |
| hf_logging.set_verbosity_error() |
|
|
| |
| from controlnet_aux import OpenposeDetector |
|
|
| |
| from rembg import remove |
|
|
| |
| MODEL_ID = "runwayml/stable-diffusion-v1-5" |
| CONTROLNET_ID = "lllyasviel/sd-controlnet-openpose" |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| |
| _PIPELINE = None |
| _OP_DETECTOR = None |
|
|
| def get_openpose_detector(): |
| global _OP_DETECTOR |
| if _OP_DETECTOR is None: |
| _OP_DETECTOR = OpenposeDetector.from_pretrained("lllyasviel/ControlNet") |
| return _OP_DETECTOR |
|
|
| def load_pipeline(): |
| """ |
| Carrega o pipeline ControlNet + Stable Diffusion (com half precision quando possível). |
| """ |
| global _PIPELINE |
| if _PIPELINE is not None: |
| return _PIPELINE |
|
|
| |
| controlnet = ControlNetModel.from_pretrained(CONTROLNET_ID, torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32) |
| |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( |
| MODEL_ID, |
| controlnet=controlnet, |
| safety_checker=None, |
| torch_dtype=torch.float16 if DEVICE=="cuda" else torch.float32, |
| ) |
| |
| pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) |
| if DEVICE == "cuda": |
| pipe.enable_attention_slicing() |
| pipe.to("cuda") |
| else: |
| pipe.to("cpu") |
|
|
| |
| _PIPELINE = pipe |
| return _PIPELINE |
|
|
| def remove_background(pil_img: Image.Image) -> Image.Image: |
| """ |
| Remove fundo da imagem da peça usando rembg (retorna RGBA com alpha). |
| """ |
| |
| img_bytes = io.BytesIO() |
| pil_img.convert("RGBA").save(img_bytes, format="PNG") |
| img_bytes = img_bytes.getvalue() |
| out = remove(img_bytes) |
| |
| out_img = Image.open(io.BytesIO(out)).convert("RGBA") |
| return out_img |
|
|
| def simple_align_garment_to_model(model_img: Image.Image, garment_rgba: Image.Image, pose_keypoints=None) -> Image.Image: |
| """ |
| Faz um alinhamento simples: escala a peça pela distância entre ombros (estimada) |
| e cola-a sobre a modelo aproximadamente no torso. Retorna imagem RGBA (com a modelo). |
| Isso é só a iniciação — o SD+ControlNet fará o refinamento. |
| """ |
| model = model_img.convert("RGBA") |
| g = garment_rgba |
|
|
| Wm, Hm = model.size |
| Wg, Hg = g.size |
|
|
| |
| if pose_keypoints is None: |
| |
| target_w = int(Wm * 0.5) |
| scale = target_w / Wg |
| new_size = (max(1, int(Wg * scale)), max(1, int(Hg * scale))) |
| g_resized = g.resize(new_size, resample=Image.LANCZOS) |
| pos = ((Wm - new_size[0]) // 2, int(Hm * 0.28)) |
| canvas = model.copy() |
| canvas.paste(g_resized, pos, g_resized) |
| return canvas |
|
|
| |
| try: |
| |
| ls = pose_keypoints.get("left_shoulder") |
| rs = pose_keypoints.get("right_shoulder") |
| if ls and rs: |
| shoulder_dist = math.hypot(rs[0]-ls[0], rs[1]-ls[1]) |
| |
| target_w = int(shoulder_dist * 1.4) |
| scale = max(0.1, target_w / Wg) |
| new_size = (max(1, int(Wg * scale)), max(1, int(Hg * scale))) |
| g_resized = g.resize(new_size, resample=Image.LANCZOS) |
| |
| center_x = int((ls[0] + rs[0]) / 2) |
| top_y = int((ls[1] + rs[1]) / 1.8) |
| pos = (max(0, center_x - new_size[0]//2), max(0, top_y - new_size[1]//6)) |
| canvas = model.copy() |
| canvas.paste(g_resized, pos, g_resized) |
| return canvas |
| except Exception: |
| pass |
|
|
| |
| return simple_align_garment_to_model(model_img, garment_rgba, pose_keypoints=None) |
|
|
| def extract_pose_and_keypoints(model_img: Image.Image) -> Tuple[Image.Image, Dict[str, Tuple[int,int]]]: |
| """ |
| Usa controlnet_aux.OpenposeDetector para gerar a pose map (imagem) e tenta retornar |
| keypoints úteis (ombros). keypoints dict = {"left_shoulder":(x,y), ...} |
| """ |
| detector = get_openpose_detector() |
| try: |
| |
| pose_image = detector(model_img) |
| pose_image = pose_image.convert("RGB") |
|
|
| |
| keypoints = {} |
| try: |
| |
| |
| |
| pass |
| except Exception: |
| pass |
|
|
| return pose_image, keypoints |
|
|
| except Exception as e: |
| |
| blank = Image.new("RGB", model_img.size, (255,255,255)) |
| return blank, {} |
|
|
| def run_pipeline(model_image: Image.Image, garment_image: Image.Image, prompt_extra: str = "") -> Tuple[Image.Image, Dict[str,Any]]: |
| """ |
| Função principal que: |
| 1) extrai pose (pose_map) |
| 2) remove fundo da peça (garment) e alinha simplisticamente |
| 3) monta uma imagem inicial (init_image) com a peça sobre a modelo (RGBA) |
| 4) chama Stable Diffusion + ControlNet (image2image) usando pose_map como conditioning image |
| Retorna: pil_image_result, info_dict |
| """ |
| |
| max_side = 768 |
| model_img = model_image.convert("RGB") |
| W, H = model_img.size |
| scale = max_side / max(W, H) if max(W, H) > max_side else 1.0 |
| if scale != 1.0: |
| model_img = model_img.resize((int(W*scale), int(H*scale)), Image.LANCZOS) |
|
|
| |
| garment_rgba = remove_background(garment_image) |
|
|
| |
| pose_map, keypoints = extract_pose_and_keypoints(model_img) |
|
|
| |
| init_composite = simple_align_garment_to_model(model_img, garment_rgba, pose_keypoints=keypoints) |
|
|
| |
| pipe = load_pipeline() |
|
|
| |
| prompt = ("photo-realistic fashion try-on, ultra detailed, high resolution, realistic lighting. " |
| + (prompt_extra or "garment applied on person, preserve texture and zippers, realistic folds.")) |
|
|
| |
| init_image = init_composite.convert("RGB") |
| control_image = pose_map.convert("RGB") |
|
|
| |
| num_inference_steps = 20 |
| guidance_scale = 7.5 |
| strength = 0.75 |
|
|
| |
| generator = torch.Generator(device=DEVICE).manual_seed(torch.randint(0, 2**31 - 1, (1,)).item()) |
|
|
| |
| |
| device = DEVICE |
| pipe.to(device) |
|
|
| try: |
| |
| with torch.autocast(device_type="cuda") if device == "cuda" else torch.cpu.amp.autocast(enabled=False): |
| out = pipe( |
| prompt=prompt, |
| image=init_image, |
| control_image=control_image, |
| num_inference_steps=num_inference_steps, |
| guidance_scale=guidance_scale, |
| strength=strength, |
| generator=generator |
| ) |
| |
| result_img = out.images[0] |
| except TypeError: |
| |
| out = pipe( |
| prompt=prompt, |
| init_image=init_image, |
| controlnet_conditioning_image=control_image, |
| num_inference_steps=num_inference_steps, |
| guidance_scale=guidance_scale, |
| strength=strength, |
| generator=generator |
| ) |
| result_img = out.images[0] |
|
|
| info = { |
| "model_id": MODEL_ID, |
| "controlnet_id": CONTROLNET_ID, |
| "steps": num_inference_steps, |
| "guidance_scale": guidance_scale, |
| "strength": strength |
| } |
| return result_img, info |