| | import gradio as gr |
| | import os |
| | import torch |
| |
|
| | import torch |
| | from PIL import Image |
| | from diffusers import ( |
| | AutoencoderKL, |
| | ) |
| |
|
| | from transformers import CLIPTextModel, CLIPTokenizer |
| | from apdepth.marigold_pipeline import MarigoldPipeline |
| | from apdepth.modules.unet_2d_condition import UNet2DConditionModel |
| |
|
| | def load_example(example_images): |
| | |
| | return example_images |
| |
|
| |
|
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| | model_repo_id = "developy/ApDepth" |
| |
|
| | torch_dtype = torch.float32 |
| |
|
| | vae = AutoencoderKL.from_pretrained(model_repo_id, subfolder="vae", torch_dtype=torch_dtype, allow_pickle=False) |
| | unet = UNet2DConditionModel.from_pretrained(model_repo_id, subfolder="unet", torch_dtype=torch_dtype, allow_pickle=False) |
| | text_encoder = CLIPTextModel.from_pretrained(model_repo_id, subfolder="text_encoder", torch_dtype=torch_dtype) |
| | tokenizer = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer", torch_dtype=torch_dtype) |
| | pipe = MarigoldPipeline(vae=vae, unet=unet, text_encoder=text_encoder, tokenizer=tokenizer) |
| |
|
| |
|
| | try: |
| | pipe.enable_xformers_memory_efficient_attention() |
| | except ImportError: |
| | pass |
| |
|
| | pipe = pipe.to(device) |
| |
|
| |
|
| | |
| | def infer( |
| | input_image, |
| | progress=gr.Progress(track_tqdm=True), |
| | ): |
| |
|
| | pipe_out = pipe( |
| | input_image, |
| | processing_res=768, |
| | match_input_res=True, |
| | batch_size=1, |
| | color_map="Spectral", |
| | show_progress_bar=True, |
| | resample_method="bilinear", |
| | ) |
| |
|
| | |
| | depth_colored: Image.Image = pipe_out.depth_colored |
| |
|
| |
|
| | return depth_colored |
| |
|
| |
|
| | |
| | example_images = [ |
| | "example/00.jpg", |
| | "example/01.jpg", |
| | "example/02.jpg", |
| | "example/03.jpg", |
| | "example/04.jpg", |
| | "example/05.jpg", |
| | "example/06.jpg", |
| | "example/07.jpg", |
| | "example/08.jpg", |
| | ] |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | css = """ |
| | #img-display-container { |
| | max-height: 100vh; |
| | } |
| | #img-display-input { |
| | max-height: 80vh; |
| | } |
| | #img-display-output { |
| | max-height: 80vh; |
| | } |
| | #download { |
| | height: 62px; |
| | } |
| | """ |
| |
|
| | title = "# ApDepth" |
| | description = """**Official demo for ApDepth**(We provide models trained using Depth Anything v2-base here, as the Hugging Face space is limited to 1GB.). |
| | Please refer to our [website](https://haruko386.github.io/research/) for more details.""" |
| |
|
| |
|
| | with gr.Blocks(css=css) as demo: |
| | gr.Markdown(title) |
| | gr.Markdown(description) |
| | gr.Markdown(" ### Depth Estimation with ApDepth.") |
| | |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | input_image = gr.Image(label="Input Image", type="pil", elem_id="img-display-input") |
| | with gr.Column(): |
| | |
| | depth_map = gr.Image(label="Depth Image", type="pil", interactive=False, elem_id="depth-map") |
| |
|
| | |
| | compute_button = gr.Button(value="Compute Depth") |
| |
|
| | |
| | compute_button.click( |
| | fn=infer, |
| | inputs=[input_image], |
| | outputs=[depth_map] |
| | ) |
| |
|
| | example_files = os.listdir('example') |
| | example_files.sort() |
| | example_files = [os.path.join('example', filename) for filename in example_files] |
| | examples = gr.Examples(examples=example_files, inputs=[input_image], outputs=[depth_map], fn=infer) |
| |
|
| |
|
| | |
| | demo.queue().launch(share=True) |
| |
|