Spaces:
Running
Running
| import torch | |
| import gradio as gr | |
| from fastapi import FastAPI | |
| import uvicorn | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| app = FastAPI(title="EmCoder API & UI") | |
| repo_id = "yezdata/EmCoder" | |
| tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base") | |
| model = AutoModelForSequenceClassification.from_pretrained( | |
| repo_id, trust_remote_code=True | |
| ) | |
| model.eval() | |
| def compute_binary_entropy(p: torch.Tensor, eps: float = 1e-9) -> torch.Tensor: | |
| p = torch.clamp(p, min=eps, max=1.0 - eps) | |
| return -(p * torch.log2(p) + (1.0 - p) * torch.log2(1.0 - p)) | |
| def compute_uncertainty(probs_samples: torch.Tensor, mean_probs: torch.Tensor) -> dict: | |
| total_unc = compute_binary_entropy(mean_probs) # (num_labels,) | |
| # Aleatoric (Expected Entropy) | |
| sample_entropies = compute_binary_entropy(probs_samples) # (n_samples, num_labels) | |
| aleatoric_unc = sample_entropies.mean(dim=0) # (num_labels,) | |
| # Epistemic (Mutual Information) | |
| epistemic_unc = total_unc - aleatoric_unc | |
| epistemic_unc = torch.clamp(epistemic_unc, min=0.0) | |
| return {"total": total_unc, "aleatoric": aleatoric_unc, "epistemic": epistemic_unc} | |
| class PredictRequest(BaseModel): | |
| text: str | |
| monte_carlo: bool = False | |
| n_samples: int = 10 | |
| def predict_api(request: PredictRequest): | |
| encoded = tokenizer(request.text, return_tensors="pt") | |
| input_ids = encoded["input_ids"] | |
| attention_mask = encoded["attention_mask"] | |
| id2label = model.config.id2label | |
| if request.monte_carlo: | |
| with torch.no_grad(): | |
| outputs = model.mc_forward( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask, | |
| n_samples=request.n_samples, | |
| ) | |
| mc_logits = outputs.logits | |
| logits_samples = mc_logits.squeeze(1) | |
| probs_samples = torch.sigmoid(logits_samples) # (n_samples, num_labels) | |
| mean_probs = probs_samples.mean(dim=0) # (num_labels,) | |
| unc_dict = compute_uncertainty( | |
| probs_samples=probs_samples, mean_probs=mean_probs | |
| ) | |
| predictions = {} | |
| for i in range(model.config.num_labels): | |
| label_name = id2label[i] | |
| predictions[label_name] = { | |
| "mean_probability": float(mean_probs[i]), | |
| "uncertainty": { | |
| "total_entropy": float(unc_dict["total"][i]), | |
| "epistemic": float(unc_dict["epistemic"][i]), | |
| "aleatoric": float(unc_dict["aleatoric"][i]), | |
| }, | |
| } | |
| return { | |
| "mode": "monte_carlo", | |
| "n_samples": request.n_samples, | |
| "predictions": predictions, | |
| } | |
| else: | |
| with torch.no_grad(): | |
| outputs = model(input_ids=input_ids, attention_mask=attention_mask) | |
| logits = outputs.logits.squeeze(0) | |
| probs = torch.sigmoid(logits) | |
| predictions = {} | |
| for i in range(model.config.num_labels): | |
| label_name = id2label[i] | |
| predictions[label_name] = {"probability": float(probs[i])} | |
| return {"mode": "standard", "predictions": predictions} | |
| def health_check(): | |
| return {"status": "healthy"} | |
| def gradio_predict(text, monte_carlo, n_samples): | |
| request_data = PredictRequest( | |
| text=text, monte_carlo=bool(monte_carlo), n_samples=int(n_samples) | |
| ) | |
| response = predict_api(request_data) | |
| sorted_preds = sorted( | |
| response["predictions"].items(), | |
| key=lambda item: ( | |
| item[1]["mean_probability"] if monte_carlo else item[1]["probability"] | |
| ), | |
| reverse=True, | |
| ) | |
| standard_rows = [] | |
| mc_rows = [] | |
| for label_name, metrics in sorted_preds: | |
| if monte_carlo: | |
| prob = metrics["mean_probability"] | |
| mc_rows.append( | |
| [ | |
| label_name, | |
| f"{prob * 100:.2f}%", | |
| f"{metrics['uncertainty']['total_entropy']:.4f}", | |
| f"{metrics['uncertainty']['epistemic']:.4f}", | |
| f"{metrics['uncertainty']['aleatoric']:.4f}", | |
| ] | |
| ) | |
| else: | |
| prob = metrics["probability"] | |
| standard_rows.append([label_name, f"{prob * 100:.2f}%"]) | |
| if monte_carlo: | |
| return ( | |
| gr.update(value=[], visible=False), | |
| gr.update(value=mc_rows, visible=True), | |
| ) | |
| else: | |
| return ( | |
| gr.update(value=standard_rows, visible=True), | |
| gr.update(value=[], visible=False), | |
| ) | |
| with gr.Blocks(title="EmCoder - Probabilistic Emotion Recognition") as ui: | |
| gr.Markdown("# EmCoder - Probabilistic Emotion Recognition") | |
| gr.Markdown( | |
| "### 🛜 API Endpoint: https://yezdata-emcoder-api-ui.hf.space/predict | " | |
| "[📋 API Docs](/docs) | " | |
| "[🤗 Model Hub Card](https://huggingface.co/yezdata/EmCoder)\n\n" | |
| "Live API service and graphical interface demonstrating **EmCoder's** epistemic and aleatoric " | |
| "uncertainty decomposition via Monte Carlo Dropout across **28 multi-label emotion classes**." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_text = gr.Textbox( | |
| label="Input text", | |
| placeholder="Input text for classification...", | |
| lines=3, | |
| ) | |
| use_mc = gr.Checkbox( | |
| label="Use Monte Carlo Dropout (Uncertainty Estimation)", value=False | |
| ) | |
| mc_samples_slider = gr.Slider( | |
| minimum=5, maximum=50, value=10, step=1, label="MC samples" | |
| ) | |
| submit_btn = gr.Button("Analyze Emotions", variant="primary") | |
| with gr.Column(scale=2): | |
| output_table_standard = gr.DataFrame( | |
| headers=["Emotion", "Probability"], | |
| datatype=["str", "str"], | |
| label="Prediction Report", | |
| visible=True, | |
| ) | |
| output_table_mc = gr.DataFrame( | |
| headers=[ | |
| "Emotion", | |
| "Probability (Mean)", | |
| "Total Uncertainty (Entropy)", | |
| "Epistemic (Model Knowledge)", | |
| "Aleatoric (Data Noise)", | |
| ], | |
| datatype=["str", "str", "str", "str", "str"], | |
| label="Prediction & Bayesian Uncertainty Report", | |
| visible=False, | |
| ) | |
| submit_btn.click( | |
| fn=gradio_predict, | |
| inputs=[input_text, use_mc, mc_samples_slider], | |
| outputs=[output_table_standard, output_table_mc], | |
| ) | |
| app = gr.mount_gradio_app(app, ui, path="/") | |
| if __name__ == "__main__": | |
| uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) | |