import torch import gradio as gr from fastapi import FastAPI import uvicorn from pydantic import BaseModel from transformers import AutoTokenizer, AutoModelForSequenceClassification app = FastAPI(title="EmCoder API & UI") repo_id = "yezdata/EmCoder" tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base") model = AutoModelForSequenceClassification.from_pretrained( repo_id, trust_remote_code=True ) model.eval() def compute_binary_entropy(p: torch.Tensor, eps: float = 1e-9) -> torch.Tensor: p = torch.clamp(p, min=eps, max=1.0 - eps) return -(p * torch.log2(p) + (1.0 - p) * torch.log2(1.0 - p)) def compute_uncertainty(probs_samples: torch.Tensor, mean_probs: torch.Tensor) -> dict: total_unc = compute_binary_entropy(mean_probs) # (num_labels,) # Aleatoric (Expected Entropy) sample_entropies = compute_binary_entropy(probs_samples) # (n_samples, num_labels) aleatoric_unc = sample_entropies.mean(dim=0) # (num_labels,) # Epistemic (Mutual Information) epistemic_unc = total_unc - aleatoric_unc epistemic_unc = torch.clamp(epistemic_unc, min=0.0) return {"total": total_unc, "aleatoric": aleatoric_unc, "epistemic": epistemic_unc} class PredictRequest(BaseModel): text: str monte_carlo: bool = False n_samples: int = 10 @app.post("/predict") def predict_api(request: PredictRequest): encoded = tokenizer(request.text, return_tensors="pt") input_ids = encoded["input_ids"] attention_mask = encoded["attention_mask"] id2label = model.config.id2label if request.monte_carlo: with torch.no_grad(): outputs = model.mc_forward( input_ids=input_ids, attention_mask=attention_mask, n_samples=request.n_samples, ) mc_logits = outputs.logits logits_samples = mc_logits.squeeze(1) probs_samples = torch.sigmoid(logits_samples) # (n_samples, num_labels) mean_probs = probs_samples.mean(dim=0) # (num_labels,) unc_dict = compute_uncertainty( probs_samples=probs_samples, mean_probs=mean_probs ) predictions = {} for i in range(model.config.num_labels): label_name = id2label[i] predictions[label_name] = { "mean_probability": float(mean_probs[i]), "uncertainty": { "total_entropy": float(unc_dict["total"][i]), "epistemic": float(unc_dict["epistemic"][i]), "aleatoric": float(unc_dict["aleatoric"][i]), }, } return { "mode": "monte_carlo", "n_samples": request.n_samples, "predictions": predictions, } else: with torch.no_grad(): outputs = model(input_ids=input_ids, attention_mask=attention_mask) logits = outputs.logits.squeeze(0) probs = torch.sigmoid(logits) predictions = {} for i in range(model.config.num_labels): label_name = id2label[i] predictions[label_name] = {"probability": float(probs[i])} return {"mode": "standard", "predictions": predictions} @app.get("/health") def health_check(): return {"status": "healthy"} def gradio_predict(text, monte_carlo, n_samples): request_data = PredictRequest( text=text, monte_carlo=bool(monte_carlo), n_samples=int(n_samples) ) response = predict_api(request_data) sorted_preds = sorted( response["predictions"].items(), key=lambda item: ( item[1]["mean_probability"] if monte_carlo else item[1]["probability"] ), reverse=True, ) standard_rows = [] mc_rows = [] for label_name, metrics in sorted_preds: if monte_carlo: prob = metrics["mean_probability"] mc_rows.append( [ label_name, f"{prob * 100:.2f}%", f"{metrics['uncertainty']['total_entropy']:.4f}", f"{metrics['uncertainty']['epistemic']:.4f}", f"{metrics['uncertainty']['aleatoric']:.4f}", ] ) else: prob = metrics["probability"] standard_rows.append([label_name, f"{prob * 100:.2f}%"]) if monte_carlo: return ( gr.update(value=[], visible=False), gr.update(value=mc_rows, visible=True), ) else: return ( gr.update(value=standard_rows, visible=True), gr.update(value=[], visible=False), ) with gr.Blocks(title="EmCoder - Probabilistic Emotion Recognition") as ui: gr.Markdown("# EmCoder - Probabilistic Emotion Recognition") gr.Markdown( "### 🛜 API Endpoint: https://yezdata-emcoder-api-ui.hf.space/predict | " "[📋 API Docs](/docs) | " "[🤗 Model Hub Card](https://huggingface.co/yezdata/EmCoder)\n\n" "Live API service and graphical interface demonstrating **EmCoder's** epistemic and aleatoric " "uncertainty decomposition via Monte Carlo Dropout across **28 multi-label emotion classes**." ) with gr.Row(): with gr.Column(scale=1): input_text = gr.Textbox( label="Input text", placeholder="Input text for classification...", lines=3, ) use_mc = gr.Checkbox( label="Use Monte Carlo Dropout (Uncertainty Estimation)", value=False ) mc_samples_slider = gr.Slider( minimum=5, maximum=50, value=10, step=1, label="MC samples" ) submit_btn = gr.Button("Analyze Emotions", variant="primary") with gr.Column(scale=2): output_table_standard = gr.DataFrame( headers=["Emotion", "Probability"], datatype=["str", "str"], label="Prediction Report", visible=True, ) output_table_mc = gr.DataFrame( headers=[ "Emotion", "Probability (Mean)", "Total Uncertainty (Entropy)", "Epistemic (Model Knowledge)", "Aleatoric (Data Noise)", ], datatype=["str", "str", "str", "str", "str"], label="Prediction & Bayesian Uncertainty Report", visible=False, ) submit_btn.click( fn=gradio_predict, inputs=[input_text, use_mc, mc_samples_slider], outputs=[output_table_standard, output_table_mc], ) app = gr.mount_gradio_app(app, ui, path="/") if __name__ == "__main__": uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)