emcoder-api-ui / main.py
yezdata's picture
switch automodel for automodelforseqclass
071037c
Raw
History Blame Contribute Delete
6.85 kB
import torch
import gradio as gr
from fastapi import FastAPI
import uvicorn
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForSequenceClassification
app = FastAPI(title="EmCoder API & UI")
repo_id = "yezdata/EmCoder"
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
model = AutoModelForSequenceClassification.from_pretrained(
repo_id, trust_remote_code=True
)
model.eval()
def compute_binary_entropy(p: torch.Tensor, eps: float = 1e-9) -> torch.Tensor:
p = torch.clamp(p, min=eps, max=1.0 - eps)
return -(p * torch.log2(p) + (1.0 - p) * torch.log2(1.0 - p))
def compute_uncertainty(probs_samples: torch.Tensor, mean_probs: torch.Tensor) -> dict:
total_unc = compute_binary_entropy(mean_probs) # (num_labels,)
# Aleatoric (Expected Entropy)
sample_entropies = compute_binary_entropy(probs_samples) # (n_samples, num_labels)
aleatoric_unc = sample_entropies.mean(dim=0) # (num_labels,)
# Epistemic (Mutual Information)
epistemic_unc = total_unc - aleatoric_unc
epistemic_unc = torch.clamp(epistemic_unc, min=0.0)
return {"total": total_unc, "aleatoric": aleatoric_unc, "epistemic": epistemic_unc}
class PredictRequest(BaseModel):
text: str
monte_carlo: bool = False
n_samples: int = 10
@app.post("/predict")
def predict_api(request: PredictRequest):
encoded = tokenizer(request.text, return_tensors="pt")
input_ids = encoded["input_ids"]
attention_mask = encoded["attention_mask"]
id2label = model.config.id2label
if request.monte_carlo:
with torch.no_grad():
outputs = model.mc_forward(
input_ids=input_ids,
attention_mask=attention_mask,
n_samples=request.n_samples,
)
mc_logits = outputs.logits
logits_samples = mc_logits.squeeze(1)
probs_samples = torch.sigmoid(logits_samples) # (n_samples, num_labels)
mean_probs = probs_samples.mean(dim=0) # (num_labels,)
unc_dict = compute_uncertainty(
probs_samples=probs_samples, mean_probs=mean_probs
)
predictions = {}
for i in range(model.config.num_labels):
label_name = id2label[i]
predictions[label_name] = {
"mean_probability": float(mean_probs[i]),
"uncertainty": {
"total_entropy": float(unc_dict["total"][i]),
"epistemic": float(unc_dict["epistemic"][i]),
"aleatoric": float(unc_dict["aleatoric"][i]),
},
}
return {
"mode": "monte_carlo",
"n_samples": request.n_samples,
"predictions": predictions,
}
else:
with torch.no_grad():
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
logits = outputs.logits.squeeze(0)
probs = torch.sigmoid(logits)
predictions = {}
for i in range(model.config.num_labels):
label_name = id2label[i]
predictions[label_name] = {"probability": float(probs[i])}
return {"mode": "standard", "predictions": predictions}
@app.get("/health")
def health_check():
return {"status": "healthy"}
def gradio_predict(text, monte_carlo, n_samples):
request_data = PredictRequest(
text=text, monte_carlo=bool(monte_carlo), n_samples=int(n_samples)
)
response = predict_api(request_data)
sorted_preds = sorted(
response["predictions"].items(),
key=lambda item: (
item[1]["mean_probability"] if monte_carlo else item[1]["probability"]
),
reverse=True,
)
standard_rows = []
mc_rows = []
for label_name, metrics in sorted_preds:
if monte_carlo:
prob = metrics["mean_probability"]
mc_rows.append(
[
label_name,
f"{prob * 100:.2f}%",
f"{metrics['uncertainty']['total_entropy']:.4f}",
f"{metrics['uncertainty']['epistemic']:.4f}",
f"{metrics['uncertainty']['aleatoric']:.4f}",
]
)
else:
prob = metrics["probability"]
standard_rows.append([label_name, f"{prob * 100:.2f}%"])
if monte_carlo:
return (
gr.update(value=[], visible=False),
gr.update(value=mc_rows, visible=True),
)
else:
return (
gr.update(value=standard_rows, visible=True),
gr.update(value=[], visible=False),
)
with gr.Blocks(title="EmCoder - Probabilistic Emotion Recognition") as ui:
gr.Markdown("# EmCoder - Probabilistic Emotion Recognition")
gr.Markdown(
"### 🛜 API Endpoint: https://yezdata-emcoder-api-ui.hf.space/predict | "
"[📋 API Docs](/docs) | "
"[🤗 Model Hub Card](https://huggingface.co/yezdata/EmCoder)\n\n"
"Live API service and graphical interface demonstrating **EmCoder's** epistemic and aleatoric "
"uncertainty decomposition via Monte Carlo Dropout across **28 multi-label emotion classes**."
)
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(
label="Input text",
placeholder="Input text for classification...",
lines=3,
)
use_mc = gr.Checkbox(
label="Use Monte Carlo Dropout (Uncertainty Estimation)", value=False
)
mc_samples_slider = gr.Slider(
minimum=5, maximum=50, value=10, step=1, label="MC samples"
)
submit_btn = gr.Button("Analyze Emotions", variant="primary")
with gr.Column(scale=2):
output_table_standard = gr.DataFrame(
headers=["Emotion", "Probability"],
datatype=["str", "str"],
label="Prediction Report",
visible=True,
)
output_table_mc = gr.DataFrame(
headers=[
"Emotion",
"Probability (Mean)",
"Total Uncertainty (Entropy)",
"Epistemic (Model Knowledge)",
"Aleatoric (Data Noise)",
],
datatype=["str", "str", "str", "str", "str"],
label="Prediction & Bayesian Uncertainty Report",
visible=False,
)
submit_btn.click(
fn=gradio_predict,
inputs=[input_text, use_mc, mc_samples_slider],
outputs=[output_table_standard, output_table_mc],
)
app = gr.mount_gradio_app(app, ui, path="/")
if __name__ == "__main__":
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)