Spaces:

ror-12
/

doc-extraction-api

Runtime error

App Files Files Community

ror-12 commited on Feb 7

Commit

44db53f

verified ·

1 Parent(s): 932a82f

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -45

app.py CHANGED Viewed

@@ -1,45 +1,53 @@
-# Use Full Python 3.10 (Includes system tools)
-FROM python:3.10
-WORKDIR /app
-# 1. Install System Dependencies
-# 'wget': To download files directly (Bypasses huggingface-cli errors)
-# 'poppler-utils': For PDF conversion
-# 'libgl1': For Vision
-RUN apt-get update && apt-get install -y \
-    wget \
-    poppler-utils \
-    libgl1 \
-    libglib2.0-0 \
-    && rm -rf /var/lib/apt/lists/*
-# 2. Install Python Libraries (Pre-built Wheels)
-RUN pip install --no-cache-dir --prefer-binary \
-    "llama-cpp-python>=0.3.1" \
-    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
-RUN pip install --no-cache-dir \
-    huggingface_hub \
-    pdf2image \
-    python-multipart \
-    uvicorn \
-    fastapi \
-    pillow
-# 3. Create Model Directory
-RUN mkdir -p /app/model
-# 4. Download Model (Direct WGET - No Cache Errors)
-# Model: Llava 1.6 Mistral 7B (Q4_K_M) - High Accuracy
-RUN wget -O /app/model/model.gguf https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q4_K_M.gguf
-# 5. Download Projector (Direct WGET)
-# This is the vision adapter for Llava 1.6
-RUN wget -O /app/model/mmproj.gguf https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf
-# 6. Copy App Code
-COPY app.py .
-# 7. Run Server
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+from fastapi import FastAPI, UploadFile, File
+from llama_cpp import Llama
+from llama_cpp.llama_chat_format import Llava15ChatHandler
+from pdf2image import convert_from_bytes
+import io
+from PIL import Image
+app = FastAPI()
+print("⏳ Loading Llava 1.6 Model...")
+# 1. Initialize Vision Handler
+# The Dockerfile (which ran successfully!) saved the file here:
+chat_handler = Llava15ChatHandler(clip_model_path="/app/model/mmproj.gguf")
+# 2. Initialize Model
+llm = Llama(
+    model_path="/app/model/model.gguf",
+    chat_handler=chat_handler,
+    n_ctx=2048,
+    n_gpu_layers=0, # Force CPU
+    verbose=True
+)
+print("✅ Model Loaded Successfully!")
+@app.post("/extract")
+async def extract_text(file: UploadFile = File(...)):
+    # --- Image Processing ---
+    if file.filename.endswith('.pdf'):
+        pdf_bytes = await file.read()
+        images = convert_from_bytes(pdf_bytes)
+        image = images[0]
+    else:
+        image_data = await file.read()
+        image = Image.open(io.BytesIO(image_data))
+    temp_path = "/tmp/temp_doc.jpg"
+    image.save(temp_path)
+    # --- Prompt ---
+    messages = [
+        {"role": "system", "content": "You are an AI that extracts text from images."},
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": f"file://{temp_path}"}},
+                {"type": "text", "text": "Extract all text from this image. Output in Markdown format."}
+            ]
+        }
+    ]
+    response = llm.create_chat_completion(messages=messages, max_tokens=1500)
+    return {"filename": file.filename, "content": response["choices"][0]["message"]["content"]}