ror-12 commited on
Commit
44db53f
·
verified ·
1 Parent(s): 932a82f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -45
app.py CHANGED
@@ -1,45 +1,53 @@
1
- # Use Full Python 3.10 (Includes system tools)
2
- FROM python:3.10
3
-
4
- WORKDIR /app
5
-
6
- # 1. Install System Dependencies
7
- # 'wget': To download files directly (Bypasses huggingface-cli errors)
8
- # 'poppler-utils': For PDF conversion
9
- # 'libgl1': For Vision
10
- RUN apt-get update && apt-get install -y \
11
- wget \
12
- poppler-utils \
13
- libgl1 \
14
- libglib2.0-0 \
15
- && rm -rf /var/lib/apt/lists/*
16
-
17
- # 2. Install Python Libraries (Pre-built Wheels)
18
- RUN pip install --no-cache-dir --prefer-binary \
19
- "llama-cpp-python>=0.3.1" \
20
- --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
21
-
22
- RUN pip install --no-cache-dir \
23
- huggingface_hub \
24
- pdf2image \
25
- python-multipart \
26
- uvicorn \
27
- fastapi \
28
- pillow
29
-
30
- # 3. Create Model Directory
31
- RUN mkdir -p /app/model
32
-
33
- # 4. Download Model (Direct WGET - No Cache Errors)
34
- # Model: Llava 1.6 Mistral 7B (Q4_K_M) - High Accuracy
35
- RUN wget -O /app/model/model.gguf https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q4_K_M.gguf
36
-
37
- # 5. Download Projector (Direct WGET)
38
- # This is the vision adapter for Llava 1.6
39
- RUN wget -O /app/model/mmproj.gguf https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf
40
-
41
- # 6. Copy App Code
42
- COPY app.py .
43
-
44
- # 7. Run Server
45
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ from llama_cpp import Llama
3
+ from llama_cpp.llama_chat_format import Llava15ChatHandler
4
+ from pdf2image import convert_from_bytes
5
+ import io
6
+ from PIL import Image
7
+
8
+ app = FastAPI()
9
+
10
+ print("⏳ Loading Llava 1.6 Model...")
11
+
12
+ # 1. Initialize Vision Handler
13
+ # The Dockerfile (which ran successfully!) saved the file here:
14
+ chat_handler = Llava15ChatHandler(clip_model_path="/app/model/mmproj.gguf")
15
+
16
+ # 2. Initialize Model
17
+ llm = Llama(
18
+ model_path="/app/model/model.gguf",
19
+ chat_handler=chat_handler,
20
+ n_ctx=2048,
21
+ n_gpu_layers=0, # Force CPU
22
+ verbose=True
23
+ )
24
+ print("✅ Model Loaded Successfully!")
25
+
26
+ @app.post("/extract")
27
+ async def extract_text(file: UploadFile = File(...)):
28
+ # --- Image Processing ---
29
+ if file.filename.endswith('.pdf'):
30
+ pdf_bytes = await file.read()
31
+ images = convert_from_bytes(pdf_bytes)
32
+ image = images[0]
33
+ else:
34
+ image_data = await file.read()
35
+ image = Image.open(io.BytesIO(image_data))
36
+
37
+ temp_path = "/tmp/temp_doc.jpg"
38
+ image.save(temp_path)
39
+
40
+ # --- Prompt ---
41
+ messages = [
42
+ {"role": "system", "content": "You are an AI that extracts text from images."},
43
+ {
44
+ "role": "user",
45
+ "content": [
46
+ {"type": "image_url", "image_url": {"url": f"file://{temp_path}"}},
47
+ {"type": "text", "text": "Extract all text from this image. Output in Markdown format."}
48
+ ]
49
+ }
50
+ ]
51
+
52
+ response = llm.create_chat_completion(messages=messages, max_tokens=1500)
53
+ return {"filename": file.filename, "content": response["choices"][0]["message"]["content"]}