97 lines
3.2 KiB
Python
97 lines
3.2 KiB
Python
import os
|
|
import logging
|
|
from typing import Dict, Any, Optional
|
|
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
|
|
from fastapi.responses import JSONResponse
|
|
from magic_doc.docconv import DocConverter, S3Config
|
|
import tempfile
|
|
import shutil
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
app = FastAPI(title="MagicDoc API", version="1.0.0")
|
|
|
|
# Global converter instance
|
|
converter = DocConverter(s3_config=None)
|
|
|
|
@app.get("/health")
|
|
async def health_check():
|
|
"""Health check endpoint"""
|
|
return {"status": "healthy", "service": "magicdoc-api"}
|
|
|
|
@app.post("/file_parse")
|
|
async def parse_file(
|
|
files: UploadFile = File(...),
|
|
output_dir: str = Form("./output"),
|
|
lang_list: str = Form("ch"),
|
|
backend: str = Form("pipeline"),
|
|
parse_method: str = Form("auto"),
|
|
formula_enable: bool = Form(True),
|
|
table_enable: bool = Form(True),
|
|
return_md: bool = Form(True),
|
|
return_middle_json: bool = Form(False),
|
|
return_model_output: bool = Form(False),
|
|
return_content_list: bool = Form(False),
|
|
return_images: bool = Form(False),
|
|
start_page_id: int = Form(0),
|
|
end_page_id: int = Form(99999)
|
|
):
|
|
"""
|
|
Parse document file and convert to markdown
|
|
Compatible with Mineru API interface
|
|
"""
|
|
try:
|
|
logger.info(f"Processing file: {files.filename}")
|
|
|
|
# Create temporary file to save uploaded content
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(files.filename)[1]) as temp_file:
|
|
shutil.copyfileobj(files.file, temp_file)
|
|
temp_file_path = temp_file.name
|
|
|
|
try:
|
|
# Convert file to markdown using magic-doc
|
|
markdown_content, time_cost = converter.convert(temp_file_path, conv_timeout=300)
|
|
|
|
logger.info(f"Successfully converted {files.filename} to markdown in {time_cost:.2f}s")
|
|
|
|
# Return response compatible with Mineru API
|
|
response = {
|
|
"markdown": markdown_content,
|
|
"md": markdown_content, # Alternative field name
|
|
"content": markdown_content, # Alternative field name
|
|
"text": markdown_content, # Alternative field name
|
|
"time_cost": time_cost,
|
|
"filename": files.filename,
|
|
"status": "success"
|
|
}
|
|
|
|
return JSONResponse(content=response)
|
|
|
|
finally:
|
|
# Clean up temporary file
|
|
if os.path.exists(temp_file_path):
|
|
os.unlink(temp_file_path)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing file {files.filename}: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
|
|
|
|
@app.get("/")
|
|
async def root():
|
|
"""Root endpoint with service information"""
|
|
return {
|
|
"service": "MagicDoc API",
|
|
"version": "1.0.0",
|
|
"description": "Document to Markdown conversion service using Magic-Doc",
|
|
"endpoints": {
|
|
"health": "/health",
|
|
"file_parse": "/file_parse"
|
|
}
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|