import os import logging from typing import Dict, Any, Optional from fastapi import FastAPI, File, UploadFile, Form, HTTPException from fastapi.responses import JSONResponse from magic_doc.docconv import DocConverter, S3Config import tempfile import shutil # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI(title="MagicDoc API", version="1.0.0") # Global converter instance converter = DocConverter(s3_config=None) @app.get("/health") async def health_check(): """Health check endpoint""" return {"status": "healthy", "service": "magicdoc-api"} @app.post("/file_parse") async def parse_file( files: UploadFile = File(...), output_dir: str = Form("./output"), lang_list: str = Form("ch"), backend: str = Form("pipeline"), parse_method: str = Form("auto"), formula_enable: bool = Form(True), table_enable: bool = Form(True), return_md: bool = Form(True), return_middle_json: bool = Form(False), return_model_output: bool = Form(False), return_content_list: bool = Form(False), return_images: bool = Form(False), start_page_id: int = Form(0), end_page_id: int = Form(99999) ): """ Parse document file and convert to markdown Compatible with Mineru API interface """ try: logger.info(f"Processing file: {files.filename}") # Create temporary file to save uploaded content with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(files.filename)[1]) as temp_file: shutil.copyfileobj(files.file, temp_file) temp_file_path = temp_file.name try: # Convert file to markdown using magic-doc markdown_content, time_cost = converter.convert(temp_file_path, conv_timeout=300) logger.info(f"Successfully converted {files.filename} to markdown in {time_cost:.2f}s") # Return response compatible with Mineru API response = { "markdown": markdown_content, "md": markdown_content, # Alternative field name "content": markdown_content, # Alternative field name "text": markdown_content, # Alternative field name "time_cost": time_cost, "filename": files.filename, "status": "success" } return JSONResponse(content=response) finally: # Clean up temporary file if os.path.exists(temp_file_path): os.unlink(temp_file_path) except Exception as e: logger.error(f"Error processing file {files.filename}: {str(e)}") raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}") @app.get("/") async def root(): """Root endpoint with service information""" return { "service": "MagicDoc API", "version": "1.0.0", "description": "Document to Markdown conversion service using Magic-Doc", "endpoints": { "health": "/health", "file_parse": "/file_parse" } } if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)