legal-doc-masker/mineru/test_mineru_api.py

106 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""
Test script for Mineru API endpoints
"""
import requests
import json
from pathlib import Path
# API base URL
BASE_URL = "http://localhost:8000/api/v1/mineru"
def test_health_check():
"""Test the health check endpoint"""
print("Testing health check...")
response = requests.get(f"{BASE_URL}/health")
print(f"Status: {response.status_code}")
print(f"Response: {response.json()}")
print()
def test_parse_document(file_path: str):
"""Test document parsing endpoint"""
print(f"Testing document parsing with file: {file_path}")
# Check if file exists
if not Path(file_path).exists():
print(f"Error: File {file_path} not found")
return
# Prepare the file upload
with open(file_path, 'rb') as f:
files = {'file': (Path(file_path).name, f, 'application/pdf')}
# Prepare parameters
params = {
'lang': 'ch',
'backend': 'pipeline',
'method': 'auto',
'formula_enable': True,
'table_enable': True,
'draw_layout_bbox': True,
'draw_span_bbox': True,
'dump_md': True,
'dump_middle_json': True,
'dump_model_output': True,
'dump_orig_pdf': True,
'dump_content_list': True,
'make_md_mode': 'MM_MD'
}
# Make the request
response = requests.post(f"{BASE_URL}/parse", files=files, params=params)
print(f"Status: {response.status_code}")
if response.status_code == 200:
result = response.json()
print("Parse successful!")
print(f"File name: {result['file_name']}")
print(f"Output directory: {result['output_directory']}")
print("Generated outputs:")
for output_type, output_path in result['outputs'].items():
print(f" - {output_type}: {output_path}")
else:
print(f"Error: {response.text}")
print()
def test_download_file(file_path: str):
"""Test file download endpoint"""
print(f"Testing file download: {file_path}")
response = requests.get(f"{BASE_URL}/download/{file_path}")
print(f"Status: {response.status_code}")
if response.status_code == 200:
# Save the downloaded file
output_filename = f"downloaded_{Path(file_path).name}"
with open(output_filename, 'wb') as f:
f.write(response.content)
print(f"File downloaded successfully as: {output_filename}")
else:
print(f"Error: {response.text}")
print()
if __name__ == "__main__":
print("Mineru API Test Script")
print("=" * 50)
# Test health check
test_health_check()
# Test document parsing (you'll need to provide a PDF file)
# Uncomment and modify the path below to test with your own file
# test_parse_document("path/to/your/document.pdf")
# Example of how to test file download (after parsing)
# test_download_file("some_uuid/document_name.md")
print("Test completed!")
print("\nTo test document parsing:")
print("1. Uncomment the test_parse_document line above")
print("2. Provide a valid PDF file path")
print("3. Run the script again")
print("\nTo test file download:")
print("1. First run a parse operation to get file paths")
print("2. Use the output paths from the parse result")
print("3. Uncomment and modify the test_download_file line")