106 lines
3.5 KiB
Python
106 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for Mineru API endpoints
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
from pathlib import Path
|
|
|
|
# API base URL
|
|
BASE_URL = "http://localhost:8000/api/v1/mineru"
|
|
|
|
def test_health_check():
|
|
"""Test the health check endpoint"""
|
|
print("Testing health check...")
|
|
response = requests.get(f"{BASE_URL}/health")
|
|
print(f"Status: {response.status_code}")
|
|
print(f"Response: {response.json()}")
|
|
print()
|
|
|
|
def test_parse_document(file_path: str):
|
|
"""Test document parsing endpoint"""
|
|
print(f"Testing document parsing with file: {file_path}")
|
|
|
|
# Check if file exists
|
|
if not Path(file_path).exists():
|
|
print(f"Error: File {file_path} not found")
|
|
return
|
|
|
|
# Prepare the file upload
|
|
with open(file_path, 'rb') as f:
|
|
files = {'file': (Path(file_path).name, f, 'application/pdf')}
|
|
|
|
# Prepare parameters
|
|
params = {
|
|
'lang': 'ch',
|
|
'backend': 'pipeline',
|
|
'method': 'auto',
|
|
'formula_enable': True,
|
|
'table_enable': True,
|
|
'draw_layout_bbox': True,
|
|
'draw_span_bbox': True,
|
|
'dump_md': True,
|
|
'dump_middle_json': True,
|
|
'dump_model_output': True,
|
|
'dump_orig_pdf': True,
|
|
'dump_content_list': True,
|
|
'make_md_mode': 'MM_MD'
|
|
}
|
|
|
|
# Make the request
|
|
response = requests.post(f"{BASE_URL}/parse", files=files, params=params)
|
|
|
|
print(f"Status: {response.status_code}")
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
print("Parse successful!")
|
|
print(f"File name: {result['file_name']}")
|
|
print(f"Output directory: {result['output_directory']}")
|
|
print("Generated outputs:")
|
|
for output_type, output_path in result['outputs'].items():
|
|
print(f" - {output_type}: {output_path}")
|
|
else:
|
|
print(f"Error: {response.text}")
|
|
print()
|
|
|
|
def test_download_file(file_path: str):
|
|
"""Test file download endpoint"""
|
|
print(f"Testing file download: {file_path}")
|
|
|
|
response = requests.get(f"{BASE_URL}/download/{file_path}")
|
|
print(f"Status: {response.status_code}")
|
|
|
|
if response.status_code == 200:
|
|
# Save the downloaded file
|
|
output_filename = f"downloaded_{Path(file_path).name}"
|
|
with open(output_filename, 'wb') as f:
|
|
f.write(response.content)
|
|
print(f"File downloaded successfully as: {output_filename}")
|
|
else:
|
|
print(f"Error: {response.text}")
|
|
print()
|
|
|
|
if __name__ == "__main__":
|
|
print("Mineru API Test Script")
|
|
print("=" * 50)
|
|
|
|
# Test health check
|
|
test_health_check()
|
|
|
|
# Test document parsing (you'll need to provide a PDF file)
|
|
# Uncomment and modify the path below to test with your own file
|
|
# test_parse_document("path/to/your/document.pdf")
|
|
|
|
# Example of how to test file download (after parsing)
|
|
# test_download_file("some_uuid/document_name.md")
|
|
|
|
print("Test completed!")
|
|
print("\nTo test document parsing:")
|
|
print("1. Uncomment the test_parse_document line above")
|
|
print("2. Provide a valid PDF file path")
|
|
print("3. Run the script again")
|
|
print("\nTo test file download:")
|
|
print("1. First run a parse operation to get file paths")
|
|
print("2. Use the output paths from the parse result")
|
|
print("3. Uncomment and modify the test_download_file line") |