legal-doc-masker/src/document_handlers/processors/md_processor.py

40 lines
1.6 KiB
Python

import os
from document_handlers.document_processor import DocumentProcessor
from services.ollama_client import OllamaClient
import logging
from config.settings import settings
logger = logging.getLogger(__name__)
class MarkdownDocumentProcessor(DocumentProcessor):
def __init__(self, input_path: str, output_path: str):
super().__init__() # Call parent class's __init__
self.input_path = input_path
self.output_path = output_path
self.ollama_client = OllamaClient(model_name=settings.OLLAMA_MODEL, base_url=settings.OLLAMA_API_URL)
def read_content(self) -> str:
"""Read markdown content from file"""
try:
with open(self.input_path, 'r', encoding='utf-8') as file:
content = file.read()
logger.info(f"Successfully read markdown content from {self.input_path}")
return content
except Exception as e:
logger.error(f"Error reading markdown file {self.input_path}: {e}")
raise
def save_content(self, content: str) -> None:
"""Save processed markdown content"""
try:
# Ensure output directory exists
output_dir = os.path.dirname(self.output_path)
os.makedirs(output_dir, exist_ok=True)
with open(self.output_path, 'w', encoding='utf-8') as file:
file.write(content)
logger.info(f"Successfully saved masked content to {self.output_path}")
except Exception as e:
logger.error(f"Error saving content to {self.output_path}: {e}")
raise