legal-doc-masker/src/document_handlers/processors/txt_processor.py

27 lines
1.1 KiB
Python

from document_handlers.document_processor import DocumentProcessor
from services.ollama_client import OllamaClient
import logging
from prompts.masking_prompts import get_masking_prompt
from config.settings import settings
logger = logging.getLogger(__name__)
class TxtDocumentProcessor(DocumentProcessor):
def __init__(self, input_path: str, output_path: str):
self.input_path = input_path
self.output_path = output_path
self.ollama_client = OllamaClient(model_name=settings.OLLAMA_MODEL, base_url=settings.OLLAMA_API_URL)
def read_content(self) -> str:
with open(self.input_path, 'r', encoding='utf-8') as file:
return file.read()
def process_content(self, content: str) -> str:
formatted_prompt = get_masking_prompt(content)
response = self.ollama_client.generate(formatted_prompt)
logger.debug(f"Processed content: {response}")
return response
def save_content(self, content: str) -> None:
with open(self.output_path, 'w', encoding='utf-8') as file:
file.write(content)