legal-doc-masker/backend/app/core/services/ollama_client.py

import requests
import logging
from typing import Dict, Any

logger = logging.getLogger(__name__)

class OllamaClient:
    def __init__(self, model_name: str, base_url: str = "http://localhost:11434"):
        """Initialize Ollama client.

        Args:
            model_name (str): Name of the Ollama model to use
            host (str): Ollama server host address
            port (int): Ollama server port
        """
        self.model_name = model_name
        self.base_url = base_url
        self.headers = {"Content-Type": "application/json"}

    def generate(self, prompt: str, strip_think: bool = True) -> str:
        """Process a document using the Ollama API.

        Args:
            document_text (str): The text content to process

        Returns:
            str: Processed text response from the model

        Raises:
            RequestException: If the API call fails
        """
        try:
            url = f"{self.base_url}/api/generate"
            payload = {
                "model": self.model_name,
                "prompt": prompt,
                "stream": False
            }

            logger.debug(f"Sending request to Ollama API: {url}")
            response = requests.post(url, json=payload, headers=self.headers)
            response.raise_for_status()

            result = response.json()
            logger.debug(f"Received response from Ollama API: {result}")
            if strip_think:
                # Remove the "thinking" part from the response
                # the response is expected to be <think>...</think>response_text
                # Check if the response contains <think> tag
                if "<think>" in result.get("response", ""):
                    # Split the response and take the part after </think>
                    response_parts = result["response"].split("</think>")
                    if len(response_parts) > 1:
                        # Return the part after </think>
                        return response_parts[1].strip()
                    else:
                        # If no closing tag, return the full response
                        return result.get("response", "").strip()
                else:
                    # If no <think> tag, return the full response
                    return result.get("response", "").strip()
            else:
                # If strip_think is False, return the full response
                return result.get("response", "")


        except requests.exceptions.RequestException as e:
            logger.error(f"Error calling Ollama API: {str(e)}")
            raise

    def get_model_info(self) -> Dict[str, Any]:
        """Get information about the current model.

        Returns:
            Dict[str, Any]: Model information

        Raises:
            RequestException: If the API call fails
        """
        try:
            url = f"{self.base_url}/api/show"
            payload = {"name": self.model_name}

            response = requests.post(url, json=payload, headers=self.headers)
            response.raise_for_status()

            return response.json()

        except requests.exceptions.RequestException as e:
            logger.error(f"Error getting model info: {str(e)}")
            raise