feat: 增加构建mapping的日志

This commit is contained in:
oliviamn 2025-07-14 22:24:43 +08:00
parent daf316bb92
commit 1ba4f3cc02
3 changed files with 18 additions and 7 deletions

View File

@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y \
# Copy requirements first to leverage Docker cache
COPY requirements.txt .
RUN pip install huggingface_hub
# RUN pip install huggingface_hub
# RUN wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O download_models_hf.py
# RUN wget https://raw.githubusercontent.com/opendatalab/MinerU/refs/heads/release-1.3.1/scripts/download_models_hf.py -O download_models_hf.py
@ -20,7 +20,7 @@ RUN pip install huggingface_hub
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install -U magic-pdf[full]
# RUN pip install -U magic-pdf[full]
# Copy the rest of the application

View File

@ -90,9 +90,11 @@ class LLMResponseValidator:
"""
try:
validate(instance=response, schema=cls.ENTITY_EXTRACTION_SCHEMA)
logger.debug(f"Entity extraction validation passed for response: {response}")
return True
except ValidationError as e:
logger.warning(f"Entity extraction validation error: {e}")
logger.warning(f"Entity extraction validation failed: {e}")
logger.warning(f"Response that failed validation: {response}")
return False
@classmethod
@ -108,9 +110,16 @@ class LLMResponseValidator:
"""
try:
validate(instance=response, schema=cls.ENTITY_LINKAGE_SCHEMA)
return cls._validate_linkage_content(response)
content_valid = cls._validate_linkage_content(response)
if content_valid:
logger.debug(f"Entity linkage validation passed for response: {response}")
return True
else:
logger.warning(f"Entity linkage content validation failed for response: {response}")
return False
except ValidationError as e:
logger.warning(f"Entity linkage validation error: {e}")
logger.warning(f"Entity linkage validation failed: {e}")
logger.warning(f"Response that failed validation: {response}")
return False
@classmethod
@ -126,9 +135,11 @@ class LLMResponseValidator:
"""
try:
validate(instance=response, schema=cls.REGEX_ENTITY_SCHEMA)
logger.debug(f"Regex entity validation passed for response: {response}")
return True
except ValidationError as e:
logger.warning(f"Regex entity validation error: {e}")
logger.warning(f"Regex entity validation failed: {e}")
logger.warning(f"Response that failed validation: {response}")
return False
@classmethod

View File

@ -28,5 +28,5 @@ requests==2.28.1
python-docx>=0.8.11
PyPDF2>=3.0.0
pandas>=2.0.0
magic-pdf[full]
# magic-pdf[full]
jsonschema>=4.20.0