legal-doc-masker/backend/validate_refactoring.py

214 lines
7.0 KiB
Python

"""
Validation script for the refactored NerProcessor.
"""
import sys
import os
# Add the current directory to the Python path
sys.path.insert(0, os.path.dirname(__file__))
def test_imports():
"""Test that all modules can be imported"""
print("Testing imports...")
try:
from app.core.document_handlers.maskers.base_masker import BaseMasker
print("✓ BaseMasker imported successfully")
except Exception as e:
print(f"✗ Failed to import BaseMasker: {e}")
return False
try:
from app.core.document_handlers.maskers.name_masker import ChineseNameMasker, EnglishNameMasker
print("✓ Name maskers imported successfully")
except Exception as e:
print(f"✗ Failed to import name maskers: {e}")
return False
try:
from app.core.document_handlers.maskers.id_masker import IDMasker
print("✓ IDMasker imported successfully")
except Exception as e:
print(f"✗ Failed to import IDMasker: {e}")
return False
try:
from app.core.document_handlers.maskers.case_masker import CaseMasker
print("✓ CaseMasker imported successfully")
except Exception as e:
print(f"✗ Failed to import CaseMasker: {e}")
return False
try:
from app.core.document_handlers.maskers.company_masker import CompanyMasker
print("✓ CompanyMasker imported successfully")
except Exception as e:
print(f"✗ Failed to import CompanyMasker: {e}")
return False
try:
from app.core.document_handlers.maskers.address_masker import AddressMasker
print("✓ AddressMasker imported successfully")
except Exception as e:
print(f"✗ Failed to import AddressMasker: {e}")
return False
try:
from app.core.document_handlers.masker_factory import MaskerFactory
print("✓ MaskerFactory imported successfully")
except Exception as e:
print(f"✗ Failed to import MaskerFactory: {e}")
return False
try:
from app.core.document_handlers.extractors.business_name_extractor import BusinessNameExtractor
print("✓ BusinessNameExtractor imported successfully")
except Exception as e:
print(f"✗ Failed to import BusinessNameExtractor: {e}")
return False
try:
from app.core.document_handlers.extractors.address_extractor import AddressExtractor
print("✓ AddressExtractor imported successfully")
except Exception as e:
print(f"✗ Failed to import AddressExtractor: {e}")
return False
try:
from app.core.document_handlers.ner_processor_refactored import NerProcessorRefactored
print("✓ NerProcessorRefactored imported successfully")
except Exception as e:
print(f"✗ Failed to import NerProcessorRefactored: {e}")
return False
return True
def test_masker_functionality():
"""Test basic masker functionality"""
print("\nTesting masker functionality...")
try:
from app.core.document_handlers.maskers.name_masker import ChineseNameMasker
masker = ChineseNameMasker()
result = masker.mask("李强")
assert result == "李Q", f"Expected '李Q', got '{result}'"
print("✓ ChineseNameMasker works correctly")
except Exception as e:
print(f"✗ ChineseNameMasker test failed: {e}")
return False
try:
from app.core.document_handlers.maskers.name_masker import EnglishNameMasker
masker = EnglishNameMasker()
result = masker.mask("John Smith")
assert result == "J*** S***", f"Expected 'J*** S***', got '{result}'"
print("✓ EnglishNameMasker works correctly")
except Exception as e:
print(f"✗ EnglishNameMasker test failed: {e}")
return False
try:
from app.core.document_handlers.maskers.id_masker import IDMasker
masker = IDMasker()
result = masker.mask("310103198802080000")
assert result == "310103XXXXXXXXXXXX", f"Expected '310103XXXXXXXXXXXX', got '{result}'"
print("✓ IDMasker works correctly")
except Exception as e:
print(f"✗ IDMasker test failed: {e}")
return False
try:
from app.core.document_handlers.maskers.case_masker import CaseMasker
masker = CaseMasker()
result = masker.mask("(2022)京 03 民终 3852 号")
assert "***号" in result, f"Expected '***号' in result, got '{result}'"
print("✓ CaseMasker works correctly")
except Exception as e:
print(f"✗ CaseMasker test failed: {e}")
return False
return True
def test_factory():
"""Test masker factory"""
print("\nTesting masker factory...")
try:
from app.core.document_handlers.masker_factory import MaskerFactory
from app.core.document_handlers.maskers.name_masker import ChineseNameMasker
masker = MaskerFactory.create_masker('chinese_name')
assert isinstance(masker, ChineseNameMasker), f"Expected ChineseNameMasker, got {type(masker)}"
print("✓ MaskerFactory works correctly")
except Exception as e:
print(f"✗ MaskerFactory test failed: {e}")
return False
return True
def test_processor_initialization():
"""Test processor initialization"""
print("\nTesting processor initialization...")
try:
from app.core.document_handlers.ner_processor_refactored import NerProcessorRefactored
processor = NerProcessorRefactored()
assert processor is not None, "Processor should not be None"
assert hasattr(processor, 'maskers'), "Processor should have maskers attribute"
assert len(processor.maskers) > 0, "Processor should have at least one masker"
print("✓ NerProcessorRefactored initializes correctly")
except Exception as e:
print(f"✗ NerProcessorRefactored initialization failed: {e}")
# This might fail if Ollama is not running, which is expected
print(" (This is expected if Ollama is not running)")
return True # Don't fail the validation for this
return True
def main():
"""Main validation function"""
print("Validating refactored NerProcessor...")
print("=" * 50)
success = True
# Test imports
if not test_imports():
success = False
# Test functionality
if not test_masker_functionality():
success = False
# Test factory
if not test_factory():
success = False
# Test processor initialization
if not test_processor_initialization():
success = False
print("\n" + "=" * 50)
if success:
print("✓ All validation tests passed!")
print("The refactored code is working correctly.")
else:
print("✗ Some validation tests failed.")
print("Please check the errors above.")
return success
if __name__ == "__main__":
main()