214 lines
7.0 KiB
Python
214 lines
7.0 KiB
Python
"""
|
|
Validation script for the refactored NerProcessor.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
|
|
# Add the current directory to the Python path
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
|
|
def test_imports():
|
|
"""Test that all modules can be imported"""
|
|
print("Testing imports...")
|
|
|
|
try:
|
|
from app.core.document_handlers.maskers.base_masker import BaseMasker
|
|
print("✓ BaseMasker imported successfully")
|
|
except Exception as e:
|
|
print(f"✗ Failed to import BaseMasker: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.maskers.name_masker import ChineseNameMasker, EnglishNameMasker
|
|
print("✓ Name maskers imported successfully")
|
|
except Exception as e:
|
|
print(f"✗ Failed to import name maskers: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.maskers.id_masker import IDMasker
|
|
print("✓ IDMasker imported successfully")
|
|
except Exception as e:
|
|
print(f"✗ Failed to import IDMasker: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.maskers.case_masker import CaseMasker
|
|
print("✓ CaseMasker imported successfully")
|
|
except Exception as e:
|
|
print(f"✗ Failed to import CaseMasker: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.maskers.company_masker import CompanyMasker
|
|
print("✓ CompanyMasker imported successfully")
|
|
except Exception as e:
|
|
print(f"✗ Failed to import CompanyMasker: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.maskers.address_masker import AddressMasker
|
|
print("✓ AddressMasker imported successfully")
|
|
except Exception as e:
|
|
print(f"✗ Failed to import AddressMasker: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.masker_factory import MaskerFactory
|
|
print("✓ MaskerFactory imported successfully")
|
|
except Exception as e:
|
|
print(f"✗ Failed to import MaskerFactory: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.extractors.business_name_extractor import BusinessNameExtractor
|
|
print("✓ BusinessNameExtractor imported successfully")
|
|
except Exception as e:
|
|
print(f"✗ Failed to import BusinessNameExtractor: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.extractors.address_extractor import AddressExtractor
|
|
print("✓ AddressExtractor imported successfully")
|
|
except Exception as e:
|
|
print(f"✗ Failed to import AddressExtractor: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.ner_processor_refactored import NerProcessorRefactored
|
|
print("✓ NerProcessorRefactored imported successfully")
|
|
except Exception as e:
|
|
print(f"✗ Failed to import NerProcessorRefactored: {e}")
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def test_masker_functionality():
|
|
"""Test basic masker functionality"""
|
|
print("\nTesting masker functionality...")
|
|
|
|
try:
|
|
from app.core.document_handlers.maskers.name_masker import ChineseNameMasker
|
|
|
|
masker = ChineseNameMasker()
|
|
result = masker.mask("李强")
|
|
assert result == "李Q", f"Expected '李Q', got '{result}'"
|
|
print("✓ ChineseNameMasker works correctly")
|
|
except Exception as e:
|
|
print(f"✗ ChineseNameMasker test failed: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.maskers.name_masker import EnglishNameMasker
|
|
|
|
masker = EnglishNameMasker()
|
|
result = masker.mask("John Smith")
|
|
assert result == "J*** S***", f"Expected 'J*** S***', got '{result}'"
|
|
print("✓ EnglishNameMasker works correctly")
|
|
except Exception as e:
|
|
print(f"✗ EnglishNameMasker test failed: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.maskers.id_masker import IDMasker
|
|
|
|
masker = IDMasker()
|
|
result = masker.mask("310103198802080000")
|
|
assert result == "310103XXXXXXXXXXXX", f"Expected '310103XXXXXXXXXXXX', got '{result}'"
|
|
print("✓ IDMasker works correctly")
|
|
except Exception as e:
|
|
print(f"✗ IDMasker test failed: {e}")
|
|
return False
|
|
|
|
try:
|
|
from app.core.document_handlers.maskers.case_masker import CaseMasker
|
|
|
|
masker = CaseMasker()
|
|
result = masker.mask("(2022)京 03 民终 3852 号")
|
|
assert "***号" in result, f"Expected '***号' in result, got '{result}'"
|
|
print("✓ CaseMasker works correctly")
|
|
except Exception as e:
|
|
print(f"✗ CaseMasker test failed: {e}")
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def test_factory():
|
|
"""Test masker factory"""
|
|
print("\nTesting masker factory...")
|
|
|
|
try:
|
|
from app.core.document_handlers.masker_factory import MaskerFactory
|
|
from app.core.document_handlers.maskers.name_masker import ChineseNameMasker
|
|
|
|
masker = MaskerFactory.create_masker('chinese_name')
|
|
assert isinstance(masker, ChineseNameMasker), f"Expected ChineseNameMasker, got {type(masker)}"
|
|
print("✓ MaskerFactory works correctly")
|
|
except Exception as e:
|
|
print(f"✗ MaskerFactory test failed: {e}")
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def test_processor_initialization():
|
|
"""Test processor initialization"""
|
|
print("\nTesting processor initialization...")
|
|
|
|
try:
|
|
from app.core.document_handlers.ner_processor_refactored import NerProcessorRefactored
|
|
|
|
processor = NerProcessorRefactored()
|
|
assert processor is not None, "Processor should not be None"
|
|
assert hasattr(processor, 'maskers'), "Processor should have maskers attribute"
|
|
assert len(processor.maskers) > 0, "Processor should have at least one masker"
|
|
print("✓ NerProcessorRefactored initializes correctly")
|
|
except Exception as e:
|
|
print(f"✗ NerProcessorRefactored initialization failed: {e}")
|
|
# This might fail if Ollama is not running, which is expected
|
|
print(" (This is expected if Ollama is not running)")
|
|
return True # Don't fail the validation for this
|
|
|
|
return True
|
|
|
|
|
|
def main():
|
|
"""Main validation function"""
|
|
print("Validating refactored NerProcessor...")
|
|
print("=" * 50)
|
|
|
|
success = True
|
|
|
|
# Test imports
|
|
if not test_imports():
|
|
success = False
|
|
|
|
# Test functionality
|
|
if not test_masker_functionality():
|
|
success = False
|
|
|
|
# Test factory
|
|
if not test_factory():
|
|
success = False
|
|
|
|
# Test processor initialization
|
|
if not test_processor_initialization():
|
|
success = False
|
|
|
|
print("\n" + "=" * 50)
|
|
if success:
|
|
print("✓ All validation tests passed!")
|
|
print("The refactored code is working correctly.")
|
|
else:
|
|
print("✗ Some validation tests failed.")
|
|
print("Please check the errors above.")
|
|
|
|
return success
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|