130 lines
4.4 KiB
Python
130 lines
4.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test file for address masking functionality
|
|
"""
|
|
|
|
import pytest
|
|
import sys
|
|
import os
|
|
|
|
# Add the backend directory to the Python path for imports
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from app.core.document_handlers.ner_processor import NerProcessor
|
|
|
|
|
|
def test_address_masking():
|
|
"""Test address masking with the new rules"""
|
|
processor = NerProcessor()
|
|
|
|
# Test cases based on the requirements
|
|
test_cases = [
|
|
("上海市静安区恒丰路66号白云大厦1607室", "上海市静安区HF路**号BY大厦****室"),
|
|
("北京市朝阳区建国路88号SOHO现代城A座1001室", "北京市朝阳区JG路**号SOHO现代城A座****室"),
|
|
("广州市天河区珠江新城花城大道123号富力中心B座2001室", "广州市天河区珠江新城HC大道**号FL中心B座****室"),
|
|
("深圳市南山区科技园南区深南大道9988号腾讯大厦T1栋15楼", "深圳市南山区科技园南区SN大道**号TX大厦T1栋**楼"),
|
|
]
|
|
|
|
for original_address, expected_masked in test_cases:
|
|
masked = processor._mask_address(original_address)
|
|
print(f"Original: {original_address}")
|
|
print(f"Masked: {masked}")
|
|
print(f"Expected: {expected_masked}")
|
|
print("-" * 50)
|
|
# Note: The exact results may vary due to LLM extraction, so we'll just print for verification
|
|
|
|
|
|
def test_address_component_extraction():
|
|
"""Test address component extraction"""
|
|
processor = NerProcessor()
|
|
|
|
# Test address component extraction
|
|
test_cases = [
|
|
("上海市静安区恒丰路66号白云大厦1607室", {
|
|
"road_name": "恒丰路",
|
|
"house_number": "66",
|
|
"building_name": "白云大厦",
|
|
"community_name": ""
|
|
}),
|
|
("北京市朝阳区建国路88号SOHO现代城A座1001室", {
|
|
"road_name": "建国路",
|
|
"house_number": "88",
|
|
"building_name": "SOHO现代城",
|
|
"community_name": ""
|
|
}),
|
|
]
|
|
|
|
for address, expected_components in test_cases:
|
|
components = processor._extract_address_components(address)
|
|
print(f"Address: {address}")
|
|
print(f"Extracted components: {components}")
|
|
print(f"Expected: {expected_components}")
|
|
print("-" * 50)
|
|
# Note: The exact results may vary due to LLM extraction, so we'll just print for verification
|
|
|
|
|
|
def test_regex_fallback():
|
|
"""Test regex fallback for address extraction"""
|
|
processor = NerProcessor()
|
|
|
|
# Test regex extraction (fallback method)
|
|
test_address = "上海市静安区恒丰路66号白云大厦1607室"
|
|
components = processor._extract_address_components_with_regex(test_address)
|
|
|
|
print(f"Address: {test_address}")
|
|
print(f"Regex extracted components: {components}")
|
|
|
|
# Basic validation
|
|
assert "road_name" in components
|
|
assert "house_number" in components
|
|
assert "building_name" in components
|
|
assert "community_name" in components
|
|
assert "confidence" in components
|
|
|
|
|
|
def test_json_validation_for_address():
|
|
"""Test JSON validation for address extraction responses"""
|
|
from app.core.utils.llm_validator import LLMResponseValidator
|
|
|
|
# Test valid JSON response
|
|
valid_response = {
|
|
"road_name": "恒丰路",
|
|
"house_number": "66",
|
|
"building_name": "白云大厦",
|
|
"community_name": "",
|
|
"confidence": 0.9
|
|
}
|
|
assert LLMResponseValidator.validate_address_extraction(valid_response) == True
|
|
|
|
# Test invalid JSON response (missing required field)
|
|
invalid_response = {
|
|
"road_name": "恒丰路",
|
|
"house_number": "66",
|
|
"building_name": "白云大厦",
|
|
"confidence": 0.9
|
|
}
|
|
assert LLMResponseValidator.validate_address_extraction(invalid_response) == False
|
|
|
|
# Test invalid JSON response (wrong type)
|
|
invalid_response2 = {
|
|
"road_name": 123,
|
|
"house_number": "66",
|
|
"building_name": "白云大厦",
|
|
"community_name": "",
|
|
"confidence": 0.9
|
|
}
|
|
assert LLMResponseValidator.validate_address_extraction(invalid_response2) == False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Testing Address Masking Functionality")
|
|
print("=" * 50)
|
|
|
|
test_regex_fallback()
|
|
print()
|
|
test_json_validation_for_address()
|
|
print()
|
|
test_address_component_extraction()
|
|
print()
|
|
test_address_masking()
|