#!/usr/bin/env python3 """ Test file for address masking functionality """ import pytest import sys import os # Add the backend directory to the Python path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from app.core.document_handlers.ner_processor import NerProcessor def test_address_masking(): """Test address masking with the new rules""" processor = NerProcessor() # Test cases based on the requirements test_cases = [ ("上海市静安区恒丰路66号白云大厦1607室", "上海市静安区HF路**号BY大厦****室"), ("北京市朝阳区建国路88号SOHO现代城A座1001室", "北京市朝阳区JG路**号SOHO现代城A座****室"), ("广州市天河区珠江新城花城大道123号富力中心B座2001室", "广州市天河区珠江新城HC大道**号FL中心B座****室"), ("深圳市南山区科技园南区深南大道9988号腾讯大厦T1栋15楼", "深圳市南山区科技园南区SN大道**号TX大厦T1栋**楼"), ] for original_address, expected_masked in test_cases: masked = processor._mask_address(original_address) print(f"Original: {original_address}") print(f"Masked: {masked}") print(f"Expected: {expected_masked}") print("-" * 50) # Note: The exact results may vary due to LLM extraction, so we'll just print for verification def test_address_component_extraction(): """Test address component extraction""" processor = NerProcessor() # Test address component extraction test_cases = [ ("上海市静安区恒丰路66号白云大厦1607室", { "road_name": "恒丰路", "house_number": "66", "building_name": "白云大厦", "community_name": "" }), ("北京市朝阳区建国路88号SOHO现代城A座1001室", { "road_name": "建国路", "house_number": "88", "building_name": "SOHO现代城", "community_name": "" }), ] for address, expected_components in test_cases: components = processor._extract_address_components(address) print(f"Address: {address}") print(f"Extracted components: {components}") print(f"Expected: {expected_components}") print("-" * 50) # Note: The exact results may vary due to LLM extraction, so we'll just print for verification def test_regex_fallback(): """Test regex fallback for address extraction""" processor = NerProcessor() # Test regex extraction (fallback method) test_address = "上海市静安区恒丰路66号白云大厦1607室" components = processor._extract_address_components_with_regex(test_address) print(f"Address: {test_address}") print(f"Regex extracted components: {components}") # Basic validation assert "road_name" in components assert "house_number" in components assert "building_name" in components assert "community_name" in components assert "confidence" in components def test_json_validation_for_address(): """Test JSON validation for address extraction responses""" from app.core.utils.llm_validator import LLMResponseValidator # Test valid JSON response valid_response = { "road_name": "恒丰路", "house_number": "66", "building_name": "白云大厦", "community_name": "", "confidence": 0.9 } assert LLMResponseValidator.validate_address_extraction(valid_response) == True # Test invalid JSON response (missing required field) invalid_response = { "road_name": "恒丰路", "house_number": "66", "building_name": "白云大厦", "confidence": 0.9 } assert LLMResponseValidator.validate_address_extraction(invalid_response) == False # Test invalid JSON response (wrong type) invalid_response2 = { "road_name": 123, "house_number": "66", "building_name": "白云大厦", "community_name": "", "confidence": 0.9 } assert LLMResponseValidator.validate_address_extraction(invalid_response2) == False if __name__ == "__main__": print("Testing Address Masking Functionality") print("=" * 50) test_regex_fallback() print() test_json_validation_for_address() print() test_address_component_extraction() print() test_address_masking()