#!/usr/bin/env python3 """ Test file for ID and social credit code masking functionality """ import pytest import sys import os # Add the backend directory to the Python path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from app.core.document_handlers.ner_processor import NerProcessor def test_id_number_masking(): """Test ID number masking with the new rules""" processor = NerProcessor() # Test cases based on the requirements test_cases = [ ("310103198802080000", "310103XXXXXXXXXXXX"), ("110101199001011234", "110101XXXXXXXXXXXX"), ("440301199505151234", "440301XXXXXXXXXXXX"), ("320102198712345678", "320102XXXXXXXXXXXX"), ("12345", "12345"), # Edge case: too short ] for original_id, expected_masked in test_cases: # Create a mock entity for testing entity = {'text': original_id, 'type': '身份证号'} unique_entities = [entity] linkage = {'entity_groups': []} # Test the masking through the full pipeline mapping = processor._generate_masked_mapping(unique_entities, linkage) masked = mapping.get(original_id, original_id) print(f"Original ID: {original_id}") print(f"Masked ID: {masked}") print(f"Expected: {expected_masked}") print(f"Match: {masked == expected_masked}") print("-" * 50) def test_social_credit_code_masking(): """Test social credit code masking with the new rules""" processor = NerProcessor() # Test cases based on the requirements test_cases = [ ("9133021276453538XT", "913302XXXXXXXXXXXX"), ("91110000100000000X", "9111000XXXXXXXXXXX"), ("914403001922038216", "9144030XXXXXXXXXXX"), ("91310000132209458G", "9131000XXXXXXXXXXX"), ("123456", "123456"), # Edge case: too short ] for original_code, expected_masked in test_cases: # Create a mock entity for testing entity = {'text': original_code, 'type': '社会信用代码'} unique_entities = [entity] linkage = {'entity_groups': []} # Test the masking through the full pipeline mapping = processor._generate_masked_mapping(unique_entities, linkage) masked = mapping.get(original_code, original_code) print(f"Original Code: {original_code}") print(f"Masked Code: {masked}") print(f"Expected: {expected_masked}") print(f"Match: {masked == expected_masked}") print("-" * 50) def test_edge_cases(): """Test edge cases for ID and social credit code masking""" processor = NerProcessor() # Test edge cases edge_cases = [ ("", ""), # Empty string ("123", "123"), # Too short for ID ("123456", "123456"), # Too short for social credit code ("123456789012345678901234567890", "123456XXXXXXXXXXXXXXXXXX"), # Very long ID ] for original, expected in edge_cases: # Test ID number entity_id = {'text': original, 'type': '身份证号'} mapping_id = processor._generate_masked_mapping([entity_id], {'entity_groups': []}) masked_id = mapping_id.get(original, original) # Test social credit code entity_code = {'text': original, 'type': '社会信用代码'} mapping_code = processor._generate_masked_mapping([entity_code], {'entity_groups': []}) masked_code = mapping_code.get(original, original) print(f"Original: {original}") print(f"ID Masked: {masked_id}") print(f"Code Masked: {masked_code}") print("-" * 30) def test_mixed_entities(): """Test masking with mixed entity types""" processor = NerProcessor() # Create mixed entities entities = [ {'text': '310103198802080000', 'type': '身份证号'}, {'text': '9133021276453538XT', 'type': '社会信用代码'}, {'text': '李强', 'type': '人名'}, {'text': '上海盒马网络科技有限公司', 'type': '公司名称'}, ] linkage = {'entity_groups': []} # Test the masking through the full pipeline mapping = processor._generate_masked_mapping(entities, linkage) print("Mixed Entities Test:") print("=" * 30) for entity in entities: original = entity['text'] entity_type = entity['type'] masked = mapping.get(original, original) print(f"{entity_type}: {original} -> {masked}") def test_id_masking(): """Test ID number and social credit code masking""" from app.core.document_handlers.ner_processor import NerProcessor processor = NerProcessor() # Test ID number masking id_entity = {'text': '310103198802080000', 'type': '身份证号'} id_mapping = processor._generate_masked_mapping([id_entity], {'entity_groups': []}) masked_id = id_mapping.get('310103198802080000', '') # Test social credit code masking code_entity = {'text': '9133021276453538XT', 'type': '社会信用代码'} code_mapping = processor._generate_masked_mapping([code_entity], {'entity_groups': []}) masked_code = code_mapping.get('9133021276453538XT', '') # Verify the masking rules assert masked_id.startswith('310103') # First 6 digits preserved assert masked_id.endswith('XXXXXXXXXXXX') # Rest masked with X assert len(masked_id) == 18 # Total length preserved assert masked_code.startswith('913302') # First 7 digits preserved assert masked_code.endswith('XXXXXXXXXXXX') # Rest masked with X assert len(masked_code) == 18 # Total length preserved print(f"ID masking: 310103198802080000 -> {masked_id}") print(f"Code masking: 9133021276453538XT -> {masked_code}") if __name__ == "__main__": print("Testing ID and Social Credit Code Masking") print("=" * 50) test_id_number_masking() print() test_social_credit_code_masking() print() test_edge_cases() print() test_mixed_entities()