170 lines
6.0 KiB
Python
170 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test file for ID and social credit code masking functionality
|
|
"""
|
|
|
|
import pytest
|
|
import sys
|
|
import os
|
|
|
|
# Add the backend directory to the Python path for imports
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from app.core.document_handlers.ner_processor import NerProcessor
|
|
|
|
|
|
def test_id_number_masking():
|
|
"""Test ID number masking with the new rules"""
|
|
processor = NerProcessor()
|
|
|
|
# Test cases based on the requirements
|
|
test_cases = [
|
|
("310103198802080000", "310103XXXXXXXXXXXX"),
|
|
("110101199001011234", "110101XXXXXXXXXXXX"),
|
|
("440301199505151234", "440301XXXXXXXXXXXX"),
|
|
("320102198712345678", "320102XXXXXXXXXXXX"),
|
|
("12345", "12345"), # Edge case: too short
|
|
]
|
|
|
|
for original_id, expected_masked in test_cases:
|
|
# Create a mock entity for testing
|
|
entity = {'text': original_id, 'type': '身份证号'}
|
|
unique_entities = [entity]
|
|
linkage = {'entity_groups': []}
|
|
|
|
# Test the masking through the full pipeline
|
|
mapping = processor._generate_masked_mapping(unique_entities, linkage)
|
|
masked = mapping.get(original_id, original_id)
|
|
|
|
print(f"Original ID: {original_id}")
|
|
print(f"Masked ID: {masked}")
|
|
print(f"Expected: {expected_masked}")
|
|
print(f"Match: {masked == expected_masked}")
|
|
print("-" * 50)
|
|
|
|
|
|
def test_social_credit_code_masking():
|
|
"""Test social credit code masking with the new rules"""
|
|
processor = NerProcessor()
|
|
|
|
# Test cases based on the requirements
|
|
test_cases = [
|
|
("9133021276453538XT", "913302XXXXXXXXXXXX"),
|
|
("91110000100000000X", "9111000XXXXXXXXXXX"),
|
|
("914403001922038216", "9144030XXXXXXXXXXX"),
|
|
("91310000132209458G", "9131000XXXXXXXXXXX"),
|
|
("123456", "123456"), # Edge case: too short
|
|
]
|
|
|
|
for original_code, expected_masked in test_cases:
|
|
# Create a mock entity for testing
|
|
entity = {'text': original_code, 'type': '社会信用代码'}
|
|
unique_entities = [entity]
|
|
linkage = {'entity_groups': []}
|
|
|
|
# Test the masking through the full pipeline
|
|
mapping = processor._generate_masked_mapping(unique_entities, linkage)
|
|
masked = mapping.get(original_code, original_code)
|
|
|
|
print(f"Original Code: {original_code}")
|
|
print(f"Masked Code: {masked}")
|
|
print(f"Expected: {expected_masked}")
|
|
print(f"Match: {masked == expected_masked}")
|
|
print("-" * 50)
|
|
|
|
|
|
def test_edge_cases():
|
|
"""Test edge cases for ID and social credit code masking"""
|
|
processor = NerProcessor()
|
|
|
|
# Test edge cases
|
|
edge_cases = [
|
|
("", ""), # Empty string
|
|
("123", "123"), # Too short for ID
|
|
("123456", "123456"), # Too short for social credit code
|
|
("123456789012345678901234567890", "123456XXXXXXXXXXXXXXXXXX"), # Very long ID
|
|
]
|
|
|
|
for original, expected in edge_cases:
|
|
# Test ID number
|
|
entity_id = {'text': original, 'type': '身份证号'}
|
|
mapping_id = processor._generate_masked_mapping([entity_id], {'entity_groups': []})
|
|
masked_id = mapping_id.get(original, original)
|
|
|
|
# Test social credit code
|
|
entity_code = {'text': original, 'type': '社会信用代码'}
|
|
mapping_code = processor._generate_masked_mapping([entity_code], {'entity_groups': []})
|
|
masked_code = mapping_code.get(original, original)
|
|
|
|
print(f"Original: {original}")
|
|
print(f"ID Masked: {masked_id}")
|
|
print(f"Code Masked: {masked_code}")
|
|
print("-" * 30)
|
|
|
|
|
|
def test_mixed_entities():
|
|
"""Test masking with mixed entity types"""
|
|
processor = NerProcessor()
|
|
|
|
# Create mixed entities
|
|
entities = [
|
|
{'text': '310103198802080000', 'type': '身份证号'},
|
|
{'text': '9133021276453538XT', 'type': '社会信用代码'},
|
|
{'text': '李强', 'type': '人名'},
|
|
{'text': '上海盒马网络科技有限公司', 'type': '公司名称'},
|
|
]
|
|
|
|
linkage = {'entity_groups': []}
|
|
|
|
# Test the masking through the full pipeline
|
|
mapping = processor._generate_masked_mapping(entities, linkage)
|
|
|
|
print("Mixed Entities Test:")
|
|
print("=" * 30)
|
|
for entity in entities:
|
|
original = entity['text']
|
|
entity_type = entity['type']
|
|
masked = mapping.get(original, original)
|
|
print(f"{entity_type}: {original} -> {masked}")
|
|
|
|
def test_id_masking():
|
|
"""Test ID number and social credit code masking"""
|
|
from app.core.document_handlers.ner_processor import NerProcessor
|
|
|
|
processor = NerProcessor()
|
|
|
|
# Test ID number masking
|
|
id_entity = {'text': '310103198802080000', 'type': '身份证号'}
|
|
id_mapping = processor._generate_masked_mapping([id_entity], {'entity_groups': []})
|
|
masked_id = id_mapping.get('310103198802080000', '')
|
|
|
|
# Test social credit code masking
|
|
code_entity = {'text': '9133021276453538XT', 'type': '社会信用代码'}
|
|
code_mapping = processor._generate_masked_mapping([code_entity], {'entity_groups': []})
|
|
masked_code = code_mapping.get('9133021276453538XT', '')
|
|
|
|
# Verify the masking rules
|
|
assert masked_id.startswith('310103') # First 6 digits preserved
|
|
assert masked_id.endswith('XXXXXXXXXXXX') # Rest masked with X
|
|
assert len(masked_id) == 18 # Total length preserved
|
|
|
|
assert masked_code.startswith('913302') # First 7 digits preserved
|
|
assert masked_code.endswith('XXXXXXXXXXXX') # Rest masked with X
|
|
assert len(masked_code) == 18 # Total length preserved
|
|
|
|
print(f"ID masking: 310103198802080000 -> {masked_id}")
|
|
print(f"Code masking: 9133021276453538XT -> {masked_code}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Testing ID and Social Credit Code Masking")
|
|
print("=" * 50)
|
|
|
|
test_id_number_masking()
|
|
print()
|
|
test_social_credit_code_masking()
|
|
print()
|
|
test_edge_cases()
|
|
print()
|
|
test_mixed_entities()
|