68 lines
2.3 KiB
Python
68 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Test script for character-by-character alignment functionality.
|
||
This script demonstrates how the alignment handles different spacing patterns
|
||
between entity text and original document text.
|
||
"""
|
||
|
||
import sys
|
||
import os
|
||
sys.path.append(os.path.join(os.path.dirname(__file__), 'backend'))
|
||
|
||
from app.core.document_handlers.ner_processor import NerProcessor
|
||
|
||
def main():
|
||
"""Test the character alignment functionality."""
|
||
processor = NerProcessor()
|
||
|
||
print("Testing Character-by-Character Alignment")
|
||
print("=" * 50)
|
||
|
||
# Test the alignment functionality
|
||
processor.test_character_alignment()
|
||
|
||
print("\n" + "=" * 50)
|
||
print("Testing Entity Masking with Alignment")
|
||
print("=" * 50)
|
||
|
||
# Test entity masking with alignment
|
||
original_document = "上诉人(原审原告):北京丰复久信营销科技有限公司,住所地北京市海淀区北小马厂6号1号楼华天大厦1306室。法定代表人:郭东军,执行董事、经理。委托诉讼代理人:周大海,北京市康达律师事务所律师。"
|
||
|
||
# Example entity mapping (from your NER results)
|
||
entity_mapping = {
|
||
"北京丰复久信营销科技有限公司": "北京JO营销科技有限公司",
|
||
"郭东军": "郭DJ",
|
||
"周大海": "周DH",
|
||
"北京市康达律师事务所": "北京市KD律师事务所"
|
||
}
|
||
|
||
print(f"Original document: {original_document}")
|
||
print(f"Entity mapping: {entity_mapping}")
|
||
|
||
# Apply masking with alignment
|
||
masked_document = processor.apply_entity_masking_with_alignment(
|
||
original_document,
|
||
entity_mapping
|
||
)
|
||
|
||
print(f"Masked document: {masked_document}")
|
||
|
||
# Test with document that has spaces
|
||
print("\n" + "=" * 50)
|
||
print("Testing with Document Containing Spaces")
|
||
print("=" * 50)
|
||
|
||
spaced_document = "上诉人(原审原告):北京 丰复久信 营销科技 有限公司,住所地北京市海淀区北小马厂6号1号楼华天大厦1306室。法定代表人:郭 东 军,执行董事、经理。"
|
||
|
||
print(f"Spaced document: {spaced_document}")
|
||
|
||
masked_spaced_document = processor.apply_entity_masking_with_alignment(
|
||
spaced_document,
|
||
entity_mapping
|
||
)
|
||
|
||
print(f"Masked spaced document: {masked_spaced_document}")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|