From 5ddef90e8b82ceb90c7f37275edf2ecae41e8736 Mon Sep 17 00:00:00 2001
From: oliviamn <oliviamren@gmail.com>
Date: Wed, 25 Jun 2025 01:31:12 +0800
Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E5=8D=95=E7=8B=AC=E5=AF=B9?=
 =?UTF-8?q?=E5=90=8D=E5=AD=97=E8=BF=9B=E8=A1=8CNER?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 backend/app/core/prompts/masking_prompts.py | 82 +++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/backend/app/core/prompts/masking_prompts.py b/backend/app/core/prompts/masking_prompts.py
index 2646f0a..03e9826 100644
--- a/backend/app/core/prompts/masking_prompts.py
+++ b/backend/app/core/prompts/masking_prompts.py
@@ -78,4 +78,86 @@ def get_masking_mapping_prompt(text: str) -> str:
         {{}}
     """)
     
+    return prompt.format(text=text)
+
+def get_masking_mapping_prompt_v2(text: str) -> str:
+    """
+    Returns a prompt that generates a mapping of original names/companies to their masked versions.
+    
+    Args:
+        text (str): The input text to be analyzed for masking
+        
+    Returns:
+        str: The formatted prompt that will generate a mapping dictionary
+    """
+    prompt = textwrap.dedent("""
+请根据以下脱敏规则，对给定文本进行分析并生成脱敏映射map（JSON格式）。
+
+脱敏规则说明：
+
+1.人名：保留姓，名变为"某"；同姓者按"某1"、"某2"依次编号；律师姓名、审判人员姓名不脱敏
+2.公司名：替换为大写英文字母（A公司、B公司等）；律师事务所不脱敏
+3.地址：仅保留区级以上地址，删除详细位置
+4.英文人名：保留姓名首字母，其余替换为"***"
+5.英文公司名：替换为所属行业名称的英文大写形式
+6.项目名：替换为小写英文字母（a项目、b项目等）
+7.案号：具体案号部分替换为"** *"
+8.身份证号：替换为6个"X"
+9.统一社会信用代码：替换为8个"X"
+输入文本：
+{text}
+
+输出要求：
+请生成一个JSON格式的映射map，包含以下结构：
+
+{{
+"原文1": "脱敏后1",
+"原文2": "脱敏后2",
+ ...  
+}}
+如无需要输出的映射，请输出空json，如下:
+{{}}
+注意事项：
+
+请准确识别文本中的各类敏感信息
+同一类别的多个实体，请在对应类别下全部列出
+如果文本中没有某类敏感信息，可以省略该类别
+请确保脱敏后的文本符合上述规则要求
+
+
+""")
+    return prompt.format(text=text)
+
+def get_ner_name_prompt(text: str) -> str:
+    """
+    Returns a prompt that generates a mapping of original names/companies to their masked versions.
+    
+    Args:
+        text (str): The input text to be analyzed for masking
+        
+    Returns:
+        str: The formatted prompt that will generate a mapping dictionary
+    """
+    prompt = textwrap.dedent("""
+你是一个专业的法律文本实体识别助手。请从以下文本中抽取出所有需要脱敏的敏感信息，并按照指定的类别进行分类。请严格按照JSON格式输出结果。
+
+实体类别包括:
+- 人名 (不包括律师、法官、书记员、检察官等公职人员)
+- 英文人名
+
+
+待处理文本:
+{text}
+
+输出格式:
+{{
+"entities": [
+    {{"text": "原始文本内容", "type": "实体类别"}},
+    ...
+  ]
+}}
+
+请严格按照JSON格式输出结果。
+    
+    """)
     return prompt.format(text=text)
\ No newline at end of file