From a16b69475ec212cc8bcd012564206a77f84b9c3f Mon Sep 17 00:00:00 2001 From: tigermren Date: Sun, 17 Aug 2025 23:33:56 +0800 Subject: [PATCH] =?UTF-8?q?refine:=20=E6=95=B4=E7=90=86=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{ => docs}/OLLAMA_CLIENT_ENHANCEMENT.md | 0 backend/{ => docs}/PDF_PROCESSOR_README.md | 0 backend/{ => docs}/REFACTORING_SUMMARY.md | 0 backend/{ => docs}/TEST_SETUP.md | 0 backend/log | 127 ------------------ backend/run_tests.py | 32 ----- .../test_enhanced_ollama_client.py | 0 7 files changed, 159 deletions(-) rename backend/{ => docs}/OLLAMA_CLIENT_ENHANCEMENT.md (100%) rename backend/{ => docs}/PDF_PROCESSOR_README.md (100%) rename backend/{ => docs}/REFACTORING_SUMMARY.md (100%) rename backend/{ => docs}/TEST_SETUP.md (100%) delete mode 100644 backend/log delete mode 100644 backend/run_tests.py rename backend/{ => tests}/test_enhanced_ollama_client.py (100%) diff --git a/backend/OLLAMA_CLIENT_ENHANCEMENT.md b/backend/docs/OLLAMA_CLIENT_ENHANCEMENT.md similarity index 100% rename from backend/OLLAMA_CLIENT_ENHANCEMENT.md rename to backend/docs/OLLAMA_CLIENT_ENHANCEMENT.md diff --git a/backend/PDF_PROCESSOR_README.md b/backend/docs/PDF_PROCESSOR_README.md similarity index 100% rename from backend/PDF_PROCESSOR_README.md rename to backend/docs/PDF_PROCESSOR_README.md diff --git a/backend/REFACTORING_SUMMARY.md b/backend/docs/REFACTORING_SUMMARY.md similarity index 100% rename from backend/REFACTORING_SUMMARY.md rename to backend/docs/REFACTORING_SUMMARY.md diff --git a/backend/TEST_SETUP.md b/backend/docs/TEST_SETUP.md similarity index 100% rename from backend/TEST_SETUP.md rename to backend/docs/TEST_SETUP.md diff --git a/backend/log b/backend/log deleted file mode 100644 index 103a34f..0000000 --- a/backend/log +++ /dev/null @@ -1,127 +0,0 @@ - [2025-07-14 14:20:19,015: INFO/ForkPoolWorker-4] Raw response from LLM: { -celery_worker-1 | "entities": [] -celery_worker-1 | } -celery_worker-1 | [2025-07-14 14:20:19,016: INFO/ForkPoolWorker-4] Parsed mapping: {'entities': []} -celery_worker-1 | [2025-07-14 14:20:19,020: INFO/ForkPoolWorker-4] Calling ollama to generate case numbers mapping for chunk (attempt 1/3): -celery_worker-1 | 你是一个专业的法律文本实体识别助手。请从以下文本中抽取出所有需要脱敏的敏感信息,并按照指定的类别进行分类。请严格按照JSON格式输出结果。 -celery_worker-1 | -celery_worker-1 | 实体类别包括: -celery_worker-1 | - 案号 -celery_worker-1 | -celery_worker-1 | 待处理文本: -celery_worker-1 | -celery_worker-1 | -celery_worker-1 | 二审案件受理费450892 元,由北京丰复久信营销科技有限公司负担(已交纳)。 -celery_worker-1 | -celery_worker-1 | 29. 本判决为终审判决。 -celery_worker-1 | -celery_worker-1 | 审 判 长 史晓霞审 判 员 邓青菁审 判 员 李 淼二〇二二年七月七日法 官 助 理 黎 铧书 记 员 郑海兴 -celery_worker-1 | -celery_worker-1 | 输出格式: -celery_worker-1 | { -celery_worker-1 | "entities": [ -celery_worker-1 | {"text": "原始文本内容", "type": "案号"}, -celery_worker-1 | ... -celery_worker-1 | ] -celery_worker-1 | } -celery_worker-1 | -celery_worker-1 | 请严格按照JSON格式输出结果。 -celery_worker-1 | -api-1 | INFO: 192.168.65.1:60045 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:34054 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:34054 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:22084 - "GET /api/v1/files/files HTTP/1.1" 200 OK -celery_worker-1 | [2025-07-14 14:20:31,279: INFO/ForkPoolWorker-4] Raw response from LLM: { -celery_worker-1 | "entities": [] -celery_worker-1 | } -celery_worker-1 | [2025-07-14 14:20:31,281: INFO/ForkPoolWorker-4] Parsed mapping: {'entities': []} -celery_worker-1 | [2025-07-14 14:20:31,287: INFO/ForkPoolWorker-4] Chunk mapping: [{'entities': []}, {'entities': [{'text': '北京丰复久信营销科技有限公司', 'type': '公司名称'}]}, {'entities': []}, {'entities': []}, {'entities': []}] -celery_worker-1 | [2025-07-14 14:20:31,288: INFO/ForkPoolWorker-4] Final chunk mappings: [{'entities': [{'text': '郭东军', 'type': '人名'}, {'text': '王欢子', 'type': '人名'}]}, {'entities': [{'text': '北京丰复久信营销科技有限公司', 'type': '公司名称'}, {'text': '丰复久信公司', 'type': '公司名称简称'}, {'text': '中研智创区块链技术有限公司', 'type': '公司名称'}, {'text': '中研智才公司', 'type': '公司名称简称'}]}, {'entities': [{'text': '北京市海淀区北小马厂6 号1 号楼华天大厦1306 室', 'type': '地址'}, {'text': '天津市津南区双港镇工业园区优谷产业园5 号楼-1505', 'type': '地址'}]}, {'entities': [{'text': '服务合同', 'type': '项目名'}]}, {'entities': [{'text': '(2022)京 03 民终 3852 号', 'type': '案号'}, {'text': '(2020)京0105 民初69754 号', 'type': '案号'}]}, {'entities': [{'text': '李圣艳', 'type': '人名'}, {'text': '闫向东', 'type': '人名'}, {'text': '李敏', 'type': '人名'}, {'text': '布兰登·斯密特', 'type': '英文人名'}]}, {'entities': [{'text': '丰复久信公司', 'type': '公司名称'}, {'text': '中研智创公司', 'type': '公司名称'}, {'text': '丰复久信', 'type': '公司名称简称'}, {'text': '中研智创', 'type': '公司名称简称'}]}, {'entities': [{'text': '上海市', 'type': '地址'}, {'text': '北京', 'type': '地址'}]}, {'entities': [{'text': '《计算机设备采购合同》', 'type': '项目名'}]}, {'entities': []}, {'entities': []}, {'entities': [{'text': '丰复久信公司', 'type': '公司名称'}, {'text': '中研智创公司', 'type': '公司名称'}]}, {'entities': []}, {'entities': [{'text': '《服务合同书》', 'type': '项目名'}]}, {'entities': []}, {'entities': []}, {'entities': [{'text': '北京丰复久信营销科技有限公司', 'type': '公司名称'}]}, {'entities': []}, {'entities': []}, {'entities': []}] -celery_worker-1 | [2025-07-14 14:20:31,288: INFO/ForkPoolWorker-4] Duplicate entity found: {'text': '丰复久信公司', 'type': '公司名称'} -celery_worker-1 | [2025-07-14 14:20:31,288: INFO/ForkPoolWorker-4] Duplicate entity found: {'text': '丰复久信公司', 'type': '公司名称'} -celery_worker-1 | [2025-07-14 14:20:31,288: INFO/ForkPoolWorker-4] Duplicate entity found: {'text': '中研智创公司', 'type': '公司名称'} -celery_worker-1 | [2025-07-14 14:20:31,288: INFO/ForkPoolWorker-4] Duplicate entity found: {'text': '北京丰复久信营销科技有限公司', 'type': '公司名称'} -celery_worker-1 | [2025-07-14 14:20:31,288: INFO/ForkPoolWorker-4] Merged 22 unique entities -celery_worker-1 | [2025-07-14 14:20:31,288: INFO/ForkPoolWorker-4] Unique entities: [{'text': '郭东军', 'type': '人名'}, {'text': '王欢子', 'type': '人名'}, {'text': '北京丰复久信营销科技有限公司', 'type': '公司名称'}, {'text': '丰复久信公司', 'type': '公司名称简称'}, {'text': '中研智创区块链技术有限公司', 'type': '公司名称'}, {'text': '中研智才公司', 'type': '公司名称简称'}, {'text': '北京市海淀区北小马厂6 号1 号楼华天大厦1306 室', 'type': '地址'}, {'text': '天津市津南区双港镇工业园区优谷产业园5 号楼-1505', 'type': '地址'}, {'text': '服务合同', 'type': '项目名'}, {'text': '(2022)京 03 民终 3852 号', 'type': '案号'}, {'text': '(2020)京0105 民初69754 号', 'type': '案号'}, {'text': '李圣艳', 'type': '人名'}, {'text': '闫向东', 'type': '人名'}, {'text': '李敏', 'type': '人名'}, {'text': '布兰登·斯密特', 'type': '英文人名'}, {'text': '中研智创公司', 'type': '公司名称'}, {'text': '丰复久信', 'type': '公司名称简称'}, {'text': '中研智创', 'type': '公司名称简称'}, {'text': '上海市', 'type': '地址'}, {'text': '北京', 'type': '地址'}, {'text': '《计算机设备采购合同》', 'type': '项目名'}, {'text': '《服务合同书》', 'type': '项目名'}] -celery_worker-1 | [2025-07-14 14:20:31,289: INFO/ForkPoolWorker-4] Calling ollama to generate entity linkage (attempt 1/3) -api-1 | INFO: 192.168.65.1:52168 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:61426 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:30702 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:48159 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:16860 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:21262 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:45564 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:32142 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:27769 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:21196 - "GET /api/v1/files/files HTTP/1.1" 200 OK -celery_worker-1 | [2025-07-14 14:21:21,436: INFO/ForkPoolWorker-4] Raw entity linkage response from LLM: { -celery_worker-1 | "entity_groups": [ -celery_worker-1 | { -celery_worker-1 | "group_id": "group_1", -celery_worker-1 | "group_type": "公司名称", -celery_worker-1 | "entities": [ -celery_worker-1 | { -celery_worker-1 | "text": "北京丰复久信营销科技有限公司", -celery_worker-1 | "type": "公司名称", -celery_worker-1 | "is_primary": true -celery_worker-1 | }, -celery_worker-1 | { -celery_worker-1 | "text": "丰复久信公司", -celery_worker-1 | "type": "公司名称简称", -celery_worker-1 | "is_primary": false -celery_worker-1 | }, -celery_worker-1 | { -celery_worker-1 | "text": "丰复久信", -celery_worker-1 | "type": "公司名称简称", -celery_worker-1 | "is_primary": false -celery_worker-1 | } -celery_worker-1 | ] -celery_worker-1 | }, -celery_worker-1 | { -celery_worker-1 | "group_id": "group_2", -celery_worker-1 | "group_type": "公司名称", -celery_worker-1 | "entities": [ -celery_worker-1 | { -celery_worker-1 | "text": "中研智创区块链技术有限公司", -celery_worker-1 | "type": "公司名称", -celery_worker-1 | "is_primary": true -celery_worker-1 | }, -celery_worker-1 | { -celery_worker-1 | "text": "中研智创公司", -celery_worker-1 | "type": "公司名称简称", -celery_worker-1 | "is_primary": false -celery_worker-1 | }, -celery_worker-1 | { -celery_worker-1 | "text": "中研智创", -celery_worker-1 | "type": "公司名称简称", -celery_worker-1 | "is_primary": false -celery_worker-1 | } -celery_worker-1 | ] -celery_worker-1 | } -celery_worker-1 | ] -celery_worker-1 | } -celery_worker-1 | [2025-07-14 14:21:21,437: INFO/ForkPoolWorker-4] Parsed entity linkage: {'entity_groups': [{'group_id': 'group_1', 'group_type': '公司名称', 'entities': [{'text': '北京丰复久信营销科技有限公司', 'type': '公司名称', 'is_primary': True}, {'text': '丰复久信公司', 'type': '公司名称简称', 'is_primary': False}, {'text': '丰复久信', 'type': '公司名称简称', 'is_primary': False}]}, {'group_id': 'group_2', 'group_type': '公司名称', 'entities': [{'text': '中研智创区块链技术有限公司', 'type': '公司名称', 'is_primary': True}, {'text': '中研智创公司', 'type': '公司名称简称', 'is_primary': False}, {'text': '中研智创', 'type': '公司名称简称', 'is_primary': False}]}]} -celery_worker-1 | [2025-07-14 14:21:21,445: INFO/ForkPoolWorker-4] Successfully created entity linkage with 2 groups -celery_worker-1 | [2025-07-14 14:21:21,445: INFO/ForkPoolWorker-4] Entity linkage: {'entity_groups': [{'group_id': 'group_1', 'group_type': '公司名称', 'entities': [{'text': '北京丰复久信营销科技有限公司', 'type': '公司名称', 'is_primary': True}, {'text': '丰复久信公司', 'type': '公司名称简称', 'is_primary': False}, {'text': '丰复久信', 'type': '公司名称简称', 'is_primary': False}]}, {'group_id': 'group_2', 'group_type': '公司名称', 'entities': [{'text': '中研智创区块链技术有限公司', 'type': '公司名称', 'is_primary': True}, {'text': '中研智创公司', 'type': '公司名称简称', 'is_primary': False}, {'text': '中研智创', 'type': '公司名称简称', 'is_primary': False}]}]} -celery_worker-1 | [2025-07-14 14:21:21,446: INFO/ForkPoolWorker-4] Generated masked mapping for 22 entities -celery_worker-1 | [2025-07-14 14:21:21,446: INFO/ForkPoolWorker-4] Combined mapping: {'郭东军': '某', '王欢子': '某甲', '北京丰复久信营销科技有限公司': '某公司', '丰复久信公司': '某公司甲', '中研智创区块链技术有限公司': '某公司乙', '中研智才公司': '某公司丙', '北京市海淀区北小马厂6 号1 号楼华天大厦1306 室': '某乙', '天津市津南区双港镇工业园区优谷产业园5 号楼-1505': '某丙', '服务合同': '某丁', '(2022)京 03 民终 3852 号': '某戊', '(2020)京0105 民初69754 号': '某己', '李圣艳': '某庚', '闫向东': '某辛', '李敏': '某壬', '布兰登·斯密特': '某癸', '中研智创公司': '某公司丁', '丰复久信': '某公司戊', '中研智创': '某公司己', '上海市': '某11', '北京': '某12', '《计算机设备采购合同》': '某13', '《服务合同书》': '某14'} -celery_worker-1 | [2025-07-14 14:21:21,446: INFO/ForkPoolWorker-4] Linked entity '北京丰复久信营销科技有限公司' to '北京丰复久信营销科技有限公司' with masked name '某公司' -celery_worker-1 | [2025-07-14 14:21:21,446: INFO/ForkPoolWorker-4] Linked entity '丰复久信公司' to '北京丰复久信营销科技有限公司' with masked name '某公司' -celery_worker-1 | [2025-07-14 14:21:21,446: INFO/ForkPoolWorker-4] Linked entity '丰复久信' to '北京丰复久信营销科技有限公司' with masked name '某公司' -celery_worker-1 | [2025-07-14 14:21:21,446: INFO/ForkPoolWorker-4] Linked entity '中研智创区块链技术有限公司' to '中研智创区块链技术有限公司' with masked name '某公司乙' -celery_worker-1 | [2025-07-14 14:21:21,446: INFO/ForkPoolWorker-4] Linked entity '中研智创公司' to '中研智创区块链技术有限公司' with masked name '某公司乙' -celery_worker-1 | [2025-07-14 14:21:21,446: INFO/ForkPoolWorker-4] Linked entity '中研智创' to '中研智创区块链技术有限公司' with masked name '某公司乙' -celery_worker-1 | [2025-07-14 14:21:21,446: INFO/ForkPoolWorker-4] Final mapping: {'郭东军': '某', '王欢子': '某甲', '北京丰复久信营销科技有限公司': '某公司', '丰复久信公司': '某公司', '中研智创区块链技术有限公司': '某公司乙', '中研智才公司': '某公司丙', '北京市海淀区北小马厂6 号1 号楼华天大厦1306 室': '某乙', '天津市津南区双港镇工业园区优谷产业园5 号楼-1505': '某丙', '服务合同': '某丁', '(2022)京 03 民终 3852 号': '某戊', '(2020)京0105 民初69754 号': '某己', '李圣艳': '某庚', '闫向东': '某辛', '李敏': '某壬', '布兰登·斯密特': '某癸', '中研智创公司': '某公司乙', '丰复久信': '某公司', '中研智创': '某公司乙', '上海市': '某11', '北京': '某12', '《计算机设备采购合同》': '某13', '《服务合同书》': '某14'} -celery_worker-1 | [2025-07-14 14:21:21,446: INFO/ForkPoolWorker-4] Successfully masked content -celery_worker-1 | [2025-07-14 14:21:21,449: INFO/ForkPoolWorker-4] Successfully saved masked content to /app/storage/processed/47522ea9-c259-4304-bfe4-1d3ed6902ede.md -celery_worker-1 | [2025-07-14 14:21:21,470: INFO/ForkPoolWorker-4] Task app.services.file_service.process_file[5cfbca4c-0f6f-4c71-a66b-b22ee2d28139] succeeded in 311.847165101s: None -api-1 | INFO: 192.168.65.1:33432 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:40073 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:29550 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:61350 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:61755 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:63726 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:43446 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:45624 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:25256 - "GET /api/v1/files/files HTTP/1.1" 200 OK -api-1 | INFO: 192.168.65.1:43464 - "GET /api/v1/files/files HTTP/1.1" 200 OK \ No newline at end of file diff --git a/backend/run_tests.py b/backend/run_tests.py deleted file mode 100644 index 0cc3155..0000000 --- a/backend/run_tests.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple test runner script to verify test discovery and execution -""" -import subprocess -import sys -import os -from pathlib import Path - -def run_tests(): - """Run pytest with proper configuration""" - # Change to backend directory - backend_dir = Path(__file__).parent - os.chdir(backend_dir) - - # Run pytest - cmd = [sys.executable, "-m", "pytest", "tests/", "-v", "--tb=short"] - - print(f"Running tests from: {backend_dir}") - print(f"Command: {' '.join(cmd)}") - print("-" * 50) - - try: - result = subprocess.run(cmd, capture_output=False, text=True) - return result.returncode - except Exception as e: - print(f"Error running tests: {e}") - return 1 - -if __name__ == "__main__": - exit_code = run_tests() - sys.exit(exit_code) diff --git a/backend/test_enhanced_ollama_client.py b/backend/tests/test_enhanced_ollama_client.py similarity index 100% rename from backend/test_enhanced_ollama_client.py rename to backend/tests/test_enhanced_ollama_client.py