知识库retrival使用glm-4-plus,并且设置max_tokens=4095

2024-11-20 15:52:38 +08:00 · 2024-11-20 15:52:38 +08:00 · d9caf9acce
parent 2179d4e71e
commit d9caf9acce
1 changed files with 9 additions and 7 deletions
--- a/app/services/zhipu_kb_service.py
+++ b/app/services/zhipu_kb_service.py
@ -7,7 +7,7 @@ logger = logging.getLogger(__name__)

 class ZhipuKbService:
    def __init__(self):
-        self.model_name = "glm-4"
+        self.model_name = "glm-4-plus"
        self.app_secret_key = "d54f764a1d67c17d857bd3983b772016.GRjowY0fyiMNurLc"
        logger.info("ZhipuKbService initialized with model: %s", self.model_name)

@ -22,7 +22,7 @@ class ZhipuKbService:
            prompt_template = default_prompt
        try:
            response = client.chat.completions.create(
-                model="glm-4",
+                model=self.model_name,
                messages=[
                    {"role": "user", "content": message},
                ],
@ -36,8 +36,9 @@ class ZhipuKbService:
                    }
                ],
                stream=False,
-                temperature=0.01,
-                top_p=0.1,
+                max_tokens=4095,
+                temperature=0.01,  # default=0.01
+                top_p=0.1, #default=0.1
            )
            result = response.choices[0].message.content
            end_time = time.time()
@ -59,7 +60,7 @@ class ZhipuKbService:
            prompt_template = default_prompt
        try:
            response = client.chat.completions.create(
-                model="glm-4",
+                model=self.model_name,
                messages=messages,
                tools=[
                    {
@ -71,8 +72,9 @@ class ZhipuKbService:
                    }
                ],
                stream=True,
-                temperature=0.01,
-                top_p=0.1,
+                max_tokens=4095,
+                temperature=0.01,  # default=0.01
+                top_p=0.1, #default=0.1
            )
            for chunk in response:
                yield chunk.choices[0].delta.content