fix: fix emoji in tts error

2025-03-13 15:42:40 +08:00 · 2025-03-13 15:42:40 +08:00 · a6900c4892
commit a6900c4892
parent e0d1d34fdc
1 changed files with 8 additions and 3 deletions
--- a/server.py
+++ b/server.py
@ -10,6 +10,7 @@ import asyncio
 import aiohttp
 import PyPDF2
 import time
+import re
 from dotenv import load_dotenv

 # 加载环境变量
@ -99,7 +100,7 @@ def generate_explanation(page_num,page_text):
            model= llm_model,
            messages=[
                {"role": "system", "content": f"你是一位幽默的教师，正在为学生讲解PDF文档内容。请提供清晰、简洁的解释，重点突出关键概念。这是你的讲解历史：\n{chat_history}, 你需要与历史保持连贯。"},
-                {"role": "user", "content": f"请讲解第{page_num}页（总页数{pdfpages}）ppt的内容：{page_text}，首先判断是否要详细或者简略，比如标题页只需要简略，示例稍微展开，记住ppt不宜讲得太长。你的输出应符合老师的风格，句子间连贯，幽默风趣。"}
+                {"role": "user", "content": f"请讲解第{page_num}页（总页数{pdfpages}）ppt的内容：{page_text}，首先判断是否要详细或者简略，比如标题页只需要简略，示例稍微展开，记住ppt不宜讲得太长不超过100字。你的输出应符合老师的风格，句子间连贯，幽默风趣。"}
            ]
        )
        logger.info(f"生成讲解耗时: {time.time()-start_time}")
@ -166,7 +167,7 @@ async def text_to_speech(text, voice="zf_xiaoxiao", speed=1.5):
        sentence_pairs = []
        i = 0
        while i < len(sentences):
-            if i + 1 < len(sentences) and len(sentences[i]) + len(sentences[i+1]) < 40:
+            if i + 1 < len(sentences) and len(sentences[i]) + len(sentences[i+1]) < 60:
                sentence_pairs.append({
                    "text": sentences[i] + " " + sentences[i+1],
                    "sentences": [sentences[i], sentences[i+1]],
@ -180,7 +181,11 @@ async def text_to_speech(text, voice="zf_xiaoxiao", speed=1.5):
                    "indices": [i]
                })
                i += 1
-
+        
+        # 将句子中的非文本emoji替换为空字符串
+        for pair in sentence_pairs:
+            pair["text"] = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9\s]', '', pair["text"])
+        
        # 创建异步HTTP会话
        async with aiohttp.ClientSession() as session:
            tasks = [