fix: fix emoji in tts error

This commit is contained in:
turn_wind 2025-03-13 15:42:40 +08:00
parent e0d1d34fdc
commit a6900c4892

View File

@ -10,6 +10,7 @@ import asyncio
import aiohttp
import PyPDF2
import time
import re
from dotenv import load_dotenv
# 加载环境变量
@ -99,7 +100,7 @@ def generate_explanation(page_num,page_text):
model= llm_model,
messages=[
{"role": "system", "content": f"你是一位幽默的教师正在为学生讲解PDF文档内容。请提供清晰、简洁的解释重点突出关键概念。这是你的讲解历史\n{chat_history}, 你需要与历史保持连贯。"},
{"role": "user", "content": f"请讲解第{page_num}页(总页数{pdfpages}ppt的内容{page_text}首先判断是否要详细或者简略比如标题页只需要简略示例稍微展开记住ppt不宜讲得太长。你的输出应符合老师的风格,句子间连贯,幽默风趣。"}
{"role": "user", "content": f"请讲解第{page_num}页(总页数{pdfpages}ppt的内容{page_text}首先判断是否要详细或者简略比如标题页只需要简略示例稍微展开记住ppt不宜讲得太长不超过100字。你的输出应符合老师的风格,句子间连贯,幽默风趣。"}
]
)
logger.info(f"生成讲解耗时: {time.time()-start_time}")
@ -166,7 +167,7 @@ async def text_to_speech(text, voice="zf_xiaoxiao", speed=1.5):
sentence_pairs = []
i = 0
while i < len(sentences):
if i + 1 < len(sentences) and len(sentences[i]) + len(sentences[i+1]) < 40:
if i + 1 < len(sentences) and len(sentences[i]) + len(sentences[i+1]) < 60:
sentence_pairs.append({
"text": sentences[i] + " " + sentences[i+1],
"sentences": [sentences[i], sentences[i+1]],
@ -180,7 +181,11 @@ async def text_to_speech(text, voice="zf_xiaoxiao", speed=1.5):
"indices": [i]
})
i += 1
# 将句子中的非文本emoji替换为空字符串
for pair in sentence_pairs:
pair["text"] = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9\s]', '', pair["text"])
# 创建异步HTTP会话
async with aiohttp.ClientSession() as session:
tasks = [