add tts support

2025-03-07 22:17:52 +08:00 · 2025-03-07 22:17:52 +08:00 · 7567f882a2
commit 7567f882a2
parent 3e7a411132
7 changed files with 539 additions and 254 deletions
--- a/index.html
+++ b/index.html
@ -153,6 +153,25 @@
            color: #721c24;
            border: 1px solid #f5c6cb;
        }
        .audio-controls {
            margin-top: 15px;
            display: flex;
            align-items: center;
            gap: 10px;
        }
        .voice-selector {
            display: flex;
            align-items: center;
            gap: 10px;
            margin-bottom: 10px;
        }
        #audio-player {
            width: 100%;
            margin-top: 10px;
        }
    </style>
 </head>
 <body>
@ -204,11 +223,35 @@
                <div class="explanation-container">
                    <div class="explanation-header">
                        <h5>AI讲解</h5>
                        <div class="d-flex gap-2">
                            <button id="explain-btn" class="btn btn-sm btn-primary">生成讲解</button>
                            <button id="play-btn" class="btn btn-sm btn-success" disabled>
                                <i class="bi bi-play-fill"></i> 播放
                            </button>
                        </div>
                    </div>
                    <div class="voice-selector">
                        <label for="voice-select" class="form-label mb-0">语音:</label>
                        <select id="voice-select" class="form-select form-select-sm">
                            <option value="zf_xiaoxiao">小小 (女)</option>
                            <option value="zf_xiaoni">小妮 (女)</option>
                            <option value="zf_xiaoyi">小怡 (女)</option>
                            <option value="zf_xiaobei">小贝 (女)</option>
                            <option value="zm_yunxi">云熙 (男)</option>
                            <option value="zm_yunyang">云扬 (男)</option>
                        </select>
                        <label for="speed-range" class="form-label mb-0 ms-2">语速:</label>
                        <input type="range" class="form-range" id="speed-range" min="0.5" max="2.0" step="0.1" value="1.0" style="width: 80px;">
                        <span id="speed-value">1.0</span>
                    </div>
                    <div id="explanation-text" class="p-3">
                        点击"生成讲解"按钮，AI将为您讲解当前页面的内容。
                    </div>
                    <audio id="audio-player" controls style="display: none;"></audio>
                </div>
            </div>
        </div>
@ -216,8 +259,9 @@
    <div id="status-message"></div>
-    <!-- 引入Bootstrap JS -->
+    <!-- 引入Bootstrap JS 和 Icons -->
    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.10.0/font/bootstrap-icons.css">
    <script type="module" src="js/main.js"></script>
 </body>
 </html>
--- a/js/main.js
+++ b/js/main.js
@ -13,10 +13,6 @@ class AITeacherApp {
        this.ctx = this.canvas.getContext('2d');
        this.messageTimeout = null;
        // 讲解状态
        this.explanationsGenerated = false;
        this.explanationsGenerating = false;
        // Live2D控制器
        this.live2dController = null;
@ -27,6 +23,12 @@ class AITeacherApp {
        this.global_setting = null;
        this.api_host = null;
        // 音频相关
        this.audioPlayer = null;
        this.currentAudioBase64 = null;
        this.selectedVoice = 'zf_xiaoxiao';
        this.speechSpeed = 1.0;
        this.init();
    }
@ -48,6 +50,12 @@ class AITeacherApp {
                console.error('初始化Live2D控制器时出错:', error);
            }
            // 初始化音频播放器
            this.audioPlayer = document.getElementById('audio-player');
            // 初始化语音和语速控制
            this.initVoiceControls();
            await this.loadDefaultPDF();       
            this.setupEventListeners();
@ -60,24 +68,22 @@ class AITeacherApp {
    async loadDefaultPDF() {
        try {
-            const defaultPdfPath = 'pdf/VLA4RM-仿生智能.pdf';
+            const defaultPdfPath = './public/pdf/test.pdf';
            const loadingTask = pdfjsLib.getDocument(defaultPdfPath);
            this.pdfDoc = await loadingTask.promise;
            document.getElementById('page-count').textContent = this.pdfDoc.numPages;
            this.renderPage(this.pageNum);
-            // 触发服务器端PDF加载和讲解生成
+            // 通知服务器加载PDF
-            this.triggerServerPdfLoad(defaultPdfPath);
+            this.notifyServerPdfLoad(defaultPdfPath);
        } catch (error) {
            console.error('加载PDF时出错:', error);
            this.showMessage('PDF加载失败: ' + error.message, true);
        }
    }
-    async triggerServerPdfLoad(pdfPath) {
+    async notifyServerPdfLoad(pdfPath) {
        try {
            this.explanationsGenerating = true;
            this.showMessage('正在生成所有页面的讲解，请稍候...', false);
            const response = await fetch(`http://${this.api_host}/api/load_pdf`, {
                method: 'POST',
                headers: {
@ -95,49 +101,12 @@ class AITeacherApp {
            if (data.success) {
                this.showMessage(data.message, false);
                // 开始轮询讲解生成状态
                this.pollExplanationStatus();
            } else {
                this.showMessage(data.message, true);
            }
        } catch (error) {
-            console.error('触发服务器PDF加载时出错:', error);
+            console.error('通知服务器加载PDF时出错:', error);
-            this.showMessage('触发服务器PDF加载时出错: ' + error.message, true);
+            this.showMessage('通知服务器加载PDF时出错: ' + error.message, true);
            this.explanationsGenerating = false;
        }
    }
    async pollExplanationStatus() {
        // 如果已经生成完毕或不在生成状态，停止轮询
        if (!this.explanationsGenerating) return;
        try {
            const response = await fetch('/api/explanation_status');
            if (!response.ok) {
                throw new Error('服务器响应错误');
            }
            const data = await response.json();
            console.log('讲解状态:', data);
            if (data.is_complete) {
                this.explanationsGenerated = true;
                this.explanationsGenerating = false;
                this.showMessage(`所有 ${data.total_pages} 页的讲解已生成完毕`, false);
                // 获取当前页面的讲解
                this.fetchExplanationForCurrentPage();
            } else {
                // 更新生成进度
                const progress = Math.round((data.explanations_generated / data.total_pages) * 100);
                this.showMessage(`讲解生成中: ${progress}% (${data.explanations_generated}/${data.total_pages})`, false);
                // 继续轮询
                setTimeout(() => this.pollExplanationStatus(), 2000);
            }
        } catch (error) {
            console.error('轮询讲解状态时出错:', error);
            this.explanationsGenerating = false;
        }
    }
@ -162,59 +131,16 @@ class AITeacherApp {
                    this.pageNumPending = null;
                }
-                // 页面渲染完成后，获取对应的讲解
+                // 清空讲解区域和停止音频播放
-                this.fetchExplanationForCurrentPage();
+                document.getElementById('explanation-text').textContent = '点击"生成讲解"按钮获取AI讲解';
                this.stopAudio();
                document.getElementById('play-btn').disabled = true;
            });
        });
        document.getElementById('page-num').value = num;
    }
    async fetchExplanationForCurrentPage() {
        // 如果讲解尚未生成完毕，使用传统方式获取讲解
        if (!this.explanationsGenerated && !this.explanationsGenerating) {
            this.onExplain();
            return;
        }
        // 如果正在生成讲解，显示等待消息
        if (this.explanationsGenerating) {
            document.getElementById('explanation-text').textContent = '正在生成讲解，请稍候...';
            return;
        }
        try {
            // 显示加载中的消息
            document.getElementById('explanation-text').textContent = '正在获取讲解...';
            // 从服务器获取预生成的讲解
            const response = await fetch(`/api/get_explanation/${this.pageNum}`);
            if (!response.ok) {
                throw new Error('服务器响应错误');
            }
            const data = await response.json();
            if (data.success) {
                document.getElementById('explanation-text').textContent = data.explanation;
                // 如果Live2D控制器已初始化，播放说话动作
                if (this.live2dController && this.live2dController.initialized) {
                    this.live2dController.playMotion('Talk', 0);
                }
            } else {
                // 如果预生成的讲解不存在，使用传统方式获取讲解
                this.onExplain();
            }
        } catch (error) {
            console.error('获取预生成讲解时出错:', error);
            document.getElementById('explanation-text').textContent = '获取讲解时出错: ' + error.message;
            // 尝试使用传统方式获取讲解
            this.onExplain();
        }
    }
    queueRenderPage(num) {
        if (this.pageRendering) {
            this.pageNumPending = num;
@ -277,13 +203,6 @@ class AITeacherApp {
                    document.getElementById('page-count').textContent = this.pdfDoc.numPages;
                    this.renderPage(this.pageNum);
                    this.showMessage('PDF加载成功', false);
                    // 重置讲解状态
                    this.explanationsGenerated = false;
                    this.explanationsGenerating = false;
                    // 对于上传的文件，我们暂时不触发服务器端讲解生成
                    // 因为服务器端需要访问文件，而上传的文件仅在客户端可用
                } catch (error) {
                    console.error('加载PDF时出错:', error);
                    this.showMessage('PDF加载失败: ' + error.message, true);
@ -298,23 +217,25 @@ class AITeacherApp {
    async onExplain() {
        try {
            // 获取当前页面的文本内容
            const page = await this.pdfDoc.getPage(this.pageNum);
            const textContent = await page.getTextContent();
            const pageText = textContent.items.map(item => item.str).join(' ');
            // 显示加载中的消息
            document.getElementById('explanation-text').textContent = '正在生成AI讲解...';
            document.getElementById('play-btn').disabled = true;
            this.stopAudio();
-            // 发送到服务器获取AI讲解
+            // 获取当前选择的语音和语速
-            const response = await fetch('/api/explain', {
+            const voice = this.selectedVoice;
            const speed = this.speechSpeed;
            // 发送到服务器获取AI讲解和音频
            const response = await fetch(`http://${this.api_host}/api/explain_with_audio`, {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
                body: JSON.stringify({ 
-                    text: pageText,
+                    page: this.pageNum,
-                    page: this.pageNum
+                    voice: voice,
                    speed: speed
                })
            });
@ -323,12 +244,28 @@ class AITeacherApp {
            }
            const data = await response.json();
            if (data.success) {
                document.getElementById('explanation-text').textContent = data.explanation;
                // 如果有音频数据，启用播放按钮并自动播放
                if (data.audio_base64) {
                    this.currentAudioBase64 = data.audio_base64;
                    document.getElementById('play-btn').disabled = false;
                    this.playAudio();
                } else if (data.tts_error) {
                    console.error('TTS生成失败:', data.tts_error);
                    this.showMessage('语音生成失败，但文本讲解已生成', true);
                }
                // 如果Live2D控制器已初始化，播放说话动作
                if (this.live2dController && this.live2dController.initialized) {
                    this.live2dController.playMotion('Talk', 0);
                }
            } else {
                document.getElementById('explanation-text').textContent = data.explanation || '生成讲解失败';
                this.showMessage('生成讲解失败', true);
            }
        } catch (error) {
            console.error('获取AI讲解时出错:', error);
            document.getElementById('explanation-text').textContent = '获取AI讲解时出错: ' + error.message;
@ -336,6 +273,127 @@ class AITeacherApp {
        }
    }
    playAudio() {
        if (!this.currentAudioBase64) {
            this.showMessage('没有可播放的音频', true);
            return;
        }
        try {
            // 将base64转换为Blob
            const byteCharacters = atob(this.currentAudioBase64);
            const byteNumbers = new Array(byteCharacters.length);
            for (let i = 0; i < byteCharacters.length; i++) {
                byteNumbers[i] = byteCharacters.charCodeAt(i);
            }
            const byteArray = new Uint8Array(byteNumbers);
            const blob = new Blob([byteArray], { type: 'audio/wav' });
            // 创建URL并设置到音频播放器
            const audioUrl = URL.createObjectURL(blob);
            this.audioPlayer.src = audioUrl;
            this.audioPlayer.style.display = 'block';
            // 播放音频
            this.audioPlayer.play();
            // 更新播放按钮状态
            const playBtn = document.getElementById('play-btn');
            playBtn.innerHTML = '<i class="bi bi-pause-fill"></i> 暂停';
            playBtn.classList.remove('btn-success');
            playBtn.classList.add('btn-warning');
            // 监听音频播放结束事件
            this.audioPlayer.onended = () => {
                playBtn.innerHTML = '<i class="bi bi-play-fill"></i> 播放';
                playBtn.classList.remove('btn-warning');
                playBtn.classList.add('btn-success');
            };
        } catch (error) {
            console.error('播放音频时出错:', error);
            this.showMessage('播放音频时出错: ' + error.message, true);
        }
    }
    stopAudio() {
        if (this.audioPlayer) {
            this.audioPlayer.pause();
            this.audioPlayer.currentTime = 0;
            this.audioPlayer.style.display = 'none';
            // 更新播放按钮状态
            const playBtn = document.getElementById('play-btn');
            playBtn.innerHTML = '<i class="bi bi-play-fill"></i> 播放';
            playBtn.classList.remove('btn-warning');
            playBtn.classList.add('btn-success');
        }
    }
    toggleAudio() {
        if (this.audioPlayer.paused) {
            this.audioPlayer.play();
            document.getElementById('play-btn').innerHTML = '<i class="bi bi-pause-fill"></i> 暂停';
            document.getElementById('play-btn').classList.remove('btn-success');
            document.getElementById('play-btn').classList.add('btn-warning');
        } else {
            this.audioPlayer.pause();
            document.getElementById('play-btn').innerHTML = '<i class="bi bi-play-fill"></i> 播放';
            document.getElementById('play-btn').classList.remove('btn-warning');
            document.getElementById('play-btn').classList.add('btn-success');
        }
    }
    initVoiceControls() {
        // 初始化语音选择器
        const voiceSelect = document.getElementById('voice-select');
        voiceSelect.addEventListener('change', () => {
            this.selectedVoice = voiceSelect.value;
        });
        // 初始化语速控制
        const speedRange = document.getElementById('speed-range');
        const speedValue = document.getElementById('speed-value');
        speedRange.addEventListener('input', () => {
            this.speechSpeed = parseFloat(speedRange.value);
            speedValue.textContent = this.speechSpeed.toFixed(1);
        });
        // 设置初始值
        this.selectedVoice = voiceSelect.value;
        this.speechSpeed = parseFloat(speedRange.value);
        speedValue.textContent = this.speechSpeed.toFixed(1);
    }
    async loadVoices() {
        try {
            const response = await fetch(`http://${this.api_host}/api/voices`);
            if (!response.ok) {
                throw new Error('获取语音列表失败');
            }
            const data = await response.json();
            if (data.success && data.voices && data.voices.length > 0) {
                const voiceSelect = document.getElementById('voice-select');
                // 清空现有选项
                voiceSelect.innerHTML = '';
                // 添加新选项
                data.voices.forEach(voice => {
                    const option = document.createElement('option');
                    option.value = voice.id;
                    option.textContent = `${voice.name} (${voice.gender === 'female' ? '女' : '男'})`;
                    voiceSelect.appendChild(option);
                });
                // 更新选中的语音
                this.selectedVoice = voiceSelect.value;
            }
        } catch (error) {
            console.error('加载语音列表时出错:', error);
        }
    }
    showMessage(message, isError = false) {
        const statusMessage = document.getElementById('status-message');
@ -363,10 +421,14 @@ class AITeacherApp {
        document.getElementById('zoom-reset').addEventListener('click', () => this.onZoomReset());
        document.getElementById('pdf-upload').addEventListener('change', (e) => this.onFileUpload(e));
        document.getElementById('explain-btn').addEventListener('click', () => this.onExplain());
        document.getElementById('play-btn').addEventListener('click', () => this.toggleAudio());
        document.getElementById('model-select').addEventListener('change', () => {
            const modelName = document.getElementById('model-select').value;
            this.live2dController.loadModel(modelName);
        });
        // 尝试加载可用的语音列表
        this.loadVoices();
    }
 }
--- a/public/pdf/test.pdf
+++ b/public/pdf/test.pdf
--- a/received_audio.wav
+++ b/received_audio.wav
--- a/requirements.txt
+++ b/requirements.txt
@ -30,7 +30,7 @@ networkx==3.4.2
 nibabel==5.3.2
 nipype==1.9.2
 numpy==2.2.3
-openai==1.3.0
+openai
 packaging==24.2
 pandas==2.2.3
 pathlib==1.0.1
@ -64,3 +64,5 @@ uvicorn==0.34.0
 websockets==10.4
 Werkzeug==2.2.3
 wheel==0.45.1
 soundfile
 IPython
--- a/server.py
+++ b/server.py
@ -1,11 +1,12 @@
 import os
 import json
 import logging
-import asyncio
+import requests
 import base64
 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 import openai
-import fitz  # PyMuPDF
+import PyPDF2
 from dotenv import load_dotenv
 # 加载环境变量
@ -26,6 +27,9 @@ logger = logging.getLogger(__name__)
 openai_api_key = "sk-95ab48a1e0754ad39c13e2987f73fe37"
 openai_base_url = "https://api.deepseek.com"
 # TTS API地址
 TTS_BASE_URL = "http://feng-arch.cn:31006"
 if not openai_api_key:
    logger.warning("OpenAI API key not found. AI explanation will use fallback mode.")
@ -41,31 +45,30 @@ except Exception as e:
 app = Flask(__name__, static_url_path='')
 CORS(app)
-# 存储PDF文档内容和生成的讲解
+# 存储当前加载的PDF路径
-pdf_content = {
+current_pdf_path = None
-    "full_text": "",
+
-    "pages": [],
+def extract_page_text(pdf_path, page_num):
-    "explanations": []
+    """提取PDF文档指定页面的文本内容"""
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            # 检查页码是否有效
            if page_num < 1 or page_num > len(reader.pages):
                return {
                    "success": False,
                    "error": f"无效的页码: {page_num}，PDF共有 {len(reader.pages)} 页"
                }
-def extract_pdf_text(pdf_path):
+            # 提取指定页面的文本
-    """提取PDF文档的全部文本内容和每一页的文本"""
+            page = reader.pages[page_num - 1]  # 页码从1开始，但索引从0开始
-    try:
+            page_text = page.extract_text()
        doc = fitz.open(pdf_path)
        full_text = ""
        pages = []
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            page_text = page.get_text()
            pages.append(page_text)
            full_text += f"\n--- 第{page_num+1}页 ---\n{page_text}"
            return {
                "success": True,
-            "full_text": full_text,
+                "page_text": page_text,
-            "pages": pages,
+                "page_count": len(reader.pages)
            "page_count": len(doc)
            }
    except Exception as e:
        logger.error(f"Error extracting PDF text: {e}")
@ -74,63 +77,18 @@ def extract_pdf_text(pdf_path):
            "error": str(e)
        }
-async def generate_explanations_for_all_pages(full_text, pages):
+def generate_explanation(page_text):
    """为所有页面生成讲解内容"""
    explanations = []
    client = openai.OpenAI(api_key=openai_api_key, base_url=openai_base_url)
    # 首先让LLM理解整个文档
    try:
        logger.info("Generating context understanding from full document...")
        context_response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": "你是一位专业的教师，需要理解整个PDF文档的内容，以便后续为每一页生成讲解。"},
                {"role": "user", "content": f"请阅读并理解以下PDF文档的全部内容，不需要回复具体内容，只需要理解：\n\n{full_text}"}
            ]
        )
        context_understanding = context_response.choices[0].message.content.strip()
        logger.info("Context understanding generated successfully")
    except Exception as e:
        logger.error(f"Error generating context understanding: {e}")
        context_understanding = "无法生成文档理解，将基于单页内容生成讲解。"
    # 为每一页生成讲解
    for i, page_text in enumerate(pages):
        try:
            logger.info(f"Generating explanation for page {i+1}...")
            response = client.chat.completions.create(
                model="deepseek-chat",
                messages=[
                    {"role": "system", "content": f"你是一位专业的教师，正在为学生讲解PDF文档内容。你已经理解了整个文档的内容，现在需要为第{i+1}页生成简洁的讲解。请提供清晰、简洁的解释，重点突出关键概念。你的讲解应该考虑到整个文档的上下文，而不仅仅是孤立地解释当前页面。"},
                    {"role": "user", "content": f"基于你对整个文档的理解，请为第{i+1}页生成简洁的讲解：\n\n{page_text}"}
                ]
            )
            explanation = response.choices[0].message.content.strip()
            explanations.append(explanation)
            logger.info(f"Explanation for page {i+1} generated successfully")
        except Exception as e:
            logger.error(f"Error generating explanation for page {i+1}: {e}")
            explanations.append(f"生成第{i+1}页讲解时出错: {str(e)}")
    return explanations
 def generate_explanation(page_text, page_num=None):
    """为单个页面生成讲解内容"""
    if not openai_api_key:
        return "这是一个示例讲解。请设置OpenAI API密钥以获取真实的AI讲解内容。"
    # 如果已经有预生成的讲解，直接返回
    if pdf_content["explanations"] and page_num is not None and 0 <= page_num-1 < len(pdf_content["explanations"]):
        return pdf_content["explanations"][page_num-1]
    try:
        client = openai.OpenAI(api_key=openai_api_key, base_url=openai_base_url)
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": "你是一位专业的教师，正在为学生讲解PDF文档内容。请提供清晰、简洁的解释，重点突出关键概念。"},
-                {"role": "user", "content": f"请讲解以下内容：\n\n{page_text}"}
+                {"role": "user", "content": f"请讲解以下内容：\n\n{page_text}，你的输出应符合讲稿的风格，句子间连贯。"}
            ]
        )
        return response.choices[0].message.content.strip()
@ -138,6 +96,40 @@ def generate_explanation(page_text, page_num=None):
        logger.error(f"Error generating explanation: {e}")
        return f"生成讲解时出错: {str(e)}"
 def text_to_speech(text, voice="zf_xiaoxiao", speed=1.0):
    """将文本转换为语音，返回base64编码的音频数据"""
    try:
        url = f"{TTS_BASE_URL}/tts"
        payload = {
            "text": text,
            "voice": voice,
            "speed": speed,
            "return_type": "base64"
        }
        response = requests.post(url, json=payload)
        if response.status_code == 200:
            data = response.json()
            # 获取base64编码的音频
            audio_base64 = data.get("audio_base64")
            return {
                "success": True,
                "audio_base64": audio_base64
            }
        else:
            logger.error(f"TTS API error: {response.status_code} - {response.text}")
            return {
                "success": False,
                "error": f"TTS API error: {response.status_code}"
            }
    except Exception as e:
        logger.error(f"Error in text_to_speech: {e}")
        return {
            "success": False,
            "error": str(e)
        }
@app.route('/')
 def index():
    return send_from_directory('', 'index.html')
@ -152,86 +144,153 @@ def explain():
    text = data.get('text', '')
    page_num = data.get('page', None)
-    explanation = generate_explanation(text, page_num)
+    # 如果提供了页码但没有提供文本，尝试从PDF中提取
-    return jsonify({'explanation': explanation})
+    if page_num and not text and current_pdf_path:
        result = extract_page_text(current_pdf_path, page_num)
        if result["success"]:
            text = result["page_text"]
        else:
            return jsonify({
                'success': False,
                'explanation': f"无法提取页面文本: {result['error']}"
            })
    explanation = generate_explanation(text)
    return jsonify({
        'success': True,
        'explanation': explanation
    })
@app.route('/api/tts', methods=['POST'])
 def tts():
    data = request.json
    text = data.get('text', '')
    voice = data.get('voice', 'zf_xiaoxiao')
    speed = data.get('speed', 1.0)
    if not text:
        return jsonify({
            'success': False,
            'error': '文本不能为空'
        })
    # 将文本转换为语音
    result = text_to_speech(text, voice, speed)
    if result["success"]:
        return jsonify({
            'success': True,
            'audio_base64': result["audio_base64"]
        })
    else:
        return jsonify({
            'success': False,
            'error': result["error"]
        })
@app.route('/api/explain_with_audio', methods=['POST'])
 def explain_with_audio():
    data = request.json
    text = data.get('text', '')
    page_num = data.get('page', None)
    voice = data.get('voice', 'zf_xiaoxiao')
    speed = data.get('speed', 1.0)
    # 如果提供了页码但没有提供文本，尝试从PDF中提取
    if page_num and not text and current_pdf_path:
        result = extract_page_text(current_pdf_path, page_num)
        if result["success"]:
            text = result["page_text"]
        else:
            return jsonify({
                'success': False,
                'explanation': f"无法提取页面文本: {result['error']}",
                'error': result["error"]
            })
    # 生成讲解
    explanation = generate_explanation(text)
    # 将讲解转换为语音
    tts_result = text_to_speech(explanation, voice, speed)
    if tts_result["success"]:
        return jsonify({
            'success': True,
            'explanation': explanation,
            'audio_base64': tts_result["audio_base64"]
        })
    else:
        return jsonify({
            'success': True,
            'explanation': explanation,
            'audio_base64': None,
            'tts_error': tts_result["error"]
        })
@app.route('/api/load_pdf', methods=['POST'])
 def load_pdf():
    global current_pdf_path
    data = request.json
-    pdf_path = data.get('path', './public/pdf/VLA4RM-仿生智能.pdf')
+    pdf_path = data.get('path', './public/pdf/test.pdf')
-    # 提取PDF文本
+    try:
-    result = extract_pdf_text(pdf_path)
+        # 检查PDF是否存在
        if not os.path.exists(pdf_path):
            return jsonify({
                'success': False,
                'message': f'PDF文件不存在: {pdf_path}'
            })
-    if result["success"]:
+        # 尝试打开PDF以验证其有效性
-        # 更新全局PDF内容
+        with open(pdf_path, 'rb') as file:
-        pdf_content["full_text"] = result["full_text"]
+            reader = PyPDF2.PdfReader(file)
-        pdf_content["pages"] = result["pages"]
+            page_count = len(reader.pages)
-        # 异步生成所有页面的讲解
+        # 更新当前PDF路径
-        async def process_explanations():
+        current_pdf_path = pdf_path
            explanations = await generate_explanations_for_all_pages(
                result["full_text"], 
                result["pages"]
            )
            pdf_content["explanations"] = explanations
            logger.info(f"Generated explanations for all {len(explanations)} pages")
        # 启动异步任务
        asyncio.run(process_explanations())
        return jsonify({
            'success': True,
-            'message': '已加载PDF并开始生成讲解',
+            'message': '已成功加载PDF',
-            'page_count': result["page_count"]
+            'page_count': page_count
        })
-    else:
+    except Exception as e:
        logger.error(f"Error loading PDF: {e}")
        return jsonify({
            'success': False,
-            'message': f'加载PDF失败: {result["error"]}'
+            'message': f'加载PDF失败: {str(e)}'
        })
-@app.route('/api/get_explanation/<int:page_num>', methods=['GET'])
+@app.route('/api/voices', methods=['GET'])
-def get_explanation(page_num):
+def get_voices():
-    if 0 <= page_num-1 < len(pdf_content["explanations"]):
+    """获取可用的TTS声音列表"""
    voices = [
        {"id": "zf_xiaoxiao", "name": "小小", "gender": "female", "lang": "zh"},
        {"id": "zf_xiaoni", "name": "小妮", "gender": "female", "lang": "zh"},
        {"id": "zf_xiaoyi", "name": "小怡", "gender": "female", "lang": "zh"},
        {"id": "zf_xiaobei", "name": "小贝", "gender": "female", "lang": "zh"},
        {"id": "zm_yunxi", "name": "云熙", "gender": "male", "lang": "zh"},
        {"id": "zm_yunyang", "name": "云扬", "gender": "male", "lang": "zh"},
        {"id": "zm_yunxia", "name": "云夏", "gender": "male", "lang": "zh"},
        {"id": "zm_yunjian", "name": "云健", "gender": "male", "lang": "zh"},
        {"id": "af_heart", "name": "Heart", "gender": "female", "lang": "en"},
        {"id": "af_bella", "name": "Bella", "gender": "female", "lang": "en"},
        {"id": "am_michael", "name": "Michael", "gender": "male", "lang": "en"},
        {"id": "am_puck", "name": "Puck", "gender": "male", "lang": "en"}
    ]
    return jsonify({
        'success': True,
-            'explanation': pdf_content["explanations"][page_num-1]
+        'voices': voices
        })
    else:
        return jsonify({
            'success': False,
            'message': f'页码 {page_num} 的讲解不存在'
        })
@app.route('/api/explanation_status', methods=['GET'])
 def explanation_status():
    return jsonify({
        'total_pages': len(pdf_content["pages"]),
        'explanations_generated': len(pdf_content["explanations"]),
        'is_complete': len(pdf_content["pages"]) > 0 and len(pdf_content["pages"]) == len(pdf_content["explanations"])
    })
 if __name__ == '__main__':
-    # 在启动时预加载默认PDF
+    # 设置默认PDF路径
-    default_pdf_path = './VLA4RM-仿生智能.pdf'
+    default_pdf_path = './public/pdf/test.pdf'
    if os.path.exists(default_pdf_path):
-        logger.info(f"Pre-loading default PDF: {default_pdf_path}")
+        current_pdf_path = default_pdf_path
-        result = extract_pdf_text(default_pdf_path)
+        logger.info(f"默认PDF已设置: {default_pdf_path}")
        if result["success"]:
            pdf_content["full_text"] = result["full_text"]
            pdf_content["pages"] = result["pages"]
            # 异步生成所有页面的讲解
            async def process_explanations():
                explanations = await generate_explanations_for_all_pages(
                    result["full_text"], 
                    result["pages"]
                )
                pdf_content["explanations"] = explanations
                logger.info(f"Generated explanations for all {len(explanations)} pages")
            # 启动异步任务
            asyncio.run(process_explanations())
    app.run(host='0.0.0.0', port=port, debug=True)
--- a/tts_test.py
+++ b/tts_test.py
@ -0,0 +1,118 @@
 import requests
 import base64
 import json
 import io
 from IPython.display import Audio
 import soundfile as sf
 # 假设您的FastAPI服务在本地运行
 BASE_URL = "http://feng-arch.cn:31006"
 def tts_file_response(text, voice="af_heart", speed=1.0, lang_code="z"):
    """
    发送请求并直接获取音频文件
    """
    url = f"{BASE_URL}/tts"
    payload = {
        "text": text,
        "voice": voice,
        "speed": speed,
        "return_type": "file"
    }
    response = requests.post(url, json=payload)
    if response.status_code == 200:
        # 保存音频文件
        with open("received_audio.wav", "wb") as f:
            f.write(response.content)
        print("音频已保存为 received_audio.wav")
        # 如果在Jupyter Notebook中，可以直接播放
        return Audio(data=response.content, rate=24000)
    else:
        print(f"错误: {response.status_code}")
        print(response.text)
        return None
 def tts_base64_response(text, voice="af_heart", speed=1.0, lang_code="z"):
    """
    发送请求并获取base64编码的音频数据
    """
    url = f"{BASE_URL}/tts"
    payload = {
        "text": text,
        "voice": voice,
        "speed": speed,
        "return_type": "base64"
    }
    response = requests.post(url, json=payload)
    if response.status_code == 200:
        data = response.json()
        # 获取base64编码的音频
        audio_base64 = data.get("audio_base64")
        # 解码base64数据
        audio_data = base64.b64decode(audio_base64)
        # 保存音频文件
        with open("received_audio.wav", "wb") as f:
            f.write(audio_data)
        print("音频已保存为 received_audio.wav")
        # 如果在Jupyter Notebook中，可以直接播放
        return Audio(data=audio_data, rate=24000)
    else:
        print(f"错误: {response.status_code}")
        print(response.text)
        return None
 def get_available_voices(lang_code="z"):
    """
    获取指定语言的可用声音列表
 Name	Traits	Target Quality	Training Duration	Overall Grade	SHA256
 af_heart	🚺❤️			A	0ab5709b
 af_alloy	🚺	B	MM minutes	C	6d877149
 af_aoede	🚺	B	H hours	C+	c03bd1a4
 af_bella	🚺🔥	A	HH hours	A-	8cb64e02
 af_jessica	🚺	C	MM minutes	D	cdfdccb8
 af_kore	🚺	B	H hours	C+	8bfbc512
 af_nicole	🚺🎧	B	HH hours	B-	c5561808
 af_nova	🚺	B	MM minutes	C	e0233676
 af_river	🚺	C	MM minutes	D	e149459b
 af_sarah	🚺	B	H hours	C+	49bd364e
 af_sky	🚺	B	M minutes 🤏	C-	c799548a
 am_adam	🚹	D	H hours	F+	ced7e284
 am_echo	🚹	C	MM minutes	D	8bcfdc85
 am_eric	🚹	C	MM minutes	D	ada66f0e
 am_fenrir	🚹	B	H hours	C+	98e507ec
 am_liam	🚹	C	MM minutes	D	c8255075
 am_michael	🚹	B	H hours	C+	9a443b79
 am_onyx	🚹	C	MM minutes	D	e8452be1
 am_puck	🚹	B	H hours	C+	dd1d8973
 am_santa	🚹	C	M minutes 🤏	D-	7f2f7582
 Name	Traits	Target Quality	Training Duration	Overall Grade	SHA256
 zf_xiaobei	🚺	C	MM minutes	D	9b76be63
 zf_xiaoni	🚺	C	MM minutes	D	95b49f16
 zf_xiaoxiao	🚺	C	MM minutes	D	cfaf6f2d
 zf_xiaoyi	🚺	C	MM minutes	D	b5235dba
 zm_yunjian	🚹	C	MM minutes	D	76cbf8ba
 zm_yunxi	🚹	C	MM minutes	D	dbe6e1ce
 zm_yunxia	🚹	C	MM minutes	D	bb2b03b0
 zm_yunyang	🚹	C	MM minutes	D	5238ac22
    """
 # 示例使用
 if __name__ == "__main__":
    text = "你能解决什么问题"
    # 获取音频文件
    audio = tts_file_response(text, voice="zf_xiaoxiao")
    # 或者获取base64编码的音频
    # audio = tts_base64_response(text)