add tts support
This commit is contained in:
parent
3e7a411132
commit
7567f882a2
48
index.html
48
index.html
@ -153,6 +153,25 @@
|
||||
color: #721c24;
|
||||
border: 1px solid #f5c6cb;
|
||||
}
|
||||
|
||||
.audio-controls {
|
||||
margin-top: 15px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.voice-selector {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
#audio-player {
|
||||
width: 100%;
|
||||
margin-top: 10px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@ -204,11 +223,35 @@
|
||||
<div class="explanation-container">
|
||||
<div class="explanation-header">
|
||||
<h5>AI讲解</h5>
|
||||
<button id="explain-btn" class="btn btn-sm btn-primary">生成讲解</button>
|
||||
<div class="d-flex gap-2">
|
||||
<button id="explain-btn" class="btn btn-sm btn-primary">生成讲解</button>
|
||||
<button id="play-btn" class="btn btn-sm btn-success" disabled>
|
||||
<i class="bi bi-play-fill"></i> 播放
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="voice-selector">
|
||||
<label for="voice-select" class="form-label mb-0">语音:</label>
|
||||
<select id="voice-select" class="form-select form-select-sm">
|
||||
<option value="zf_xiaoxiao">小小 (女)</option>
|
||||
<option value="zf_xiaoni">小妮 (女)</option>
|
||||
<option value="zf_xiaoyi">小怡 (女)</option>
|
||||
<option value="zf_xiaobei">小贝 (女)</option>
|
||||
<option value="zm_yunxi">云熙 (男)</option>
|
||||
<option value="zm_yunyang">云扬 (男)</option>
|
||||
</select>
|
||||
|
||||
<label for="speed-range" class="form-label mb-0 ms-2">语速:</label>
|
||||
<input type="range" class="form-range" id="speed-range" min="0.5" max="2.0" step="0.1" value="1.0" style="width: 80px;">
|
||||
<span id="speed-value">1.0</span>
|
||||
</div>
|
||||
|
||||
<div id="explanation-text" class="p-3">
|
||||
点击"生成讲解"按钮,AI将为您讲解当前页面的内容。
|
||||
</div>
|
||||
|
||||
<audio id="audio-player" controls style="display: none;"></audio>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -216,8 +259,9 @@
|
||||
|
||||
<div id="status-message"></div>
|
||||
|
||||
<!-- 引入Bootstrap JS -->
|
||||
<!-- 引入Bootstrap JS 和 Icons -->
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.10.0/font/bootstrap-icons.css">
|
||||
<script type="module" src="js/main.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
296
js/main.js
296
js/main.js
@ -13,10 +13,6 @@ class AITeacherApp {
|
||||
this.ctx = this.canvas.getContext('2d');
|
||||
this.messageTimeout = null;
|
||||
|
||||
// 讲解状态
|
||||
this.explanationsGenerated = false;
|
||||
this.explanationsGenerating = false;
|
||||
|
||||
// Live2D控制器
|
||||
this.live2dController = null;
|
||||
|
||||
@ -27,6 +23,12 @@ class AITeacherApp {
|
||||
this.global_setting = null;
|
||||
this.api_host = null;
|
||||
|
||||
// 音频相关
|
||||
this.audioPlayer = null;
|
||||
this.currentAudioBase64 = null;
|
||||
this.selectedVoice = 'zf_xiaoxiao';
|
||||
this.speechSpeed = 1.0;
|
||||
|
||||
this.init();
|
||||
}
|
||||
|
||||
@ -48,6 +50,12 @@ class AITeacherApp {
|
||||
console.error('初始化Live2D控制器时出错:', error);
|
||||
}
|
||||
|
||||
// 初始化音频播放器
|
||||
this.audioPlayer = document.getElementById('audio-player');
|
||||
|
||||
// 初始化语音和语速控制
|
||||
this.initVoiceControls();
|
||||
|
||||
await this.loadDefaultPDF();
|
||||
this.setupEventListeners();
|
||||
|
||||
@ -60,24 +68,22 @@ class AITeacherApp {
|
||||
|
||||
async loadDefaultPDF() {
|
||||
try {
|
||||
const defaultPdfPath = 'pdf/VLA4RM-仿生智能.pdf';
|
||||
const defaultPdfPath = './public/pdf/test.pdf';
|
||||
const loadingTask = pdfjsLib.getDocument(defaultPdfPath);
|
||||
this.pdfDoc = await loadingTask.promise;
|
||||
document.getElementById('page-count').textContent = this.pdfDoc.numPages;
|
||||
this.renderPage(this.pageNum);
|
||||
|
||||
// 触发服务器端PDF加载和讲解生成
|
||||
this.triggerServerPdfLoad(defaultPdfPath);
|
||||
// 通知服务器加载PDF
|
||||
this.notifyServerPdfLoad(defaultPdfPath);
|
||||
} catch (error) {
|
||||
console.error('加载PDF时出错:', error);
|
||||
this.showMessage('PDF加载失败: ' + error.message, true);
|
||||
}
|
||||
}
|
||||
|
||||
async triggerServerPdfLoad(pdfPath) {
|
||||
async notifyServerPdfLoad(pdfPath) {
|
||||
try {
|
||||
this.explanationsGenerating = true;
|
||||
this.showMessage('正在生成所有页面的讲解,请稍候...', false);
|
||||
const response = await fetch(`http://${this.api_host}/api/load_pdf`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
@ -95,49 +101,12 @@ class AITeacherApp {
|
||||
|
||||
if (data.success) {
|
||||
this.showMessage(data.message, false);
|
||||
// 开始轮询讲解生成状态
|
||||
this.pollExplanationStatus();
|
||||
} else {
|
||||
this.showMessage(data.message, true);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('触发服务器PDF加载时出错:', error);
|
||||
this.showMessage('触发服务器PDF加载时出错: ' + error.message, true);
|
||||
this.explanationsGenerating = false;
|
||||
}
|
||||
}
|
||||
|
||||
async pollExplanationStatus() {
|
||||
// 如果已经生成完毕或不在生成状态,停止轮询
|
||||
if (!this.explanationsGenerating) return;
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/explanation_status');
|
||||
if (!response.ok) {
|
||||
throw new Error('服务器响应错误');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
console.log('讲解状态:', data);
|
||||
|
||||
if (data.is_complete) {
|
||||
this.explanationsGenerated = true;
|
||||
this.explanationsGenerating = false;
|
||||
this.showMessage(`所有 ${data.total_pages} 页的讲解已生成完毕`, false);
|
||||
|
||||
// 获取当前页面的讲解
|
||||
this.fetchExplanationForCurrentPage();
|
||||
} else {
|
||||
// 更新生成进度
|
||||
const progress = Math.round((data.explanations_generated / data.total_pages) * 100);
|
||||
this.showMessage(`讲解生成中: ${progress}% (${data.explanations_generated}/${data.total_pages})`, false);
|
||||
|
||||
// 继续轮询
|
||||
setTimeout(() => this.pollExplanationStatus(), 2000);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('轮询讲解状态时出错:', error);
|
||||
this.explanationsGenerating = false;
|
||||
console.error('通知服务器加载PDF时出错:', error);
|
||||
this.showMessage('通知服务器加载PDF时出错: ' + error.message, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -162,59 +131,16 @@ class AITeacherApp {
|
||||
this.pageNumPending = null;
|
||||
}
|
||||
|
||||
// 页面渲染完成后,获取对应的讲解
|
||||
this.fetchExplanationForCurrentPage();
|
||||
// 清空讲解区域和停止音频播放
|
||||
document.getElementById('explanation-text').textContent = '点击"生成讲解"按钮获取AI讲解';
|
||||
this.stopAudio();
|
||||
document.getElementById('play-btn').disabled = true;
|
||||
});
|
||||
});
|
||||
|
||||
document.getElementById('page-num').value = num;
|
||||
}
|
||||
|
||||
async fetchExplanationForCurrentPage() {
|
||||
// 如果讲解尚未生成完毕,使用传统方式获取讲解
|
||||
if (!this.explanationsGenerated && !this.explanationsGenerating) {
|
||||
this.onExplain();
|
||||
return;
|
||||
}
|
||||
|
||||
// 如果正在生成讲解,显示等待消息
|
||||
if (this.explanationsGenerating) {
|
||||
document.getElementById('explanation-text').textContent = '正在生成讲解,请稍候...';
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// 显示加载中的消息
|
||||
document.getElementById('explanation-text').textContent = '正在获取讲解...';
|
||||
|
||||
// 从服务器获取预生成的讲解
|
||||
const response = await fetch(`/api/get_explanation/${this.pageNum}`);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('服务器响应错误');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
document.getElementById('explanation-text').textContent = data.explanation;
|
||||
|
||||
// 如果Live2D控制器已初始化,播放说话动作
|
||||
if (this.live2dController && this.live2dController.initialized) {
|
||||
this.live2dController.playMotion('Talk', 0);
|
||||
}
|
||||
} else {
|
||||
// 如果预生成的讲解不存在,使用传统方式获取讲解
|
||||
this.onExplain();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('获取预生成讲解时出错:', error);
|
||||
document.getElementById('explanation-text').textContent = '获取讲解时出错: ' + error.message;
|
||||
// 尝试使用传统方式获取讲解
|
||||
this.onExplain();
|
||||
}
|
||||
}
|
||||
|
||||
queueRenderPage(num) {
|
||||
if (this.pageRendering) {
|
||||
this.pageNumPending = num;
|
||||
@ -277,13 +203,6 @@ class AITeacherApp {
|
||||
document.getElementById('page-count').textContent = this.pdfDoc.numPages;
|
||||
this.renderPage(this.pageNum);
|
||||
this.showMessage('PDF加载成功', false);
|
||||
|
||||
// 重置讲解状态
|
||||
this.explanationsGenerated = false;
|
||||
this.explanationsGenerating = false;
|
||||
|
||||
// 对于上传的文件,我们暂时不触发服务器端讲解生成
|
||||
// 因为服务器端需要访问文件,而上传的文件仅在客户端可用
|
||||
} catch (error) {
|
||||
console.error('加载PDF时出错:', error);
|
||||
this.showMessage('PDF加载失败: ' + error.message, true);
|
||||
@ -298,23 +217,25 @@ class AITeacherApp {
|
||||
|
||||
async onExplain() {
|
||||
try {
|
||||
// 获取当前页面的文本内容
|
||||
const page = await this.pdfDoc.getPage(this.pageNum);
|
||||
const textContent = await page.getTextContent();
|
||||
const pageText = textContent.items.map(item => item.str).join(' ');
|
||||
|
||||
// 显示加载中的消息
|
||||
document.getElementById('explanation-text').textContent = '正在生成AI讲解...';
|
||||
document.getElementById('play-btn').disabled = true;
|
||||
this.stopAudio();
|
||||
|
||||
// 发送到服务器获取AI讲解
|
||||
const response = await fetch('/api/explain', {
|
||||
// 获取当前选择的语音和语速
|
||||
const voice = this.selectedVoice;
|
||||
const speed = this.speechSpeed;
|
||||
|
||||
// 发送到服务器获取AI讲解和音频
|
||||
const response = await fetch(`http://${this.api_host}/api/explain_with_audio`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text: pageText,
|
||||
page: this.pageNum
|
||||
page: this.pageNum,
|
||||
voice: voice,
|
||||
speed: speed
|
||||
})
|
||||
});
|
||||
|
||||
@ -323,11 +244,27 @@ class AITeacherApp {
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
document.getElementById('explanation-text').textContent = data.explanation;
|
||||
|
||||
// 如果Live2D控制器已初始化,播放说话动作
|
||||
if (this.live2dController && this.live2dController.initialized) {
|
||||
this.live2dController.playMotion('Talk', 0);
|
||||
if (data.success) {
|
||||
document.getElementById('explanation-text').textContent = data.explanation;
|
||||
|
||||
// 如果有音频数据,启用播放按钮并自动播放
|
||||
if (data.audio_base64) {
|
||||
this.currentAudioBase64 = data.audio_base64;
|
||||
document.getElementById('play-btn').disabled = false;
|
||||
this.playAudio();
|
||||
} else if (data.tts_error) {
|
||||
console.error('TTS生成失败:', data.tts_error);
|
||||
this.showMessage('语音生成失败,但文本讲解已生成', true);
|
||||
}
|
||||
|
||||
// 如果Live2D控制器已初始化,播放说话动作
|
||||
if (this.live2dController && this.live2dController.initialized) {
|
||||
this.live2dController.playMotion('Talk', 0);
|
||||
}
|
||||
} else {
|
||||
document.getElementById('explanation-text').textContent = data.explanation || '生成讲解失败';
|
||||
this.showMessage('生成讲解失败', true);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('获取AI讲解时出错:', error);
|
||||
@ -335,7 +272,128 @@ class AITeacherApp {
|
||||
this.showMessage('获取AI讲解时出错: ' + error.message, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
playAudio() {
|
||||
if (!this.currentAudioBase64) {
|
||||
this.showMessage('没有可播放的音频', true);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// 将base64转换为Blob
|
||||
const byteCharacters = atob(this.currentAudioBase64);
|
||||
const byteNumbers = new Array(byteCharacters.length);
|
||||
for (let i = 0; i < byteCharacters.length; i++) {
|
||||
byteNumbers[i] = byteCharacters.charCodeAt(i);
|
||||
}
|
||||
const byteArray = new Uint8Array(byteNumbers);
|
||||
const blob = new Blob([byteArray], { type: 'audio/wav' });
|
||||
|
||||
// 创建URL并设置到音频播放器
|
||||
const audioUrl = URL.createObjectURL(blob);
|
||||
this.audioPlayer.src = audioUrl;
|
||||
this.audioPlayer.style.display = 'block';
|
||||
|
||||
// 播放音频
|
||||
this.audioPlayer.play();
|
||||
|
||||
// 更新播放按钮状态
|
||||
const playBtn = document.getElementById('play-btn');
|
||||
playBtn.innerHTML = '<i class="bi bi-pause-fill"></i> 暂停';
|
||||
playBtn.classList.remove('btn-success');
|
||||
playBtn.classList.add('btn-warning');
|
||||
|
||||
// 监听音频播放结束事件
|
||||
this.audioPlayer.onended = () => {
|
||||
playBtn.innerHTML = '<i class="bi bi-play-fill"></i> 播放';
|
||||
playBtn.classList.remove('btn-warning');
|
||||
playBtn.classList.add('btn-success');
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('播放音频时出错:', error);
|
||||
this.showMessage('播放音频时出错: ' + error.message, true);
|
||||
}
|
||||
}
|
||||
|
||||
stopAudio() {
|
||||
if (this.audioPlayer) {
|
||||
this.audioPlayer.pause();
|
||||
this.audioPlayer.currentTime = 0;
|
||||
this.audioPlayer.style.display = 'none';
|
||||
|
||||
// 更新播放按钮状态
|
||||
const playBtn = document.getElementById('play-btn');
|
||||
playBtn.innerHTML = '<i class="bi bi-play-fill"></i> 播放';
|
||||
playBtn.classList.remove('btn-warning');
|
||||
playBtn.classList.add('btn-success');
|
||||
}
|
||||
}
|
||||
|
||||
toggleAudio() {
|
||||
if (this.audioPlayer.paused) {
|
||||
this.audioPlayer.play();
|
||||
document.getElementById('play-btn').innerHTML = '<i class="bi bi-pause-fill"></i> 暂停';
|
||||
document.getElementById('play-btn').classList.remove('btn-success');
|
||||
document.getElementById('play-btn').classList.add('btn-warning');
|
||||
} else {
|
||||
this.audioPlayer.pause();
|
||||
document.getElementById('play-btn').innerHTML = '<i class="bi bi-play-fill"></i> 播放';
|
||||
document.getElementById('play-btn').classList.remove('btn-warning');
|
||||
document.getElementById('play-btn').classList.add('btn-success');
|
||||
}
|
||||
}
|
||||
|
||||
initVoiceControls() {
|
||||
// 初始化语音选择器
|
||||
const voiceSelect = document.getElementById('voice-select');
|
||||
voiceSelect.addEventListener('change', () => {
|
||||
this.selectedVoice = voiceSelect.value;
|
||||
});
|
||||
|
||||
// 初始化语速控制
|
||||
const speedRange = document.getElementById('speed-range');
|
||||
const speedValue = document.getElementById('speed-value');
|
||||
|
||||
speedRange.addEventListener('input', () => {
|
||||
this.speechSpeed = parseFloat(speedRange.value);
|
||||
speedValue.textContent = this.speechSpeed.toFixed(1);
|
||||
});
|
||||
|
||||
// 设置初始值
|
||||
this.selectedVoice = voiceSelect.value;
|
||||
this.speechSpeed = parseFloat(speedRange.value);
|
||||
speedValue.textContent = this.speechSpeed.toFixed(1);
|
||||
}
|
||||
|
||||
async loadVoices() {
|
||||
try {
|
||||
const response = await fetch(`http://${this.api_host}/api/voices`);
|
||||
if (!response.ok) {
|
||||
throw new Error('获取语音列表失败');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
if (data.success && data.voices && data.voices.length > 0) {
|
||||
const voiceSelect = document.getElementById('voice-select');
|
||||
|
||||
// 清空现有选项
|
||||
voiceSelect.innerHTML = '';
|
||||
|
||||
// 添加新选项
|
||||
data.voices.forEach(voice => {
|
||||
const option = document.createElement('option');
|
||||
option.value = voice.id;
|
||||
option.textContent = `${voice.name} (${voice.gender === 'female' ? '女' : '男'})`;
|
||||
voiceSelect.appendChild(option);
|
||||
});
|
||||
|
||||
// 更新选中的语音
|
||||
this.selectedVoice = voiceSelect.value;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('加载语音列表时出错:', error);
|
||||
}
|
||||
}
|
||||
|
||||
showMessage(message, isError = false) {
|
||||
const statusMessage = document.getElementById('status-message');
|
||||
@ -363,10 +421,14 @@ class AITeacherApp {
|
||||
document.getElementById('zoom-reset').addEventListener('click', () => this.onZoomReset());
|
||||
document.getElementById('pdf-upload').addEventListener('change', (e) => this.onFileUpload(e));
|
||||
document.getElementById('explain-btn').addEventListener('click', () => this.onExplain());
|
||||
document.getElementById('play-btn').addEventListener('click', () => this.toggleAudio());
|
||||
document.getElementById('model-select').addEventListener('change', () => {
|
||||
const modelName = document.getElementById('model-select').value;
|
||||
this.live2dController.loadModel(modelName);
|
||||
});
|
||||
|
||||
// 尝试加载可用的语音列表
|
||||
this.loadVoices();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BIN
public/pdf/test.pdf
Normal file
BIN
public/pdf/test.pdf
Normal file
Binary file not shown.
BIN
received_audio.wav
Normal file
BIN
received_audio.wav
Normal file
Binary file not shown.
@ -30,7 +30,7 @@ networkx==3.4.2
|
||||
nibabel==5.3.2
|
||||
nipype==1.9.2
|
||||
numpy==2.2.3
|
||||
openai==1.3.0
|
||||
openai
|
||||
packaging==24.2
|
||||
pandas==2.2.3
|
||||
pathlib==1.0.1
|
||||
@ -64,3 +64,5 @@ uvicorn==0.34.0
|
||||
websockets==10.4
|
||||
Werkzeug==2.2.3
|
||||
wheel==0.45.1
|
||||
soundfile
|
||||
IPython
|
||||
327
server.py
327
server.py
@ -1,11 +1,12 @@
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
import asyncio
|
||||
import requests
|
||||
import base64
|
||||
from flask import Flask, request, jsonify, send_from_directory
|
||||
from flask_cors import CORS
|
||||
import openai
|
||||
import fitz # PyMuPDF
|
||||
import PyPDF2
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 加载环境变量
|
||||
@ -26,6 +27,9 @@ logger = logging.getLogger(__name__)
|
||||
openai_api_key = "sk-95ab48a1e0754ad39c13e2987f73fe37"
|
||||
openai_base_url = "https://api.deepseek.com"
|
||||
|
||||
# TTS API地址
|
||||
TTS_BASE_URL = "http://feng-arch.cn:31006"
|
||||
|
||||
if not openai_api_key:
|
||||
logger.warning("OpenAI API key not found. AI explanation will use fallback mode.")
|
||||
|
||||
@ -41,32 +45,31 @@ except Exception as e:
|
||||
app = Flask(__name__, static_url_path='')
|
||||
CORS(app)
|
||||
|
||||
# 存储PDF文档内容和生成的讲解
|
||||
pdf_content = {
|
||||
"full_text": "",
|
||||
"pages": [],
|
||||
"explanations": []
|
||||
}
|
||||
# 存储当前加载的PDF路径
|
||||
current_pdf_path = None
|
||||
|
||||
def extract_pdf_text(pdf_path):
|
||||
"""提取PDF文档的全部文本内容和每一页的文本"""
|
||||
def extract_page_text(pdf_path, page_num):
|
||||
"""提取PDF文档指定页面的文本内容"""
|
||||
try:
|
||||
doc = fitz.open(pdf_path)
|
||||
full_text = ""
|
||||
pages = []
|
||||
|
||||
for page_num in range(len(doc)):
|
||||
page = doc.load_page(page_num)
|
||||
page_text = page.get_text()
|
||||
pages.append(page_text)
|
||||
full_text += f"\n--- 第{page_num+1}页 ---\n{page_text}"
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"full_text": full_text,
|
||||
"pages": pages,
|
||||
"page_count": len(doc)
|
||||
}
|
||||
with open(pdf_path, 'rb') as file:
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
|
||||
# 检查页码是否有效
|
||||
if page_num < 1 or page_num > len(reader.pages):
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"无效的页码: {page_num},PDF共有 {len(reader.pages)} 页"
|
||||
}
|
||||
|
||||
# 提取指定页面的文本
|
||||
page = reader.pages[page_num - 1] # 页码从1开始,但索引从0开始
|
||||
page_text = page.extract_text()
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"page_text": page_text,
|
||||
"page_count": len(reader.pages)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting PDF text: {e}")
|
||||
return {
|
||||
@ -74,63 +77,18 @@ def extract_pdf_text(pdf_path):
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def generate_explanations_for_all_pages(full_text, pages):
|
||||
"""为所有页面生成讲解内容"""
|
||||
explanations = []
|
||||
client = openai.OpenAI(api_key=openai_api_key, base_url=openai_base_url)
|
||||
|
||||
# 首先让LLM理解整个文档
|
||||
try:
|
||||
logger.info("Generating context understanding from full document...")
|
||||
context_response = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": "system", "content": "你是一位专业的教师,需要理解整个PDF文档的内容,以便后续为每一页生成讲解。"},
|
||||
{"role": "user", "content": f"请阅读并理解以下PDF文档的全部内容,不需要回复具体内容,只需要理解:\n\n{full_text}"}
|
||||
]
|
||||
)
|
||||
context_understanding = context_response.choices[0].message.content.strip()
|
||||
logger.info("Context understanding generated successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating context understanding: {e}")
|
||||
context_understanding = "无法生成文档理解,将基于单页内容生成讲解。"
|
||||
|
||||
# 为每一页生成讲解
|
||||
for i, page_text in enumerate(pages):
|
||||
try:
|
||||
logger.info(f"Generating explanation for page {i+1}...")
|
||||
response = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": "system", "content": f"你是一位专业的教师,正在为学生讲解PDF文档内容。你已经理解了整个文档的内容,现在需要为第{i+1}页生成简洁的讲解。请提供清晰、简洁的解释,重点突出关键概念。你的讲解应该考虑到整个文档的上下文,而不仅仅是孤立地解释当前页面。"},
|
||||
{"role": "user", "content": f"基于你对整个文档的理解,请为第{i+1}页生成简洁的讲解:\n\n{page_text}"}
|
||||
]
|
||||
)
|
||||
explanation = response.choices[0].message.content.strip()
|
||||
explanations.append(explanation)
|
||||
logger.info(f"Explanation for page {i+1} generated successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating explanation for page {i+1}: {e}")
|
||||
explanations.append(f"生成第{i+1}页讲解时出错: {str(e)}")
|
||||
|
||||
return explanations
|
||||
|
||||
def generate_explanation(page_text, page_num=None):
|
||||
def generate_explanation(page_text):
|
||||
"""为单个页面生成讲解内容"""
|
||||
if not openai_api_key:
|
||||
return "这是一个示例讲解。请设置OpenAI API密钥以获取真实的AI讲解内容。"
|
||||
|
||||
# 如果已经有预生成的讲解,直接返回
|
||||
if pdf_content["explanations"] and page_num is not None and 0 <= page_num-1 < len(pdf_content["explanations"]):
|
||||
return pdf_content["explanations"][page_num-1]
|
||||
|
||||
try:
|
||||
client = openai.OpenAI(api_key=openai_api_key, base_url=openai_base_url)
|
||||
response = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
messages=[
|
||||
{"role": "system", "content": "你是一位专业的教师,正在为学生讲解PDF文档内容。请提供清晰、简洁的解释,重点突出关键概念。"},
|
||||
{"role": "user", "content": f"请讲解以下内容:\n\n{page_text}"}
|
||||
{"role": "user", "content": f"请讲解以下内容:\n\n{page_text},你的输出应符合讲稿的风格,句子间连贯。"}
|
||||
]
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
@ -138,6 +96,40 @@ def generate_explanation(page_text, page_num=None):
|
||||
logger.error(f"Error generating explanation: {e}")
|
||||
return f"生成讲解时出错: {str(e)}"
|
||||
|
||||
def text_to_speech(text, voice="zf_xiaoxiao", speed=1.0):
|
||||
"""将文本转换为语音,返回base64编码的音频数据"""
|
||||
try:
|
||||
url = f"{TTS_BASE_URL}/tts"
|
||||
payload = {
|
||||
"text": text,
|
||||
"voice": voice,
|
||||
"speed": speed,
|
||||
"return_type": "base64"
|
||||
}
|
||||
|
||||
response = requests.post(url, json=payload)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
# 获取base64编码的音频
|
||||
audio_base64 = data.get("audio_base64")
|
||||
return {
|
||||
"success": True,
|
||||
"audio_base64": audio_base64
|
||||
}
|
||||
else:
|
||||
logger.error(f"TTS API error: {response.status_code} - {response.text}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"TTS API error: {response.status_code}"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error in text_to_speech: {e}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return send_from_directory('', 'index.html')
|
||||
@ -152,86 +144,153 @@ def explain():
|
||||
text = data.get('text', '')
|
||||
page_num = data.get('page', None)
|
||||
|
||||
explanation = generate_explanation(text, page_num)
|
||||
return jsonify({'explanation': explanation})
|
||||
# 如果提供了页码但没有提供文本,尝试从PDF中提取
|
||||
if page_num and not text and current_pdf_path:
|
||||
result = extract_page_text(current_pdf_path, page_num)
|
||||
if result["success"]:
|
||||
text = result["page_text"]
|
||||
else:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'explanation': f"无法提取页面文本: {result['error']}"
|
||||
})
|
||||
|
||||
explanation = generate_explanation(text)
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'explanation': explanation
|
||||
})
|
||||
|
||||
@app.route('/api/tts', methods=['POST'])
|
||||
def tts():
|
||||
data = request.json
|
||||
text = data.get('text', '')
|
||||
voice = data.get('voice', 'zf_xiaoxiao')
|
||||
speed = data.get('speed', 1.0)
|
||||
|
||||
if not text:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': '文本不能为空'
|
||||
})
|
||||
|
||||
# 将文本转换为语音
|
||||
result = text_to_speech(text, voice, speed)
|
||||
|
||||
if result["success"]:
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'audio_base64': result["audio_base64"]
|
||||
})
|
||||
else:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': result["error"]
|
||||
})
|
||||
|
||||
@app.route('/api/explain_with_audio', methods=['POST'])
|
||||
def explain_with_audio():
|
||||
data = request.json
|
||||
text = data.get('text', '')
|
||||
page_num = data.get('page', None)
|
||||
voice = data.get('voice', 'zf_xiaoxiao')
|
||||
speed = data.get('speed', 1.0)
|
||||
|
||||
# 如果提供了页码但没有提供文本,尝试从PDF中提取
|
||||
if page_num and not text and current_pdf_path:
|
||||
result = extract_page_text(current_pdf_path, page_num)
|
||||
if result["success"]:
|
||||
text = result["page_text"]
|
||||
else:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'explanation': f"无法提取页面文本: {result['error']}",
|
||||
'error': result["error"]
|
||||
})
|
||||
|
||||
# 生成讲解
|
||||
explanation = generate_explanation(text)
|
||||
|
||||
# 将讲解转换为语音
|
||||
tts_result = text_to_speech(explanation, voice, speed)
|
||||
|
||||
if tts_result["success"]:
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'explanation': explanation,
|
||||
'audio_base64': tts_result["audio_base64"]
|
||||
})
|
||||
else:
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'explanation': explanation,
|
||||
'audio_base64': None,
|
||||
'tts_error': tts_result["error"]
|
||||
})
|
||||
|
||||
@app.route('/api/load_pdf', methods=['POST'])
|
||||
def load_pdf():
|
||||
global current_pdf_path
|
||||
|
||||
data = request.json
|
||||
pdf_path = data.get('path', './public/pdf/VLA4RM-仿生智能.pdf')
|
||||
pdf_path = data.get('path', './public/pdf/test.pdf')
|
||||
|
||||
# 提取PDF文本
|
||||
result = extract_pdf_text(pdf_path)
|
||||
|
||||
if result["success"]:
|
||||
# 更新全局PDF内容
|
||||
pdf_content["full_text"] = result["full_text"]
|
||||
pdf_content["pages"] = result["pages"]
|
||||
try:
|
||||
# 检查PDF是否存在
|
||||
if not os.path.exists(pdf_path):
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': f'PDF文件不存在: {pdf_path}'
|
||||
})
|
||||
|
||||
# 异步生成所有页面的讲解
|
||||
async def process_explanations():
|
||||
explanations = await generate_explanations_for_all_pages(
|
||||
result["full_text"],
|
||||
result["pages"]
|
||||
)
|
||||
pdf_content["explanations"] = explanations
|
||||
logger.info(f"Generated explanations for all {len(explanations)} pages")
|
||||
# 尝试打开PDF以验证其有效性
|
||||
with open(pdf_path, 'rb') as file:
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
page_count = len(reader.pages)
|
||||
|
||||
# 启动异步任务
|
||||
asyncio.run(process_explanations())
|
||||
# 更新当前PDF路径
|
||||
current_pdf_path = pdf_path
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'message': '已加载PDF并开始生成讲解',
|
||||
'page_count': result["page_count"]
|
||||
'message': '已成功加载PDF',
|
||||
'page_count': page_count
|
||||
})
|
||||
else:
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading PDF: {e}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': f'加载PDF失败: {result["error"]}'
|
||||
'message': f'加载PDF失败: {str(e)}'
|
||||
})
|
||||
|
||||
@app.route('/api/get_explanation/<int:page_num>', methods=['GET'])
|
||||
def get_explanation(page_num):
|
||||
if 0 <= page_num-1 < len(pdf_content["explanations"]):
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'explanation': pdf_content["explanations"][page_num-1]
|
||||
})
|
||||
else:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': f'页码 {page_num} 的讲解不存在'
|
||||
})
|
||||
|
||||
@app.route('/api/explanation_status', methods=['GET'])
|
||||
def explanation_status():
|
||||
@app.route('/api/voices', methods=['GET'])
|
||||
def get_voices():
|
||||
"""获取可用的TTS声音列表"""
|
||||
voices = [
|
||||
{"id": "zf_xiaoxiao", "name": "小小", "gender": "female", "lang": "zh"},
|
||||
{"id": "zf_xiaoni", "name": "小妮", "gender": "female", "lang": "zh"},
|
||||
{"id": "zf_xiaoyi", "name": "小怡", "gender": "female", "lang": "zh"},
|
||||
{"id": "zf_xiaobei", "name": "小贝", "gender": "female", "lang": "zh"},
|
||||
{"id": "zm_yunxi", "name": "云熙", "gender": "male", "lang": "zh"},
|
||||
{"id": "zm_yunyang", "name": "云扬", "gender": "male", "lang": "zh"},
|
||||
{"id": "zm_yunxia", "name": "云夏", "gender": "male", "lang": "zh"},
|
||||
{"id": "zm_yunjian", "name": "云健", "gender": "male", "lang": "zh"},
|
||||
{"id": "af_heart", "name": "Heart", "gender": "female", "lang": "en"},
|
||||
{"id": "af_bella", "name": "Bella", "gender": "female", "lang": "en"},
|
||||
{"id": "am_michael", "name": "Michael", "gender": "male", "lang": "en"},
|
||||
{"id": "am_puck", "name": "Puck", "gender": "male", "lang": "en"}
|
||||
]
|
||||
|
||||
return jsonify({
|
||||
'total_pages': len(pdf_content["pages"]),
|
||||
'explanations_generated': len(pdf_content["explanations"]),
|
||||
'is_complete': len(pdf_content["pages"]) > 0 and len(pdf_content["pages"]) == len(pdf_content["explanations"])
|
||||
'success': True,
|
||||
'voices': voices
|
||||
})
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 在启动时预加载默认PDF
|
||||
default_pdf_path = './VLA4RM-仿生智能.pdf'
|
||||
# 设置默认PDF路径
|
||||
default_pdf_path = './public/pdf/test.pdf'
|
||||
if os.path.exists(default_pdf_path):
|
||||
logger.info(f"Pre-loading default PDF: {default_pdf_path}")
|
||||
result = extract_pdf_text(default_pdf_path)
|
||||
if result["success"]:
|
||||
pdf_content["full_text"] = result["full_text"]
|
||||
pdf_content["pages"] = result["pages"]
|
||||
|
||||
# 异步生成所有页面的讲解
|
||||
async def process_explanations():
|
||||
explanations = await generate_explanations_for_all_pages(
|
||||
result["full_text"],
|
||||
result["pages"]
|
||||
)
|
||||
pdf_content["explanations"] = explanations
|
||||
logger.info(f"Generated explanations for all {len(explanations)} pages")
|
||||
|
||||
# 启动异步任务
|
||||
asyncio.run(process_explanations())
|
||||
current_pdf_path = default_pdf_path
|
||||
logger.info(f"默认PDF已设置: {default_pdf_path}")
|
||||
|
||||
app.run(host='0.0.0.0', port=port, debug=True)
|
||||
118
tts_test.py
Normal file
118
tts_test.py
Normal file
@ -0,0 +1,118 @@
|
||||
import requests
|
||||
import base64
|
||||
import json
|
||||
import io
|
||||
from IPython.display import Audio
|
||||
import soundfile as sf
|
||||
|
||||
# 假设您的FastAPI服务在本地运行
|
||||
BASE_URL = "http://feng-arch.cn:31006"
|
||||
|
||||
def tts_file_response(text, voice="af_heart", speed=1.0, lang_code="z"):
|
||||
"""
|
||||
发送请求并直接获取音频文件
|
||||
"""
|
||||
url = f"{BASE_URL}/tts"
|
||||
payload = {
|
||||
"text": text,
|
||||
"voice": voice,
|
||||
"speed": speed,
|
||||
"return_type": "file"
|
||||
}
|
||||
|
||||
response = requests.post(url, json=payload)
|
||||
|
||||
if response.status_code == 200:
|
||||
# 保存音频文件
|
||||
with open("received_audio.wav", "wb") as f:
|
||||
f.write(response.content)
|
||||
print("音频已保存为 received_audio.wav")
|
||||
|
||||
# 如果在Jupyter Notebook中,可以直接播放
|
||||
return Audio(data=response.content, rate=24000)
|
||||
else:
|
||||
print(f"错误: {response.status_code}")
|
||||
print(response.text)
|
||||
return None
|
||||
|
||||
def tts_base64_response(text, voice="af_heart", speed=1.0, lang_code="z"):
|
||||
"""
|
||||
发送请求并获取base64编码的音频数据
|
||||
"""
|
||||
url = f"{BASE_URL}/tts"
|
||||
payload = {
|
||||
"text": text,
|
||||
"voice": voice,
|
||||
"speed": speed,
|
||||
"return_type": "base64"
|
||||
}
|
||||
|
||||
response = requests.post(url, json=payload)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
# 获取base64编码的音频
|
||||
audio_base64 = data.get("audio_base64")
|
||||
|
||||
# 解码base64数据
|
||||
audio_data = base64.b64decode(audio_base64)
|
||||
|
||||
# 保存音频文件
|
||||
with open("received_audio.wav", "wb") as f:
|
||||
f.write(audio_data)
|
||||
print("音频已保存为 received_audio.wav")
|
||||
|
||||
# 如果在Jupyter Notebook中,可以直接播放
|
||||
return Audio(data=audio_data, rate=24000)
|
||||
else:
|
||||
print(f"错误: {response.status_code}")
|
||||
print(response.text)
|
||||
return None
|
||||
|
||||
def get_available_voices(lang_code="z"):
|
||||
"""
|
||||
获取指定语言的可用声音列表
|
||||
Name Traits Target Quality Training Duration Overall Grade SHA256
|
||||
af_heart 🚺❤️ A 0ab5709b
|
||||
af_alloy 🚺 B MM minutes C 6d877149
|
||||
af_aoede 🚺 B H hours C+ c03bd1a4
|
||||
af_bella 🚺🔥 A HH hours A- 8cb64e02
|
||||
af_jessica 🚺 C MM minutes D cdfdccb8
|
||||
af_kore 🚺 B H hours C+ 8bfbc512
|
||||
af_nicole 🚺🎧 B HH hours B- c5561808
|
||||
af_nova 🚺 B MM minutes C e0233676
|
||||
af_river 🚺 C MM minutes D e149459b
|
||||
af_sarah 🚺 B H hours C+ 49bd364e
|
||||
af_sky 🚺 B M minutes 🤏 C- c799548a
|
||||
am_adam 🚹 D H hours F+ ced7e284
|
||||
am_echo 🚹 C MM minutes D 8bcfdc85
|
||||
am_eric 🚹 C MM minutes D ada66f0e
|
||||
am_fenrir 🚹 B H hours C+ 98e507ec
|
||||
am_liam 🚹 C MM minutes D c8255075
|
||||
am_michael 🚹 B H hours C+ 9a443b79
|
||||
am_onyx 🚹 C MM minutes D e8452be1
|
||||
am_puck 🚹 B H hours C+ dd1d8973
|
||||
am_santa 🚹 C M minutes 🤏 D- 7f2f7582
|
||||
|
||||
Name Traits Target Quality Training Duration Overall Grade SHA256
|
||||
zf_xiaobei 🚺 C MM minutes D 9b76be63
|
||||
zf_xiaoni 🚺 C MM minutes D 95b49f16
|
||||
zf_xiaoxiao 🚺 C MM minutes D cfaf6f2d
|
||||
zf_xiaoyi 🚺 C MM minutes D b5235dba
|
||||
zm_yunjian 🚹 C MM minutes D 76cbf8ba
|
||||
zm_yunxi 🚹 C MM minutes D dbe6e1ce
|
||||
zm_yunxia 🚹 C MM minutes D bb2b03b0
|
||||
zm_yunyang 🚹 C MM minutes D 5238ac22
|
||||
"""
|
||||
|
||||
|
||||
# 示例使用
|
||||
if __name__ == "__main__":
|
||||
text = "你能解决什么问题"
|
||||
|
||||
# 获取音频文件
|
||||
audio = tts_file_response(text, voice="zf_xiaoxiao")
|
||||
|
||||
# 或者获取base64编码的音频
|
||||
# audio = tts_base64_response(text)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user