趁着MiMo-V2-TTS免费,写了一个网页端页面

2026-04-11 11:151阅读0评论SEO资讯
  • 内容介绍
  • 文章标签
  • 相关推荐
问题描述:

废话不多说,直接上图。
截屏2026-03-20 12.57.251608×1440 155 KB

HTML源码

<!DOCTYPE html> <html lang="zh-CN"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>MiMo TTS 语音合成</title> <style> * { margin: 0; padding: 0; box-sizing: border-box; } body { font-family: Inter, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; -webkit-font-smoothing: antialiased; background: #fafafa; color: #171717; min-height: 100vh; display: flex; align-items: center; justify-content: center; padding: 40px 24px; } .container { width: 100%; max-width: 900px; padding: 32px; background: #ffffff; border-radius: 12px; border: 1px solid #e5e5e5; transition: all 0.2s ease; } .container:hover { border-color: #d4d4d4; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.07); } h1 { font-size: 2rem; font-weight: 700; letter-spacing: -0.02em; margin-bottom: 8px; color: #171717; } .subtitle { font-size: 0.9375rem; color: #737373; margin-bottom: 32px; line-height: 1.5; } .field { margin-bottom: 24px; } .field.hidden { display: none; } .row { display: flex; gap: 16px; } .row .field { flex: 1; } label { display: block; font-size: 0.75rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: #737373; margin-bottom: 8px; } input[type="text"], input[type="password"], select, textarea { width: 100%; background: #ffffff; border: 1px solid #e5e5e5; border-radius: 8px; color: #171717; padding: 12px 14px; font-size: 0.875rem; font-family: inherit; outline: none; transition: border-color 0.2s ease; } textarea { resize: vertical; min-height: 100px; line-height: 1.6; } input:focus, select:focus, textarea:focus { border-color: #000000; } select option { background: #ffffff; } small { font-size: 0.75rem; color: #a3a3a3; margin-top: 6px; display: block; line-height: 1.5; } small > div + div { margin-top: 8px; } code { background: #fafafa; padding: 2px 6px; border-radius: 4px; font-family: 'SF Mono', Monaco, Consolas, 'Courier New', monospace; color: #737373; font-size: 0.6875rem; border: 1px solid #e5e5e5; } .style-chips { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 8px; } .chip { padding: 6px 12px; background: #fafafa; border: 1px solid #e5e5e5; border-radius: 6px; font-size: 0.75rem; font-weight: 600; color: #737373; cursor: pointer; transition: all 0.15s ease; user-select: none; letter-spacing: 0.02em; } .chip:hover { border-color: #d4d4d4; background: #ffffff; transform: translateY(-1px); } .chip.active { background: #000000; border-color: #000000; color: #ffffff; } button { width: 100%; padding: 12px 20px; background: #000000; color: #ffffff; border: none; border-radius: 8px; font-size: 0.875rem; font-weight: 600; font-family: inherit; cursor: pointer; transition: all 0.15s ease; } button:hover:not(:disabled):not(.icon-btn) { background: #1a1a1a; transform: translateY(-1px); box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); } button:active:not(:disabled) { transform: translateY(0); } button:disabled { opacity: 0.5; cursor: not-allowed; transform: none; } .status { margin-top: 20px; padding: 12px 16px; background: #ffffff; border-radius: 8px; font-size: 0.875rem; font-weight: 500; display: none; line-height: 1.5; } .status.error { color: #ef4444; background: #fef2f2; border: 1px solid rgba(239, 68, 68, 0.2); } .status.success { color: #10b981; background: #f0fdf4; border: 1px solid rgba(16, 185, 129, 0.2); } .status.loading { color: #3b82f6; background: #eff6ff; border: 1px solid rgba(59, 130, 246, 0.2); } .player { margin-top: 20px; display: none; } .player-wrapper { display: flex; align-items: center; gap: 12px; } .player audio { flex: 1; border-radius: 8px; outline: none; } .icon-btn { width: 44px; height: 44px; padding: 0; background: #ffffff; border: 1px solid #e5e5e5; border-radius: 8px; display: flex; align-items: center; justify-content: center; cursor: pointer; transition: all 0.15s ease; flex-shrink: 0; } .icon-btn:hover { border-color: #d4d4d4; background: #fafafa; transform: translateY(-1px); } .icon-btn:hover svg { color: #000000; } .icon-btn:active { transform: translateY(0); } .icon-btn svg { color: #171717; transition: color 0.15s ease; } .examples { margin-top: 32px; padding-top: 24px; border-top: 1px solid #e5e5e5; } .examples h3 { font-size: 0.75rem; font-weight: 600; color: #737373; margin-bottom: 12px; text-transform: uppercase; letter-spacing: 0.05em; } .examples-grid { display: grid; gap: 16px; grid-template-columns: repeat(auto-fit, minmax(320px, 1fr)); } .example { background: #fafafa; padding: 16px; border-radius: 8px; border: 1px solid #e5e5e5; font-size: 0.875rem; line-height: 1.6; color: #737373; transition: all 0.2s ease; } .example:hover { border-color: #d4d4d4; background: #ffffff; } .example strong { color: #171717; font-weight: 600; display: block; margin-bottom: 6px; font-size: 0.875rem; } .example code { background: #ffffff; display: block; padding: 8px; margin-top: 6px; border-radius: 6px; font-size: 0.75rem; } @keyframes spin { to { transform: rotate(360deg); } } .loading-spinner { display: inline-block; width: 14px; height: 14px; border: 2px solid #3b82f6; border-top-color: transparent; border-radius: 50%; animation: spin 0.6s linear infinite; margin-right: 8px; vertical-align: middle; } @media (max-width: 768px) { body { padding: 24px 16px; } .container { padding: 24px; } h1 { font-size: 1.75rem; } .examples-grid { grid-template-columns: 1fr; } } @media (prefers-reduced-motion: reduce) { * { animation-duration: 0.01ms !important; transition-duration: 0.01ms !important; } } </style> </head> <body> <div class="container"> <h1>MiMo TTS</h1> <p class="subtitle">输入文字,选择风格,生成自然语音</p> <div class="field"> <label>API Key</label> <input type="password" id="apiKey" placeholder="输入你的 MIMO_API_KEY"> </div> <div class="row"> <div class="field"> <label>音色</label> <select id="voiceSelect"> <option value="mimo_default">默认 (mimo_default)</option> <option value="default_zh">中文女声 (default_zh)</option> <option value="default_en">英文女声 (default_en)</option> <option value="custom">自定义音色(上传 WAV)</option> </select> </div> <div class="field"> <label>风格(可选)</label> <input type="text" id="styleInput" placeholder="如:开心、东北话、语速慢"> </div> </div> <div class="field hidden" id="voiceSampleField"> <label>上传参考音频 (WAV, 5-15s)</label> <input type="file" id="voiceSample" accept=".wav,audio/wav"> <small>上传 5-15 秒的 WAV 文件作为参考音色</small> </div> <div class="field"> <label>快捷风格</label> <div class="style-chips"> <span class="chip" data-style="开心">开心</span> <span class="chip" data-style="悲伤">悲伤</span> <span class="chip" data-style="生气">生气</span> <span class="chip" data-style="悄悄话">悄悄话</span> <span class="chip" data-style="东北话">东北话</span> <span class="chip" data-style="粤语">粤语</span> <span class="chip" data-style="变快">变快</span> <span class="chip" data-style="变慢">变慢</span> <span class="chip" data-style="唱歌">唱歌</span> <span class="chip" data-style="像个大将军">大将军</span> </div> </div> <div class="field"> <label>用户消息(可选)</label> <textarea id="userMessage" placeholder="输入用户说的话,用于调整语气...">你好,MiMo,你吃午饭了吗?</textarea> </div> <div class="field"> <label>助手回复(必填)</label> <textarea id="assistantMessage" placeholder="输入助手的回复...(可使用风格标签)">是的,我吃了一个三明治。</textarea> <small> <div>整体风格:<code>&lt;style&gt;开心&lt;/style&gt;明天就是周五了!</code></div> <div>细粒度控制:<code>(紧张,深呼吸)呼……冷静。(语速加快)不就是一个面试吗!</code></div> </small> </div> <button id="btnGenerate">生成语音</button> <div id="status" class="status"></div> <div id="player" class="player"> <div class="player-wrapper"> <audio id="audio" controls></audio> <button id="btnDownloadWav" class="icon-btn" title="下载 WAV"> <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"> <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path> <polyline points="7 10 12 15 17 10"></polyline> <line x1="12" y1="15" x2="12" y2="3"></line> </svg> </button> </div> </div> <div class="examples"> <h3>使用示例</h3> <div class="examples-grid"> <div class="example"> <strong>整体风格控制</strong> 在文本开头添加风格标签 <code>&lt;style&gt;开心&lt;/style&gt;明天就是周五了,真开心!</code> </div> <div class="example"> <strong>细粒度控制</strong> 使用括号进行精确控制 <code>(紧张,深呼吸)呼……冷静。(语速加快)不就是一个面试吗!</code> </div> <div class="example"> <strong>方言示例</strong> 支持多种方言风格 <code>&lt;style&gt;东北话&lt;/style&gt;哎呀妈呀,这天儿也忒冷了吧!</code> </div> </div> </div> </div> <script> // ==================== 变量声明 ==================== let audioBlob = null; let audioUrl = null; const audio = document.getElementById('audio'); const apiKeyInput = document.getElementById('apiKey'); const voiceSelect = document.getElementById('voiceSelect'); const styleInput = document.getElementById('styleInput'); const userMessageInput = document.getElementById('userMessage'); const assistantMessageInput = document.getElementById('assistantMessage'); const btnGenerate = document.getElementById('btnGenerate'); const btnDownloadWav = document.getElementById('btnDownloadWav'); const statusDiv = document.getElementById('status'); const playerDiv = document.getElementById('player'); const voiceSampleField = document.getElementById('voiceSampleField'); const voiceSampleInput = document.getElementById('voiceSample'); // ==================== 初始化 ==================== // 从 localStorage 加载 API Key apiKeyInput.value = localStorage.getItem('mimo_api_key') || ''; apiKeyInput.addEventListener('change', () => { localStorage.setItem('mimo_api_key', apiKeyInput.value); }); // ==================== 事件监听器 ==================== // 风格芯片点击事件 document.querySelectorAll('.chip').forEach(chip => { chip.addEventListener('click', () => { const isActive = chip.classList.contains('active'); // 清除所有激活状态 document.querySelectorAll('.chip').forEach(c => c.classList.remove('active')); if (!isActive) { chip.classList.add('active'); styleInput.value = chip.dataset.style; // 在助手消息开头添加风格标签 const currentText = assistantMessageInput.value.trim(); const textWithoutStyle = currentText.replace(/<style>.*?<\/style>\s*/g, ''); assistantMessageInput.value = `<style>${chip.dataset.style}</style>${textWithoutStyle}`; } else { styleInput.value = ''; // 移除风格标签 const currentText = assistantMessageInput.value.trim(); assistantMessageInput.value = currentText.replace(/<style>.*?<\/style>\s*/g, ''); } }); }); // 手动输入风格时清除芯片激活状态 styleInput.addEventListener('input', () => { document.querySelectorAll('.chip').forEach(c => c.classList.remove('active')); }); // 监听助手消息输入,更新芯片状态 assistantMessageInput.addEventListener('input', () => { const text = assistantMessageInput.value; const hasStyleTag = /<style>(.*?)<\/style>/.exec(text); document.querySelectorAll('.chip').forEach(chip => { if (hasStyleTag && text.includes(`<style>${chip.dataset.style}</style>`)) { chip.classList.add('active'); styleInput.value = chip.dataset.style; } else { chip.classList.remove('active'); } }); }); // 音色选择切换 voiceSelect.addEventListener('change', () => { voiceSampleField.classList.toggle('hidden', voiceSelect.value !== 'custom'); }); // ==================== 工具函数 ==================== function setStatus(message, type = '') { statusDiv.style.display = message ? 'block' : 'none'; statusDiv.className = 'status' + (type ? ' ' + type : ''); statusDiv.innerHTML = type === 'loading' ? `<span class="loading-spinner"></span>${message}` : message; } async function fileToBase64(file) { return new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = () => resolve(reader.result.split(',')[1]); reader.onerror = reject; reader.readAsDataURL(file); }); } // ==================== 核心功能 ==================== async function generateAudio() { const apiKey = apiKeyInput.value.trim(); const userMessage = userMessageInput.value.trim(); const assistantMessage = assistantMessageInput.value.trim(); const voice = voiceSelect.value; if (!apiKey) { setStatus('请输入 API Key', 'error'); return; } if (!assistantMessage) { setStatus('请输入助手回复(这是生成语音的目标文本)', 'error'); return; } btnGenerate.disabled = true; setStatus('正在生成语音...', 'loading'); playerDiv.style.display = 'none'; try { // 构建请求 payload let payload; if (voice === 'custom') { const file = voiceSampleInput.files[0]; if (!file) { throw new Error('请上传参考音频文件(WAV 格式)'); } const base64 = await fileToBase64(file); payload = { model: 'mimo-v2-audio-tts', audio: { format: 'wav', voice_audio: { format: 'wav', data: base64 } }, messages: [ userMessage ? { role: 'user', content: userMessage } : null, { role: 'assistant', content: assistantMessage } ].filter(Boolean) }; } else { payload = { model: 'mimo-v2-audio-tts', audio: { format: 'wav', voice: voice }, messages: [ userMessage ? { role: 'user', content: userMessage } : null, { role: 'assistant', content: assistantMessage } ].filter(Boolean) }; } const response = await fetch('https://api.xiaomimimo.com/v1/chat/completions', { method: 'POST', headers: { 'api-key': apiKey, 'Content-Type': 'application/json' }, body: JSON.stringify(payload) }); if (!response.ok) { const errorText = await response.text(); throw new Error(`API 请求失败: ${response.status} - ${errorText.slice(0, 200)}`); } const data = await response.json(); // 提取音频数据 const audioData = data.choices?.[0]?.message?.audio?.data; if (!audioData) { throw new Error('响应中没有音频数据'); } // 解码 Base64 音频 const raw = Uint8Array.from(atob(audioData), c => c.charCodeAt(0)); let wavBytes; // 检查是否已经是 WAV 格式(RIFF header) if (raw[0] === 0x52 && raw[1] === 0x49 && raw[2] === 0x46 && raw[3] === 0x46) { wavBytes = raw; } else { // 如果是原始 PCM,封装为 WAV (24kHz, 16-bit, mono) wavBytes = wrapPcmToWav(raw, 24000, 16, 1); } audioBlob = new Blob([wavBytes], { type: 'audio/wav' }); if (audioUrl) { URL.revokeObjectURL(audioUrl); } audioUrl = URL.createObjectURL(audioBlob); audio.src = audioUrl; // 等待音频元数据加载后再显示播放器 audio.addEventListener('loadedmetadata', function onLoadedMetadata() { audio.removeEventListener('loadedmetadata', onLoadedMetadata); // 显示统计信息 const usage = data.usage; const sizeKB = (wavBytes.length / 1024).toFixed(1); const duration = audio.duration ? `${audio.duration.toFixed(1)}s` : ''; const stats = usage ? ` | Token: ${usage.total_tokens}` : ''; const durationInfo = duration ? ` | 时长: ${duration}` : ''; setStatus(`生成成功 — ${sizeKB} KB${durationInfo}${stats}`, 'success'); playerDiv.style.display = 'block'; // 自动播放 audio.play().catch(() => {}); }, { once: true }); // 显式调用 load() 触发元数据加载 audio.load(); } catch (error) { console.error('生成失败:', error); setStatus(`${error.message}`, 'error'); } finally { btnGenerate.disabled = false; } } function wrapPcmToWav(pcmData, sampleRate, bitsPerSample, numChannels) { const byteRate = sampleRate * numChannels * bitsPerSample / 8; const blockAlign = numChannels * bitsPerSample / 8; const dataSize = pcmData.length; const buffer = new ArrayBuffer(44 + dataSize); const view = new DataView(buffer); // RIFF header writeStr(view, 0, 'RIFF'); view.setUint32(4, 36 + dataSize, true); writeStr(view, 8, 'WAVE'); writeStr(view, 12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bitsPerSample, true); writeStr(view, 36, 'data'); view.setUint32(40, dataSize, true); new Uint8Array(buffer, 44).set(pcmData); return new Uint8Array(buffer); } function writeStr(view, offset, str) { for (let i = 0; i < str.length; i++) { view.setUint8(offset + i, str.charCodeAt(i)); } } function downloadWav() { if (!audioBlob) { setStatus('请先生成语音', 'error'); return; } const url = URL.createObjectURL(audioBlob); const a = document.createElement('a'); a.href = url; a.download = `mimo_tts_${Date.now()}.wav`; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); setStatus('WAV 文件已下载', 'success'); } // ==================== 事件绑定 ==================== btnGenerate.addEventListener('click', generateAudio); btnDownloadWav.addEventListener('click', downloadWav); // 快捷键 document.addEventListener('keydown', (e) => { if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') { e.preventDefault(); generateAudio(); } }); // 音频播放错误事件 audio.addEventListener('error', (e) => { setStatus(`播放错误: ${e.message}`, 'error'); }); </script> </body> </html>

网友解答:
--【壹】--:

mimo 的 tts 有没有 rpm 和并发限制呢


--【贰】--:

真巧,我也写了一个

mimotts.iloli.love

MiMo 语音合成


--【叁】--:

效果凑合


--【肆】--:

这个模型有音色克隆吗?


--【伍】--:

好快的佬,晚上部署弄到maibot里试试,这模型音色克隆效果可以吗?


--【陆】--:

感谢佬的网页,赞


--【柒】--:

厉害呀!


--【捌】--:

可以涩涩吗


--【玖】--:

soga,以为只能合成机器声音


--【拾】--:

感谢~
另外想问问,有开源的可以换歌声的TTS吗?


--【拾壹】--:

不支持。
image1920×919 69.6 KB


--【拾贰】--:

可以上传一段自己的声音来模拟


--【拾叁】--:

大佬牛皮