趁着MiMo-V2-TTS免费,写了一个网页端页面
- 内容介绍
- 文章标签
- 相关推荐
废话不多说,直接上图。
截屏2026-03-20 12.57.251608×1440 155 KB
HTML源码
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>MiMo TTS 语音合成</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: Inter, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
-webkit-font-smoothing: antialiased;
background: #fafafa;
color: #171717;
min-height: 100vh;
display: flex;
align-items: center;
justify-content: center;
padding: 40px 24px;
}
.container {
width: 100%;
max-width: 900px;
padding: 32px;
background: #ffffff;
border-radius: 12px;
border: 1px solid #e5e5e5;
transition: all 0.2s ease;
}
.container:hover {
border-color: #d4d4d4;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.07);
}
h1 {
font-size: 2rem;
font-weight: 700;
letter-spacing: -0.02em;
margin-bottom: 8px;
color: #171717;
}
.subtitle {
font-size: 0.9375rem;
color: #737373;
margin-bottom: 32px;
line-height: 1.5;
}
.field {
margin-bottom: 24px;
}
.field.hidden {
display: none;
}
.row {
display: flex;
gap: 16px;
}
.row .field {
flex: 1;
}
label {
display: block;
font-size: 0.75rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #737373;
margin-bottom: 8px;
}
input[type="text"],
input[type="password"],
select,
textarea {
width: 100%;
background: #ffffff;
border: 1px solid #e5e5e5;
border-radius: 8px;
color: #171717;
padding: 12px 14px;
font-size: 0.875rem;
font-family: inherit;
outline: none;
transition: border-color 0.2s ease;
}
textarea {
resize: vertical;
min-height: 100px;
line-height: 1.6;
}
input:focus,
select:focus,
textarea:focus {
border-color: #000000;
}
select option {
background: #ffffff;
}
small {
font-size: 0.75rem;
color: #a3a3a3;
margin-top: 6px;
display: block;
line-height: 1.5;
}
small > div + div {
margin-top: 8px;
}
code {
background: #fafafa;
padding: 2px 6px;
border-radius: 4px;
font-family: 'SF Mono', Monaco, Consolas, 'Courier New', monospace;
color: #737373;
font-size: 0.6875rem;
border: 1px solid #e5e5e5;
}
.style-chips {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 8px;
}
.chip {
padding: 6px 12px;
background: #fafafa;
border: 1px solid #e5e5e5;
border-radius: 6px;
font-size: 0.75rem;
font-weight: 600;
color: #737373;
cursor: pointer;
transition: all 0.15s ease;
user-select: none;
letter-spacing: 0.02em;
}
.chip:hover {
border-color: #d4d4d4;
background: #ffffff;
transform: translateY(-1px);
}
.chip.active {
background: #000000;
border-color: #000000;
color: #ffffff;
}
button {
width: 100%;
padding: 12px 20px;
background: #000000;
color: #ffffff;
border: none;
border-radius: 8px;
font-size: 0.875rem;
font-weight: 600;
font-family: inherit;
cursor: pointer;
transition: all 0.15s ease;
}
button:hover:not(:disabled):not(.icon-btn) {
background: #1a1a1a;
transform: translateY(-1px);
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
button:active:not(:disabled) {
transform: translateY(0);
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
transform: none;
}
.status {
margin-top: 20px;
padding: 12px 16px;
background: #ffffff;
border-radius: 8px;
font-size: 0.875rem;
font-weight: 500;
display: none;
line-height: 1.5;
}
.status.error {
color: #ef4444;
background: #fef2f2;
border: 1px solid rgba(239, 68, 68, 0.2);
}
.status.success {
color: #10b981;
background: #f0fdf4;
border: 1px solid rgba(16, 185, 129, 0.2);
}
.status.loading {
color: #3b82f6;
background: #eff6ff;
border: 1px solid rgba(59, 130, 246, 0.2);
}
.player {
margin-top: 20px;
display: none;
}
.player-wrapper {
display: flex;
align-items: center;
gap: 12px;
}
.player audio {
flex: 1;
border-radius: 8px;
outline: none;
}
.icon-btn {
width: 44px;
height: 44px;
padding: 0;
background: #ffffff;
border: 1px solid #e5e5e5;
border-radius: 8px;
display: flex;
align-items: center;
justify-content: center;
cursor: pointer;
transition: all 0.15s ease;
flex-shrink: 0;
}
.icon-btn:hover {
border-color: #d4d4d4;
background: #fafafa;
transform: translateY(-1px);
}
.icon-btn:hover svg {
color: #000000;
}
.icon-btn:active {
transform: translateY(0);
}
.icon-btn svg {
color: #171717;
transition: color 0.15s ease;
}
.examples {
margin-top: 32px;
padding-top: 24px;
border-top: 1px solid #e5e5e5;
}
.examples h3 {
font-size: 0.75rem;
font-weight: 600;
color: #737373;
margin-bottom: 12px;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.examples-grid {
display: grid;
gap: 16px;
grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
}
.example {
background: #fafafa;
padding: 16px;
border-radius: 8px;
border: 1px solid #e5e5e5;
font-size: 0.875rem;
line-height: 1.6;
color: #737373;
transition: all 0.2s ease;
}
.example:hover {
border-color: #d4d4d4;
background: #ffffff;
}
.example strong {
color: #171717;
font-weight: 600;
display: block;
margin-bottom: 6px;
font-size: 0.875rem;
}
.example code {
background: #ffffff;
display: block;
padding: 8px;
margin-top: 6px;
border-radius: 6px;
font-size: 0.75rem;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.loading-spinner {
display: inline-block;
width: 14px;
height: 14px;
border: 2px solid #3b82f6;
border-top-color: transparent;
border-radius: 50%;
animation: spin 0.6s linear infinite;
margin-right: 8px;
vertical-align: middle;
}
@media (max-width: 768px) {
body {
padding: 24px 16px;
}
.container {
padding: 24px;
}
h1 {
font-size: 1.75rem;
}
.examples-grid {
grid-template-columns: 1fr;
}
}
@media (prefers-reduced-motion: reduce) {
* {
animation-duration: 0.01ms !important;
transition-duration: 0.01ms !important;
}
}
</style>
</head>
<body>
<div class="container">
<h1>MiMo TTS</h1>
<p class="subtitle">输入文字,选择风格,生成自然语音</p>
<div class="field">
<label>API Key</label>
<input type="password" id="apiKey" placeholder="输入你的 MIMO_API_KEY">
</div>
<div class="row">
<div class="field">
<label>音色</label>
<select id="voiceSelect">
<option value="mimo_default">默认 (mimo_default)</option>
<option value="default_zh">中文女声 (default_zh)</option>
<option value="default_en">英文女声 (default_en)</option>
<option value="custom">自定义音色(上传 WAV)</option>
</select>
</div>
<div class="field">
<label>风格(可选)</label>
<input type="text" id="styleInput" placeholder="如:开心、东北话、语速慢">
</div>
</div>
<div class="field hidden" id="voiceSampleField">
<label>上传参考音频 (WAV, 5-15s)</label>
<input type="file" id="voiceSample" accept=".wav,audio/wav">
<small>上传 5-15 秒的 WAV 文件作为参考音色</small>
</div>
<div class="field">
<label>快捷风格</label>
<div class="style-chips">
<span class="chip" data-style="开心">开心</span>
<span class="chip" data-style="悲伤">悲伤</span>
<span class="chip" data-style="生气">生气</span>
<span class="chip" data-style="悄悄话">悄悄话</span>
<span class="chip" data-style="东北话">东北话</span>
<span class="chip" data-style="粤语">粤语</span>
<span class="chip" data-style="变快">变快</span>
<span class="chip" data-style="变慢">变慢</span>
<span class="chip" data-style="唱歌">唱歌</span>
<span class="chip" data-style="像个大将军">大将军</span>
</div>
</div>
<div class="field">
<label>用户消息(可选)</label>
<textarea id="userMessage" placeholder="输入用户说的话,用于调整语气...">你好,MiMo,你吃午饭了吗?</textarea>
</div>
<div class="field">
<label>助手回复(必填)</label>
<textarea id="assistantMessage" placeholder="输入助手的回复...(可使用风格标签)">是的,我吃了一个三明治。</textarea>
<small>
<div>整体风格:<code><style>开心</style>明天就是周五了!</code></div>
<div>细粒度控制:<code>(紧张,深呼吸)呼……冷静。(语速加快)不就是一个面试吗!</code></div>
</small>
</div>
<button id="btnGenerate">生成语音</button>
<div id="status" class="status"></div>
<div id="player" class="player">
<div class="player-wrapper">
<audio id="audio" controls></audio>
<button id="btnDownloadWav" class="icon-btn" title="下载 WAV">
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
<polyline points="7 10 12 15 17 10"></polyline>
<line x1="12" y1="15" x2="12" y2="3"></line>
</svg>
</button>
</div>
</div>
<div class="examples">
<h3>使用示例</h3>
<div class="examples-grid">
<div class="example">
<strong>整体风格控制</strong>
在文本开头添加风格标签
<code><style>开心</style>明天就是周五了,真开心!</code>
</div>
<div class="example">
<strong>细粒度控制</strong>
使用括号进行精确控制
<code>(紧张,深呼吸)呼……冷静。(语速加快)不就是一个面试吗!</code>
</div>
<div class="example">
<strong>方言示例</strong>
支持多种方言风格
<code><style>东北话</style>哎呀妈呀,这天儿也忒冷了吧!</code>
</div>
</div>
</div>
</div>
<script>
// ==================== 变量声明 ====================
let audioBlob = null;
let audioUrl = null;
const audio = document.getElementById('audio');
const apiKeyInput = document.getElementById('apiKey');
const voiceSelect = document.getElementById('voiceSelect');
const styleInput = document.getElementById('styleInput');
const userMessageInput = document.getElementById('userMessage');
const assistantMessageInput = document.getElementById('assistantMessage');
const btnGenerate = document.getElementById('btnGenerate');
const btnDownloadWav = document.getElementById('btnDownloadWav');
const statusDiv = document.getElementById('status');
const playerDiv = document.getElementById('player');
const voiceSampleField = document.getElementById('voiceSampleField');
const voiceSampleInput = document.getElementById('voiceSample');
// ==================== 初始化 ====================
// 从 localStorage 加载 API Key
apiKeyInput.value = localStorage.getItem('mimo_api_key') || '';
apiKeyInput.addEventListener('change', () => {
localStorage.setItem('mimo_api_key', apiKeyInput.value);
});
// ==================== 事件监听器 ====================
// 风格芯片点击事件
document.querySelectorAll('.chip').forEach(chip => {
chip.addEventListener('click', () => {
const isActive = chip.classList.contains('active');
// 清除所有激活状态
document.querySelectorAll('.chip').forEach(c => c.classList.remove('active'));
if (!isActive) {
chip.classList.add('active');
styleInput.value = chip.dataset.style;
// 在助手消息开头添加风格标签
const currentText = assistantMessageInput.value.trim();
const textWithoutStyle = currentText.replace(/<style>.*?<\/style>\s*/g, '');
assistantMessageInput.value = `<style>${chip.dataset.style}</style>${textWithoutStyle}`;
} else {
styleInput.value = '';
// 移除风格标签
const currentText = assistantMessageInput.value.trim();
assistantMessageInput.value = currentText.replace(/<style>.*?<\/style>\s*/g, '');
}
});
});
// 手动输入风格时清除芯片激活状态
styleInput.addEventListener('input', () => {
document.querySelectorAll('.chip').forEach(c => c.classList.remove('active'));
});
// 监听助手消息输入,更新芯片状态
assistantMessageInput.addEventListener('input', () => {
const text = assistantMessageInput.value;
const hasStyleTag = /<style>(.*?)<\/style>/.exec(text);
document.querySelectorAll('.chip').forEach(chip => {
if (hasStyleTag && text.includes(`<style>${chip.dataset.style}</style>`)) {
chip.classList.add('active');
styleInput.value = chip.dataset.style;
} else {
chip.classList.remove('active');
}
});
});
// 音色选择切换
voiceSelect.addEventListener('change', () => {
voiceSampleField.classList.toggle('hidden', voiceSelect.value !== 'custom');
});
// ==================== 工具函数 ====================
function setStatus(message, type = '') {
statusDiv.style.display = message ? 'block' : 'none';
statusDiv.className = 'status' + (type ? ' ' + type : '');
statusDiv.innerHTML = type === 'loading'
? `<span class="loading-spinner"></span>${message}`
: message;
}
async function fileToBase64(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result.split(',')[1]);
reader.onerror = reject;
reader.readAsDataURL(file);
});
}
// ==================== 核心功能 ====================
async function generateAudio() {
const apiKey = apiKeyInput.value.trim();
const userMessage = userMessageInput.value.trim();
const assistantMessage = assistantMessageInput.value.trim();
const voice = voiceSelect.value;
if (!apiKey) {
setStatus('请输入 API Key', 'error');
return;
}
if (!assistantMessage) {
setStatus('请输入助手回复(这是生成语音的目标文本)', 'error');
return;
}
btnGenerate.disabled = true;
setStatus('正在生成语音...', 'loading');
playerDiv.style.display = 'none';
try {
// 构建请求 payload
let payload;
if (voice === 'custom') {
const file = voiceSampleInput.files[0];
if (!file) {
throw new Error('请上传参考音频文件(WAV 格式)');
}
const base64 = await fileToBase64(file);
payload = {
model: 'mimo-v2-audio-tts',
audio: {
format: 'wav',
voice_audio: {
format: 'wav',
data: base64
}
},
messages: [
userMessage ? { role: 'user', content: userMessage } : null,
{ role: 'assistant', content: assistantMessage }
].filter(Boolean)
};
} else {
payload = {
model: 'mimo-v2-audio-tts',
audio: {
format: 'wav',
voice: voice
},
messages: [
userMessage ? { role: 'user', content: userMessage } : null,
{ role: 'assistant', content: assistantMessage }
].filter(Boolean)
};
}
const response = await fetch('https://api.xiaomimimo.com/v1/chat/completions', {
method: 'POST',
headers: {
'api-key': apiKey,
'Content-Type': 'application/json'
},
body: JSON.stringify(payload)
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`API 请求失败: ${response.status} - ${errorText.slice(0, 200)}`);
}
const data = await response.json();
// 提取音频数据
const audioData = data.choices?.[0]?.message?.audio?.data;
if (!audioData) {
throw new Error('响应中没有音频数据');
}
// 解码 Base64 音频
const raw = Uint8Array.from(atob(audioData), c => c.charCodeAt(0));
let wavBytes;
// 检查是否已经是 WAV 格式(RIFF header)
if (raw[0] === 0x52 && raw[1] === 0x49 && raw[2] === 0x46 && raw[3] === 0x46) {
wavBytes = raw;
} else {
// 如果是原始 PCM,封装为 WAV (24kHz, 16-bit, mono)
wavBytes = wrapPcmToWav(raw, 24000, 16, 1);
}
audioBlob = new Blob([wavBytes], { type: 'audio/wav' });
if (audioUrl) {
URL.revokeObjectURL(audioUrl);
}
audioUrl = URL.createObjectURL(audioBlob);
audio.src = audioUrl;
// 等待音频元数据加载后再显示播放器
audio.addEventListener('loadedmetadata', function onLoadedMetadata() {
audio.removeEventListener('loadedmetadata', onLoadedMetadata);
// 显示统计信息
const usage = data.usage;
const sizeKB = (wavBytes.length / 1024).toFixed(1);
const duration = audio.duration ? `${audio.duration.toFixed(1)}s` : '';
const stats = usage ? ` | Token: ${usage.total_tokens}` : '';
const durationInfo = duration ? ` | 时长: ${duration}` : '';
setStatus(`生成成功 — ${sizeKB} KB${durationInfo}${stats}`, 'success');
playerDiv.style.display = 'block';
// 自动播放
audio.play().catch(() => {});
}, { once: true });
// 显式调用 load() 触发元数据加载
audio.load();
} catch (error) {
console.error('生成失败:', error);
setStatus(`${error.message}`, 'error');
} finally {
btnGenerate.disabled = false;
}
}
function wrapPcmToWav(pcmData, sampleRate, bitsPerSample, numChannels) {
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
const blockAlign = numChannels * bitsPerSample / 8;
const dataSize = pcmData.length;
const buffer = new ArrayBuffer(44 + dataSize);
const view = new DataView(buffer);
// RIFF header
writeStr(view, 0, 'RIFF');
view.setUint32(4, 36 + dataSize, true);
writeStr(view, 8, 'WAVE');
writeStr(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, numChannels, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, byteRate, true);
view.setUint16(32, blockAlign, true);
view.setUint16(34, bitsPerSample, true);
writeStr(view, 36, 'data');
view.setUint32(40, dataSize, true);
new Uint8Array(buffer, 44).set(pcmData);
return new Uint8Array(buffer);
}
function writeStr(view, offset, str) {
for (let i = 0; i < str.length; i++) {
view.setUint8(offset + i, str.charCodeAt(i));
}
}
function downloadWav() {
if (!audioBlob) {
setStatus('请先生成语音', 'error');
return;
}
const url = URL.createObjectURL(audioBlob);
const a = document.createElement('a');
a.href = url;
a.download = `mimo_tts_${Date.now()}.wav`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
setStatus('WAV 文件已下载', 'success');
}
// ==================== 事件绑定 ====================
btnGenerate.addEventListener('click', generateAudio);
btnDownloadWav.addEventListener('click', downloadWav);
// 快捷键
document.addEventListener('keydown', (e) => {
if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
e.preventDefault();
generateAudio();
}
});
// 音频播放错误事件
audio.addEventListener('error', (e) => {
setStatus(`播放错误: ${e.message}`, 'error');
});
</script>
</body>
</html>
--【壹】--:
mimo 的 tts 有没有 rpm 和并发限制呢
--【贰】--:
真巧,我也写了一个
MiMo 语音合成
--【叁】--:
效果凑合
--【肆】--:
这个模型有音色克隆吗?
--【伍】--:
好快的佬,晚上部署弄到maibot里试试,这模型音色克隆效果可以吗?
--【陆】--:
感谢佬的网页,赞
--【柒】--:
厉害呀!
--【捌】--:
可以涩涩吗
--【玖】--:
soga,以为只能合成机器声音
--【拾】--:
感谢~
另外想问问,有开源的可以换歌声的TTS吗?
--【拾壹】--:
不支持。
image1920×919 69.6 KB
--【拾贰】--:
可以上传一段自己的声音来模拟
--【拾叁】--:
大佬牛皮
废话不多说,直接上图。
截屏2026-03-20 12.57.251608×1440 155 KB
HTML源码
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>MiMo TTS 语音合成</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: Inter, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
-webkit-font-smoothing: antialiased;
background: #fafafa;
color: #171717;
min-height: 100vh;
display: flex;
align-items: center;
justify-content: center;
padding: 40px 24px;
}
.container {
width: 100%;
max-width: 900px;
padding: 32px;
background: #ffffff;
border-radius: 12px;
border: 1px solid #e5e5e5;
transition: all 0.2s ease;
}
.container:hover {
border-color: #d4d4d4;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.07);
}
h1 {
font-size: 2rem;
font-weight: 700;
letter-spacing: -0.02em;
margin-bottom: 8px;
color: #171717;
}
.subtitle {
font-size: 0.9375rem;
color: #737373;
margin-bottom: 32px;
line-height: 1.5;
}
.field {
margin-bottom: 24px;
}
.field.hidden {
display: none;
}
.row {
display: flex;
gap: 16px;
}
.row .field {
flex: 1;
}
label {
display: block;
font-size: 0.75rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #737373;
margin-bottom: 8px;
}
input[type="text"],
input[type="password"],
select,
textarea {
width: 100%;
background: #ffffff;
border: 1px solid #e5e5e5;
border-radius: 8px;
color: #171717;
padding: 12px 14px;
font-size: 0.875rem;
font-family: inherit;
outline: none;
transition: border-color 0.2s ease;
}
textarea {
resize: vertical;
min-height: 100px;
line-height: 1.6;
}
input:focus,
select:focus,
textarea:focus {
border-color: #000000;
}
select option {
background: #ffffff;
}
small {
font-size: 0.75rem;
color: #a3a3a3;
margin-top: 6px;
display: block;
line-height: 1.5;
}
small > div + div {
margin-top: 8px;
}
code {
background: #fafafa;
padding: 2px 6px;
border-radius: 4px;
font-family: 'SF Mono', Monaco, Consolas, 'Courier New', monospace;
color: #737373;
font-size: 0.6875rem;
border: 1px solid #e5e5e5;
}
.style-chips {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 8px;
}
.chip {
padding: 6px 12px;
background: #fafafa;
border: 1px solid #e5e5e5;
border-radius: 6px;
font-size: 0.75rem;
font-weight: 600;
color: #737373;
cursor: pointer;
transition: all 0.15s ease;
user-select: none;
letter-spacing: 0.02em;
}
.chip:hover {
border-color: #d4d4d4;
background: #ffffff;
transform: translateY(-1px);
}
.chip.active {
background: #000000;
border-color: #000000;
color: #ffffff;
}
button {
width: 100%;
padding: 12px 20px;
background: #000000;
color: #ffffff;
border: none;
border-radius: 8px;
font-size: 0.875rem;
font-weight: 600;
font-family: inherit;
cursor: pointer;
transition: all 0.15s ease;
}
button:hover:not(:disabled):not(.icon-btn) {
background: #1a1a1a;
transform: translateY(-1px);
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
button:active:not(:disabled) {
transform: translateY(0);
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
transform: none;
}
.status {
margin-top: 20px;
padding: 12px 16px;
background: #ffffff;
border-radius: 8px;
font-size: 0.875rem;
font-weight: 500;
display: none;
line-height: 1.5;
}
.status.error {
color: #ef4444;
background: #fef2f2;
border: 1px solid rgba(239, 68, 68, 0.2);
}
.status.success {
color: #10b981;
background: #f0fdf4;
border: 1px solid rgba(16, 185, 129, 0.2);
}
.status.loading {
color: #3b82f6;
background: #eff6ff;
border: 1px solid rgba(59, 130, 246, 0.2);
}
.player {
margin-top: 20px;
display: none;
}
.player-wrapper {
display: flex;
align-items: center;
gap: 12px;
}
.player audio {
flex: 1;
border-radius: 8px;
outline: none;
}
.icon-btn {
width: 44px;
height: 44px;
padding: 0;
background: #ffffff;
border: 1px solid #e5e5e5;
border-radius: 8px;
display: flex;
align-items: center;
justify-content: center;
cursor: pointer;
transition: all 0.15s ease;
flex-shrink: 0;
}
.icon-btn:hover {
border-color: #d4d4d4;
background: #fafafa;
transform: translateY(-1px);
}
.icon-btn:hover svg {
color: #000000;
}
.icon-btn:active {
transform: translateY(0);
}
.icon-btn svg {
color: #171717;
transition: color 0.15s ease;
}
.examples {
margin-top: 32px;
padding-top: 24px;
border-top: 1px solid #e5e5e5;
}
.examples h3 {
font-size: 0.75rem;
font-weight: 600;
color: #737373;
margin-bottom: 12px;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.examples-grid {
display: grid;
gap: 16px;
grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
}
.example {
background: #fafafa;
padding: 16px;
border-radius: 8px;
border: 1px solid #e5e5e5;
font-size: 0.875rem;
line-height: 1.6;
color: #737373;
transition: all 0.2s ease;
}
.example:hover {
border-color: #d4d4d4;
background: #ffffff;
}
.example strong {
color: #171717;
font-weight: 600;
display: block;
margin-bottom: 6px;
font-size: 0.875rem;
}
.example code {
background: #ffffff;
display: block;
padding: 8px;
margin-top: 6px;
border-radius: 6px;
font-size: 0.75rem;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.loading-spinner {
display: inline-block;
width: 14px;
height: 14px;
border: 2px solid #3b82f6;
border-top-color: transparent;
border-radius: 50%;
animation: spin 0.6s linear infinite;
margin-right: 8px;
vertical-align: middle;
}
@media (max-width: 768px) {
body {
padding: 24px 16px;
}
.container {
padding: 24px;
}
h1 {
font-size: 1.75rem;
}
.examples-grid {
grid-template-columns: 1fr;
}
}
@media (prefers-reduced-motion: reduce) {
* {
animation-duration: 0.01ms !important;
transition-duration: 0.01ms !important;
}
}
</style>
</head>
<body>
<div class="container">
<h1>MiMo TTS</h1>
<p class="subtitle">输入文字,选择风格,生成自然语音</p>
<div class="field">
<label>API Key</label>
<input type="password" id="apiKey" placeholder="输入你的 MIMO_API_KEY">
</div>
<div class="row">
<div class="field">
<label>音色</label>
<select id="voiceSelect">
<option value="mimo_default">默认 (mimo_default)</option>
<option value="default_zh">中文女声 (default_zh)</option>
<option value="default_en">英文女声 (default_en)</option>
<option value="custom">自定义音色(上传 WAV)</option>
</select>
</div>
<div class="field">
<label>风格(可选)</label>
<input type="text" id="styleInput" placeholder="如:开心、东北话、语速慢">
</div>
</div>
<div class="field hidden" id="voiceSampleField">
<label>上传参考音频 (WAV, 5-15s)</label>
<input type="file" id="voiceSample" accept=".wav,audio/wav">
<small>上传 5-15 秒的 WAV 文件作为参考音色</small>
</div>
<div class="field">
<label>快捷风格</label>
<div class="style-chips">
<span class="chip" data-style="开心">开心</span>
<span class="chip" data-style="悲伤">悲伤</span>
<span class="chip" data-style="生气">生气</span>
<span class="chip" data-style="悄悄话">悄悄话</span>
<span class="chip" data-style="东北话">东北话</span>
<span class="chip" data-style="粤语">粤语</span>
<span class="chip" data-style="变快">变快</span>
<span class="chip" data-style="变慢">变慢</span>
<span class="chip" data-style="唱歌">唱歌</span>
<span class="chip" data-style="像个大将军">大将军</span>
</div>
</div>
<div class="field">
<label>用户消息(可选)</label>
<textarea id="userMessage" placeholder="输入用户说的话,用于调整语气...">你好,MiMo,你吃午饭了吗?</textarea>
</div>
<div class="field">
<label>助手回复(必填)</label>
<textarea id="assistantMessage" placeholder="输入助手的回复...(可使用风格标签)">是的,我吃了一个三明治。</textarea>
<small>
<div>整体风格:<code><style>开心</style>明天就是周五了!</code></div>
<div>细粒度控制:<code>(紧张,深呼吸)呼……冷静。(语速加快)不就是一个面试吗!</code></div>
</small>
</div>
<button id="btnGenerate">生成语音</button>
<div id="status" class="status"></div>
<div id="player" class="player">
<div class="player-wrapper">
<audio id="audio" controls></audio>
<button id="btnDownloadWav" class="icon-btn" title="下载 WAV">
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
<polyline points="7 10 12 15 17 10"></polyline>
<line x1="12" y1="15" x2="12" y2="3"></line>
</svg>
</button>
</div>
</div>
<div class="examples">
<h3>使用示例</h3>
<div class="examples-grid">
<div class="example">
<strong>整体风格控制</strong>
在文本开头添加风格标签
<code><style>开心</style>明天就是周五了,真开心!</code>
</div>
<div class="example">
<strong>细粒度控制</strong>
使用括号进行精确控制
<code>(紧张,深呼吸)呼……冷静。(语速加快)不就是一个面试吗!</code>
</div>
<div class="example">
<strong>方言示例</strong>
支持多种方言风格
<code><style>东北话</style>哎呀妈呀,这天儿也忒冷了吧!</code>
</div>
</div>
</div>
</div>
<script>
// ==================== 变量声明 ====================
let audioBlob = null;
let audioUrl = null;
const audio = document.getElementById('audio');
const apiKeyInput = document.getElementById('apiKey');
const voiceSelect = document.getElementById('voiceSelect');
const styleInput = document.getElementById('styleInput');
const userMessageInput = document.getElementById('userMessage');
const assistantMessageInput = document.getElementById('assistantMessage');
const btnGenerate = document.getElementById('btnGenerate');
const btnDownloadWav = document.getElementById('btnDownloadWav');
const statusDiv = document.getElementById('status');
const playerDiv = document.getElementById('player');
const voiceSampleField = document.getElementById('voiceSampleField');
const voiceSampleInput = document.getElementById('voiceSample');
// ==================== 初始化 ====================
// 从 localStorage 加载 API Key
apiKeyInput.value = localStorage.getItem('mimo_api_key') || '';
apiKeyInput.addEventListener('change', () => {
localStorage.setItem('mimo_api_key', apiKeyInput.value);
});
// ==================== 事件监听器 ====================
// 风格芯片点击事件
document.querySelectorAll('.chip').forEach(chip => {
chip.addEventListener('click', () => {
const isActive = chip.classList.contains('active');
// 清除所有激活状态
document.querySelectorAll('.chip').forEach(c => c.classList.remove('active'));
if (!isActive) {
chip.classList.add('active');
styleInput.value = chip.dataset.style;
// 在助手消息开头添加风格标签
const currentText = assistantMessageInput.value.trim();
const textWithoutStyle = currentText.replace(/<style>.*?<\/style>\s*/g, '');
assistantMessageInput.value = `<style>${chip.dataset.style}</style>${textWithoutStyle}`;
} else {
styleInput.value = '';
// 移除风格标签
const currentText = assistantMessageInput.value.trim();
assistantMessageInput.value = currentText.replace(/<style>.*?<\/style>\s*/g, '');
}
});
});
// 手动输入风格时清除芯片激活状态
styleInput.addEventListener('input', () => {
document.querySelectorAll('.chip').forEach(c => c.classList.remove('active'));
});
// 监听助手消息输入,更新芯片状态
assistantMessageInput.addEventListener('input', () => {
const text = assistantMessageInput.value;
const hasStyleTag = /<style>(.*?)<\/style>/.exec(text);
document.querySelectorAll('.chip').forEach(chip => {
if (hasStyleTag && text.includes(`<style>${chip.dataset.style}</style>`)) {
chip.classList.add('active');
styleInput.value = chip.dataset.style;
} else {
chip.classList.remove('active');
}
});
});
// 音色选择切换
voiceSelect.addEventListener('change', () => {
voiceSampleField.classList.toggle('hidden', voiceSelect.value !== 'custom');
});
// ==================== 工具函数 ====================
function setStatus(message, type = '') {
statusDiv.style.display = message ? 'block' : 'none';
statusDiv.className = 'status' + (type ? ' ' + type : '');
statusDiv.innerHTML = type === 'loading'
? `<span class="loading-spinner"></span>${message}`
: message;
}
async function fileToBase64(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result.split(',')[1]);
reader.onerror = reject;
reader.readAsDataURL(file);
});
}
// ==================== 核心功能 ====================
async function generateAudio() {
const apiKey = apiKeyInput.value.trim();
const userMessage = userMessageInput.value.trim();
const assistantMessage = assistantMessageInput.value.trim();
const voice = voiceSelect.value;
if (!apiKey) {
setStatus('请输入 API Key', 'error');
return;
}
if (!assistantMessage) {
setStatus('请输入助手回复(这是生成语音的目标文本)', 'error');
return;
}
btnGenerate.disabled = true;
setStatus('正在生成语音...', 'loading');
playerDiv.style.display = 'none';
try {
// 构建请求 payload
let payload;
if (voice === 'custom') {
const file = voiceSampleInput.files[0];
if (!file) {
throw new Error('请上传参考音频文件(WAV 格式)');
}
const base64 = await fileToBase64(file);
payload = {
model: 'mimo-v2-audio-tts',
audio: {
format: 'wav',
voice_audio: {
format: 'wav',
data: base64
}
},
messages: [
userMessage ? { role: 'user', content: userMessage } : null,
{ role: 'assistant', content: assistantMessage }
].filter(Boolean)
};
} else {
payload = {
model: 'mimo-v2-audio-tts',
audio: {
format: 'wav',
voice: voice
},
messages: [
userMessage ? { role: 'user', content: userMessage } : null,
{ role: 'assistant', content: assistantMessage }
].filter(Boolean)
};
}
const response = await fetch('https://api.xiaomimimo.com/v1/chat/completions', {
method: 'POST',
headers: {
'api-key': apiKey,
'Content-Type': 'application/json'
},
body: JSON.stringify(payload)
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`API 请求失败: ${response.status} - ${errorText.slice(0, 200)}`);
}
const data = await response.json();
// 提取音频数据
const audioData = data.choices?.[0]?.message?.audio?.data;
if (!audioData) {
throw new Error('响应中没有音频数据');
}
// 解码 Base64 音频
const raw = Uint8Array.from(atob(audioData), c => c.charCodeAt(0));
let wavBytes;
// 检查是否已经是 WAV 格式(RIFF header)
if (raw[0] === 0x52 && raw[1] === 0x49 && raw[2] === 0x46 && raw[3] === 0x46) {
wavBytes = raw;
} else {
// 如果是原始 PCM,封装为 WAV (24kHz, 16-bit, mono)
wavBytes = wrapPcmToWav(raw, 24000, 16, 1);
}
audioBlob = new Blob([wavBytes], { type: 'audio/wav' });
if (audioUrl) {
URL.revokeObjectURL(audioUrl);
}
audioUrl = URL.createObjectURL(audioBlob);
audio.src = audioUrl;
// 等待音频元数据加载后再显示播放器
audio.addEventListener('loadedmetadata', function onLoadedMetadata() {
audio.removeEventListener('loadedmetadata', onLoadedMetadata);
// 显示统计信息
const usage = data.usage;
const sizeKB = (wavBytes.length / 1024).toFixed(1);
const duration = audio.duration ? `${audio.duration.toFixed(1)}s` : '';
const stats = usage ? ` | Token: ${usage.total_tokens}` : '';
const durationInfo = duration ? ` | 时长: ${duration}` : '';
setStatus(`生成成功 — ${sizeKB} KB${durationInfo}${stats}`, 'success');
playerDiv.style.display = 'block';
// 自动播放
audio.play().catch(() => {});
}, { once: true });
// 显式调用 load() 触发元数据加载
audio.load();
} catch (error) {
console.error('生成失败:', error);
setStatus(`${error.message}`, 'error');
} finally {
btnGenerate.disabled = false;
}
}
function wrapPcmToWav(pcmData, sampleRate, bitsPerSample, numChannels) {
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
const blockAlign = numChannels * bitsPerSample / 8;
const dataSize = pcmData.length;
const buffer = new ArrayBuffer(44 + dataSize);
const view = new DataView(buffer);
// RIFF header
writeStr(view, 0, 'RIFF');
view.setUint32(4, 36 + dataSize, true);
writeStr(view, 8, 'WAVE');
writeStr(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, numChannels, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, byteRate, true);
view.setUint16(32, blockAlign, true);
view.setUint16(34, bitsPerSample, true);
writeStr(view, 36, 'data');
view.setUint32(40, dataSize, true);
new Uint8Array(buffer, 44).set(pcmData);
return new Uint8Array(buffer);
}
function writeStr(view, offset, str) {
for (let i = 0; i < str.length; i++) {
view.setUint8(offset + i, str.charCodeAt(i));
}
}
function downloadWav() {
if (!audioBlob) {
setStatus('请先生成语音', 'error');
return;
}
const url = URL.createObjectURL(audioBlob);
const a = document.createElement('a');
a.href = url;
a.download = `mimo_tts_${Date.now()}.wav`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
setStatus('WAV 文件已下载', 'success');
}
// ==================== 事件绑定 ====================
btnGenerate.addEventListener('click', generateAudio);
btnDownloadWav.addEventListener('click', downloadWav);
// 快捷键
document.addEventListener('keydown', (e) => {
if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
e.preventDefault();
generateAudio();
}
});
// 音频播放错误事件
audio.addEventListener('error', (e) => {
setStatus(`播放错误: ${e.message}`, 'error');
});
</script>
</body>
</html>
--【壹】--:
mimo 的 tts 有没有 rpm 和并发限制呢
--【贰】--:
真巧,我也写了一个
MiMo 语音合成
--【叁】--:
效果凑合
--【肆】--:
这个模型有音色克隆吗?
--【伍】--:
好快的佬,晚上部署弄到maibot里试试,这模型音色克隆效果可以吗?
--【陆】--:
感谢佬的网页,赞
--【柒】--:
厉害呀!
--【捌】--:
可以涩涩吗
--【玖】--:
soga,以为只能合成机器声音
--【拾】--:
感谢~
另外想问问,有开源的可以换歌声的TTS吗?
--【拾壹】--:
不支持。
image1920×919 69.6 KB
--【拾贰】--:
可以上传一段自己的声音来模拟
--【拾叁】--:
大佬牛皮

