# 统计最长、最短音频
sounds_len=[]
for sound in sound_files:
sounds_len.append(get_sound_len(sound))
print("音频最大长度:",max(sounds_len),"秒")
print("音频最小长度:",min(sounds_len),"秒")
!cp train/hungry/hungry_0.wav ~/
!pip install pydub -q
# 音频信息查看
import math
import soundfile as sf
import numpy as np
import librosa
data, samplerate = sf.read('hungry_0.wav')
channels = len(data.shape)
length_s = len(data)/float(samplerate)
format_rate=16000
print(f"channels: {channels}")
print(f"length_s: {length_s}")
print(f"samplerate: {samplerate}")
# 统一到34s
from pydub import AudioSegment
audio = AudioSegment.from_wav('hungry_0.wav')
print(str(audio.duration_seconds))
i = 1
padded = audio
while padded.duration_seconds * 1000 < 34000:
padded = audio * i
i = i + 1
padded[0:34000].set_frame_rate(16000).export('padded-file.wav', format='wav')
import math
import soundfile as sf
import numpy as np
import librosa
data, samplerate = sf.read('padded-file.wav')
channels = len(data.shape)
length_s = len(data)/float(samplerate)
format_rate=16000
print(f"channels: {channels}")
print(f"length_s: {length_s}")
print(f"samplerate: {samplerate}")
# 定义函数,如未达到最大长度,则重复填充,最终从超过34s的音频中截取
from pydub import AudioSegment
def convert_sound_len(filename):
audio = AudioSegment.from_wav(filename)
i = 1
padded = audio*i
while padded.duration_seconds * 1000 < 34000:
i = i + 1
padded = audio * i
padded[0:34000].set_frame_rate(16000).export(filename, format='wav')
# 统一所有音频到定长
for sound in sound_files:
convert_sound_len(sound)
3.自定义数据集
import os
from paddlespeech.audio.datasets.dataset import AudioClassificationDataset
class CustomDataset(AudioClassificationDataset):
# List all the class labels
label_list = [
'awake',
'diaper',
'hug',
'hungry',
'sleepy',
'uncomfortable'
]
train_data_dir='./train/'
def __init__(self, **kwargs):
files, labels = self._get_data()
super(CustomDataset, self).__init__(
files=files, labels=labels, feat_type='raw', **kwargs)
# 返回音频文件、label值
def _get_data(self):
'''
This method offer information of wave files and labels.
'''
files = []
labels = []
for i in range(len(self.label_list)):
single_class_path=os.path.join(self.train_data_dir, self.label_list[i])
for sound in os.listdir(single_class_path):
# print(sound)
if 'wav' in sound:
sound=os.path.join(single_class_path, sound)
files.append(sound)
labels.append(i)
return files, labels
# 统计最长、最短音频
sounds_len=[]
for sound in sound_files:
sounds_len.append(get_sound_len(sound))
print("音频最大长度:",max(sounds_len),"秒")
print("音频最小长度:",min(sounds_len),"秒")
!cp train/hungry/hungry_0.wav ~/
!pip install pydub -q
# 音频信息查看
import math
import soundfile as sf
import numpy as np
import librosa
data, samplerate = sf.read('hungry_0.wav')
channels = len(data.shape)
length_s = len(data)/float(samplerate)
format_rate=16000
print(f"channels: {channels}")
print(f"length_s: {length_s}")
print(f"samplerate: {samplerate}")
# 统一到34s
from pydub import AudioSegment
audio = AudioSegment.from_wav('hungry_0.wav')
print(str(audio.duration_seconds))
i = 1
padded = audio
while padded.duration_seconds * 1000 < 34000:
padded = audio * i
i = i + 1
padded[0:34000].set_frame_rate(16000).export('padded-file.wav', format='wav')
import math
import soundfile as sf
import numpy as np
import librosa
data, samplerate = sf.read('padded-file.wav')
channels = len(data.shape)
length_s = len(data)/float(samplerate)
format_rate=16000
print(f"channels: {channels}")
print(f"length_s: {length_s}")
print(f"samplerate: {samplerate}")
# 定义函数,如未达到最大长度,则重复填充,最终从超过34s的音频中截取
from pydub import AudioSegment
def convert_sound_len(filename):
audio = AudioSegment.from_wav(filename)
i = 1
padded = audio*i
while padded.duration_seconds * 1000 < 34000:
i = i + 1
padded = audio * i
padded[0:34000].set_frame_rate(16000).export(filename, format='wav')
# 统一所有音频到定长
for sound in sound_files:
convert_sound_len(sound)
3.自定义数据集
import os
from paddlespeech.audio.datasets.dataset import AudioClassificationDataset
class CustomDataset(AudioClassificationDataset):
# List all the class labels
label_list = [
'awake',
'diaper',
'hug',
'hungry',
'sleepy',
'uncomfortable'
]
train_data_dir='./train/'
def __init__(self, **kwargs):
files, labels = self._get_data()
super(CustomDataset, self).__init__(
files=files, labels=labels, feat_type='raw', **kwargs)
# 返回音频文件、label值
def _get_data(self):
'''
This method offer information of wave files and labels.
'''
files = []
labels = []
for i in range(len(self.label_list)):
single_class_path=os.path.join(self.train_data_dir, self.label_list[i])
for sound in os.listdir(single_class_path):
# print(sound)
if 'wav' in sound:
sound=os.path.join(single_class_path, sound)
files.append(sound)
labels.append(i)
return files, labels