sox
# 查看音频信息
soxi test.wav
sox test.wav -n stat
# 切割音频
sox in.wav out.wav trim 0 10
# 改变音量
sox -v 0.5 input.wav output.wav # 音频放大0.5倍
# 以 sox foo.wav -n stat -v 命令返回的数字作为放大倍数,将最大化 foo.wav 的音量而不至于出现削波
sox foo.wav -n stat -v 2> vc
sox -v `cat vc` foo.wav foo-maxed.wav
sox --norm=-1 <inputfile> <outputfile> # 归一化音频响度
librosa
import librosa
import librosa.display
# load wavfile
waveform, sample_rate = librosa.load(filepath)
# MFCCs
MFCCs = librosa.feature.mfcc(y=waveform, sr=sample_rate, n_mfcc=24)
torchadudio
import torch
import torchaudio
# load wavefile
waveform, sample_rate = torchaudio.load(filepath)
# MFCCs
MFCCc = torchaudio.transforms.MFCC()(waveform)
# Specgram
Specgram = torchaudio.transforms.Spectrogram()(waveform)
Kaldi
Reference
[3] [简书] SOX常用命令