微调 Whisper 语音大模型 - qidao123.com技术社区-IT企服评测·应用市场

复制代码

# 从数据列表里面获取音频数据、采样率和文本
def _get_list_data(self, idx):
if self.data_list_path.endswith(".header"):
data_list = self.dataset_reader.get_data(self.data_list[idx])
else:
data_list = self.data_list[idx]
# 分割音频路径和标签
audio_file = data_list["audio"]['path']
transcript = data_list["sentences"] if self.timestamps else data_list["sentence"]
language = data_list["language"] if 'language' in data_list.keys() else None
if 'start_time' not in data_list["audio"].keys():
sample, sample_rate = soundfile.read(audio_file, dtype='float32')
else:
start_time, end_time = data_list["audio"]["start_time"], data_list["audio"]["end_time"]
# 分割读取音频
sample, sample_rate = self.slice_from_file(audio_file, start=start_time, end=end_time)
sample = sample.T
# 转成单通道
if self.mono:
sample = librosa.to_mono(sample)
# 数据增强
if self.augment_configs:
sample, sample_rate = self.augment(sample, sample_rate)
# 重采样
if self.sample_rate != sample_rate:
sample = self.resample(sample, orig_sr=sample_rate, target_sr=self.sample_rate)
return sample, sample_rate, transcript, language

复制代码

class SavePeftModelCallback(TrainerCallback):
def on_save(self,
args: TrainingArguments,
state: TrainerState,
control: TrainerControl,
**kwargs):
if args.local_rank == 0 or args.local_rank == -1:
# 保存效果最好的模型
best_checkpoint_folder = os.path.join(args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-best")
# 确保 state.best_model_checkpoint 不是 NoneType
if state.best_model_checkpoint is not None:
# 因为只保存最新5个检查点，所以要确保不是之前的检查点
if os.path.exists(state.best_model_checkpoint):
if os.path.exists(best_checkpoint_folder):
shutil.rmtree(best_checkpoint_folder)
shutil.copytree(state.best_model_checkpoint, best_checkpoint_folder)
print(f"效果最好的检查点为：{state.best_model_checkpoint}，评估结果为：{state.best_metric}")
return control

复制代码