Skip to content
This repository was archived by the owner on Nov 11, 2023. It is now read-only.

Commit 163e91c

Browse files
committed
diff updata
1 parent 7dbb0ba commit 163e91c

File tree

1 file changed

+4
-6
lines changed

1 file changed

+4
-6
lines changed

inference/infer_tool.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,8 @@ def load_model(self):
174174

175175

176176

177-
def get_unit_f0(self, in_path, tran, cluster_infer_ratio, speaker, f0_filter ,f0_predictor,cr_threshold=0.05):
177+
def get_unit_f0(self, wav, tran, cluster_infer_ratio, speaker, f0_filter ,f0_predictor,cr_threshold=0.05):
178178

179-
wav, sr = librosa.load(in_path, sr=self.target_sample)
180-
181179
f0_predictor_object = utils.get_f0_predictor(f0_predictor,hop_length=self.hop_size,sampling_rate=self.target_sample,device=self.dev,threshold=cr_threshold)
182180

183181
f0, uv = f0_predictor_object.compute_f0_uv(wav)
@@ -219,7 +217,8 @@ def infer(self, speaker, tran, raw_path,
219217
if len(self.spk2id.__dict__) >= speaker:
220218
speaker_id = speaker
221219
sid = torch.LongTensor([int(speaker_id)]).to(self.dev).unsqueeze(0)
222-
c, f0, uv = self.get_unit_f0(raw_path, tran, cluster_infer_ratio, speaker, f0_filter,f0_predictor,cr_threshold=cr_threshold)
220+
wav, sr = librosa.load(raw_path, sr=self.target_sample)
221+
c, f0, uv = self.get_unit_f0(wav, tran, cluster_infer_ratio, speaker, f0_filter,f0_predictor,cr_threshold=cr_threshold)
223222
if "half" in self.net_g_path and torch.cuda.is_available():
224223
c = c.half()
225224
with torch.no_grad():
@@ -245,9 +244,8 @@ def infer(self, speaker, tran, raw_path,
245244
k_step=k_step)
246245
audio = self.vocoder.infer(audio_mel, f0).squeeze()
247246
else:
248-
wav, sr = librosa.load(raw_path, sr=self.target_sample)
249247
wav = torch.FloatTensor(wav).to(self.dev)
250-
vol = self.volume_extractor.extract(wav[None,:])[None,:,None]
248+
vol = self.volume_extractor.extract(wav[None,:])[None,:,None].to(self.dev)
251249
c = c.transpose(-1,-2)
252250
f0 = f0[:,:,None]
253251
audio_mel = self.diffusion_model(

0 commit comments

Comments
 (0)