Spaces:
Runtime error
Runtime error
Update dataloader/dataloader.py
Browse files- dataloader/dataloader.py +7 -5
dataloader/dataloader.py
CHANGED
|
@@ -36,7 +36,7 @@ def audioread(path, sampling_rate):
|
|
| 36 |
data, fs = sf.read(path)
|
| 37 |
|
| 38 |
# Normalize the audio data.
|
| 39 |
-
data = audio_norm(data)
|
| 40 |
|
| 41 |
# Resample the audio if the sample rate is different from the target sampling rate.
|
| 42 |
if fs != sampling_rate:
|
|
@@ -47,7 +47,7 @@ def audioread(path, sampling_rate):
|
|
| 47 |
data = data[:, 0]
|
| 48 |
|
| 49 |
# Return the processed audio data.
|
| 50 |
-
return data
|
| 51 |
|
| 52 |
def audio_norm(x):
|
| 53 |
"""
|
|
@@ -87,7 +87,7 @@ def audio_norm(x):
|
|
| 87 |
x = x * scalarx
|
| 88 |
|
| 89 |
# Return the doubly normalized audio signal.
|
| 90 |
-
return x
|
| 91 |
|
| 92 |
class DataReader(object):
|
| 93 |
"""
|
|
@@ -155,13 +155,15 @@ class DataReader(object):
|
|
| 155 |
utt_id = path.split('/')[-1]
|
| 156 |
|
| 157 |
# Read and normalize the audio data, converting it to float32 for processing.
|
| 158 |
-
data = audioread(path, self.sampling_rate).astype(np.float32)
|
|
|
|
|
|
|
| 159 |
|
| 160 |
# Reshape the data to ensure it's in the format [1, data_length].
|
| 161 |
inputs = np.reshape(data, [1, data.shape[0]])
|
| 162 |
|
| 163 |
# Return the reshaped audio data, utterance ID, and the length of the original data.
|
| 164 |
-
return inputs, utt_id, data.shape[0]
|
| 165 |
|
| 166 |
class Wave_Processor(object):
|
| 167 |
"""
|
|
|
|
| 36 |
data, fs = sf.read(path)
|
| 37 |
|
| 38 |
# Normalize the audio data.
|
| 39 |
+
data, scalar = audio_norm(data)
|
| 40 |
|
| 41 |
# Resample the audio if the sample rate is different from the target sampling rate.
|
| 42 |
if fs != sampling_rate:
|
|
|
|
| 47 |
data = data[:, 0]
|
| 48 |
|
| 49 |
# Return the processed audio data.
|
| 50 |
+
return data, scalar
|
| 51 |
|
| 52 |
def audio_norm(x):
|
| 53 |
"""
|
|
|
|
| 87 |
x = x * scalarx
|
| 88 |
|
| 89 |
# Return the doubly normalized audio signal.
|
| 90 |
+
return x, , 1/(scalar * scalarx + EPS)
|
| 91 |
|
| 92 |
class DataReader(object):
|
| 93 |
"""
|
|
|
|
| 155 |
utt_id = path.split('/')[-1]
|
| 156 |
|
| 157 |
# Read and normalize the audio data, converting it to float32 for processing.
|
| 158 |
+
#data = audioread(path, self.sampling_rate).astype(np.float32)
|
| 159 |
+
data, scalar = audioread(path, self.sampling_rate)
|
| 160 |
+
data = data.astype(np.float32)
|
| 161 |
|
| 162 |
# Reshape the data to ensure it's in the format [1, data_length].
|
| 163 |
inputs = np.reshape(data, [1, data.shape[0]])
|
| 164 |
|
| 165 |
# Return the reshaped audio data, utterance ID, and the length of the original data.
|
| 166 |
+
return inputs, utt_id, data.shape[0], scalar
|
| 167 |
|
| 168 |
class Wave_Processor(object):
|
| 169 |
"""
|