Spaces:
Runtime error
Runtime error
renhang
commited on
Commit
·
a1ddd2f
1
Parent(s):
deaa9a6
update
Browse files
app.py
CHANGED
|
@@ -95,6 +95,11 @@ def load_example(example_idx, examples):
|
|
| 95 |
# Load examples at startup
|
| 96 |
examples = load_examples()
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
# Gradio interface
|
| 99 |
with gr.Blocks() as demo:
|
| 100 |
gr.Markdown("# Jamify: Music Generation from Lyrics and Style")
|
|
@@ -116,12 +121,12 @@ with gr.Blocks() as demo:
|
|
| 116 |
label="Lyrics",
|
| 117 |
lines=10,
|
| 118 |
placeholder="Enter lyrics in format: word[start:end] word[start:end]...\nExample: It's[4.96:5.52] a[5.52:5.84] long[5.84:6.16] way[6.16:6.48]...",
|
| 119 |
-
value=
|
| 120 |
)
|
| 121 |
-
duration_slider = gr.Slider(minimum=5, maximum=230, value=
|
| 122 |
|
| 123 |
with gr.Tab("Style from Audio"):
|
| 124 |
-
reference_audio = gr.File(label="Reference Audio (.mp3, .wav)", type="filepath")
|
| 125 |
with gr.Tab("Style from Text"):
|
| 126 |
style_prompt = gr.Textbox(label="Style Prompt", lines=3, placeholder="e.g., A high-energy electronic dance track with a strong bassline and euphoric synths.")
|
| 127 |
|
|
|
|
| 95 |
# Load examples at startup
|
| 96 |
examples = load_examples()
|
| 97 |
|
| 98 |
+
# Get default values from first example
|
| 99 |
+
default_audio = examples[0]['audio_path'] if examples else None
|
| 100 |
+
default_lyrics = examples[0]['lyrics_text'] if examples else ""
|
| 101 |
+
default_duration = examples[0]['duration'] if examples else 120
|
| 102 |
+
|
| 103 |
# Gradio interface
|
| 104 |
with gr.Blocks() as demo:
|
| 105 |
gr.Markdown("# Jamify: Music Generation from Lyrics and Style")
|
|
|
|
| 121 |
label="Lyrics",
|
| 122 |
lines=10,
|
| 123 |
placeholder="Enter lyrics in format: word[start:end] word[start:end]...\nExample: It's[4.96:5.52] a[5.52:5.84] long[5.84:6.16] way[6.16:6.48]...",
|
| 124 |
+
value=default_lyrics
|
| 125 |
)
|
| 126 |
+
duration_slider = gr.Slider(minimum=5, maximum=230, value=default_duration, step=30, label="Duration (seconds)")
|
| 127 |
|
| 128 |
with gr.Tab("Style from Audio"):
|
| 129 |
+
reference_audio = gr.File(label="Reference Audio (.mp3, .wav)", type="filepath", value=default_audio)
|
| 130 |
with gr.Tab("Style from Text"):
|
| 131 |
style_prompt = gr.Textbox(label="Style Prompt", lines=3, placeholder="e.g., A high-energy electronic dance track with a strong bassline and euphoric synths.")
|
| 132 |
|
utils.py
CHANGED
|
@@ -95,7 +95,10 @@ def words_to_text(words: list[dict]) -> str:
|
|
| 95 |
word_text = word.get('word', '')
|
| 96 |
start = word.get('start', 0.0)
|
| 97 |
end = word.get('end', 0.0)
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
return " ".join(text_parts)
|
| 101 |
|
|
@@ -104,30 +107,53 @@ def json_to_text(json_data: dict) -> str:
|
|
| 104 |
"""
|
| 105 |
Convert JSON lyrics data to text format for display.
|
| 106 |
Only uses the 'word' layer from the JSON structure.
|
|
|
|
| 107 |
|
| 108 |
Args:
|
| 109 |
json_data: Dictionary with 'word' key containing list of word objects
|
| 110 |
|
| 111 |
Returns:
|
| 112 |
-
String
|
| 113 |
"""
|
| 114 |
if not isinstance(json_data, dict) or 'word' not in json_data:
|
| 115 |
return ""
|
| 116 |
|
| 117 |
words = json_data['word']
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
def text_to_json(text: str) -> dict:
|
| 122 |
"""
|
| 123 |
Convert text format to JSON structure expected by the model.
|
| 124 |
Creates the 'word' layer that the model needs.
|
|
|
|
| 125 |
|
| 126 |
Args:
|
| 127 |
-
text: String in format "word[start:end] word[start:end]..."
|
| 128 |
|
| 129 |
Returns:
|
| 130 |
Dictionary with 'word' key containing list of word objects
|
| 131 |
"""
|
| 132 |
-
|
|
|
|
|
|
|
| 133 |
return {"word": words}
|
|
|
|
| 95 |
word_text = word.get('word', '')
|
| 96 |
start = word.get('start', 0.0)
|
| 97 |
end = word.get('end', 0.0)
|
| 98 |
+
# Format timestamps to max 2 decimal places
|
| 99 |
+
start_str = f"{start:.2f}".rstrip('0').rstrip('.')
|
| 100 |
+
end_str = f"{end:.2f}".rstrip('0').rstrip('.')
|
| 101 |
+
text_parts.append(f"{word_text}[{start_str}:{end_str}]")
|
| 102 |
|
| 103 |
return " ".join(text_parts)
|
| 104 |
|
|
|
|
| 107 |
"""
|
| 108 |
Convert JSON lyrics data to text format for display.
|
| 109 |
Only uses the 'word' layer from the JSON structure.
|
| 110 |
+
Groups words into sentences/lines for better readability.
|
| 111 |
|
| 112 |
Args:
|
| 113 |
json_data: Dictionary with 'word' key containing list of word objects
|
| 114 |
|
| 115 |
Returns:
|
| 116 |
+
String with words grouped into lines: "word[start:end] word[start:end]...\nword[start:end]..."
|
| 117 |
"""
|
| 118 |
if not isinstance(json_data, dict) or 'word' not in json_data:
|
| 119 |
return ""
|
| 120 |
|
| 121 |
words = json_data['word']
|
| 122 |
+
|
| 123 |
+
# Group words into segments using the existing regroup_words function
|
| 124 |
+
segments = regroup_words(words, max_len=15.0, gap=0.50)
|
| 125 |
+
|
| 126 |
+
# Convert each segment to text format
|
| 127 |
+
segment_lines = []
|
| 128 |
+
for seg in segments:
|
| 129 |
+
# Extract words for this segment based on time range
|
| 130 |
+
seg_words = []
|
| 131 |
+
for word in words:
|
| 132 |
+
if seg['start'] <= word['start'] < seg['end'] or (
|
| 133 |
+
word['start'] <= seg['start'] < word['end']
|
| 134 |
+
):
|
| 135 |
+
seg_words.append(word)
|
| 136 |
+
|
| 137 |
+
if seg_words:
|
| 138 |
+
segment_text = words_to_text(seg_words)
|
| 139 |
+
segment_lines.append(segment_text)
|
| 140 |
+
|
| 141 |
+
return '\n'.join(segment_lines)
|
| 142 |
|
| 143 |
|
| 144 |
def text_to_json(text: str) -> dict:
|
| 145 |
"""
|
| 146 |
Convert text format to JSON structure expected by the model.
|
| 147 |
Creates the 'word' layer that the model needs.
|
| 148 |
+
Handles multi-line input by joining lines.
|
| 149 |
|
| 150 |
Args:
|
| 151 |
+
text: String in format "word[start:end] word[start:end]..." (can be multi-line)
|
| 152 |
|
| 153 |
Returns:
|
| 154 |
Dictionary with 'word' key containing list of word objects
|
| 155 |
"""
|
| 156 |
+
# Join multiple lines into single line for parsing
|
| 157 |
+
single_line_text = ' '.join(line.strip() for line in text.split('\n') if line.strip())
|
| 158 |
+
words = text_to_words(single_line_text)
|
| 159 |
return {"word": words}
|