Skip to content

Commit 1f259e2

Browse files
committed
translate mp4 into wav added
1 parent 907c891 commit 1f259e2

1 file changed

Lines changed: 38 additions & 0 deletions

File tree

PythonRpcServer/transcribe.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,43 @@
11
import os
22
import subprocess
33
import json
4+
from time import perf_counter
5+
from ffmpy import FFmpeg
6+
import utils
47

58
# Path to the Whisper executable inside the container
69
WHISPER_EXECUTABLE = os.environ.get('WHISPER_EXE','whisper') # Executable 'main' is assumed to be in the same directory as this script
710
MODEL = os.environ.get('WHISPER_MODEL','models/ggml-base.en.bin')
811

12+
def convert_video_to_wav(input_filepath, offset=None):
13+
"""
14+
Converts a video file to WAV format using ffmpy.
15+
"""
16+
try:
17+
start_time = perf_counter()
18+
if offset is None:
19+
offset = 0.0
20+
21+
nthreads = utils.getMaxThreads()
22+
23+
print(f"Converting video '{input_filepath}' to WAV with offset {offset} using {nthreads} thread(s).")
24+
output_filepath = utils.getTmpFile()
25+
ext = '.wav'
26+
27+
ff = FFmpeg(
28+
global_options=f"-hide_banner -loglevel error -nostats -threads {nthreads}",
29+
inputs={input_filepath: f'-ss {offset}'},
30+
outputs={output_filepath: '-c:a pcm_s16le -ac 1 -y -ar 16000 -f wav'}
31+
)
32+
print(f"Starting conversion. Audio output will be saved in {output_filepath}")
33+
ff.run()
34+
end_time = perf_counter()
35+
print(f"Conversion complete. Duration: {int(end_time - start_time)} seconds")
36+
return output_filepath, ext
37+
except Exception as e:
38+
print("Exception during conversion:" + str(e))
39+
raise e
40+
941
def transcribe_audio(media_filepath):
1042

1143
if media_filepath == 'TEST-transcribe_example_result':
@@ -18,6 +50,11 @@ def transcribe_audio(media_filepath):
1850
if not os.path.exists(media_filepath):
1951
raise FileNotFoundError(f"Media file not found: {media_filepath}")
2052

53+
# convert video to wav if needed
54+
if not media_filepath.endswith('.wav'):
55+
media_filepath, _ = convert_video_to_wav(media_filepath)
56+
57+
2158
# Path to the output JSON file that Whisper will generate
2259
json_output_path = f"{media_filepath}.json"
2360
if os.path.exists(json_output_path):
@@ -41,6 +78,7 @@ def transcribe_audio(media_filepath):
4178
raise Exception(f"Whisper failed with error:\n{result.stderr.decode('utf-8')}")
4279

4380
# Check if the output JSON file was generated
81+
print(f"Checking for JSON output at: {json_output_path}")
4482
if not os.path.exists(json_output_path):
4583
raise FileNotFoundError(f"Expected JSON output file not found: {json_output_path}")
4684

0 commit comments

Comments
 (0)