11import os
22import subprocess
33import json
4+ from time import perf_counter
5+ from ffmpy import FFmpeg
6+ import utils
47
58# Path to the Whisper executable inside the container
69WHISPER_EXECUTABLE = os .environ .get ('WHISPER_EXE' ,'whisper' ) # Executable 'main' is assumed to be in the same directory as this script
710MODEL = os .environ .get ('WHISPER_MODEL' ,'models/ggml-base.en.bin' )
811
12+ def convert_video_to_wav (input_filepath , offset = None ):
13+ """
14+ Converts a video file to WAV format using ffmpy.
15+ """
16+ try :
17+ start_time = perf_counter ()
18+ if offset is None :
19+ offset = 0.0
20+
21+ nthreads = utils .getMaxThreads ()
22+
23+ print (f"Converting video '{ input_filepath } ' to WAV with offset { offset } using { nthreads } thread(s)." )
24+ output_filepath = utils .getTmpFile ()
25+ ext = '.wav'
26+
27+ ff = FFmpeg (
28+ global_options = f"-hide_banner -loglevel error -nostats -threads { nthreads } " ,
29+ inputs = {input_filepath : f'-ss { offset } ' },
30+ outputs = {output_filepath : '-c:a pcm_s16le -ac 1 -y -ar 16000 -f wav' }
31+ )
32+ print (f"Starting conversion. Audio output will be saved in { output_filepath } " )
33+ ff .run ()
34+ end_time = perf_counter ()
35+ print (f"Conversion complete. Duration: { int (end_time - start_time )} seconds" )
36+ return output_filepath , ext
37+ except Exception as e :
38+ print ("Exception during conversion:" + str (e ))
39+ raise e
40+
941def transcribe_audio (media_filepath ):
1042
1143 if media_filepath == 'TEST-transcribe_example_result' :
@@ -18,6 +50,11 @@ def transcribe_audio(media_filepath):
1850 if not os .path .exists (media_filepath ):
1951 raise FileNotFoundError (f"Media file not found: { media_filepath } " )
2052
53+ # convert video to wav if needed
54+ if not media_filepath .endswith ('.wav' ):
55+ media_filepath , _ = convert_video_to_wav (media_filepath )
56+
57+
2158 # Path to the output JSON file that Whisper will generate
2259 json_output_path = f"{ media_filepath } .json"
2360 if os .path .exists (json_output_path ):
@@ -41,6 +78,7 @@ def transcribe_audio(media_filepath):
4178 raise Exception (f"Whisper failed with error:\n { result .stderr .decode ('utf-8' )} " )
4279
4380 # Check if the output JSON file was generated
81+ print (f"Checking for JSON output at: { json_output_path } " )
4482 if not os .path .exists (json_output_path ):
4583 raise FileNotFoundError (f"Expected JSON output file not found: { json_output_path } " )
4684
0 commit comments