translate mp4 into wav added

tyler232 · tyler232 · commit 1f259e2aafb9 · 2024-10-08T20:25:25.000-05:00
diff --git a/PythonRpcServer/transcribe.py b/PythonRpcServer/transcribe.py
@@ -1,11 +1,43 @@
 import os
 import subprocess
 import json
+from time import perf_counter 
+from ffmpy import FFmpeg
+import utils
 
 # Path to the Whisper executable inside the container
 WHISPER_EXECUTABLE = os.environ.get('WHISPER_EXE','whisper')  # Executable 'main' is assumed to be in the same directory as this script
 MODEL = os.environ.get('WHISPER_MODEL','models/ggml-base.en.bin')
 
+def convert_video_to_wav(input_filepath, offset=None):
+    """
+    Converts a video file to WAV format using ffmpy.
+    """
+    try:
+        start_time = perf_counter()
+        if offset is None:
+            offset = 0.0
+
+        nthreads = utils.getMaxThreads()
+        
+        print(f"Converting video '{input_filepath}' to WAV with offset {offset} using {nthreads} thread(s).")
+        output_filepath = utils.getTmpFile()
+        ext = '.wav'
+        
+        ff = FFmpeg(
+            global_options=f"-hide_banner -loglevel error -nostats -threads {nthreads}",
+            inputs={input_filepath: f'-ss {offset}'},
+            outputs={output_filepath: '-c:a pcm_s16le -ac 1 -y -ar 16000 -f wav'}
+        )
+        print(f"Starting conversion. Audio output will be saved in {output_filepath}")
+        ff.run()
+        end_time = perf_counter()
+        print(f"Conversion complete. Duration: {int(end_time - start_time)} seconds")
+        return output_filepath, ext
+    except Exception as e:
+        print("Exception during conversion:" + str(e))
+        raise e
+
 def transcribe_audio(media_filepath):
 
     if media_filepath == 'TEST-transcribe_example_result':
@@ -18,6 +50,11 @@ def transcribe_audio(media_filepath):
     if not os.path.exists(media_filepath):
         raise FileNotFoundError(f"Media file not found: {media_filepath}")
 
+    # convert video to wav if needed
+    if not media_filepath.endswith('.wav'):
+        media_filepath, _ = convert_video_to_wav(media_filepath)
+
+
     # Path to the output JSON file that Whisper will generate
     json_output_path = f"{media_filepath}.json"
     if os.path.exists(json_output_path):
@@ -41,6 +78,7 @@ def transcribe_audio(media_filepath):
         raise Exception(f"Whisper failed with error:\n{result.stderr.decode('utf-8')}")
 
     # Check if the output JSON file was generated
+    print(f"Checking for JSON output at: {json_output_path}")
     if not os.path.exists(json_output_path):
         raise FileNotFoundError(f"Expected JSON output file not found: {json_output_path}")