whisper transcribing function added

tyler232 · tyler232 · commit d545d52a27d5 · 2024-09-25T14:48:21.000-05:00
diff --git a/PythonRpcServer/randomvoice_16kHz.json b/PythonRpcServer/randomvoice_16kHz.json
@@ -0,0 +1 @@
+{"text": " Hello? Hello? Hello?", "segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 3.0, "text": " Hello? Hello? Hello?", "tokens": [50363, 18435, 30, 18435, 30, 18435, 30, 50513], "temperature": 0.0, "avg_logprob": -0.636968559688992, "compression_ratio": 1.1764705882352942, "no_speech_prob": 0.22877301275730133}], "language": "en"}
diff --git a/PythonRpcServer/randomvoice_16kHz.wav b/PythonRpcServer/randomvoice_16kHz.wav
diff --git a/PythonRpcServer/server.py b/PythonRpcServer/server.py
@@ -41,6 +41,9 @@ def LogWorker(logId, worker):
 
 
 class PythonServerServicer(ct_pb2_grpc.PythonServerServicer):
+    # Transcribe it into a json string from the transcribe text
+    # Make it returns a json string
+    # change name to TranscribeRPC
     def CaptionRPC(self, request, context):
         #See CaptionRequest
         print( f"CaptionRPC({request.logId};{request.refId};{request.filePath};{request.phraseHints};{request.courseHints};{request.outputLanguages})")
diff --git a/PythonRpcServer/transcribe.py b/PythonRpcServer/transcribe.py
@@ -0,0 +1,57 @@
+import subprocess
+import os
+import json
+import re
+
+def transcribe_audio_with_whisper(audio_file_path):
+    if not os.path.exists(audio_file_path):
+        raise FileNotFoundError(f"Audio file {audio_file_path} does not exist.")
+    
+    command = [
+        "whisper",
+        audio_file_path,
+        "--model", "base.en",
+        "--output_format", "json"
+    ]
+
+    try:
+        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)
+
+        print("Whisper Output:")
+        print(result.stdout)
+
+        formatted_data = {"en": []}
+        
+        segments = result.stdout.strip().split('\n\n')
+        for segment in segments:
+            match = re.search(r'\[(\d+:\d+\.\d+)\s+-->\s+(\d+:\d+\.\d+)\]\s+(.*)', segment)
+            if match:
+                start_time = match.group(1)
+                end_time = match.group(2)
+                text = match.group(3).strip()
+
+                formatted_data["en"].append({
+                    "starttime": start_time,
+                    "endtime": end_time,
+                    "caption": text
+                })
+
+        return formatted_data
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error during transcription: {e.stderr}")
+        return None
+    
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return None
+
+if __name__ == "__main__":
+    audio_file = "randomvoice_16kHz.wav"
+
+    transcription = transcribe_audio_with_whisper(audio_file)
+
+    if transcription:
+        print(json.dumps(transcription, indent=4))
+    else:
+        print("Transcription failed.")
diff --git a/randomvoice_16kHz.json b/randomvoice_16kHz.json
@@ -0,0 +1 @@
+{"text": " Hello? Hello? Hello?", "segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 3.0, "text": " Hello? Hello? Hello?", "tokens": [50363, 18435, 30, 18435, 30, 18435, 30, 50513], "temperature": 0.0, "avg_logprob": -0.636968559688992, "compression_ratio": 1.1764705882352942, "no_speech_prob": 0.22877301275730133}], "language": "en"}
diff --git a/whisper.cpp b/whisper.cpp
@@ -0,0 +1 @@
+Subproject commit 5236f0278420ab776d1787c4330678d80219b4b6

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"text": " Hello? Hello? Hello?", "segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 3.0, "text": " Hello? Hello? Hello?", "tokens": [50363, 18435, 30, 18435, 30, 18435, 30, 50513], "temperature": 0.0, "avg_logprob": -0.636968559688992, "compression_ratio": 1.1764705882352942, "no_speech_prob": 0.22877301275730133}], "language": "en"}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Subproject commit 5236f0278420ab776d1787c4330678d80219b4b6`