Skip to content

Commit d545d52

Browse files
committed
whisper transcribing function added
1 parent 79953d4 commit d545d52

6 files changed

Lines changed: 63 additions & 0 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"text": " Hello? Hello? Hello?", "segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 3.0, "text": " Hello? Hello? Hello?", "tokens": [50363, 18435, 30, 18435, 30, 18435, 30, 50513], "temperature": 0.0, "avg_logprob": -0.636968559688992, "compression_ratio": 1.1764705882352942, "no_speech_prob": 0.22877301275730133}], "language": "en"}
113 KB
Binary file not shown.

PythonRpcServer/server.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ def LogWorker(logId, worker):
4141

4242

4343
class PythonServerServicer(ct_pb2_grpc.PythonServerServicer):
44+
# Transcribe it into a json string from the transcribe text
45+
# Make it returns a json string
46+
# change name to TranscribeRPC
4447
def CaptionRPC(self, request, context):
4548
#See CaptionRequest
4649
print( f"CaptionRPC({request.logId};{request.refId};{request.filePath};{request.phraseHints};{request.courseHints};{request.outputLanguages})")

PythonRpcServer/transcribe.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import subprocess
2+
import os
3+
import json
4+
import re
5+
6+
def transcribe_audio_with_whisper(audio_file_path):
7+
if not os.path.exists(audio_file_path):
8+
raise FileNotFoundError(f"Audio file {audio_file_path} does not exist.")
9+
10+
command = [
11+
"whisper",
12+
audio_file_path,
13+
"--model", "base.en",
14+
"--output_format", "json"
15+
]
16+
17+
try:
18+
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)
19+
20+
print("Whisper Output:")
21+
print(result.stdout)
22+
23+
formatted_data = {"en": []}
24+
25+
segments = result.stdout.strip().split('\n\n')
26+
for segment in segments:
27+
match = re.search(r'\[(\d+:\d+\.\d+)\s+-->\s+(\d+:\d+\.\d+)\]\s+(.*)', segment)
28+
if match:
29+
start_time = match.group(1)
30+
end_time = match.group(2)
31+
text = match.group(3).strip()
32+
33+
formatted_data["en"].append({
34+
"starttime": start_time,
35+
"endtime": end_time,
36+
"caption": text
37+
})
38+
39+
return formatted_data
40+
41+
except subprocess.CalledProcessError as e:
42+
print(f"Error during transcription: {e.stderr}")
43+
return None
44+
45+
except Exception as e:
46+
print(f"An unexpected error occurred: {e}")
47+
return None
48+
49+
if __name__ == "__main__":
50+
audio_file = "randomvoice_16kHz.wav"
51+
52+
transcription = transcribe_audio_with_whisper(audio_file)
53+
54+
if transcription:
55+
print(json.dumps(transcription, indent=4))
56+
else:
57+
print("Transcription failed.")

randomvoice_16kHz.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"text": " Hello? Hello? Hello?", "segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 3.0, "text": " Hello? Hello? Hello?", "tokens": [50363, 18435, 30, 18435, 30, 18435, 30, 50513], "temperature": 0.0, "avg_logprob": -0.636968559688992, "compression_ratio": 1.1764705882352942, "no_speech_prob": 0.22877301275730133}], "language": "en"}

whisper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit 5236f0278420ab776d1787c4330678d80219b4b6

0 commit comments

Comments
 (0)