1+ using Microsoft . EntityFrameworkCore ;
2+ using Microsoft . Extensions . Logging ;
3+ using System ;
4+ using System . Collections . Generic ;
5+ using System . Diagnostics . CodeAnalysis ;
6+ using System . Linq ;
7+ using System . Threading . Tasks ;
8+ using Grpc . Core ;
9+ using Newtonsoft . Json . Linq ;
10+
11+
12+ using ClassTranscribeDatabase ;
13+ using ClassTranscribeDatabase . Models ;
14+ using ClassTranscribeDatabase . Services ;
15+
16+ using static ClassTranscribeDatabase . CommonUtils ;
17+
18+ #pragma warning disable CA2007
19+ // https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca2007
20+ // We are okay awaiting on a task in the same thread
21+
22+ namespace TaskEngine . Tasks
23+ {
24+ /// <summary>
25+ /// This task produces the transcriptions for a Video item.
26+ /// </summary>
27+ [ SuppressMessage ( "Microsoft.Performance" , "CA1812:MarkMembersAsStatic" ) ] // This class is never directly instantiated
28+ class LocalTranscriptionTask : RabbitMQTask < string >
29+ {
30+
31+ private readonly CaptionQueries _captionQueries ;
32+ private readonly RpcClient _rpcClient ;
33+
34+
35+ public LocalTranscriptionTask ( RabbitMQConnection rabbitMQ ,
36+ RpcClient rpcClient ,
37+ // GenerateVTTFileTask generateVTTFileTask,
38+ ILogger < LocalTranscriptionTask > logger , CaptionQueries captionQueries )
39+ : base ( rabbitMQ , TaskType . TranscribeVideo , logger )
40+ {
41+ _rpcClient = rpcClient ;
42+ _captionQueries = captionQueries ;
43+ }
44+
45+ protected async override Task OnConsume ( string videoId , TaskParameters taskParameters , ClientActiveTasks cleanup )
46+ {
47+ RegisterTask ( cleanup , videoId ) ; // may throw AlreadyInProgress exception
48+
49+ const string SOURCEINTERNALREF = "ClassTranscribe/Local" ; // Do not change me; this is a key inside the database
50+ // to indicate the source of the captions was this code
51+
52+
53+ using ( var _context = CTDbContext . CreateDbContext ( ) )
54+ {
55+
56+ // TODO: taskParameters.Force should wipe all captions and reset the Transcription Status
57+
58+ Video video = await _context . Videos . Include ( v => v . Video1 ) . Where ( v => v . Id == videoId ) . FirstAsync ( ) ;
59+ // ! Note the 'Include' ; we don't build the whole tree of related Entities
60+
61+ if ( video . TranscriptionStatus == Video . TranscriptionStatusMessages . NOERROR )
62+ {
63+ GetLogger ( ) . LogInformation ( $ "{ videoId } :Skipping Transcribing of- already complete") ;
64+ return ;
65+ }
66+ var medias = await _context . Medias . Include ( m=> m . Playlist ) . Where ( m=> m . VideoId == videoId && m . Playlist != null ) . ToListAsync ( ) ;
67+ if ( medias . Count == 0 ) {
68+ GetLogger ( ) . LogInformation ( $ "{ videoId } :Skipping Transcribing - no media / playlist cares about this video") ;
69+ return ;
70+ }
71+
72+ GetLogger ( ) . LogInformation ( $ "{ videoId } : Has new Phrase Hints: { video . HasPhraseHints ( ) } ") ;
73+
74+ string phraseHints = "" ;
75+ if ( video . HasPhraseHints ( ) ) {
76+ var data = await _context . TextData . FindAsync ( video . PhraseHintsDataId ) ;
77+ phraseHints = data . Text ;
78+ } else
79+ { // deprecated
80+ phraseHints = video . PhraseHints ?? "" ;
81+ }
82+
83+ GetLogger ( ) . LogInformation ( $ "{ videoId } :Using Phrase Hints length = { phraseHints . Length } ") ;
84+ // GetKey can throw if the video.Id is currently being transcribed
85+ // However registerTask should have already detected that
86+ var key = TaskEngineGlobals . KeyProvider . GetKey ( video . Id ) ;
87+
88+ video . TranscribingAttempts += 10 ;
89+ await _context . SaveChangesAsync ( ) ;
90+ GetLogger ( ) . LogInformation ( $ "{ videoId } : Updated TranscribingAttempts = { video . TranscribingAttempts } ") ;
91+ try
92+ {
93+
94+ GetLogger ( ) . LogInformation ( $ "{ videoId } : Calling RecognitionWithVideoStreamAsync") ;
95+
96+ var request = new CTGrpc . CaptionRequest
97+ {
98+ LogId = videoId ,
99+ FilePath = video . Video1 . VMPath ,
100+ PhraseHints = phraseHints ,
101+ CourseHints = "" ,
102+ OutputLanguages = "en"
103+ } ;
104+ var jsonString = "" ;
105+ try {
106+ jsonString = ( await _rpcClient . PythonServerClient . CaptionRPCAsync ( request ) ) . Json ;
107+ }
108+ catch ( RpcException e )
109+ {
110+ if ( e . Status . StatusCode == StatusCode . InvalidArgument )
111+ {
112+ GetLogger ( ) . LogError ( $ "CaptionRPC=({ videoId } ):{ e . Message } ") ;
113+ }
114+ return ;
115+ } finally {
116+ GetLogger ( ) . LogInformation ( $ "{ videoId } Caption - rpc complete") ;
117+ TaskEngineGlobals . KeyProvider . ReleaseKey ( key , video . Id ) ;
118+ }
119+ JArray jArray = JArray . Parse ( jsonString ) ;
120+
121+ foreach ( var captionsInLanguage in jArray )
122+ {
123+ var theLanguage = captionsInLanguage [ "Lang" ] . ToString ( Newtonsoft . Json . Formatting . None ) ;
124+ var theCaptionsAsJson = captionsInLanguage [ "Captions" ] ;
125+
126+ var theCaptions = new List < Caption > ( ) ;
127+ int cueCount = 0 ;
128+ // Fix the next line of code
129+
130+ foreach ( var jsonCue in theCaptionsAsJson ) {
131+ var caption = new Caption ( ) {
132+ Index = cueCount ++ ,
133+ Begin = TimeSpan . Parse ( jsonCue [ "start" ] . ToString ( Newtonsoft . Json . Formatting . None ) ) ,
134+ End = TimeSpan . Parse ( jsonCue [ "end" ] . ToString ( Newtonsoft . Json . Formatting . None ) ) ,
135+ Text = jsonCue [ "text" ] . ToString ( Newtonsoft . Json . Formatting . None )
136+ } ;
137+
138+ theCaptions . Add ( caption ) ;
139+ }
140+ if ( theCaptions . Count > 0 )
141+ {
142+
143+ var t = _context . Transcriptions . SingleOrDefault ( t => t . VideoId == video . Id && t . SourceInternalRef == SOURCEINTERNALREF && t . Language == theLanguage && t . TranscriptionType == TranscriptionType . Caption ) ;
144+ GetLogger ( ) . LogInformation ( $ "Find Existing Transcriptions null={ t == null } ") ;
145+ // Did we get the default or an existing Transcription entity?
146+ if ( t == null )
147+ {
148+ t = new Transcription ( )
149+ {
150+ TranscriptionType = TranscriptionType . Caption ,
151+ Captions = theCaptions ,
152+ Language = theLanguage ,
153+ VideoId = video . Id ,
154+ Label = $ "{ theLanguage } (ClassTranscribe)",
155+ SourceInternalRef = SOURCEINTERNALREF , //
156+ SourceLabel = "ClassTranscribe (Local" + ( phraseHints . Length > 0 ? " with phrase hints)" : ")" )
157+ } ;
158+ _context . Add ( t ) ;
159+ }
160+ else
161+ {
162+ t . Captions . AddRange ( theCaptions ) ;
163+ }
164+ }
165+ }
166+
167+ video . TranscriptionStatus = "NoError" ;
168+ // video.JsonMetadata["LastSuccessfulTime"] = result.LastSuccessTime.ToString();
169+
170+ // GetLogger().LogInformation($"{videoId}: Saving captions Code={result.ErrorCode}. LastSuccessTime={result.LastSuccessTime}");
171+ await _context . SaveChangesAsync ( ) ;
172+ }
173+ catch ( Exception ex )
174+ {
175+ GetLogger ( ) . LogError ( ex , $ "{ videoId } : Transcription Exception:${ ex . StackTrace } ") ;
176+ video . TranscribingAttempts += 1000 ;
177+ await _context . SaveChangesAsync ( ) ;
178+ throw ;
179+ }
180+
181+ }
182+ }
183+
184+ }
185+ }
0 commit comments