Amazon Transcribe Streaming Service Speech to text for .NET SDK

Question

I am not able to find any references for transcribe streaming service (from speech to text ) in AWS .NET SDK .

Is it available in .NET SDK Amazon Transcribe Streaming Service ? Any references would be helpful

score 4 · Answer 1 · answered Apr 01 '21 at 13:28

This is how I do it:

using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Threading.Tasks;
using Amazon;
using Amazon.S3;
using Amazon.S3.Model;
using Amazon.TranscribeService;
using Amazon.TranscribeService.Model;
using Newtonsoft.Json;
using QuickScreenHelper;

namespace CognitiveFace.Speech
{
    public class AwsSpeech : IDisposable
    {
        public AwsSpeech(RegionEndpoint regionEndpoint = null)
        {
            RegionEndpoint = regionEndpoint ?? RegionEndpoint.APNortheast1;
            //todo add region endpoint for AWS Face
            S3Client = new AmazonS3Client(RegionEndpoint);
            TranscribeClient = new AmazonTranscribeServiceClient(RegionEndpoint);
        }

        private RegionEndpoint RegionEndpoint { get; }

        private AmazonTranscribeServiceClient TranscribeClient { get; }

        private AmazonS3Client S3Client { get; }

        public void Dispose()
        {
            //TODO remember to call
            S3Client.Dispose();
            TranscribeClient.Dispose();
            //TODO dispose for faceClient
            //todo dispose for gcp speech and azure speech
        }

        public async Task TranscribeInputFile(string fileName, string targetLanguageCode = "ja-JP")
        {
            var bucketName = "transcribe-" + Guid.NewGuid();
            var putBucketResponse = await CreateBucket(bucketName);
            if (putBucketResponse.HttpStatusCode == HttpStatusCode.OK)
            {
                var uploadInputFileToS3 = await UploadInputFileToS3(fileName, bucketName);
                if (uploadInputFileToS3.HttpStatusCode == HttpStatusCode.OK)
                {
                    var startTranscriptionJobResponse =
                        await TranscribeInputFile(fileName, bucketName, targetLanguageCode);
                    //todo
                    //todo delete bucket
                }
                else
                {
                    Logger.WriteLine($"Fail to transcribe {fileName} because cannot upload {fileName} to {bucketName}",
                        uploadInputFileToS3);
                }
            }
            else
            {
                Logger.WriteLine($"Fail to transcribe {fileName} because cannot create bucket {bucketName}",
                    putBucketResponse);
            }
        }

        private async Task<TranscriptionJobResult> TranscribeInputFile(string fileName, string bucketName,
            string targetLanguageCode)
        {
            var objectName = Path.GetFileName(fileName);

            var media = new Media()
            {
                MediaFileUri = $"https://s3.{RegionEndpoint.SystemName}.amazonaws.com/{bucketName}/{objectName}"
            };

            var transcriptionJobName = $"transcribe-job-{bucketName}";
            var transcriptionJobRequest = new StartTranscriptionJobRequest()
            {
                LanguageCode = targetLanguageCode,
                Media = media,
                MediaFormat = MediaFormat.Wav,
                TranscriptionJobName = transcriptionJobName,
                OutputBucketName = bucketName
            };

            var startTranscriptionJobResponse =
                await TranscribeClient.StartTranscriptionJobAsync(transcriptionJobRequest);
            if (startTranscriptionJobResponse.HttpStatusCode == HttpStatusCode.OK)
            {
                return await WaitForTranscriptionJob(startTranscriptionJobResponse.TranscriptionJob, bucketName);
            }
            else
            {
                //todo
                throw new NotImplementedException();
            }
        }

        private async Task<TranscriptionJobResult> WaitForTranscriptionJob(TranscriptionJob transcriptionJob,
            string bucketName, int delayTime = 16000)
        {
            var transcriptionJobTranscriptionJobStatus = transcriptionJob.TranscriptionJobStatus;
            Logger.WriteLine($"transcriptionJobTranscriptionJobStatus={transcriptionJobTranscriptionJobStatus}");
            if (transcriptionJobTranscriptionJobStatus ==
                TranscriptionJobStatus.COMPLETED)
            {
                var keyName = $"{transcriptionJob.TranscriptionJobName}.json";
                Logger.WriteLine($"Downloading {keyName}");
                var result = await GetFileFromS3(keyName, bucketName);
                return JsonConvert.DeserializeObject<TranscriptionJobResult>(result);
                /*using var stringReader = new StringReader(result);
                using var jsonTextReader = new JsonTextReader(stringReader);*/
            }
            else if (transcriptionJobTranscriptionJobStatus == TranscriptionJobStatus.FAILED)
            {
                //TODO
                throw new NotImplementedException();
            }
            else
            {
                await Task.Delay(delayTime);
                var getTranscriptionJobResponse = await TranscribeClient.GetTranscriptionJobAsync(
                    new GetTranscriptionJobRequest()
                    {
                        TranscriptionJobName = transcriptionJob.TranscriptionJobName
                    });
                return await WaitForTranscriptionJob(getTranscriptionJobResponse.TranscriptionJob, bucketName,
                    delayTime * 2);
            }
        }

        public async Task<PutBucketResponse> CreateBucket(string bucketName)
        {
            var putBucketRequest = new PutBucketRequest()
            {
                BucketName = bucketName,
            };

            return await S3Client.PutBucketAsync(putBucketRequest);
        }

        public async Task<PutObjectResponse> UploadInputFileToS3(string fileName, string bucketName)
        {
            var objectName = Path.GetFileName(fileName);

            var putObjectRequest = new PutObjectRequest
            {
                BucketName = bucketName,
                Key = objectName,
                ContentType = "audio/wav",
                FilePath = fileName
            };

            return await S3Client.PutObjectAsync(putObjectRequest);
        }

        public async Task<string> GetFileFromS3(string keyName, string bucketName)
        {
            var request = new GetObjectRequest()
            {
                BucketName = bucketName,
                Key = keyName
            };
            using var response = await S3Client.GetObjectAsync(request);
            using var responseStream = response.ResponseStream;
            using var reader = new StreamReader(responseStream);
            /*string title = response.Metadata["x-amz-meta-title"]; // Assume you have "title" as medata added to the object.
            string contentType = response.Headers["Content-Type"];
            Console.WriteLine("Object metadata, Title: {0}", title);
            Console.WriteLine("Content type: {0}", contentType);*/

            return await reader.ReadToEndAsync(); // Now you process the response body.
        }
    }

    //todo move
    public class TranscriptionJobResult
    {
        public string jobName { get; set; }
        public string accountId { get; set; }
        public string status { get; set; }
        public TranscriptionResult results { get; set; }
    }

    public class TranscriptionResult
    {
        public List<Transcript> transcripts { get; set; }
        public List<TranscriptItem> items { get; set; }
    }

    public class Transcript
    {
        public string transcript { get; set; }
    }

    public class TranscriptItem
    {
        public string start_time { get; set; }
        public string end_time { get; set; }
        public List<AlternativeTranscription> alternatives { get; set; }
        public string type { get; set; }
    }

    public class AlternativeTranscription
    {
        public string confidence { get; set; }
        public string content { get; set; }
    }
}

This solution is for the offline transcription service not the streaming/realtime service. — Ben, Feb 14 '22 at 18:29

Xinton · Answer 2 · 2019-04-01T12:42:25.890

Probably not, for the .NET i found nothing. I was searching for the JavaScritpt SDK too and i found an GitHub issue where it was replied that it is not yet supported.

Based on this official blog post, the real time transcription feature are very recent, this may explain the lack of SDKs (20 NOV 2018)

I only found a example using the Java SDK: Example Java Application using AWS SDK creating streaming transcriptions via AWS Transcribe

UPDATE: I got in touch and they gave me this answer:

Right now only the Java and Ruby SDKs support streaming transcription. If you want to use the .NET Framework or JavaScript you’ll need to write your own client. If you decide to do that, you might find the documentation on this page useful: https://docs.aws.amazon.com/transcribe/latest/dg/streaming-format.html

Amazon Transcribe Streaming Service Speech to text for .NET SDK

2 Answers2

Linked