0

I am running into problems when running my code. It is passing over/ignoring the create_bucket() and upload_blob() functions and returning a NotFound: 404 No such object: gcspeechstorage/output.wav error. I have isolated the working parts of the code, and I am left with those two problematic functions. I should also note, that in a previous question I asked, I was instructed to test transcribe_gcs and it did indeed work ONLY when I manually uploaded the file. My problem thus is creating the bucket and then uploading said file. Thank you for the help.

Expected results: creates a bucket, uploads WAV file to GCS bucket, then retrieves it to transcribe, then analyzes the sentiment.

Actual results: records audio, then crashes giving me the aforementioned 404 error.

Code in full (to give you a better picture):

import pyaudio
import wave
import pprint
import argparse
import datetime
import io
import json
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from google.cloud import storage
import sys
from oauth2client.service_account import ServiceAccountCredentials

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:/Users/Dave/Desktop/mizu/Project Mizu-7e2ecd8c5804.json'

bucket_name = "gcspeechstorage"
source_file_name = "C:/Users/Dave/Desktop/mizu/output.wav"
destination_blob_name = "output.wav"
gcs_uri = "gs://gcspeechstorage/output.wav"

def create_bucket(bucket_name):
    """Creates a new bucket."""
    storage_client = storage.Client()
    bucket = storage_client.create_bucket(bucket_name)
    print('Bucket {} created'.format(bucket.name))

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print('File {} uploaded to {}.'.format(
        source_file_name,
        destination_blob_name))

# [START speech_transcribe_async_gcs]
def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()

    audio = types.RecognitionAudio(uri=gcs_uri)
    config = types.RecognitionConfig(
        encoding= 'LINEAR16',
        sample_rate_hertz=44100,
        language_code='en-US')

    operation = client.long_running_recognize(config, audio)

    print('Waiting for operation to complete...')
    response = operation.result(timeout=90)

    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        # The first alternative is the most likely one for this portion.
        print(u'Transcript: {}'.format(result.alternatives[0].transcript))

        transcribedSpeechFile = open('speechToAnalyze.txt', 'a+')  # this is where a text file is made with the transcribed speech

        transcribedSpeechFile.write(format(result.alternatives[0].transcript))

        transcribedSpeechFile.close()

        print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END speech_transcribe_async_gcs]


if __name__ == '__main__':
    transcribe_gcs(gcs_uri)


audio_rec = open('speechToAnalyze.txt', 'r')

sid = SentimentIntensityAnalyzer()
for sentence in audio_rec:
    ss = sid.polarity_scores(sentence)
    for k in ss:
        print('{0}: {1}, '.format(k, ss[k]), end='')
    print()

ERROR MESSAGE:

C:\Users\Dave\AppData\Local\Programs\Python\Python37\python.exe C:/Users/Dave/Desktop/mizu/FrankensteinedFile.py
* recording
* done recording
Traceback (most recent call last):
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\grpc_helpers.py", line 57, in error_remapped_callable
    return callable_(*args, **kwargs)
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\grpc\_channel.py", line 565, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\grpc\_channel.py", line 467, in _end_unary_response_blocking
    raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
    status = StatusCode.NOT_FOUND
    details = "No such object: gcspeechstorage/output.wav"
    debug_error_string = "{"created":"@1562878577.907000000","description":"Error received from peer ipv6:[2607:f8b0:4000:806::200a]:443","file":"src/core/lib/surface/call.cc","file_line":1052,"grpc_message":"No such object: gcspeechstorage/output.wav","grpc_status":5}"
>

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:/Users/Dave/Desktop/mizu/FrankensteinedFile.py", line 112, in <module>
    transcribe_gcs(gcs_uri)
  File "C:/Users/Dave/Desktop/mizu/FrankensteinedFile.py", line 90, in transcribe_gcs
    operation = client.long_running_recognize(config, audio)
  File "C:\Users\Dave\AppData\Local\Programs\Python\Python37\lib\site-packages\google\cloud\speech_v1\gapic\speech_client.py", line 326, in long_running_recognize
    request, retry=retry, timeout=timeout, metadata=metadata
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\gapic_v1\method.py", line 143, in __call__
    return wrapped_func(*args, **kwargs)
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\retry.py", line 273, in retry_wrapped_func
    on_error=on_error,
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\retry.py", line 182, in retry_target
    return target()
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\timeout.py", line 214, in func_with_timeout
    return func(*args, **kwargs)
  File "C:\Users\Dave\AppData\Roaming\Python\Python37\site-packages\google\api_core\grpc_helpers.py", line 59, in error_remapped_callable
    six.raise_from(exceptions.from_grpc_error(exc), exc)
  File "<string>", line 3, in raise_from
google.api_core.exceptions.NotFound: 404 No such object: gcspeechstorage/output.wav
diet coke
  • 35
  • 1
  • 7
  • You say "I am running into problems when compiling my code". It sounds to me though like you are having problems when RUNNING your code, not compiling it. Can you please confirm this for clarity? – CryptoFool Jul 11 '19 at 21:10
  • Yes sir. I have confirmed in the edit that it is "running" my code. Sorry about that. – diet coke Jul 11 '19 at 21:12
  • You say "passing over/ignoring `create_bucket` and `upload_blob` functions in Google Cloud Storage with 404 Error when running code". So which is it. If you're getting a 404 error from those calls, then they are not being ignored. - Can you please provide the full error message you are getting, including the full stack trace? It's unclear to me from your description just where the error is coming from. – CryptoFool Jul 11 '19 at 21:14
  • Added error message. Thanks. – diet coke Jul 11 '19 at 21:16
  • You're sure that the resource "gs://gcspeechstorage/output.wav" exists and has the proper permissions to be read by your client process? - since it's called "output.wav", I'm wondering if you are expecting this file to be created rather than be read. It seems that this file needs to exist in advance for your code to function. – CryptoFool Jul 11 '19 at 21:23
  • Yes. It exists in my folder and I have given it full permission. It gets created for sure. – diet coke Jul 11 '19 at 21:25
  • Hmm...strange. I'm an AWS guy, so I don't know the specifics of Google, but your code looks ok to me. I'm out of ideas. Sorry. I hope someone comes along who can help you where I failed. Best of luck. – CryptoFool Jul 11 '19 at 21:27
  • Thank you steve! I appreciate the help. I have another question opened that a SWE from Google was working on, but I haven't heard from him. I think I'll just keep playing around with it until someone can help or maybe I have a brilliant revelation while drinking coffee. – diet coke Jul 11 '19 at 21:28
  • The error that is giving you is straight forward, It cannot find the object because either the uri provided is wrong or the object actually don't exist on your bucket. I see that you defined the "upload" function but is never called (on the code your shared), I work at Google Platform Support so I was able to confirm that there's currently no object "output.wav" on your bucket. Have you verified that the object exists before calling the "transcribe_gcs" function? . – Mayeru Jul 12 '19 at 09:22

1 Answers1

0

It is not passing over or ignoring your functions. You just didn't call them inside the if __name__ == '__main__': block. The way your script is written, it only tries to call transcribe_gcs(gcs_uri) when you run it with python FrankensteinedFile.py. So it makes perfect sense that it cannot find the specific file in the bucket because it is never recorded and uploaded.

Check this link to understand how the if __name__ == '__main__': block works. Here is your code structured in a way that it calls everything you have written:

import pyaudio
import wave
import pprint
import argparse
import datetime
import io
import json
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from google.cloud import storage
import sys
from oauth2client.service_account import ServiceAccountCredentials


def record_audio(chunk, format, channels, rate, record_seconds, wave_output_filename):
  p = pyaudio.PyAudio()

  stream = p.open(format=format,
                  channels=channels,
                  rate=rate,
                  input=True,
                  frames_per_buffer=chunk)

  print("* recording")

  frames = []

  for i in range(0, int(rate / chunk * record_seconds)):
      data = stream.read(chunk)
      frames.append(data)

  print("* done recording")

  stream.stop_stream()
  stream.close()
  p.terminate()

  wf = wave.open(wave_output_filename, 'wb')
  wf.setnchannels(channels)
  wf.setsampwidth(p.get_sample_size(format))
  wf.setframerate(rate)
  wf.writeframes(b''.join(frames))
  wf.close()

def create_bucket(bucket_name):
    """Creates a new bucket."""
    storage_client = storage.Client()
    bucket = storage_client.create_bucket(bucket_name)
    print('Bucket {} created'.format(bucket.name))

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print('File {} uploaded to {}.'.format(
        source_file_name,
        destination_blob_name))

# [START speech_transcribe_async_gcs]
def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()

    audio = types.RecognitionAudio(uri=gcs_uri)
    config = types.RecognitionConfig(
        encoding= 'LINEAR16',
        sample_rate_hertz=44100,
        language_code='en-US')

    operation = client.long_running_recognize(config, audio)

    print('Waiting for operation to complete...')
    response = operation.result(timeout=90)

    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        # The first alternative is the most likely one for this portion.
        print(u'Transcript: {}'.format(result.alternatives[0].transcript))

        transcribedSpeechFile = open('speechToAnalyze.txt', 'a+')  # this is where a text file is made with the transcribed speech

        transcribedSpeechFile.write(format(result.alternatives[0].transcript))

        transcribedSpeechFile.close()

        print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END speech_transcribe_async_gcs]

def analyze_text():
  audio_rec = open('speechToAnalyze.txt', 'r')

  sid = SentimentIntensityAnalyzer()
  for sentence in audio_rec:
      ss = sid.polarity_scores(sentence)
      for k in ss:
          print('{0}: {1}, '.format(k, ss[k]), end='')
      print()

if __name__ == '__main__':
  os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:/Users/Dave/Desktop/mizu/Project Mizu-7e2ecd8c5804.json'

  bucket_name = "gcspeechstorage"
  source_file_name = "C:/Users/Dave/Desktop/mizu/output.wav"
  destination_blob_name = "output.wav"
  gcs_uri = "gs://gcspeechstorage/output.wav"  

  chunk = 1024
  format = pyaudio.paInt16
  channels = 1
  rate = 44100
  record_seconds = 10
  wave_output_filename = "output.wav"

  record_audio(chunk, format, channels, rate, record_seconds, wave_output_filename)
  create_bucket(bucket_name)
  upload_blob(bucket_name, source_file_name, destination_blob_name)
  transcribe_gcs(gcs_uri)
  analyze_text()
itroulli
  • 2,044
  • 1
  • 10
  • 21