本文整理汇总了Python中google.cloud.speech.types.RecognitionConfig方法的典型用法代码示例。如果您正苦于以下问题:Python types.RecognitionConfig方法的具体用法?Python types.RecognitionConfig怎么用?Python types.RecognitionConfig使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类google.cloud.speech.types
的用法示例。
在下文中一共展示了types.RecognitionConfig方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: gspeech_client
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def gspeech_client(self):
"""Creates the Google Speech API client, configures it, and sends/gets
audio/text data for parsing.
"""
language_code = 'en-US'
client = speech.SpeechClient()
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code=language_code)
streaming_config = types.StreamingRecognitionConfig(
config=config,
interim_results=True)
# Hack from Google Speech Python docs, very pythonic c:
requests = (types.StreamingRecognizeRequest(audio_content=content) for content in self._generator())
responses = client.streaming_recognize(streaming_config, requests)
self._listen_print_loop(responses)
示例2: transcribe_data_sync
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def transcribe_data_sync(speech_data, model='default', language_code='en-US'):
# model in ['video', 'phone_call', 'command_and_search', 'default']
if not gcloud_imported:
_log.error("Cannot find google.cloud package!")
return None
client = speech.SpeechClient()
audio = types.RecognitionAudio(content=speech_data)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code=language_code or 'en-US',
model=model,
)
response = client.recognize(config, audio)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
assert len(response.results) <= 1
for result in response.results:
# The first alternative is the most likely one for this portion.
# print(u'Transcript: {}'.format(result.alternatives[0].transcript))
return result.alternatives[0].transcript
示例3: transcribe_gcs
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def transcribe_gcs(gcs_uri):
"""Transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
# [START speech_python_migration_config_gcs]
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=16000,
language_code='en-US')
# [END speech_python_migration_config_gcs]
response = client.recognize(config, audio)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
# [END speech_transcribe_sync_gcs]
示例4: transcribe_gcs
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def transcribe_gcs(gcs_uri):
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=16000,
language_code='en-US')
operation = client.long_running_recognize(config, audio)
print('Waiting for operation to complete...')
response = operation.result(timeout=90)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END speech_transcribe_async_gcs]
示例5: main
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def main():
# See http://g.co/cloud/speech/docs/languages
# for a list of supported languages.
language_code = 'en-US' # a BCP-47 language tag
client = speech.SpeechClient()
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=RATE,
language_code=language_code)
streaming_config = types.StreamingRecognitionConfig(
config=config,
interim_results=True)
with MicrophoneStream(RATE, CHUNK) as stream:
audio_generator = stream.generator()
requests = (types.StreamingRecognizeRequest(audio_content=content)
for content in audio_generator)
responses = client.streaming_recognize(streaming_config, requests)
# Now, put the transcription responses to use.
listen_print_loop(responses)
示例6: transcribe
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def transcribe(self, path):
cache_path = path.replace('.wav', '.ggl')
if os.path.exists(cache_path):
with open(cache_path) as f:
return f.read()
with open(path, 'rb') as f:
content = f.read()
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
response = self._client.recognize(config, audio)
res = ' '.join(result.alternatives[0].transcript for result in response.results)
res = res.translate(str.maketrans('', '', string.punctuation))
with open(cache_path, 'w') as f:
f.write(res)
return res
示例7: do_recognition
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def do_recognition(stream: bytes) -> Iterable:
client = speech.SpeechClient()
audio = types.RecognitionAudio(content=stream)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.OGG_OPUS,
sample_rate_hertz=16000,
language_code='ru-RU',
)
recognition = client.long_running_recognize(config, audio).result(timeout=90)
return [result.alternatives[0].transcript for result in recognition.results]
示例8: transcribe_gcs
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def transcribe_gcs(mp4_file):
audio_file_path = process_video(mp4_file) #Create audio file
if audio_file_path:
bucket_name = 'test-dictation' # Your gcloud bucket name
print(mp4_file)
audio_file_name = os.path.basename(audio_file_path) + '.ogg'
print(audio_file_name)
upload_to_gcloud(bucket_name, source_file_name=audio_file_path + '.ogg', destination_blob_name=audio_file_name)
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
client = speech.SpeechClient()
audio = types.RecognitionAudio(
uri="gs://" + bucket_name + "/" + audio_file_name)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.OGG_OPUS,
language_code='en-US',
sample_rate_hertz=16000,
enable_word_time_offsets=True
)
operation = client.long_running_recognize(config, audio)
if not operation.done():
print('Waiting for results...')
result = operation.result()
results = result.results
raw_text_file = open( audio_file_path + '.txt', 'w')
for result in results:
for alternative in result.alternatives:
raw_text_file.write(alternative.transcript + '\n')
raw_text_file.close() #output raw text file of transcription
format_transcript(results, audio_file_path) #output .srt formatted version of transcription
else:
return
示例9: onStart
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def onStart(self):
super().onStart()
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(Path(self.Commons.rootDir(), 'credentials/googlecredentials.json'))
self._client = SpeechClient()
# noinspection PyUnresolvedReferences
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=self.ConfigManager.getAliceConfigByName('micSampleRate'),
language_code=self.LanguageManager.activeLanguageAndCountryCode
)
self._streamingConfig = types.StreamingRecognitionConfig(config=config, interim_results=True)
示例10: get_transcripts
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def get_transcripts(audio_data):
client = speech.SpeechClient()
audio = types.RecognitionAudio(content=audio_data)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US'
)
response = client.recognize(config, audio)
return [result.alternatives[0].transcript for result in response.results]
示例11: transcribe_file
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def transcribe_file(speech_file):
"""Transcribe the given audio file."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
import io
client = speech.SpeechClient()
# [START speech_python_migration_sync_request]
# [START speech_python_migration_config]
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
# [END speech_python_migration_config]
# [START speech_python_migration_sync_response]
response = client.recognize(config, audio)
# [END speech_python_migration_sync_request]
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
# [END speech_python_migration_sync_response]
# [END speech_transcribe_sync]
# [START speech_transcribe_sync_gcs]
示例12: transcribe_file
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def transcribe_file(speech_file):
"""Transcribe the given audio file asynchronously."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
# [START speech_python_migration_async_request]
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
# [START speech_python_migration_async_response]
operation = client.long_running_recognize(config, audio)
# [END speech_python_migration_async_request]
print('Waiting for operation to complete...')
response = operation.result(timeout=90)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END speech_python_migration_async_response]
# [END speech_transcribe_async]
# [START speech_transcribe_async_gcs]
示例13: transcribe_file_with_word_time_offsets
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def transcribe_file_with_word_time_offsets(speech_file):
"""Transcribe the given audio file synchronously and output the word time
offsets."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US',
enable_word_time_offsets=True)
response = client.recognize(config, audio)
for result in response.results:
alternative = result.alternatives[0]
print(u'Transcript: {}'.format(alternative.transcript))
for word_info in alternative.words:
word = word_info.word
start_time = word_info.start_time
end_time = word_info.end_time
print('Word: {}, start_time: {}, end_time: {}'.format(
word,
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9))
# [START speech_transcribe_async_word_time_offsets_gcs]
示例14: run_quickstart
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def run_quickstart():
# [START speech_quickstart]
import io
import os
# Imports the Google Cloud client library
# [START speech_python_migration_imports]
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
# [END speech_python_migration_imports]
# Instantiates a client
# [START speech_python_migration_client]
client = speech.SpeechClient()
# [END speech_python_migration_client]
# The name of the audio file to transcribe
file_name = os.path.join(
os.path.dirname(__file__),
'resources',
'audio.raw')
# Loads the audio into memory
with io.open(file_name, 'rb') as audio_file:
content = audio_file.read()
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
# Detects speech in the audio file
response = client.recognize(config, audio)
for result in response.results:
print('Transcript: {}'.format(result.alternatives[0].transcript))
# [END speech_quickstart]
示例15: cloud_speech_transcribe
# 需要导入模块: from google.cloud.speech import types [as 别名]
# 或者: from google.cloud.speech.types import RecognitionConfig [as 别名]
def cloud_speech_transcribe(self,speech_file,language):
client = speech.SpeechClient()
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=44100,
language_code=language)
response = client.recognize(config, audio)
for result in response.results:
transcribedtext=u'{}'.format(result.alternatives[0].transcript)
return transcribedtext