blob: 38e82f580bb178bf228aaaf24765e9ac768a599c [file] [log] [blame]
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
75<h1><a href="speech_v1p1beta1.html">Cloud Speech-to-Text API</a> . <a href="speech_v1p1beta1.speech.html">speech</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
Dan O'Mearadd494642020-05-01 07:42:23 -070078 <code><a href="#longrunningrecognize">longrunningrecognize(body=None, x__xgafv=None)</a></code></p>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070079<p class="firstline">Performs asynchronous speech recognition: receive results via the</p>
80<p class="toc_element">
Dan O'Mearadd494642020-05-01 07:42:23 -070081 <code><a href="#recognize">recognize(body=None, x__xgafv=None)</a></code></p>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070082<p class="firstline">Performs synchronous speech recognition: receive results after all audio</p>
83<h3>Method Details</h3>
84<div class="method">
Dan O'Mearadd494642020-05-01 07:42:23 -070085 <code class="details" id="longrunningrecognize">longrunningrecognize(body=None, x__xgafv=None)</code>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070086 <pre>Performs asynchronous speech recognition: receive results via the
87google.longrunning.Operations interface. Returns either an
88`Operation.error` or an `Operation.response` which contains
89a `LongRunningRecognizeResponse` message.
90For more information on asynchronous speech recognition, see the
91[how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
92
93Args:
Dan O'Mearadd494642020-05-01 07:42:23 -070094 body: object, The request body.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070095 The object takes the form of:
96
97{ # The top-level message sent by the client for the `LongRunningRecognize`
98 # method.
Bu Sun Kim65020912020-05-20 12:08:20 -070099 &quot;config&quot;: { # Provides information to the recognizer that specifies how to process the # Required. Provides information to the recognizer that specifies how to
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700100 # process the request.
101 # request.
Bu Sun Kim65020912020-05-20 12:08:20 -0700102 &quot;metadata&quot;: { # Description of audio data to be recognized. # Metadata regarding this request.
103 &quot;originalMediaType&quot;: &quot;A String&quot;, # The original media the speech was recorded on.
104 &quot;obfuscatedId&quot;: &quot;A String&quot;, # Obfuscated (privacy-protected) ID of the user, to identify number of
105 # unique users using the service.
106 &quot;recordingDeviceType&quot;: &quot;A String&quot;, # The type of device the speech was recorded with.
107 &quot;interactionType&quot;: &quot;A String&quot;, # The use case most closely describing the audio content to be recognized.
108 &quot;recordingDeviceName&quot;: &quot;A String&quot;, # The device used to make the recording. Examples &#x27;Nexus 5X&#x27; or
109 # &#x27;Polycom SoundStation IP 6000&#x27; or &#x27;POTS&#x27; or &#x27;VoIP&#x27; or
110 # &#x27;Cardioid Microphone&#x27;.
111 &quot;originalMimeType&quot;: &quot;A String&quot;, # Mime type of the original audio file. For example `audio/m4a`,
112 # `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.
113 # A list of possible audio mime types is maintained at
114 # http://www.iana.org/assignments/media-types/media-types.xhtml#audio
115 &quot;audioTopic&quot;: &quot;A String&quot;, # Description of the content. Eg. &quot;Recordings of federal supreme court
116 # hearings from 2012&quot;.
117 &quot;industryNaicsCodeOfAudio&quot;: 42, # The industry vertical to which this speech recognition request most
118 # closely applies. This is most indicative of the topics contained
119 # in the audio. Use the 6-digit NAICS code to identify the industry
120 # vertical - see https://www.naics.com/search/.
121 &quot;microphoneDistance&quot;: &quot;A String&quot;, # The audio type that most closely describes the audio being recognized.
122 },
123 &quot;sampleRateHertz&quot;: 42, # Sample rate in Hertz of the audio data sent in all
124 # `RecognitionAudio` messages. Valid values are: 8000-48000.
125 # 16000 is optimal. For best results, set the sampling rate of the audio
126 # source to 16000 Hz. If that&#x27;s not possible, use the native sample rate of
127 # the audio source (instead of re-sampling).
128 # This field is optional for FLAC and WAV audio files, but is
129 # required for all other audio formats. For details, see AudioEncoding.
130 &quot;enableSeparateRecognitionPerChannel&quot;: True or False, # This needs to be set to `true` explicitly and `audio_channel_count` &gt; 1
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700131 # to get each channel recognized separately. The recognition result will
132 # contain a `channel_tag` field to state which channel that result belongs
133 # to. If this is not true, we will only recognize the first channel. The
134 # request is billed cumulatively for all channels recognized:
135 # `audio_channel_count` multiplied by the length of the audio.
Bu Sun Kim65020912020-05-20 12:08:20 -0700136 &quot;enableAutomaticPunctuation&quot;: True or False, # If &#x27;true&#x27;, adds punctuation to recognition result hypotheses.
137 # This feature is only available in select languages. Setting this for
138 # requests in other languages has no effect at all.
139 # The default &#x27;false&#x27; value does not add punctuation to result hypotheses.
140 &quot;adaptation&quot;: { # Speech adaptation configuration. # Speech adaptation configuration improves the accuracy of speech
Dan O'Mearadd494642020-05-01 07:42:23 -0700141 # recognition. When speech adaptation is set it supersedes the
142 # `speech_contexts` field. For more information, see the [speech
143 # adaptation](https://cloud.google.com/speech-to-text/docs/context-strength)
144 # documentation.
Bu Sun Kim65020912020-05-20 12:08:20 -0700145 &quot;phraseSets&quot;: [ # A collection of phrase sets. To specify the hints inline, leave the
146 # phrase set&#x27;s `name` blank and fill in the rest of its fields. Any
Dan O'Mearadd494642020-05-01 07:42:23 -0700147 # phrase set can use any custom class.
Bu Sun Kim65020912020-05-20 12:08:20 -0700148 { # Provides &quot;hints&quot; to the speech recognizer to favor specific words and phrases
Dan O'Mearadd494642020-05-01 07:42:23 -0700149 # in the results.
Bu Sun Kim65020912020-05-20 12:08:20 -0700150 &quot;boost&quot;: 3.14, # Hint Boost. Positive value will increase the probability that a specific
151 # phrase will be recognized over other similar sounding phrases. The higher
152 # the boost, the higher the chance of false positive recognition as well.
153 # Negative boost values would correspond to anti-biasing. Anti-biasing is not
154 # enabled, so negative boost will simply be ignored. Though `boost` can
155 # accept a wide range of positive values, most use cases are best served with
156 # values between 0 (exclusive) and 20. We recommend using a binary search
157 # approach to finding the optimal value for your use case. Speech recognition
158 # will skip PhraseSets with a boost value of 0.
159 &quot;name&quot;: &quot;A String&quot;, # The resource name of the phrase set.
160 &quot;phrases&quot;: [ # A list of word and phrases.
161 { # A phrases containing words and phrase &quot;hints&quot; so that
Dan O'Mearadd494642020-05-01 07:42:23 -0700162 # the speech recognition is more likely to recognize them. This can be used
163 # to improve the accuracy for specific words and phrases, for example, if
164 # specific commands are typically spoken by the user. This can also be used
165 # to add additional words to the vocabulary of the recognizer. See
166 # [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
167 #
168 # List items can also include pre-built or custom classes containing groups
169 # of words that represent common concepts that occur in natural language. For
170 # example, rather than providing a phrase hint for every month of the
Bu Sun Kim65020912020-05-20 12:08:20 -0700171 # year (e.g. &quot;i was born in january&quot;, &quot;i was born in febuary&quot;, ...), use the
172 # pre-built `$MONTH` class improves the likelihood of correctly transcribing
173 # audio that includes months (e.g. &quot;i was born in $month&quot;).
174 # To refer to pre-built classes, use the class&#x27; symbol prepended with `$`
175 # e.g. `$MONTH`. To refer to custom classes that were defined inline in the
176 # request, set the class&#x27;s `custom_class_id` to a string unique to all class
177 # resources and inline classes. Then use the class&#x27; id wrapped in $`{...}`
178 # e.g. &quot;${my-months}&quot;. To refer to custom classes resources, use the class&#x27;
179 # id wrapped in `${}` (e.g. `${my-months}`).
180 &quot;value&quot;: &quot;A String&quot;, # The phrase itself.
181 &quot;boost&quot;: 3.14, # Hint Boost. Overrides the boost set at the phrase set level.
Dan O'Mearadd494642020-05-01 07:42:23 -0700182 # Positive value will increase the probability that a specific phrase will
183 # be recognized over other similar sounding phrases. The higher the boost,
184 # the higher the chance of false positive recognition as well. Negative
185 # boost values would correspond to anti-biasing. Anti-biasing is not
186 # enabled, so negative boost will simply be ignored. Though `boost` can
187 # accept a wide range of positive values, most use cases are best served
188 # with values between 0 and 20. We recommend using a binary search approach
189 # to finding the optimal value for your use case. Speech recognition
190 # will skip PhraseSets with a boost value of 0.
Dan O'Mearadd494642020-05-01 07:42:23 -0700191 },
192 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700193 },
194 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700195 &quot;customClasses&quot;: [ # A collection of custom classes. To specify the classes inline, leave the
196 # class&#x27; `name` blank and fill in the rest of its fields, giving it a unique
Dan O'Mearadd494642020-05-01 07:42:23 -0700197 # `custom_class_id`. Refer to the inline defined class in phrase hints by its
198 # `custom_class_id`.
199 { # A set of words or phrases that represents a common concept likely to appear
200 # in your audio, for example a list of passenger ship names. CustomClass items
201 # can be substituted into placeholders that you set in PhraseSet phrases.
Bu Sun Kim65020912020-05-20 12:08:20 -0700202 &quot;name&quot;: &quot;A String&quot;, # The resource name of the custom class.
203 &quot;customClassId&quot;: &quot;A String&quot;, # If this custom class is a resource, the custom_class_id is the resource id
204 # of the CustomClass. Case sensitive.
205 &quot;items&quot;: [ # A collection of class items.
Dan O'Mearadd494642020-05-01 07:42:23 -0700206 { # An item of the class.
Bu Sun Kim65020912020-05-20 12:08:20 -0700207 &quot;value&quot;: &quot;A String&quot;, # The class item&#x27;s value.
Dan O'Mearadd494642020-05-01 07:42:23 -0700208 },
209 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700210 },
211 ],
212 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700213 &quot;maxAlternatives&quot;: 42, # Maximum number of recognition hypotheses to be returned.
214 # Specifically, the maximum number of `SpeechRecognitionAlternative` messages
215 # within each `SpeechRecognitionResult`.
216 # The server may return fewer than `max_alternatives`.
217 # Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
218 # one. If omitted, will return a maximum of one.
219 &quot;diarizationSpeakerCount&quot;: 42, # If set, specifies the estimated number of speakers in the conversation.
220 # Defaults to &#x27;2&#x27;. Ignored unless enable_speaker_diarization is set to true.
Dan O'Mearadd494642020-05-01 07:42:23 -0700221 # Note: Use diarization_config instead.
Bu Sun Kim65020912020-05-20 12:08:20 -0700222 &quot;encoding&quot;: &quot;A String&quot;, # Encoding of audio data sent in all `RecognitionAudio` messages.
223 # This field is optional for `FLAC` and `WAV` audio files and required
224 # for all other audio formats. For details, see AudioEncoding.
225 &quot;speechContexts&quot;: [ # Array of SpeechContext.
226 # A means to provide context to assist the speech recognition. For more
227 # information, see
228 # [speech
229 # adaptation](https://cloud.google.com/speech-to-text/docs/context-strength).
230 { # Provides &quot;hints&quot; to the speech recognizer to favor specific words and phrases
231 # in the results.
232 &quot;phrases&quot;: [ # A list of strings containing words and phrases &quot;hints&quot; so that
233 # the speech recognition is more likely to recognize them. This can be used
234 # to improve the accuracy for specific words and phrases, for example, if
235 # specific commands are typically spoken by the user. This can also be used
236 # to add additional words to the vocabulary of the recognizer. See
237 # [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
238 #
239 # List items can also be set to classes for groups of words that represent
240 # common concepts that occur in natural language. For example, rather than
241 # providing phrase hints for every month of the year, using the $MONTH class
242 # improves the likelihood of correctly transcribing audio that includes
243 # months.
244 &quot;A String&quot;,
245 ],
246 &quot;boost&quot;: 3.14, # Hint Boost. Positive value will increase the probability that a specific
247 # phrase will be recognized over other similar sounding phrases. The higher
248 # the boost, the higher the chance of false positive recognition as well.
249 # Negative boost values would correspond to anti-biasing. Anti-biasing is not
250 # enabled, so negative boost will simply be ignored. Though `boost` can
251 # accept a wide range of positive values, most use cases are best served with
252 # values between 0 and 20. We recommend using a binary search approach to
253 # finding the optimal value for your use case.
254 },
255 ],
256 &quot;enableWordConfidence&quot;: True or False, # If `true`, the top result includes a list of words and the
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700257 # confidence for those words. If `false`, no word-level confidence
258 # information is returned. The default is `false`.
Bu Sun Kim65020912020-05-20 12:08:20 -0700259 &quot;model&quot;: &quot;A String&quot;, # Which model to select for the given request. Select the model
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700260 # best suited to your domain to get best results. If a model is not
261 # explicitly specified, then we auto-select a model based on the parameters
262 # in the RecognitionConfig.
Dan O'Mearadd494642020-05-01 07:42:23 -0700263 # &lt;table&gt;
264 # &lt;tr&gt;
265 # &lt;td&gt;&lt;b&gt;Model&lt;/b&gt;&lt;/td&gt;
266 # &lt;td&gt;&lt;b&gt;Description&lt;/b&gt;&lt;/td&gt;
267 # &lt;/tr&gt;
268 # &lt;tr&gt;
269 # &lt;td&gt;&lt;code&gt;command_and_search&lt;/code&gt;&lt;/td&gt;
270 # &lt;td&gt;Best for short queries such as voice commands or voice search.&lt;/td&gt;
271 # &lt;/tr&gt;
272 # &lt;tr&gt;
273 # &lt;td&gt;&lt;code&gt;phone_call&lt;/code&gt;&lt;/td&gt;
274 # &lt;td&gt;Best for audio that originated from a phone call (typically
275 # recorded at an 8khz sampling rate).&lt;/td&gt;
276 # &lt;/tr&gt;
277 # &lt;tr&gt;
278 # &lt;td&gt;&lt;code&gt;video&lt;/code&gt;&lt;/td&gt;
279 # &lt;td&gt;Best for audio that originated from from video or includes multiple
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700280 # speakers. Ideally the audio is recorded at a 16khz or greater
281 # sampling rate. This is a premium model that costs more than the
Dan O'Mearadd494642020-05-01 07:42:23 -0700282 # standard rate.&lt;/td&gt;
283 # &lt;/tr&gt;
284 # &lt;tr&gt;
285 # &lt;td&gt;&lt;code&gt;default&lt;/code&gt;&lt;/td&gt;
286 # &lt;td&gt;Best for audio that is not one of the specific audio models.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700287 # For example, long-form audio. Ideally the audio is high-fidelity,
Dan O'Mearadd494642020-05-01 07:42:23 -0700288 # recorded at a 16khz or greater sampling rate.&lt;/td&gt;
289 # &lt;/tr&gt;
290 # &lt;/table&gt;
Bu Sun Kim65020912020-05-20 12:08:20 -0700291 &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data.
292 # ONLY set this for MULTI-CHANNEL recognition.
293 # Valid values for LINEAR16 and FLAC are `1`-`8`.
294 # Valid values for OGG_OPUS are &#x27;1&#x27;-&#x27;254&#x27;.
295 # Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
296 # If `0` or omitted, defaults to one channel (mono).
297 # Note: We only recognize the first channel by default.
298 # To perform independent recognition on each channel set
299 # `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
300 &quot;enableWordTimeOffsets&quot;: True or False, # If `true`, the top result includes a list of words and
301 # the start and end time offsets (timestamps) for those words. If
302 # `false`, no word-level time offset information is returned. The default is
303 # `false`.
304 &quot;alternativeLanguageCodes&quot;: [ # A list of up to 3 additional
305 # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
306 # listing possible alternative languages of the supplied audio.
307 # See [Language
308 # Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
309 # of the currently supported language codes. If alternative languages are
310 # listed, recognition result will contain recognition in the most likely
311 # language detected including the main language_code. The recognition result
312 # will include the language tag of the language detected in the audio. Note:
313 # This feature is only supported for Voice Command and Voice Search use cases
314 # and performance may vary for other use cases (e.g., phone call
315 # transcription).
316 &quot;A String&quot;,
317 ],
318 &quot;diarizationConfig&quot;: { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700319 # parameters to make diarization better suited for your application.
320 # Note: When this is enabled, we send all the words from the beginning of the
321 # audio for the top alternative in every consecutive STREAMING responses.
322 # This is done in order to improve our speaker tags as our models learn to
323 # identify the speakers in the conversation over time.
324 # For non-streaming requests, the diarization results will be provided only
325 # in the top alternative of the FINAL SpeechRecognitionResult.
Bu Sun Kim65020912020-05-20 12:08:20 -0700326 &quot;minSpeakerCount&quot;: 42, # Minimum number of speakers in the conversation. This range gives you more
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700327 # flexibility by allowing the system to automatically determine the correct
328 # number of speakers. If not set, the default value is 2.
Bu Sun Kim65020912020-05-20 12:08:20 -0700329 &quot;maxSpeakerCount&quot;: 42, # Maximum number of speakers in the conversation. This range gives you more
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700330 # flexibility by allowing the system to automatically determine the correct
331 # number of speakers. If not set, the default value is 6.
Bu Sun Kim65020912020-05-20 12:08:20 -0700332 &quot;speakerTag&quot;: 42, # Output only. Unused.
333 &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in
334 # the top alternative of the recognition result using a speaker_tag provided
335 # in the WordInfo.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700336 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700337 &quot;languageCode&quot;: &quot;A String&quot;, # Required. The language of the supplied audio as a
338 # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
339 # Example: &quot;en-US&quot;.
340 # See [Language
341 # Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
342 # of the currently supported language codes.
343 &quot;profanityFilter&quot;: True or False, # If set to `true`, the server will attempt to filter out
344 # profanities, replacing all but the initial character in each filtered word
345 # with asterisks, e.g. &quot;f***&quot;. If set to `false` or omitted, profanities
346 # won&#x27;t be filtered out.
347 &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in
348 # the top alternative of the recognition result using a speaker_tag provided
349 # in the WordInfo.
350 # Note: Use diarization_config instead.
351 &quot;useEnhanced&quot;: True or False, # Set to true to use an enhanced model for speech recognition.
352 # If `use_enhanced` is set to true and the `model` field is not set, then
353 # an appropriate enhanced model is chosen if an enhanced model exists for
354 # the audio.
355 #
356 # If `use_enhanced` is true and an enhanced version of the specified model
357 # does not exist, then the speech is recognized using the standard version
358 # of the specified model.
359 },
360 &quot;audio&quot;: { # Contains audio data in the encoding specified in the `RecognitionConfig`. # Required. The audio data to be recognized.
361 # Either `content` or `uri` must be supplied. Supplying both or neither
362 # returns google.rpc.Code.INVALID_ARGUMENT. See
363 # [content limits](https://cloud.google.com/speech-to-text/quotas#content).
364 &quot;content&quot;: &quot;A String&quot;, # The audio data bytes encoded as specified in
365 # `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a
366 # pure binary representation, whereas JSON representations use base64.
367 &quot;uri&quot;: &quot;A String&quot;, # URI that points to a file that contains audio data bytes as specified in
368 # `RecognitionConfig`. The file must not be compressed (for example, gzip).
369 # Currently, only Google Cloud Storage URIs are
370 # supported, which must be specified in the following format:
371 # `gs://bucket_name/object_name` (other URI formats return
372 # google.rpc.Code.INVALID_ARGUMENT). For more information, see
373 # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700374 },
375 }
376
377 x__xgafv: string, V1 error format.
378 Allowed values
379 1 - v1 error format
380 2 - v2 error format
381
382Returns:
383 An object of the form:
384
385 { # This resource represents a long-running operation that is the result of a
386 # network API call.
Bu Sun Kim65020912020-05-20 12:08:20 -0700387 &quot;name&quot;: &quot;A String&quot;, # The server-assigned name, which is only unique within the same service that
388 # originally returns it. If you use the default HTTP mapping, the
389 # `name` should be a resource name ending with `operations/{unique_id}`.
390 &quot;error&quot;: { # The `Status` type defines a logical error model that is suitable for # The error result of the operation in case of failure or cancellation.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700391 # different programming environments, including REST APIs and RPC APIs. It is
392 # used by [gRPC](https://github.com/grpc). Each `Status` message contains
393 # three pieces of data: error code, error message, and error details.
394 #
395 # You can find out more about this error model and how to work with it in the
396 # [API Design Guide](https://cloud.google.com/apis/design/errors).
Bu Sun Kim65020912020-05-20 12:08:20 -0700397 &quot;code&quot;: 42, # The status code, which should be an enum value of google.rpc.Code.
398 &quot;message&quot;: &quot;A String&quot;, # A developer-facing error message, which should be in English. Any
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700399 # user-facing error message should be localized and sent in the
400 # google.rpc.Status.details field, or localized by the client.
Bu Sun Kim65020912020-05-20 12:08:20 -0700401 &quot;details&quot;: [ # A list of messages that carry the error details. There is a common set of
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700402 # message types for APIs to use.
403 {
Bu Sun Kim65020912020-05-20 12:08:20 -0700404 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700405 },
406 ],
407 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700408 &quot;metadata&quot;: { # Service-specific metadata associated with the operation. It typically
409 # contains progress information and common metadata such as create time.
410 # Some services might not provide such metadata. Any method that returns a
411 # long-running operation should document the metadata type, if any.
412 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
413 },
414 &quot;done&quot;: True or False, # If the value is `false`, it means the operation is still in progress.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700415 # If `true`, the operation is completed, and either `error` or `response` is
416 # available.
Bu Sun Kim65020912020-05-20 12:08:20 -0700417 &quot;response&quot;: { # The normal response of the operation in case of success. If the original
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700418 # method returns no data on success, such as `Delete`, the response is
419 # `google.protobuf.Empty`. If the original method is standard
420 # `Get`/`Create`/`Update`, the response should be the resource. For other
421 # methods, the response should have the type `XxxResponse`, where `Xxx`
422 # is the original method name. For example, if the original method name
423 # is `TakeSnapshot()`, the inferred response type is
424 # `TakeSnapshotResponse`.
Bu Sun Kim65020912020-05-20 12:08:20 -0700425 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
Dan O'Mearadd494642020-05-01 07:42:23 -0700426 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700427 }</pre>
428</div>
429
430<div class="method">
Dan O'Mearadd494642020-05-01 07:42:23 -0700431 <code class="details" id="recognize">recognize(body=None, x__xgafv=None)</code>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700432 <pre>Performs synchronous speech recognition: receive results after all audio
433has been sent and processed.
434
435Args:
Dan O'Mearadd494642020-05-01 07:42:23 -0700436 body: object, The request body.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700437 The object takes the form of:
438
439{ # The top-level message sent by the client for the `Recognize` method.
Bu Sun Kim65020912020-05-20 12:08:20 -0700440 &quot;config&quot;: { # Provides information to the recognizer that specifies how to process the # Required. Provides information to the recognizer that specifies how to
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700441 # process the request.
442 # request.
Bu Sun Kim65020912020-05-20 12:08:20 -0700443 &quot;metadata&quot;: { # Description of audio data to be recognized. # Metadata regarding this request.
444 &quot;originalMediaType&quot;: &quot;A String&quot;, # The original media the speech was recorded on.
445 &quot;obfuscatedId&quot;: &quot;A String&quot;, # Obfuscated (privacy-protected) ID of the user, to identify number of
446 # unique users using the service.
447 &quot;recordingDeviceType&quot;: &quot;A String&quot;, # The type of device the speech was recorded with.
448 &quot;interactionType&quot;: &quot;A String&quot;, # The use case most closely describing the audio content to be recognized.
449 &quot;recordingDeviceName&quot;: &quot;A String&quot;, # The device used to make the recording. Examples &#x27;Nexus 5X&#x27; or
450 # &#x27;Polycom SoundStation IP 6000&#x27; or &#x27;POTS&#x27; or &#x27;VoIP&#x27; or
451 # &#x27;Cardioid Microphone&#x27;.
452 &quot;originalMimeType&quot;: &quot;A String&quot;, # Mime type of the original audio file. For example `audio/m4a`,
453 # `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.
454 # A list of possible audio mime types is maintained at
455 # http://www.iana.org/assignments/media-types/media-types.xhtml#audio
456 &quot;audioTopic&quot;: &quot;A String&quot;, # Description of the content. Eg. &quot;Recordings of federal supreme court
457 # hearings from 2012&quot;.
458 &quot;industryNaicsCodeOfAudio&quot;: 42, # The industry vertical to which this speech recognition request most
459 # closely applies. This is most indicative of the topics contained
460 # in the audio. Use the 6-digit NAICS code to identify the industry
461 # vertical - see https://www.naics.com/search/.
462 &quot;microphoneDistance&quot;: &quot;A String&quot;, # The audio type that most closely describes the audio being recognized.
463 },
464 &quot;sampleRateHertz&quot;: 42, # Sample rate in Hertz of the audio data sent in all
465 # `RecognitionAudio` messages. Valid values are: 8000-48000.
466 # 16000 is optimal. For best results, set the sampling rate of the audio
467 # source to 16000 Hz. If that&#x27;s not possible, use the native sample rate of
468 # the audio source (instead of re-sampling).
469 # This field is optional for FLAC and WAV audio files, but is
470 # required for all other audio formats. For details, see AudioEncoding.
471 &quot;enableSeparateRecognitionPerChannel&quot;: True or False, # This needs to be set to `true` explicitly and `audio_channel_count` &gt; 1
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700472 # to get each channel recognized separately. The recognition result will
473 # contain a `channel_tag` field to state which channel that result belongs
474 # to. If this is not true, we will only recognize the first channel. The
475 # request is billed cumulatively for all channels recognized:
476 # `audio_channel_count` multiplied by the length of the audio.
Bu Sun Kim65020912020-05-20 12:08:20 -0700477 &quot;enableAutomaticPunctuation&quot;: True or False, # If &#x27;true&#x27;, adds punctuation to recognition result hypotheses.
478 # This feature is only available in select languages. Setting this for
479 # requests in other languages has no effect at all.
480 # The default &#x27;false&#x27; value does not add punctuation to result hypotheses.
481 &quot;adaptation&quot;: { # Speech adaptation configuration. # Speech adaptation configuration improves the accuracy of speech
Dan O'Mearadd494642020-05-01 07:42:23 -0700482 # recognition. When speech adaptation is set it supersedes the
483 # `speech_contexts` field. For more information, see the [speech
484 # adaptation](https://cloud.google.com/speech-to-text/docs/context-strength)
485 # documentation.
Bu Sun Kim65020912020-05-20 12:08:20 -0700486 &quot;phraseSets&quot;: [ # A collection of phrase sets. To specify the hints inline, leave the
487 # phrase set&#x27;s `name` blank and fill in the rest of its fields. Any
Dan O'Mearadd494642020-05-01 07:42:23 -0700488 # phrase set can use any custom class.
Bu Sun Kim65020912020-05-20 12:08:20 -0700489 { # Provides &quot;hints&quot; to the speech recognizer to favor specific words and phrases
Dan O'Mearadd494642020-05-01 07:42:23 -0700490 # in the results.
Bu Sun Kim65020912020-05-20 12:08:20 -0700491 &quot;boost&quot;: 3.14, # Hint Boost. Positive value will increase the probability that a specific
492 # phrase will be recognized over other similar sounding phrases. The higher
493 # the boost, the higher the chance of false positive recognition as well.
494 # Negative boost values would correspond to anti-biasing. Anti-biasing is not
495 # enabled, so negative boost will simply be ignored. Though `boost` can
496 # accept a wide range of positive values, most use cases are best served with
497 # values between 0 (exclusive) and 20. We recommend using a binary search
498 # approach to finding the optimal value for your use case. Speech recognition
499 # will skip PhraseSets with a boost value of 0.
500 &quot;name&quot;: &quot;A String&quot;, # The resource name of the phrase set.
501 &quot;phrases&quot;: [ # A list of word and phrases.
502 { # A phrases containing words and phrase &quot;hints&quot; so that
Dan O'Mearadd494642020-05-01 07:42:23 -0700503 # the speech recognition is more likely to recognize them. This can be used
504 # to improve the accuracy for specific words and phrases, for example, if
505 # specific commands are typically spoken by the user. This can also be used
506 # to add additional words to the vocabulary of the recognizer. See
507 # [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
508 #
509 # List items can also include pre-built or custom classes containing groups
510 # of words that represent common concepts that occur in natural language. For
511 # example, rather than providing a phrase hint for every month of the
Bu Sun Kim65020912020-05-20 12:08:20 -0700512 # year (e.g. &quot;i was born in january&quot;, &quot;i was born in febuary&quot;, ...), use the
513 # pre-built `$MONTH` class improves the likelihood of correctly transcribing
514 # audio that includes months (e.g. &quot;i was born in $month&quot;).
515 # To refer to pre-built classes, use the class&#x27; symbol prepended with `$`
516 # e.g. `$MONTH`. To refer to custom classes that were defined inline in the
517 # request, set the class&#x27;s `custom_class_id` to a string unique to all class
518 # resources and inline classes. Then use the class&#x27; id wrapped in $`{...}`
519 # e.g. &quot;${my-months}&quot;. To refer to custom classes resources, use the class&#x27;
520 # id wrapped in `${}` (e.g. `${my-months}`).
521 &quot;value&quot;: &quot;A String&quot;, # The phrase itself.
522 &quot;boost&quot;: 3.14, # Hint Boost. Overrides the boost set at the phrase set level.
Dan O'Mearadd494642020-05-01 07:42:23 -0700523 # Positive value will increase the probability that a specific phrase will
524 # be recognized over other similar sounding phrases. The higher the boost,
525 # the higher the chance of false positive recognition as well. Negative
526 # boost values would correspond to anti-biasing. Anti-biasing is not
527 # enabled, so negative boost will simply be ignored. Though `boost` can
528 # accept a wide range of positive values, most use cases are best served
529 # with values between 0 and 20. We recommend using a binary search approach
530 # to finding the optimal value for your use case. Speech recognition
531 # will skip PhraseSets with a boost value of 0.
Dan O'Mearadd494642020-05-01 07:42:23 -0700532 },
533 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700534 },
535 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700536 &quot;customClasses&quot;: [ # A collection of custom classes. To specify the classes inline, leave the
537 # class&#x27; `name` blank and fill in the rest of its fields, giving it a unique
Dan O'Mearadd494642020-05-01 07:42:23 -0700538 # `custom_class_id`. Refer to the inline defined class in phrase hints by its
539 # `custom_class_id`.
540 { # A set of words or phrases that represents a common concept likely to appear
541 # in your audio, for example a list of passenger ship names. CustomClass items
542 # can be substituted into placeholders that you set in PhraseSet phrases.
Bu Sun Kim65020912020-05-20 12:08:20 -0700543 &quot;name&quot;: &quot;A String&quot;, # The resource name of the custom class.
544 &quot;customClassId&quot;: &quot;A String&quot;, # If this custom class is a resource, the custom_class_id is the resource id
545 # of the CustomClass. Case sensitive.
546 &quot;items&quot;: [ # A collection of class items.
Dan O'Mearadd494642020-05-01 07:42:23 -0700547 { # An item of the class.
Bu Sun Kim65020912020-05-20 12:08:20 -0700548 &quot;value&quot;: &quot;A String&quot;, # The class item&#x27;s value.
Dan O'Mearadd494642020-05-01 07:42:23 -0700549 },
550 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700551 },
552 ],
553 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700554 &quot;maxAlternatives&quot;: 42, # Maximum number of recognition hypotheses to be returned.
555 # Specifically, the maximum number of `SpeechRecognitionAlternative` messages
556 # within each `SpeechRecognitionResult`.
557 # The server may return fewer than `max_alternatives`.
558 # Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
559 # one. If omitted, will return a maximum of one.
560 &quot;diarizationSpeakerCount&quot;: 42, # If set, specifies the estimated number of speakers in the conversation.
561 # Defaults to &#x27;2&#x27;. Ignored unless enable_speaker_diarization is set to true.
Dan O'Mearadd494642020-05-01 07:42:23 -0700562 # Note: Use diarization_config instead.
Bu Sun Kim65020912020-05-20 12:08:20 -0700563 &quot;encoding&quot;: &quot;A String&quot;, # Encoding of audio data sent in all `RecognitionAudio` messages.
564 # This field is optional for `FLAC` and `WAV` audio files and required
565 # for all other audio formats. For details, see AudioEncoding.
566 &quot;speechContexts&quot;: [ # Array of SpeechContext.
567 # A means to provide context to assist the speech recognition. For more
568 # information, see
569 # [speech
570 # adaptation](https://cloud.google.com/speech-to-text/docs/context-strength).
571 { # Provides &quot;hints&quot; to the speech recognizer to favor specific words and phrases
572 # in the results.
573 &quot;phrases&quot;: [ # A list of strings containing words and phrases &quot;hints&quot; so that
574 # the speech recognition is more likely to recognize them. This can be used
575 # to improve the accuracy for specific words and phrases, for example, if
576 # specific commands are typically spoken by the user. This can also be used
577 # to add additional words to the vocabulary of the recognizer. See
578 # [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
579 #
580 # List items can also be set to classes for groups of words that represent
581 # common concepts that occur in natural language. For example, rather than
582 # providing phrase hints for every month of the year, using the $MONTH class
583 # improves the likelihood of correctly transcribing audio that includes
584 # months.
585 &quot;A String&quot;,
586 ],
587 &quot;boost&quot;: 3.14, # Hint Boost. Positive value will increase the probability that a specific
588 # phrase will be recognized over other similar sounding phrases. The higher
589 # the boost, the higher the chance of false positive recognition as well.
590 # Negative boost values would correspond to anti-biasing. Anti-biasing is not
591 # enabled, so negative boost will simply be ignored. Though `boost` can
592 # accept a wide range of positive values, most use cases are best served with
593 # values between 0 and 20. We recommend using a binary search approach to
594 # finding the optimal value for your use case.
595 },
596 ],
597 &quot;enableWordConfidence&quot;: True or False, # If `true`, the top result includes a list of words and the
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700598 # confidence for those words. If `false`, no word-level confidence
599 # information is returned. The default is `false`.
Bu Sun Kim65020912020-05-20 12:08:20 -0700600 &quot;model&quot;: &quot;A String&quot;, # Which model to select for the given request. Select the model
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700601 # best suited to your domain to get best results. If a model is not
602 # explicitly specified, then we auto-select a model based on the parameters
603 # in the RecognitionConfig.
Dan O'Mearadd494642020-05-01 07:42:23 -0700604 # &lt;table&gt;
605 # &lt;tr&gt;
606 # &lt;td&gt;&lt;b&gt;Model&lt;/b&gt;&lt;/td&gt;
607 # &lt;td&gt;&lt;b&gt;Description&lt;/b&gt;&lt;/td&gt;
608 # &lt;/tr&gt;
609 # &lt;tr&gt;
610 # &lt;td&gt;&lt;code&gt;command_and_search&lt;/code&gt;&lt;/td&gt;
611 # &lt;td&gt;Best for short queries such as voice commands or voice search.&lt;/td&gt;
612 # &lt;/tr&gt;
613 # &lt;tr&gt;
614 # &lt;td&gt;&lt;code&gt;phone_call&lt;/code&gt;&lt;/td&gt;
615 # &lt;td&gt;Best for audio that originated from a phone call (typically
616 # recorded at an 8khz sampling rate).&lt;/td&gt;
617 # &lt;/tr&gt;
618 # &lt;tr&gt;
619 # &lt;td&gt;&lt;code&gt;video&lt;/code&gt;&lt;/td&gt;
620 # &lt;td&gt;Best for audio that originated from from video or includes multiple
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700621 # speakers. Ideally the audio is recorded at a 16khz or greater
622 # sampling rate. This is a premium model that costs more than the
Dan O'Mearadd494642020-05-01 07:42:23 -0700623 # standard rate.&lt;/td&gt;
624 # &lt;/tr&gt;
625 # &lt;tr&gt;
626 # &lt;td&gt;&lt;code&gt;default&lt;/code&gt;&lt;/td&gt;
627 # &lt;td&gt;Best for audio that is not one of the specific audio models.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700628 # For example, long-form audio. Ideally the audio is high-fidelity,
Dan O'Mearadd494642020-05-01 07:42:23 -0700629 # recorded at a 16khz or greater sampling rate.&lt;/td&gt;
630 # &lt;/tr&gt;
631 # &lt;/table&gt;
Bu Sun Kim65020912020-05-20 12:08:20 -0700632 &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data.
633 # ONLY set this for MULTI-CHANNEL recognition.
634 # Valid values for LINEAR16 and FLAC are `1`-`8`.
635 # Valid values for OGG_OPUS are &#x27;1&#x27;-&#x27;254&#x27;.
636 # Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
637 # If `0` or omitted, defaults to one channel (mono).
638 # Note: We only recognize the first channel by default.
639 # To perform independent recognition on each channel set
640 # `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
641 &quot;enableWordTimeOffsets&quot;: True or False, # If `true`, the top result includes a list of words and
642 # the start and end time offsets (timestamps) for those words. If
643 # `false`, no word-level time offset information is returned. The default is
644 # `false`.
645 &quot;alternativeLanguageCodes&quot;: [ # A list of up to 3 additional
646 # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
647 # listing possible alternative languages of the supplied audio.
648 # See [Language
649 # Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
650 # of the currently supported language codes. If alternative languages are
651 # listed, recognition result will contain recognition in the most likely
652 # language detected including the main language_code. The recognition result
653 # will include the language tag of the language detected in the audio. Note:
654 # This feature is only supported for Voice Command and Voice Search use cases
655 # and performance may vary for other use cases (e.g., phone call
656 # transcription).
657 &quot;A String&quot;,
658 ],
659 &quot;diarizationConfig&quot;: { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700660 # parameters to make diarization better suited for your application.
661 # Note: When this is enabled, we send all the words from the beginning of the
662 # audio for the top alternative in every consecutive STREAMING responses.
663 # This is done in order to improve our speaker tags as our models learn to
664 # identify the speakers in the conversation over time.
665 # For non-streaming requests, the diarization results will be provided only
666 # in the top alternative of the FINAL SpeechRecognitionResult.
Bu Sun Kim65020912020-05-20 12:08:20 -0700667 &quot;minSpeakerCount&quot;: 42, # Minimum number of speakers in the conversation. This range gives you more
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700668 # flexibility by allowing the system to automatically determine the correct
669 # number of speakers. If not set, the default value is 2.
Bu Sun Kim65020912020-05-20 12:08:20 -0700670 &quot;maxSpeakerCount&quot;: 42, # Maximum number of speakers in the conversation. This range gives you more
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700671 # flexibility by allowing the system to automatically determine the correct
672 # number of speakers. If not set, the default value is 6.
Bu Sun Kim65020912020-05-20 12:08:20 -0700673 &quot;speakerTag&quot;: 42, # Output only. Unused.
674 &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in
675 # the top alternative of the recognition result using a speaker_tag provided
676 # in the WordInfo.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700677 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700678 &quot;languageCode&quot;: &quot;A String&quot;, # Required. The language of the supplied audio as a
679 # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
680 # Example: &quot;en-US&quot;.
681 # See [Language
682 # Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
683 # of the currently supported language codes.
684 &quot;profanityFilter&quot;: True or False, # If set to `true`, the server will attempt to filter out
685 # profanities, replacing all but the initial character in each filtered word
686 # with asterisks, e.g. &quot;f***&quot;. If set to `false` or omitted, profanities
687 # won&#x27;t be filtered out.
688 &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in
689 # the top alternative of the recognition result using a speaker_tag provided
690 # in the WordInfo.
691 # Note: Use diarization_config instead.
692 &quot;useEnhanced&quot;: True or False, # Set to true to use an enhanced model for speech recognition.
693 # If `use_enhanced` is set to true and the `model` field is not set, then
694 # an appropriate enhanced model is chosen if an enhanced model exists for
695 # the audio.
696 #
697 # If `use_enhanced` is true and an enhanced version of the specified model
698 # does not exist, then the speech is recognized using the standard version
699 # of the specified model.
700 },
701 &quot;audio&quot;: { # Contains audio data in the encoding specified in the `RecognitionConfig`. # Required. The audio data to be recognized.
702 # Either `content` or `uri` must be supplied. Supplying both or neither
703 # returns google.rpc.Code.INVALID_ARGUMENT. See
704 # [content limits](https://cloud.google.com/speech-to-text/quotas#content).
705 &quot;content&quot;: &quot;A String&quot;, # The audio data bytes encoded as specified in
706 # `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a
707 # pure binary representation, whereas JSON representations use base64.
708 &quot;uri&quot;: &quot;A String&quot;, # URI that points to a file that contains audio data bytes as specified in
709 # `RecognitionConfig`. The file must not be compressed (for example, gzip).
710 # Currently, only Google Cloud Storage URIs are
711 # supported, which must be specified in the following format:
712 # `gs://bucket_name/object_name` (other URI formats return
713 # google.rpc.Code.INVALID_ARGUMENT). For more information, see
714 # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700715 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700716 }
717
718 x__xgafv: string, V1 error format.
719 Allowed values
720 1 - v1 error format
721 2 - v2 error format
722
723Returns:
724 An object of the form:
725
726 { # The only message returned to the client by the `Recognize` method. It
727 # contains the result as zero or more sequential `SpeechRecognitionResult`
728 # messages.
Bu Sun Kim65020912020-05-20 12:08:20 -0700729 &quot;results&quot;: [ # Sequential list of transcription results corresponding to
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700730 # sequential portions of audio.
731 { # A speech recognition result corresponding to a portion of the audio.
Bu Sun Kim65020912020-05-20 12:08:20 -0700732 &quot;channelTag&quot;: 42, # For multi-channel audio, this is the channel number corresponding to the
Dan O'Mearadd494642020-05-01 07:42:23 -0700733 # recognized result for the audio from that channel.
Bu Sun Kim65020912020-05-20 12:08:20 -0700734 # For audio_channel_count = N, its output values can range from &#x27;1&#x27; to &#x27;N&#x27;.
735 &quot;languageCode&quot;: &quot;A String&quot;, # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
736 # of the language in this result. This language code was detected to have
737 # the most likelihood of being spoken in the audio.
738 &quot;alternatives&quot;: [ # May contain one or more recognition hypotheses (up to the
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700739 # maximum specified in `max_alternatives`).
740 # These alternatives are ordered in terms of accuracy, with the top (first)
741 # alternative being the most probable, as ranked by the recognizer.
742 { # Alternative hypotheses (a.k.a. n-best list).
Bu Sun Kim65020912020-05-20 12:08:20 -0700743 &quot;confidence&quot;: 3.14, # The confidence estimate between 0.0 and 1.0. A higher number
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700744 # indicates an estimated greater likelihood that the recognized words are
745 # correct. This field is set only for the top alternative of a non-streaming
746 # result or, of a streaming result where `is_final=true`.
747 # This field is not guaranteed to be accurate and users should not rely on it
748 # to be always provided.
749 # The default of 0.0 is a sentinel value indicating `confidence` was not set.
Bu Sun Kim65020912020-05-20 12:08:20 -0700750 &quot;transcript&quot;: &quot;A String&quot;, # Transcript text representing the words that the user spoke.
751 &quot;words&quot;: [ # A list of word-specific information for each recognized word.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700752 # Note: When `enable_speaker_diarization` is true, you will see all the words
753 # from the beginning of the audio.
754 { # Word-specific information for recognized words.
Bu Sun Kim65020912020-05-20 12:08:20 -0700755 &quot;speakerTag&quot;: 42, # Output only. A distinct integer value is assigned for every speaker within
756 # the audio. This field specifies which one of those speakers was detected to
757 # have spoken this word. Value ranges from &#x27;1&#x27; to diarization_speaker_count.
758 # speaker_tag is set if enable_speaker_diarization = &#x27;true&#x27; and only in the
759 # top alternative.
760 &quot;endTime&quot;: &quot;A String&quot;, # Time offset relative to the beginning of the audio,
761 # and corresponding to the end of the spoken word.
762 # This field is only set if `enable_word_time_offsets=true` and only
763 # in the top hypothesis.
764 # This is an experimental feature and the accuracy of the time offset can
765 # vary.
766 &quot;confidence&quot;: 3.14, # The confidence estimate between 0.0 and 1.0. A higher number
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700767 # indicates an estimated greater likelihood that the recognized words are
768 # correct. This field is set only for the top alternative of a non-streaming
769 # result or, of a streaming result where `is_final=true`.
770 # This field is not guaranteed to be accurate and users should not rely on it
771 # to be always provided.
772 # The default of 0.0 is a sentinel value indicating `confidence` was not set.
Bu Sun Kim65020912020-05-20 12:08:20 -0700773 &quot;startTime&quot;: &quot;A String&quot;, # Time offset relative to the beginning of the audio,
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700774 # and corresponding to the start of the spoken word.
775 # This field is only set if `enable_word_time_offsets=true` and only
776 # in the top hypothesis.
777 # This is an experimental feature and the accuracy of the time offset can
778 # vary.
Bu Sun Kim65020912020-05-20 12:08:20 -0700779 &quot;word&quot;: &quot;A String&quot;, # The word corresponding to this set of information.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700780 },
781 ],
782 },
783 ],
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700784 },
785 ],
786 }</pre>
787</div>
788
789</body></html>