blob: 86609a0988f4c0af152711a6edb87df4aae3d1e4 [file] [log] [blame]
Bu Sun Kim715bd7f2019-06-14 16:50:42 -07001<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5 margin: 0;
6 padding: 0;
7 border: 0;
8 font-weight: inherit;
9 font-style: inherit;
10 font-size: 100%;
11 font-family: inherit;
12 vertical-align: baseline;
13}
14
15body {
16 font-size: 13px;
17 padding: 1em;
18}
19
20h1 {
21 font-size: 26px;
22 margin-bottom: 1em;
23}
24
25h2 {
26 font-size: 24px;
27 margin-bottom: 1em;
28}
29
30h3 {
31 font-size: 20px;
32 margin-bottom: 1em;
33 margin-top: 1em;
34}
35
36pre, code {
37 line-height: 1.5;
38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42 margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46 font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50 border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54 margin-top: 0.5em;
55}
56
57.firstline {
58 margin-left: 2 em;
59}
60
61.method {
62 margin-top: 1em;
63 border: solid 1px #CCC;
64 padding: 1em;
65 background: #EEE;
66}
67
68.details {
69 font-weight: bold;
70 font-size: 14px;
71}
72
73</style>
74
75<h1><a href="speech_v1p1beta1.html">Cloud Speech-to-Text API</a> . <a href="speech_v1p1beta1.speech.html">speech</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
Dan O'Mearadd494642020-05-01 07:42:23 -070078 <code><a href="#longrunningrecognize">longrunningrecognize(body=None, x__xgafv=None)</a></code></p>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070079<p class="firstline">Performs asynchronous speech recognition: receive results via the</p>
80<p class="toc_element">
Dan O'Mearadd494642020-05-01 07:42:23 -070081 <code><a href="#recognize">recognize(body=None, x__xgafv=None)</a></code></p>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070082<p class="firstline">Performs synchronous speech recognition: receive results after all audio</p>
83<h3>Method Details</h3>
84<div class="method">
Dan O'Mearadd494642020-05-01 07:42:23 -070085 <code class="details" id="longrunningrecognize">longrunningrecognize(body=None, x__xgafv=None)</code>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070086 <pre>Performs asynchronous speech recognition: receive results via the
87google.longrunning.Operations interface. Returns either an
88`Operation.error` or an `Operation.response` which contains
89a `LongRunningRecognizeResponse` message.
90For more information on asynchronous speech recognition, see the
91[how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
92
93Args:
Dan O'Mearadd494642020-05-01 07:42:23 -070094 body: object, The request body.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -070095 The object takes the form of:
96
97{ # The top-level message sent by the client for the `LongRunningRecognize`
98 # method.
Bu Sun Kimd059ad82020-07-22 17:02:09 -070099 &quot;audio&quot;: { # Contains audio data in the encoding specified in the `RecognitionConfig`. # Required. The audio data to be recognized.
100 # Either `content` or `uri` must be supplied. Supplying both or neither
101 # returns google.rpc.Code.INVALID_ARGUMENT. See
102 # [content limits](https://cloud.google.com/speech-to-text/quotas#content).
103 &quot;content&quot;: &quot;A String&quot;, # The audio data bytes encoded as specified in
104 # `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a
105 # pure binary representation, whereas JSON representations use base64.
106 &quot;uri&quot;: &quot;A String&quot;, # URI that points to a file that contains audio data bytes as specified in
107 # `RecognitionConfig`. The file must not be compressed (for example, gzip).
108 # Currently, only Google Cloud Storage URIs are
109 # supported, which must be specified in the following format:
110 # `gs://bucket_name/object_name` (other URI formats return
111 # google.rpc.Code.INVALID_ARGUMENT). For more information, see
112 # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
113 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700114 &quot;config&quot;: { # Provides information to the recognizer that specifies how to process the # Required. Provides information to the recognizer that specifies how to
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700115 # process the request.
116 # request.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700117 &quot;encoding&quot;: &quot;A String&quot;, # Encoding of audio data sent in all `RecognitionAudio` messages.
118 # This field is optional for `FLAC` and `WAV` audio files and required
119 # for all other audio formats. For details, see AudioEncoding.
120 &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data.
121 # ONLY set this for MULTI-CHANNEL recognition.
122 # Valid values for LINEAR16 and FLAC are `1`-`8`.
123 # Valid values for OGG_OPUS are &#x27;1&#x27;-&#x27;254&#x27;.
124 # Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
125 # If `0` or omitted, defaults to one channel (mono).
126 # Note: We only recognize the first channel by default.
127 # To perform independent recognition on each channel set
128 # `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
129 &quot;languageCode&quot;: &quot;A String&quot;, # Required. The language of the supplied audio as a
130 # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
131 # Example: &quot;en-US&quot;.
132 # See [Language
133 # Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
134 # of the currently supported language codes.
135 &quot;metadata&quot;: { # Description of audio data to be recognized. # Metadata regarding this request.
136 &quot;audioTopic&quot;: &quot;A String&quot;, # Description of the content. Eg. &quot;Recordings of federal supreme court
137 # hearings from 2012&quot;.
138 &quot;originalMediaType&quot;: &quot;A String&quot;, # The original media the speech was recorded on.
139 &quot;interactionType&quot;: &quot;A String&quot;, # The use case most closely describing the audio content to be recognized.
140 &quot;recordingDeviceName&quot;: &quot;A String&quot;, # The device used to make the recording. Examples &#x27;Nexus 5X&#x27; or
141 # &#x27;Polycom SoundStation IP 6000&#x27; or &#x27;POTS&#x27; or &#x27;VoIP&#x27; or
142 # &#x27;Cardioid Microphone&#x27;.
143 &quot;microphoneDistance&quot;: &quot;A String&quot;, # The audio type that most closely describes the audio being recognized.
144 &quot;industryNaicsCodeOfAudio&quot;: 42, # The industry vertical to which this speech recognition request most
145 # closely applies. This is most indicative of the topics contained
146 # in the audio. Use the 6-digit NAICS code to identify the industry
147 # vertical - see https://www.naics.com/search/.
148 &quot;recordingDeviceType&quot;: &quot;A String&quot;, # The type of device the speech was recorded with.
149 &quot;originalMimeType&quot;: &quot;A String&quot;, # Mime type of the original audio file. For example `audio/m4a`,
150 # `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.
151 # A list of possible audio mime types is maintained at
152 # http://www.iana.org/assignments/media-types/media-types.xhtml#audio
153 &quot;obfuscatedId&quot;: &quot;A String&quot;, # Obfuscated (privacy-protected) ID of the user, to identify number of
154 # unique users using the service.
155 },
156 &quot;maxAlternatives&quot;: 42, # Maximum number of recognition hypotheses to be returned.
157 # Specifically, the maximum number of `SpeechRecognitionAlternative` messages
158 # within each `SpeechRecognitionResult`.
159 # The server may return fewer than `max_alternatives`.
160 # Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
161 # one. If omitted, will return a maximum of one.
162 &quot;profanityFilter&quot;: True or False, # If set to `true`, the server will attempt to filter out
163 # profanities, replacing all but the initial character in each filtered word
164 # with asterisks, e.g. &quot;f***&quot;. If set to `false` or omitted, profanities
165 # won&#x27;t be filtered out.
166 &quot;sampleRateHertz&quot;: 42, # Sample rate in Hertz of the audio data sent in all
167 # `RecognitionAudio` messages. Valid values are: 8000-48000.
168 # 16000 is optimal. For best results, set the sampling rate of the audio
169 # source to 16000 Hz. If that&#x27;s not possible, use the native sample rate of
170 # the audio source (instead of re-sampling).
171 # This field is optional for FLAC and WAV audio files, but is
172 # required for all other audio formats. For details, see AudioEncoding.
173 &quot;diarizationSpeakerCount&quot;: 42, # If set, specifies the estimated number of speakers in the conversation.
174 # Defaults to &#x27;2&#x27;. Ignored unless enable_speaker_diarization is set to true.
175 # Note: Use diarization_config instead.
176 &quot;enableWordConfidence&quot;: True or False, # If `true`, the top result includes a list of words and the
177 # confidence for those words. If `false`, no word-level confidence
178 # information is returned. The default is `false`.
179 &quot;speechContexts&quot;: [ # Array of SpeechContext.
180 # A means to provide context to assist the speech recognition. For more
181 # information, see
182 # [speech
183 # adaptation](https://cloud.google.com/speech-to-text/docs/context-strength).
184 { # Provides &quot;hints&quot; to the speech recognizer to favor specific words and phrases
185 # in the results.
186 &quot;boost&quot;: 3.14, # Hint Boost. Positive value will increase the probability that a specific
187 # phrase will be recognized over other similar sounding phrases. The higher
188 # the boost, the higher the chance of false positive recognition as well.
189 # Negative boost values would correspond to anti-biasing. Anti-biasing is not
190 # enabled, so negative boost will simply be ignored. Though `boost` can
191 # accept a wide range of positive values, most use cases are best served with
192 # values between 0 and 20. We recommend using a binary search approach to
193 # finding the optimal value for your use case.
194 &quot;phrases&quot;: [ # A list of strings containing words and phrases &quot;hints&quot; so that
195 # the speech recognition is more likely to recognize them. This can be used
196 # to improve the accuracy for specific words and phrases, for example, if
197 # specific commands are typically spoken by the user. This can also be used
198 # to add additional words to the vocabulary of the recognizer. See
199 # [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
200 #
201 # List items can also be set to classes for groups of words that represent
202 # common concepts that occur in natural language. For example, rather than
203 # providing phrase hints for every month of the year, using the $MONTH class
204 # improves the likelihood of correctly transcribing audio that includes
205 # months.
206 &quot;A String&quot;,
207 ],
208 },
209 ],
210 &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in
211 # the top alternative of the recognition result using a speaker_tag provided
212 # in the WordInfo.
213 # Note: Use diarization_config instead.
214 &quot;enableAutomaticPunctuation&quot;: True or False, # If &#x27;true&#x27;, adds punctuation to recognition result hypotheses.
215 # This feature is only available in select languages. Setting this for
216 # requests in other languages has no effect at all.
217 # The default &#x27;false&#x27; value does not add punctuation to result hypotheses.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700218 &quot;enableWordTimeOffsets&quot;: True or False, # If `true`, the top result includes a list of words and
219 # the start and end time offsets (timestamps) for those words. If
220 # `false`, no word-level time offset information is returned. The default is
221 # `false`.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700222 &quot;diarizationConfig&quot;: { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional
223 # parameters to make diarization better suited for your application.
224 # Note: When this is enabled, we send all the words from the beginning of the
225 # audio for the top alternative in every consecutive STREAMING responses.
226 # This is done in order to improve our speaker tags as our models learn to
227 # identify the speakers in the conversation over time.
228 # For non-streaming requests, the diarization results will be provided only
229 # in the top alternative of the FINAL SpeechRecognitionResult.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700230 &quot;maxSpeakerCount&quot;: 42, # Maximum number of speakers in the conversation. This range gives you more
231 # flexibility by allowing the system to automatically determine the correct
232 # number of speakers. If not set, the default value is 6.
233 &quot;speakerTag&quot;: 42, # Output only. Unused.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700234 &quot;minSpeakerCount&quot;: 42, # Minimum number of speakers in the conversation. This range gives you more
235 # flexibility by allowing the system to automatically determine the correct
236 # number of speakers. If not set, the default value is 2.
237 &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in
238 # the top alternative of the recognition result using a speaker_tag provided
239 # in the WordInfo.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700240 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700241 &quot;adaptation&quot;: { # Speech adaptation configuration. # Speech adaptation configuration improves the accuracy of speech
Dan O'Mearadd494642020-05-01 07:42:23 -0700242 # recognition. When speech adaptation is set it supersedes the
243 # `speech_contexts` field. For more information, see the [speech
244 # adaptation](https://cloud.google.com/speech-to-text/docs/context-strength)
245 # documentation.
Bu Sun Kim65020912020-05-20 12:08:20 -0700246 &quot;phraseSets&quot;: [ # A collection of phrase sets. To specify the hints inline, leave the
247 # phrase set&#x27;s `name` blank and fill in the rest of its fields. Any
Dan O'Mearadd494642020-05-01 07:42:23 -0700248 # phrase set can use any custom class.
Bu Sun Kim65020912020-05-20 12:08:20 -0700249 { # Provides &quot;hints&quot; to the speech recognizer to favor specific words and phrases
Dan O'Mearadd494642020-05-01 07:42:23 -0700250 # in the results.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700251 &quot;boost&quot;: 3.14, # Hint Boost. Positive value will increase the probability that a specific
252 # phrase will be recognized over other similar sounding phrases. The higher
253 # the boost, the higher the chance of false positive recognition as well.
254 # Negative boost values would correspond to anti-biasing. Anti-biasing is not
255 # enabled, so negative boost will simply be ignored. Though `boost` can
256 # accept a wide range of positive values, most use cases are best served with
257 # values between 0 (exclusive) and 20. We recommend using a binary search
258 # approach to finding the optimal value for your use case. Speech recognition
259 # will skip PhraseSets with a boost value of 0.
Bu Sun Kim65020912020-05-20 12:08:20 -0700260 &quot;name&quot;: &quot;A String&quot;, # The resource name of the phrase set.
261 &quot;phrases&quot;: [ # A list of word and phrases.
262 { # A phrases containing words and phrase &quot;hints&quot; so that
Dan O'Mearadd494642020-05-01 07:42:23 -0700263 # the speech recognition is more likely to recognize them. This can be used
264 # to improve the accuracy for specific words and phrases, for example, if
265 # specific commands are typically spoken by the user. This can also be used
266 # to add additional words to the vocabulary of the recognizer. See
267 # [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
268 #
269 # List items can also include pre-built or custom classes containing groups
270 # of words that represent common concepts that occur in natural language. For
271 # example, rather than providing a phrase hint for every month of the
Bu Sun Kim65020912020-05-20 12:08:20 -0700272 # year (e.g. &quot;i was born in january&quot;, &quot;i was born in febuary&quot;, ...), use the
273 # pre-built `$MONTH` class improves the likelihood of correctly transcribing
274 # audio that includes months (e.g. &quot;i was born in $month&quot;).
275 # To refer to pre-built classes, use the class&#x27; symbol prepended with `$`
276 # e.g. `$MONTH`. To refer to custom classes that were defined inline in the
277 # request, set the class&#x27;s `custom_class_id` to a string unique to all class
278 # resources and inline classes. Then use the class&#x27; id wrapped in $`{...}`
279 # e.g. &quot;${my-months}&quot;. To refer to custom classes resources, use the class&#x27;
280 # id wrapped in `${}` (e.g. `${my-months}`).
Bu Sun Kim65020912020-05-20 12:08:20 -0700281 &quot;boost&quot;: 3.14, # Hint Boost. Overrides the boost set at the phrase set level.
Dan O'Mearadd494642020-05-01 07:42:23 -0700282 # Positive value will increase the probability that a specific phrase will
283 # be recognized over other similar sounding phrases. The higher the boost,
284 # the higher the chance of false positive recognition as well. Negative
285 # boost values would correspond to anti-biasing. Anti-biasing is not
286 # enabled, so negative boost will simply be ignored. Though `boost` can
287 # accept a wide range of positive values, most use cases are best served
288 # with values between 0 and 20. We recommend using a binary search approach
289 # to finding the optimal value for your use case. Speech recognition
290 # will skip PhraseSets with a boost value of 0.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700291 &quot;value&quot;: &quot;A String&quot;, # The phrase itself.
Dan O'Mearadd494642020-05-01 07:42:23 -0700292 },
293 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700294 },
295 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700296 &quot;customClasses&quot;: [ # A collection of custom classes. To specify the classes inline, leave the
297 # class&#x27; `name` blank and fill in the rest of its fields, giving it a unique
Dan O'Mearadd494642020-05-01 07:42:23 -0700298 # `custom_class_id`. Refer to the inline defined class in phrase hints by its
299 # `custom_class_id`.
300 { # A set of words or phrases that represents a common concept likely to appear
301 # in your audio, for example a list of passenger ship names. CustomClass items
302 # can be substituted into placeholders that you set in PhraseSet phrases.
Bu Sun Kim65020912020-05-20 12:08:20 -0700303 &quot;items&quot;: [ # A collection of class items.
Dan O'Mearadd494642020-05-01 07:42:23 -0700304 { # An item of the class.
Bu Sun Kim65020912020-05-20 12:08:20 -0700305 &quot;value&quot;: &quot;A String&quot;, # The class item&#x27;s value.
Dan O'Mearadd494642020-05-01 07:42:23 -0700306 },
307 ],
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700308 &quot;customClassId&quot;: &quot;A String&quot;, # If this custom class is a resource, the custom_class_id is the resource id
309 # of the CustomClass. Case sensitive.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700310 &quot;name&quot;: &quot;A String&quot;, # The resource name of the custom class.
Dan O'Mearadd494642020-05-01 07:42:23 -0700311 },
312 ],
313 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700314 &quot;model&quot;: &quot;A String&quot;, # Which model to select for the given request. Select the model
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700315 # best suited to your domain to get best results. If a model is not
316 # explicitly specified, then we auto-select a model based on the parameters
317 # in the RecognitionConfig.
Dan O'Mearadd494642020-05-01 07:42:23 -0700318 # &lt;table&gt;
319 # &lt;tr&gt;
320 # &lt;td&gt;&lt;b&gt;Model&lt;/b&gt;&lt;/td&gt;
321 # &lt;td&gt;&lt;b&gt;Description&lt;/b&gt;&lt;/td&gt;
322 # &lt;/tr&gt;
323 # &lt;tr&gt;
324 # &lt;td&gt;&lt;code&gt;command_and_search&lt;/code&gt;&lt;/td&gt;
325 # &lt;td&gt;Best for short queries such as voice commands or voice search.&lt;/td&gt;
326 # &lt;/tr&gt;
327 # &lt;tr&gt;
328 # &lt;td&gt;&lt;code&gt;phone_call&lt;/code&gt;&lt;/td&gt;
329 # &lt;td&gt;Best for audio that originated from a phone call (typically
330 # recorded at an 8khz sampling rate).&lt;/td&gt;
331 # &lt;/tr&gt;
332 # &lt;tr&gt;
333 # &lt;td&gt;&lt;code&gt;video&lt;/code&gt;&lt;/td&gt;
334 # &lt;td&gt;Best for audio that originated from from video or includes multiple
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700335 # speakers. Ideally the audio is recorded at a 16khz or greater
336 # sampling rate. This is a premium model that costs more than the
Dan O'Mearadd494642020-05-01 07:42:23 -0700337 # standard rate.&lt;/td&gt;
338 # &lt;/tr&gt;
339 # &lt;tr&gt;
340 # &lt;td&gt;&lt;code&gt;default&lt;/code&gt;&lt;/td&gt;
341 # &lt;td&gt;Best for audio that is not one of the specific audio models.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700342 # For example, long-form audio. Ideally the audio is high-fidelity,
Dan O'Mearadd494642020-05-01 07:42:23 -0700343 # recorded at a 16khz or greater sampling rate.&lt;/td&gt;
344 # &lt;/tr&gt;
345 # &lt;/table&gt;
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700346 &quot;useEnhanced&quot;: True or False, # Set to true to use an enhanced model for speech recognition.
347 # If `use_enhanced` is set to true and the `model` field is not set, then
348 # an appropriate enhanced model is chosen if an enhanced model exists for
349 # the audio.
350 #
351 # If `use_enhanced` is true and an enhanced version of the specified model
352 # does not exist, then the speech is recognized using the standard version
353 # of the specified model.
354 &quot;alternativeLanguageCodes&quot;: [ # A list of up to 3 additional
355 # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
356 # listing possible alternative languages of the supplied audio.
357 # See [Language
358 # Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
359 # of the currently supported language codes. If alternative languages are
360 # listed, recognition result will contain recognition in the most likely
361 # language detected including the main language_code. The recognition result
362 # will include the language tag of the language detected in the audio. Note:
363 # This feature is only supported for Voice Command and Voice Search use cases
364 # and performance may vary for other use cases (e.g., phone call
365 # transcription).
366 &quot;A String&quot;,
367 ],
368 &quot;enableSeparateRecognitionPerChannel&quot;: True or False, # This needs to be set to `true` explicitly and `audio_channel_count` &gt; 1
369 # to get each channel recognized separately. The recognition result will
370 # contain a `channel_tag` field to state which channel that result belongs
371 # to. If this is not true, we will only recognize the first channel. The
372 # request is billed cumulatively for all channels recognized:
373 # `audio_channel_count` multiplied by the length of the audio.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700374 },
375 }
376
377 x__xgafv: string, V1 error format.
378 Allowed values
379 1 - v1 error format
380 2 - v2 error format
381
382Returns:
383 An object of the form:
384
385 { # This resource represents a long-running operation that is the result of a
386 # network API call.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700387 &quot;done&quot;: True or False, # If the value is `false`, it means the operation is still in progress.
388 # If `true`, the operation is completed, and either `error` or `response` is
389 # available.
390 &quot;metadata&quot;: { # Service-specific metadata associated with the operation. It typically
391 # contains progress information and common metadata such as create time.
392 # Some services might not provide such metadata. Any method that returns a
393 # long-running operation should document the metadata type, if any.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700394 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
395 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700396 &quot;error&quot;: { # The `Status` type defines a logical error model that is suitable for # The error result of the operation in case of failure or cancellation.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700397 # different programming environments, including REST APIs and RPC APIs. It is
398 # used by [gRPC](https://github.com/grpc). Each `Status` message contains
399 # three pieces of data: error code, error message, and error details.
400 #
401 # You can find out more about this error model and how to work with it in the
402 # [API Design Guide](https://cloud.google.com/apis/design/errors).
Bu Sun Kim65020912020-05-20 12:08:20 -0700403 &quot;code&quot;: 42, # The status code, which should be an enum value of google.rpc.Code.
404 &quot;message&quot;: &quot;A String&quot;, # A developer-facing error message, which should be in English. Any
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700405 # user-facing error message should be localized and sent in the
406 # google.rpc.Status.details field, or localized by the client.
Bu Sun Kim65020912020-05-20 12:08:20 -0700407 &quot;details&quot;: [ # A list of messages that carry the error details. There is a common set of
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700408 # message types for APIs to use.
409 {
Bu Sun Kim65020912020-05-20 12:08:20 -0700410 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700411 },
412 ],
413 },
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700414 &quot;name&quot;: &quot;A String&quot;, # The server-assigned name, which is only unique within the same service that
415 # originally returns it. If you use the default HTTP mapping, the
416 # `name` should be a resource name ending with `operations/{unique_id}`.
417 &quot;response&quot;: { # The normal response of the operation in case of success. If the original
418 # method returns no data on success, such as `Delete`, the response is
419 # `google.protobuf.Empty`. If the original method is standard
420 # `Get`/`Create`/`Update`, the response should be the resource. For other
421 # methods, the response should have the type `XxxResponse`, where `Xxx`
422 # is the original method name. For example, if the original method name
423 # is `TakeSnapshot()`, the inferred response type is
424 # `TakeSnapshotResponse`.
Bu Sun Kim65020912020-05-20 12:08:20 -0700425 &quot;a_key&quot;: &quot;&quot;, # Properties of the object. Contains field @type with type URL.
426 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700427 }</pre>
428</div>
429
430<div class="method">
Dan O'Mearadd494642020-05-01 07:42:23 -0700431 <code class="details" id="recognize">recognize(body=None, x__xgafv=None)</code>
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700432 <pre>Performs synchronous speech recognition: receive results after all audio
433has been sent and processed.
434
435Args:
Dan O'Mearadd494642020-05-01 07:42:23 -0700436 body: object, The request body.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700437 The object takes the form of:
438
439{ # The top-level message sent by the client for the `Recognize` method.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700440 &quot;audio&quot;: { # Contains audio data in the encoding specified in the `RecognitionConfig`. # Required. The audio data to be recognized.
441 # Either `content` or `uri` must be supplied. Supplying both or neither
442 # returns google.rpc.Code.INVALID_ARGUMENT. See
443 # [content limits](https://cloud.google.com/speech-to-text/quotas#content).
444 &quot;content&quot;: &quot;A String&quot;, # The audio data bytes encoded as specified in
445 # `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a
446 # pure binary representation, whereas JSON representations use base64.
447 &quot;uri&quot;: &quot;A String&quot;, # URI that points to a file that contains audio data bytes as specified in
448 # `RecognitionConfig`. The file must not be compressed (for example, gzip).
449 # Currently, only Google Cloud Storage URIs are
450 # supported, which must be specified in the following format:
451 # `gs://bucket_name/object_name` (other URI formats return
452 # google.rpc.Code.INVALID_ARGUMENT). For more information, see
453 # [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
454 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700455 &quot;config&quot;: { # Provides information to the recognizer that specifies how to process the # Required. Provides information to the recognizer that specifies how to
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700456 # process the request.
457 # request.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700458 &quot;encoding&quot;: &quot;A String&quot;, # Encoding of audio data sent in all `RecognitionAudio` messages.
459 # This field is optional for `FLAC` and `WAV` audio files and required
460 # for all other audio formats. For details, see AudioEncoding.
461 &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data.
462 # ONLY set this for MULTI-CHANNEL recognition.
463 # Valid values for LINEAR16 and FLAC are `1`-`8`.
464 # Valid values for OGG_OPUS are &#x27;1&#x27;-&#x27;254&#x27;.
465 # Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
466 # If `0` or omitted, defaults to one channel (mono).
467 # Note: We only recognize the first channel by default.
468 # To perform independent recognition on each channel set
469 # `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
470 &quot;languageCode&quot;: &quot;A String&quot;, # Required. The language of the supplied audio as a
471 # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
472 # Example: &quot;en-US&quot;.
473 # See [Language
474 # Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
475 # of the currently supported language codes.
476 &quot;metadata&quot;: { # Description of audio data to be recognized. # Metadata regarding this request.
477 &quot;audioTopic&quot;: &quot;A String&quot;, # Description of the content. Eg. &quot;Recordings of federal supreme court
478 # hearings from 2012&quot;.
479 &quot;originalMediaType&quot;: &quot;A String&quot;, # The original media the speech was recorded on.
480 &quot;interactionType&quot;: &quot;A String&quot;, # The use case most closely describing the audio content to be recognized.
481 &quot;recordingDeviceName&quot;: &quot;A String&quot;, # The device used to make the recording. Examples &#x27;Nexus 5X&#x27; or
482 # &#x27;Polycom SoundStation IP 6000&#x27; or &#x27;POTS&#x27; or &#x27;VoIP&#x27; or
483 # &#x27;Cardioid Microphone&#x27;.
484 &quot;microphoneDistance&quot;: &quot;A String&quot;, # The audio type that most closely describes the audio being recognized.
485 &quot;industryNaicsCodeOfAudio&quot;: 42, # The industry vertical to which this speech recognition request most
486 # closely applies. This is most indicative of the topics contained
487 # in the audio. Use the 6-digit NAICS code to identify the industry
488 # vertical - see https://www.naics.com/search/.
489 &quot;recordingDeviceType&quot;: &quot;A String&quot;, # The type of device the speech was recorded with.
490 &quot;originalMimeType&quot;: &quot;A String&quot;, # Mime type of the original audio file. For example `audio/m4a`,
491 # `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.
492 # A list of possible audio mime types is maintained at
493 # http://www.iana.org/assignments/media-types/media-types.xhtml#audio
494 &quot;obfuscatedId&quot;: &quot;A String&quot;, # Obfuscated (privacy-protected) ID of the user, to identify number of
495 # unique users using the service.
496 },
497 &quot;maxAlternatives&quot;: 42, # Maximum number of recognition hypotheses to be returned.
498 # Specifically, the maximum number of `SpeechRecognitionAlternative` messages
499 # within each `SpeechRecognitionResult`.
500 # The server may return fewer than `max_alternatives`.
501 # Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
502 # one. If omitted, will return a maximum of one.
503 &quot;profanityFilter&quot;: True or False, # If set to `true`, the server will attempt to filter out
504 # profanities, replacing all but the initial character in each filtered word
505 # with asterisks, e.g. &quot;f***&quot;. If set to `false` or omitted, profanities
506 # won&#x27;t be filtered out.
507 &quot;sampleRateHertz&quot;: 42, # Sample rate in Hertz of the audio data sent in all
508 # `RecognitionAudio` messages. Valid values are: 8000-48000.
509 # 16000 is optimal. For best results, set the sampling rate of the audio
510 # source to 16000 Hz. If that&#x27;s not possible, use the native sample rate of
511 # the audio source (instead of re-sampling).
512 # This field is optional for FLAC and WAV audio files, but is
513 # required for all other audio formats. For details, see AudioEncoding.
514 &quot;diarizationSpeakerCount&quot;: 42, # If set, specifies the estimated number of speakers in the conversation.
515 # Defaults to &#x27;2&#x27;. Ignored unless enable_speaker_diarization is set to true.
516 # Note: Use diarization_config instead.
517 &quot;enableWordConfidence&quot;: True or False, # If `true`, the top result includes a list of words and the
518 # confidence for those words. If `false`, no word-level confidence
519 # information is returned. The default is `false`.
520 &quot;speechContexts&quot;: [ # Array of SpeechContext.
521 # A means to provide context to assist the speech recognition. For more
522 # information, see
523 # [speech
524 # adaptation](https://cloud.google.com/speech-to-text/docs/context-strength).
525 { # Provides &quot;hints&quot; to the speech recognizer to favor specific words and phrases
526 # in the results.
527 &quot;boost&quot;: 3.14, # Hint Boost. Positive value will increase the probability that a specific
528 # phrase will be recognized over other similar sounding phrases. The higher
529 # the boost, the higher the chance of false positive recognition as well.
530 # Negative boost values would correspond to anti-biasing. Anti-biasing is not
531 # enabled, so negative boost will simply be ignored. Though `boost` can
532 # accept a wide range of positive values, most use cases are best served with
533 # values between 0 and 20. We recommend using a binary search approach to
534 # finding the optimal value for your use case.
535 &quot;phrases&quot;: [ # A list of strings containing words and phrases &quot;hints&quot; so that
536 # the speech recognition is more likely to recognize them. This can be used
537 # to improve the accuracy for specific words and phrases, for example, if
538 # specific commands are typically spoken by the user. This can also be used
539 # to add additional words to the vocabulary of the recognizer. See
540 # [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
541 #
542 # List items can also be set to classes for groups of words that represent
543 # common concepts that occur in natural language. For example, rather than
544 # providing phrase hints for every month of the year, using the $MONTH class
545 # improves the likelihood of correctly transcribing audio that includes
546 # months.
547 &quot;A String&quot;,
548 ],
549 },
550 ],
551 &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in
552 # the top alternative of the recognition result using a speaker_tag provided
553 # in the WordInfo.
554 # Note: Use diarization_config instead.
555 &quot;enableAutomaticPunctuation&quot;: True or False, # If &#x27;true&#x27;, adds punctuation to recognition result hypotheses.
556 # This feature is only available in select languages. Setting this for
557 # requests in other languages has no effect at all.
558 # The default &#x27;false&#x27; value does not add punctuation to result hypotheses.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700559 &quot;enableWordTimeOffsets&quot;: True or False, # If `true`, the top result includes a list of words and
560 # the start and end time offsets (timestamps) for those words. If
561 # `false`, no word-level time offset information is returned. The default is
562 # `false`.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700563 &quot;diarizationConfig&quot;: { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional
564 # parameters to make diarization better suited for your application.
565 # Note: When this is enabled, we send all the words from the beginning of the
566 # audio for the top alternative in every consecutive STREAMING responses.
567 # This is done in order to improve our speaker tags as our models learn to
568 # identify the speakers in the conversation over time.
569 # For non-streaming requests, the diarization results will be provided only
570 # in the top alternative of the FINAL SpeechRecognitionResult.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700571 &quot;maxSpeakerCount&quot;: 42, # Maximum number of speakers in the conversation. This range gives you more
572 # flexibility by allowing the system to automatically determine the correct
573 # number of speakers. If not set, the default value is 6.
574 &quot;speakerTag&quot;: 42, # Output only. Unused.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700575 &quot;minSpeakerCount&quot;: 42, # Minimum number of speakers in the conversation. This range gives you more
576 # flexibility by allowing the system to automatically determine the correct
577 # number of speakers. If not set, the default value is 2.
578 &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in
579 # the top alternative of the recognition result using a speaker_tag provided
580 # in the WordInfo.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700581 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700582 &quot;adaptation&quot;: { # Speech adaptation configuration. # Speech adaptation configuration improves the accuracy of speech
Dan O'Mearadd494642020-05-01 07:42:23 -0700583 # recognition. When speech adaptation is set it supersedes the
584 # `speech_contexts` field. For more information, see the [speech
585 # adaptation](https://cloud.google.com/speech-to-text/docs/context-strength)
586 # documentation.
Bu Sun Kim65020912020-05-20 12:08:20 -0700587 &quot;phraseSets&quot;: [ # A collection of phrase sets. To specify the hints inline, leave the
588 # phrase set&#x27;s `name` blank and fill in the rest of its fields. Any
Dan O'Mearadd494642020-05-01 07:42:23 -0700589 # phrase set can use any custom class.
Bu Sun Kim65020912020-05-20 12:08:20 -0700590 { # Provides &quot;hints&quot; to the speech recognizer to favor specific words and phrases
Dan O'Mearadd494642020-05-01 07:42:23 -0700591 # in the results.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700592 &quot;boost&quot;: 3.14, # Hint Boost. Positive value will increase the probability that a specific
593 # phrase will be recognized over other similar sounding phrases. The higher
594 # the boost, the higher the chance of false positive recognition as well.
595 # Negative boost values would correspond to anti-biasing. Anti-biasing is not
596 # enabled, so negative boost will simply be ignored. Though `boost` can
597 # accept a wide range of positive values, most use cases are best served with
598 # values between 0 (exclusive) and 20. We recommend using a binary search
599 # approach to finding the optimal value for your use case. Speech recognition
600 # will skip PhraseSets with a boost value of 0.
Bu Sun Kim65020912020-05-20 12:08:20 -0700601 &quot;name&quot;: &quot;A String&quot;, # The resource name of the phrase set.
602 &quot;phrases&quot;: [ # A list of word and phrases.
603 { # A phrases containing words and phrase &quot;hints&quot; so that
Dan O'Mearadd494642020-05-01 07:42:23 -0700604 # the speech recognition is more likely to recognize them. This can be used
605 # to improve the accuracy for specific words and phrases, for example, if
606 # specific commands are typically spoken by the user. This can also be used
607 # to add additional words to the vocabulary of the recognizer. See
608 # [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
609 #
610 # List items can also include pre-built or custom classes containing groups
611 # of words that represent common concepts that occur in natural language. For
612 # example, rather than providing a phrase hint for every month of the
Bu Sun Kim65020912020-05-20 12:08:20 -0700613 # year (e.g. &quot;i was born in january&quot;, &quot;i was born in febuary&quot;, ...), use the
614 # pre-built `$MONTH` class improves the likelihood of correctly transcribing
615 # audio that includes months (e.g. &quot;i was born in $month&quot;).
616 # To refer to pre-built classes, use the class&#x27; symbol prepended with `$`
617 # e.g. `$MONTH`. To refer to custom classes that were defined inline in the
618 # request, set the class&#x27;s `custom_class_id` to a string unique to all class
619 # resources and inline classes. Then use the class&#x27; id wrapped in $`{...}`
620 # e.g. &quot;${my-months}&quot;. To refer to custom classes resources, use the class&#x27;
621 # id wrapped in `${}` (e.g. `${my-months}`).
Bu Sun Kim65020912020-05-20 12:08:20 -0700622 &quot;boost&quot;: 3.14, # Hint Boost. Overrides the boost set at the phrase set level.
Dan O'Mearadd494642020-05-01 07:42:23 -0700623 # Positive value will increase the probability that a specific phrase will
624 # be recognized over other similar sounding phrases. The higher the boost,
625 # the higher the chance of false positive recognition as well. Negative
626 # boost values would correspond to anti-biasing. Anti-biasing is not
627 # enabled, so negative boost will simply be ignored. Though `boost` can
628 # accept a wide range of positive values, most use cases are best served
629 # with values between 0 and 20. We recommend using a binary search approach
630 # to finding the optimal value for your use case. Speech recognition
631 # will skip PhraseSets with a boost value of 0.
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700632 &quot;value&quot;: &quot;A String&quot;, # The phrase itself.
Dan O'Mearadd494642020-05-01 07:42:23 -0700633 },
634 ],
Dan O'Mearadd494642020-05-01 07:42:23 -0700635 },
636 ],
Bu Sun Kim65020912020-05-20 12:08:20 -0700637 &quot;customClasses&quot;: [ # A collection of custom classes. To specify the classes inline, leave the
638 # class&#x27; `name` blank and fill in the rest of its fields, giving it a unique
Dan O'Mearadd494642020-05-01 07:42:23 -0700639 # `custom_class_id`. Refer to the inline defined class in phrase hints by its
640 # `custom_class_id`.
641 { # A set of words or phrases that represents a common concept likely to appear
642 # in your audio, for example a list of passenger ship names. CustomClass items
643 # can be substituted into placeholders that you set in PhraseSet phrases.
Bu Sun Kim65020912020-05-20 12:08:20 -0700644 &quot;items&quot;: [ # A collection of class items.
Dan O'Mearadd494642020-05-01 07:42:23 -0700645 { # An item of the class.
Bu Sun Kim65020912020-05-20 12:08:20 -0700646 &quot;value&quot;: &quot;A String&quot;, # The class item&#x27;s value.
Dan O'Mearadd494642020-05-01 07:42:23 -0700647 },
648 ],
Bu Sun Kim4ed7d3f2020-05-27 12:20:54 -0700649 &quot;customClassId&quot;: &quot;A String&quot;, # If this custom class is a resource, the custom_class_id is the resource id
650 # of the CustomClass. Case sensitive.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700651 &quot;name&quot;: &quot;A String&quot;, # The resource name of the custom class.
Dan O'Mearadd494642020-05-01 07:42:23 -0700652 },
653 ],
654 },
Bu Sun Kim65020912020-05-20 12:08:20 -0700655 &quot;model&quot;: &quot;A String&quot;, # Which model to select for the given request. Select the model
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700656 # best suited to your domain to get best results. If a model is not
657 # explicitly specified, then we auto-select a model based on the parameters
658 # in the RecognitionConfig.
Dan O'Mearadd494642020-05-01 07:42:23 -0700659 # &lt;table&gt;
660 # &lt;tr&gt;
661 # &lt;td&gt;&lt;b&gt;Model&lt;/b&gt;&lt;/td&gt;
662 # &lt;td&gt;&lt;b&gt;Description&lt;/b&gt;&lt;/td&gt;
663 # &lt;/tr&gt;
664 # &lt;tr&gt;
665 # &lt;td&gt;&lt;code&gt;command_and_search&lt;/code&gt;&lt;/td&gt;
666 # &lt;td&gt;Best for short queries such as voice commands or voice search.&lt;/td&gt;
667 # &lt;/tr&gt;
668 # &lt;tr&gt;
669 # &lt;td&gt;&lt;code&gt;phone_call&lt;/code&gt;&lt;/td&gt;
670 # &lt;td&gt;Best for audio that originated from a phone call (typically
671 # recorded at an 8khz sampling rate).&lt;/td&gt;
672 # &lt;/tr&gt;
673 # &lt;tr&gt;
674 # &lt;td&gt;&lt;code&gt;video&lt;/code&gt;&lt;/td&gt;
675 # &lt;td&gt;Best for audio that originated from from video or includes multiple
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700676 # speakers. Ideally the audio is recorded at a 16khz or greater
677 # sampling rate. This is a premium model that costs more than the
Dan O'Mearadd494642020-05-01 07:42:23 -0700678 # standard rate.&lt;/td&gt;
679 # &lt;/tr&gt;
680 # &lt;tr&gt;
681 # &lt;td&gt;&lt;code&gt;default&lt;/code&gt;&lt;/td&gt;
682 # &lt;td&gt;Best for audio that is not one of the specific audio models.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700683 # For example, long-form audio. Ideally the audio is high-fidelity,
Dan O'Mearadd494642020-05-01 07:42:23 -0700684 # recorded at a 16khz or greater sampling rate.&lt;/td&gt;
685 # &lt;/tr&gt;
686 # &lt;/table&gt;
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700687 &quot;useEnhanced&quot;: True or False, # Set to true to use an enhanced model for speech recognition.
688 # If `use_enhanced` is set to true and the `model` field is not set, then
689 # an appropriate enhanced model is chosen if an enhanced model exists for
690 # the audio.
691 #
692 # If `use_enhanced` is true and an enhanced version of the specified model
693 # does not exist, then the speech is recognized using the standard version
694 # of the specified model.
695 &quot;alternativeLanguageCodes&quot;: [ # A list of up to 3 additional
696 # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
697 # listing possible alternative languages of the supplied audio.
698 # See [Language
699 # Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
700 # of the currently supported language codes. If alternative languages are
701 # listed, recognition result will contain recognition in the most likely
702 # language detected including the main language_code. The recognition result
703 # will include the language tag of the language detected in the audio. Note:
704 # This feature is only supported for Voice Command and Voice Search use cases
705 # and performance may vary for other use cases (e.g., phone call
706 # transcription).
707 &quot;A String&quot;,
708 ],
709 &quot;enableSeparateRecognitionPerChannel&quot;: True or False, # This needs to be set to `true` explicitly and `audio_channel_count` &gt; 1
710 # to get each channel recognized separately. The recognition result will
711 # contain a `channel_tag` field to state which channel that result belongs
712 # to. If this is not true, we will only recognize the first channel. The
713 # request is billed cumulatively for all channels recognized:
714 # `audio_channel_count` multiplied by the length of the audio.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700715 },
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700716 }
717
718 x__xgafv: string, V1 error format.
719 Allowed values
720 1 - v1 error format
721 2 - v2 error format
722
723Returns:
724 An object of the form:
725
726 { # The only message returned to the client by the `Recognize` method. It
727 # contains the result as zero or more sequential `SpeechRecognitionResult`
728 # messages.
Bu Sun Kim65020912020-05-20 12:08:20 -0700729 &quot;results&quot;: [ # Sequential list of transcription results corresponding to
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700730 # sequential portions of audio.
731 { # A speech recognition result corresponding to a portion of the audio.
Bu Sun Kim65020912020-05-20 12:08:20 -0700732 &quot;alternatives&quot;: [ # May contain one or more recognition hypotheses (up to the
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700733 # maximum specified in `max_alternatives`).
734 # These alternatives are ordered in terms of accuracy, with the top (first)
735 # alternative being the most probable, as ranked by the recognizer.
736 { # Alternative hypotheses (a.k.a. n-best list).
Bu Sun Kim65020912020-05-20 12:08:20 -0700737 &quot;transcript&quot;: &quot;A String&quot;, # Transcript text representing the words that the user spoke.
738 &quot;words&quot;: [ # A list of word-specific information for each recognized word.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700739 # Note: When `enable_speaker_diarization` is true, you will see all the words
740 # from the beginning of the audio.
741 { # Word-specific information for recognized words.
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700742 &quot;startTime&quot;: &quot;A String&quot;, # Time offset relative to the beginning of the audio,
743 # and corresponding to the start of the spoken word.
744 # This field is only set if `enable_word_time_offsets=true` and only
745 # in the top hypothesis.
746 # This is an experimental feature and the accuracy of the time offset can
747 # vary.
748 &quot;speakerTag&quot;: 42, # Output only. A distinct integer value is assigned for every speaker within
749 # the audio. This field specifies which one of those speakers was detected to
750 # have spoken this word. Value ranges from &#x27;1&#x27; to diarization_speaker_count.
751 # speaker_tag is set if enable_speaker_diarization = &#x27;true&#x27; and only in the
752 # top alternative.
753 &quot;word&quot;: &quot;A String&quot;, # The word corresponding to this set of information.
Bu Sun Kim65020912020-05-20 12:08:20 -0700754 &quot;endTime&quot;: &quot;A String&quot;, # Time offset relative to the beginning of the audio,
755 # and corresponding to the end of the spoken word.
756 # This field is only set if `enable_word_time_offsets=true` and only
757 # in the top hypothesis.
758 # This is an experimental feature and the accuracy of the time offset can
759 # vary.
760 &quot;confidence&quot;: 3.14, # The confidence estimate between 0.0 and 1.0. A higher number
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700761 # indicates an estimated greater likelihood that the recognized words are
762 # correct. This field is set only for the top alternative of a non-streaming
763 # result or, of a streaming result where `is_final=true`.
764 # This field is not guaranteed to be accurate and users should not rely on it
765 # to be always provided.
766 # The default of 0.0 is a sentinel value indicating `confidence` was not set.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700767 },
768 ],
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700769 &quot;confidence&quot;: 3.14, # The confidence estimate between 0.0 and 1.0. A higher number
770 # indicates an estimated greater likelihood that the recognized words are
771 # correct. This field is set only for the top alternative of a non-streaming
772 # result or, of a streaming result where `is_final=true`.
773 # This field is not guaranteed to be accurate and users should not rely on it
774 # to be always provided.
775 # The default of 0.0 is a sentinel value indicating `confidence` was not set.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700776 },
777 ],
Bu Sun Kimd059ad82020-07-22 17:02:09 -0700778 &quot;channelTag&quot;: 42, # For multi-channel audio, this is the channel number corresponding to the
779 # recognized result for the audio from that channel.
780 # For audio_channel_count = N, its output values can range from &#x27;1&#x27; to &#x27;N&#x27;.
781 &quot;languageCode&quot;: &quot;A String&quot;, # Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
782 # of the language in this result. This language code was detected to have
783 # the most likelihood of being spoken in the audio.
Bu Sun Kim715bd7f2019-06-14 16:50:42 -0700784 },
785 ],
786 }</pre>
787</div>
788
789</body></html>