Blame - docs/dyn/speech_v1.speech.html - platform/external/python/google-api-python-client

"config": { # Provides information to the recognizer that specifies how to process the # *Required* Provides information to the recognizer that specifies how to

115

# process the request.

116

# request.

117

"languageCode": "A String", # *Required* The language of the supplied audio as a

118

# [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.

119

# Example: "en-US".

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

120

# See [Language Support](/speech-to-text/docs/languages)

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

121

# for a list of the currently supported language codes.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

122

"audioChannelCount": 42, # *Optional* The number of channels in the input audio data.

123

# ONLY set this for MULTI-CHANNEL recognition.

124

# Valid values for LINEAR16 and FLAC are `1`-`8`.

125

# Valid values for OGG_OPUS are '1'-'254'.

126

# Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.

127

# If `0` or omitted, defaults to one channel (mono).

128

# Note: We only recognize the first channel by default.

129

# To perform independent recognition on each channel set

130

# `enable_separate_recognition_per_channel` to 'true'.

131

"encoding": "A String", # Encoding of audio data sent in all `RecognitionAudio` messages.

132

# This field is optional for `FLAC` and `WAV` audio files and required

133

# for all other audio formats. For details, see AudioEncoding.

134

"enableAutomaticPunctuation": True or False, # *Optional* If 'true', adds punctuation to recognition result hypotheses.

135

# This feature is only available in select languages. Setting this for

136

# requests in other languages has no effect at all.

137

# The default 'false' value does not add punctuation to result hypotheses.

138

# Note: This is currently offered as an experimental service, complimentary

139

# to all users. In the future this may be exclusively available as a

140

# premium feature.

141

"enableSeparateRecognitionPerChannel": True or False, # This needs to be set to `true` explicitly and `audio_channel_count` > 1

142

# to get each channel recognized separately. The recognition result will

143

# contain a `channel_tag` field to state which channel that result belongs

144

# to. If this is not true, we will only recognize the first channel. The

145

# request is billed cumulatively for all channels recognized:

146

# `audio_channel_count` multiplied by the length of the audio.

147

"enableWordTimeOffsets": True or False, # *Optional* If `true`, the top result includes a list of words and

148

# the start and end time offsets (timestamps) for those words. If

149

# `false`, no word-level time offset information is returned. The default is

150

# `false`.

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

151

"maxAlternatives": 42, # *Optional* Maximum number of recognition hypotheses to be returned.

152

# Specifically, the maximum number of `SpeechRecognitionAlternative` messages

153

# within each `SpeechRecognitionResult`.

154

# The server may return fewer than `max_alternatives`.

155

# Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of

156

# one. If omitted, will return a maximum of one.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

157

"useEnhanced": True or False, # *Optional* Set to true to use an enhanced model for speech recognition.

158

# If `use_enhanced` is set to true and the `model` field is not set, then

159

# an appropriate enhanced model is chosen if:

160

# 1. project is eligible for requesting enhanced models

161

# 2. an enhanced model exists for the audio

162

#

163

# If `use_enhanced` is true and an enhanced version of the specified model

164

# does not exist, then the speech is recognized using the standard version

165

# of the specified model.

166

#

167

# Enhanced speech models require that you opt-in to data logging using

168

# instructions in the

169

# [documentation](/speech-to-text/docs/enable-data-logging). If you set

170

# `use_enhanced` to true and you have not enabled audio logging, then you

171

# will receive an error.

172

"sampleRateHertz": 42, # Sample rate in Hertz of the audio data sent in all

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

173

# `RecognitionAudio` messages. Valid values are: 8000-48000.

174

# 16000 is optimal. For best results, set the sampling rate of the audio

175

# source to 16000 Hz. If that's not possible, use the native sample rate of

176

# the audio source (instead of re-sampling).

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

177

# This field is optional for FLAC and WAV audio files, but is

178

# required for all other audio formats. For details, see AudioEncoding.

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

179

"profanityFilter": True or False, # *Optional* If set to `true`, the server will attempt to filter out

180

# profanities, replacing all but the initial character in each filtered word

181

# with asterisks, e.g. "f***". If set to `false` or omitted, profanities

182

# won't be filtered out.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

183

"model": "A String", # *Optional* Which model to select for the given request. Select the model

184

# best suited to your domain to get best results. If a model is not

185

# explicitly specified, then we auto-select a model based on the parameters

186

# in the RecognitionConfig.

187

# <table>

188

# <tr>

189

# <td><b>Model</b></td>

190

# <td><b>Description</b></td>

191

# </tr>

192

# <tr>

193

# <td><code>command_and_search</code></td>

194

# <td>Best for short queries such as voice commands or voice search.</td>

195

# </tr>

196

# <tr>

197

# <td><code>phone_call</code></td>

198

# <td>Best for audio that originated from a phone call (typically

199

# recorded at an 8khz sampling rate).</td>

200

# </tr>

201

# <tr>

202

# <td><code>video</code></td>

203

# <td>Best for audio that originated from from video or includes multiple

204

# speakers. Ideally the audio is recorded at a 16khz or greater

205

# sampling rate. This is a premium model that costs more than the

206

# standard rate.</td>

207

# </tr>

208

# <tr>

209

# <td><code>default</code></td>

210

# <td>Best for audio that is not one of the specific audio models.

211

# For example, long-form audio. Ideally the audio is high-fidelity,

212

# recorded at a 16khz or greater sampling rate.</td>

213

# </tr>

214

# </table>

215

"speechContexts": [ # *Optional* array of SpeechContext.

216

# A means to provide context to assist the speech recognition. For more

217

# information, see [Phrase Hints](/speech-to-text/docs/basics#phrase-hints).

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

218

{ # Provides "hints" to the speech recognizer to favor specific words and phrases

219

# in the results.

220

"phrases": [ # *Optional* A list of strings containing words and phrases "hints" so that

221

# the speech recognition is more likely to recognize them. This can be used

222

# to improve the accuracy for specific words and phrases, for example, if

223

# specific commands are typically spoken by the user. This can also be used

224

# to add additional words to the vocabulary of the recognizer. See

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

225

# [usage limits](/speech-to-text/quotas#content).

226

#

227

# List items can also be set to classes for groups of words that represent

228

# common concepts that occur in natural language. For example, rather than

229

# providing phrase hints for every month of the year, using the $MONTH class

230

# improves the likelihood of correctly transcribing audio that includes

231

# months.

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

"A String",

],

},

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

236

"metadata": { # Description of audio data to be recognized. # *Optional* Metadata regarding this request.

237

"recordingDeviceType": "A String", # The type of device the speech was recorded with.

238

"originalMediaType": "A String", # The original media the speech was recorded on.

239

"microphoneDistance": "A String", # The audio type that most closely describes the audio being recognized.

240

"obfuscatedId": "A String", # Obfuscated (privacy-protected) ID of the user, to identify number of

241

# unique users using the service.

242

"originalMimeType": "A String", # Mime type of the original audio file. For example `audio/m4a`,

243

# `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.

244

# A list of possible audio mime types is maintained at

245

# http://www.iana.org/assignments/media-types/media-types.xhtml#audio

246

"industryNaicsCodeOfAudio": 42, # The industry vertical to which this speech recognition request most

247

# closely applies. This is most indicative of the topics contained

248

# in the audio. Use the 6-digit NAICS code to identify the industry

249

# vertical - see https://www.naics.com/search/.

250

"audioTopic": "A String", # Description of the content. Eg. "Recordings of federal supreme court

251

# hearings from 2012".

252

"recordingDeviceName": "A String", # The device used to make the recording. Examples 'Nexus 5X' or

253

# 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or

254

# 'Cardioid Microphone'.

255

"interactionType": "A String", # The use case most closely describing the audio content to be recognized.

256

},

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

},

}

x__xgafv: string, V1 error format.

Allowed values

1 - v1 error format

2 - v2 error format

Returns:

An object of the form:

267

268

{ # This resource represents a long-running operation that is the result of a

269

# network API call.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

270

"error": { # The `Status` type defines a logical error model that is suitable for # The error result of the operation in case of failure or cancellation.

271

# different programming environments, including REST APIs and RPC APIs. It is

272

# used by [gRPC](https://github.com/grpc). Each `Status` message contains

273

# three pieces of data: error code, error message, and error details.

274

#

275

# You can find out more about this error model and how to work with it in the

276

# [API Design Guide](https://cloud.google.com/apis/design/errors).

277

"message": "A String", # A developer-facing error message, which should be in English. Any

278

# user-facing error message should be localized and sent in the

279

# google.rpc.Status.details field, or localized by the client.

280

"code": 42, # The status code, which should be an enum value of google.rpc.Code.

281

"details": [ # A list of messages that carry the error details. There is a common set of

282

# message types for APIs to use.

283

{

284

"a_key": "", # Properties of the object. Contains field @type with type URL.

285

},

286

],

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

287

},

288

"done": True or False, # If the value is `false`, it means the operation is still in progress.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

289

# If `true`, the operation is completed, and either `error` or `response` is

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

290

# available.

291

"response": { # The normal response of the operation in case of success. If the original

292

# method returns no data on success, such as `Delete`, the response is

293

# `google.protobuf.Empty`. If the original method is standard

294

# `Get`/`Create`/`Update`, the response should be the resource. For other

295

# methods, the response should have the type `XxxResponse`, where `Xxx`

296

# is the original method name. For example, if the original method name

297

# is `TakeSnapshot()`, the inferred response type is

298

# `TakeSnapshotResponse`.

299

"a_key": "", # Properties of the object. Contains field @type with type URL.

300

},

301

"name": "A String", # The server-assigned name, which is only unique within the same service that

302

# originally returns it. If you use the default HTTP mapping, the

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

303

# `name` should be a resource name ending with `operations/{unique_id}`.

304

"metadata": { # Service-specific metadata associated with the operation. It typically

305

# contains progress information and common metadata such as create time.

306

# Some services might not provide such metadata. Any method that returns a

307

# long-running operation should document the metadata type, if any.

308

"a_key": "", # Properties of the object. Contains field @type with type URL.

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

},

}</pre>

</div>

<code class="details" id="recognize">recognize(body, x__xgafv=None)</code>

315

<pre>Performs synchronous speech recognition: receive results after all audio

316

has been sent and processed.

317

318

Args:

319

body: object, The request body. (required)

320

The object takes the form of:

321

322

{ # The top-level message sent by the client for the `Recognize` method.

323

"audio": { # Contains audio data in the encoding specified in the `RecognitionConfig`. # *Required* The audio data to be recognized.

324

# Either `content` or `uri` must be supplied. Supplying both or neither

325

# returns google.rpc.Code.INVALID_ARGUMENT. See

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

326

# [content limits](/speech-to-text/quotas#content).

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

327

"content": "A String", # The audio data bytes encoded as specified in

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

328

# `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

329

# pure binary representation, whereas JSON representations use base64.

330

"uri": "A String", # URI that points to a file that contains audio data bytes as specified in

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

331

# `RecognitionConfig`. The file must not be compressed (for example, gzip).

332

# Currently, only Google Cloud Storage URIs are

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

333

# supported, which must be specified in the following format:

334

# `gs://bucket_name/object_name` (other URI formats return

335

# google.rpc.Code.INVALID_ARGUMENT). For more information, see

336

# [Request URIs](https://cloud.google.com/storage/docs/reference-uris).

337

},

338

"config": { # Provides information to the recognizer that specifies how to process the # *Required* Provides information to the recognizer that specifies how to

339

# process the request.

340

# request.

341

"languageCode": "A String", # *Required* The language of the supplied audio as a

342

# [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.

343

# Example: "en-US".

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

344

# See [Language Support](/speech-to-text/docs/languages)

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

345

# for a list of the currently supported language codes.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

346

"audioChannelCount": 42, # *Optional* The number of channels in the input audio data.

347

# ONLY set this for MULTI-CHANNEL recognition.

348

# Valid values for LINEAR16 and FLAC are `1`-`8`.

349

# Valid values for OGG_OPUS are '1'-'254'.

350

# Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.

351

# If `0` or omitted, defaults to one channel (mono).

352

# Note: We only recognize the first channel by default.

353

# To perform independent recognition on each channel set

354

# `enable_separate_recognition_per_channel` to 'true'.

355

"encoding": "A String", # Encoding of audio data sent in all `RecognitionAudio` messages.

356

# This field is optional for `FLAC` and `WAV` audio files and required

357

# for all other audio formats. For details, see AudioEncoding.

358

"enableAutomaticPunctuation": True or False, # *Optional* If 'true', adds punctuation to recognition result hypotheses.

359

# This feature is only available in select languages. Setting this for

360

# requests in other languages has no effect at all.

361

# The default 'false' value does not add punctuation to result hypotheses.

362

# Note: This is currently offered as an experimental service, complimentary

363

# to all users. In the future this may be exclusively available as a

364

# premium feature.

365

"enableSeparateRecognitionPerChannel": True or False, # This needs to be set to `true` explicitly and `audio_channel_count` > 1

366

# to get each channel recognized separately. The recognition result will

367

# contain a `channel_tag` field to state which channel that result belongs

368

# to. If this is not true, we will only recognize the first channel. The

369

# request is billed cumulatively for all channels recognized:

370

# `audio_channel_count` multiplied by the length of the audio.

371

"enableWordTimeOffsets": True or False, # *Optional* If `true`, the top result includes a list of words and

372

# the start and end time offsets (timestamps) for those words. If

373

# `false`, no word-level time offset information is returned. The default is

374

# `false`.

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

375

"maxAlternatives": 42, # *Optional* Maximum number of recognition hypotheses to be returned.

376

# Specifically, the maximum number of `SpeechRecognitionAlternative` messages

377

# within each `SpeechRecognitionResult`.

378

# The server may return fewer than `max_alternatives`.

379

# Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of

380

# one. If omitted, will return a maximum of one.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

381

"useEnhanced": True or False, # *Optional* Set to true to use an enhanced model for speech recognition.

382

# If `use_enhanced` is set to true and the `model` field is not set, then

383

# an appropriate enhanced model is chosen if:

384

# 1. project is eligible for requesting enhanced models

385

# 2. an enhanced model exists for the audio

386

#

387

# If `use_enhanced` is true and an enhanced version of the specified model

388

# does not exist, then the speech is recognized using the standard version

389

# of the specified model.

390

#

391

# Enhanced speech models require that you opt-in to data logging using

392

# instructions in the

393

# [documentation](/speech-to-text/docs/enable-data-logging). If you set

394

# `use_enhanced` to true and you have not enabled audio logging, then you

395

# will receive an error.

396

"sampleRateHertz": 42, # Sample rate in Hertz of the audio data sent in all

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

397

# `RecognitionAudio` messages. Valid values are: 8000-48000.

398

# 16000 is optimal. For best results, set the sampling rate of the audio

399

# source to 16000 Hz. If that's not possible, use the native sample rate of

400

# the audio source (instead of re-sampling).

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

401

# This field is optional for FLAC and WAV audio files, but is

402

# required for all other audio formats. For details, see AudioEncoding.

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

403

"profanityFilter": True or False, # *Optional* If set to `true`, the server will attempt to filter out

404

# profanities, replacing all but the initial character in each filtered word

405

# with asterisks, e.g. "f***". If set to `false` or omitted, profanities

406

# won't be filtered out.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

407

"model": "A String", # *Optional* Which model to select for the given request. Select the model

408

# best suited to your domain to get best results. If a model is not

409

# explicitly specified, then we auto-select a model based on the parameters

410

# in the RecognitionConfig.

411

# <table>

412

# <tr>

413

# <td><b>Model</b></td>

414

# <td><b>Description</b></td>

415

# </tr>

416

# <tr>

417

# <td><code>command_and_search</code></td>

418

# <td>Best for short queries such as voice commands or voice search.</td>

419

# </tr>

420

# <tr>

421

# <td><code>phone_call</code></td>

422

# <td>Best for audio that originated from a phone call (typically

423

# recorded at an 8khz sampling rate).</td>

424

# </tr>

425

# <tr>

426

# <td><code>video</code></td>

427

# <td>Best for audio that originated from from video or includes multiple

428

# speakers. Ideally the audio is recorded at a 16khz or greater

429

# sampling rate. This is a premium model that costs more than the

430

# standard rate.</td>

431

# </tr>

432

# <tr>

433

# <td><code>default</code></td>

434

# <td>Best for audio that is not one of the specific audio models.

435

# For example, long-form audio. Ideally the audio is high-fidelity,

436

# recorded at a 16khz or greater sampling rate.</td>

437

# </tr>

438

# </table>

439

"speechContexts": [ # *Optional* array of SpeechContext.

440

# A means to provide context to assist the speech recognition. For more

441

# information, see [Phrase Hints](/speech-to-text/docs/basics#phrase-hints).

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

442

{ # Provides "hints" to the speech recognizer to favor specific words and phrases

443

# in the results.

444

"phrases": [ # *Optional* A list of strings containing words and phrases "hints" so that

445

# the speech recognition is more likely to recognize them. This can be used

446

# to improve the accuracy for specific words and phrases, for example, if

447

# specific commands are typically spoken by the user. This can also be used

448

# to add additional words to the vocabulary of the recognizer. See

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

449

# [usage limits](/speech-to-text/quotas#content).

450

#

451

# List items can also be set to classes for groups of words that represent

452

# common concepts that occur in natural language. For example, rather than

453

# providing phrase hints for every month of the year, using the $MONTH class

454

# improves the likelihood of correctly transcribing audio that includes

455

# months.

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

"A String",

],

},

],

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

460

"metadata": { # Description of audio data to be recognized. # *Optional* Metadata regarding this request.

461

"recordingDeviceType": "A String", # The type of device the speech was recorded with.

462

"originalMediaType": "A String", # The original media the speech was recorded on.

463

"microphoneDistance": "A String", # The audio type that most closely describes the audio being recognized.

464

"obfuscatedId": "A String", # Obfuscated (privacy-protected) ID of the user, to identify number of

465

# unique users using the service.

466

"originalMimeType": "A String", # Mime type of the original audio file. For example `audio/m4a`,

467

# `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.

468

# A list of possible audio mime types is maintained at

469

# http://www.iana.org/assignments/media-types/media-types.xhtml#audio

470

"industryNaicsCodeOfAudio": 42, # The industry vertical to which this speech recognition request most

471

# closely applies. This is most indicative of the topics contained

472

# in the audio. Use the 6-digit NAICS code to identify the industry

473

# vertical - see https://www.naics.com/search/.

474

"audioTopic": "A String", # Description of the content. Eg. "Recordings of federal supreme court

475

# hearings from 2012".

476

"recordingDeviceName": "A String", # The device used to make the recording. Examples 'Nexus 5X' or

477

# 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or

478

# 'Cardioid Microphone'.

479

"interactionType": "A String", # The use case most closely describing the audio content to be recognized.

480

},

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

},

}

x__xgafv: string, V1 error format.

Allowed values

1 - v1 error format

2 - v2 error format

Returns:

An object of the form:

491

492

{ # The only message returned to the client by the `Recognize` method. It

493

# contains the result as zero or more sequential `SpeechRecognitionResult`

494

# messages.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

495

"results": [ # Output only. Sequential list of transcription results corresponding to

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

496

# sequential portions of audio.

497

{ # A speech recognition result corresponding to a portion of the audio.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

498

"channelTag": 42, # For multi-channel audio, this is the channel number corresponding to the

499

# recognized result for the audio from that channel.

500

# For audio_channel_count = N, its output values can range from '1' to 'N'.

501

"alternatives": [ # Output only. May contain one or more recognition hypotheses (up to the

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

502

# maximum specified in `max_alternatives`).

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

503

# These alternatives are ordered in terms of accuracy, with the top (first)

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

504

# alternative being the most probable, as ranked by the recognizer.

505

{ # Alternative hypotheses (a.k.a. n-best list).

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

506

"confidence": 3.14, # Output only. The confidence estimate between 0.0 and 1.0. A higher number

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

507

# indicates an estimated greater likelihood that the recognized words are

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

508

# correct. This field is set only for the top alternative of a non-streaming

509

# result or, of a streaming result where `is_final=true`.

510

# This field is not guaranteed to be accurate and users should not rely on it

511

# to be always provided.

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

512

# The default of 0.0 is a sentinel value indicating `confidence` was not set.

Bu Sun Kim

715bd7f

2019-06-14 16:50:42 -0700

[diff] [blame]

513

"transcript": "A String", # Output only. Transcript text representing the words that the user spoke.

514

"words": [ # Output only. A list of word-specific information for each recognized word.

515

# Note: When `enable_speaker_diarization` is true, you will see all the words

516

# from the beginning of the audio.

517

{ # Word-specific information for recognized words.

518

"endTime": "A String", # Output only. Time offset relative to the beginning of the audio,

519

# and corresponding to the end of the spoken word.

520

# This field is only set if `enable_word_time_offsets=true` and only

521

# in the top hypothesis.

522

# This is an experimental feature and the accuracy of the time offset can

523

# vary.

524

"word": "A String", # Output only. The word corresponding to this set of information.

525

"startTime": "A String", # Output only. Time offset relative to the beginning of the audio,

526

# and corresponding to the start of the spoken word.

527

# This field is only set if `enable_word_time_offsets=true` and only

528

# in the top hypothesis.

529

# This is an experimental feature and the accuracy of the time offset can

530

# vary.

531

},

532

],

Sai Cheemalapati

4ba8c23

2017-06-06 18:46:08 -0400

[diff] [blame]

},

],

},

],

}</pre>

</div>

</body></html>