blob: b09a9b38a7603568adca438fded1974e5c3e6d0e [file] [log] [blame]
Matthew Heaney4a514132012-08-30 15:16:06 -07001// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the LICENSE file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8
9#include <cstdio>
Tom Finegane64bf752016-03-18 09:32:52 -070010#include <cstdlib>
Matthew Heaney4a514132012-08-30 15:16:06 -070011#include <cstring>
12#include <map>
13#include <memory>
14#include <string>
Tom Fineganbaba8b12016-03-09 14:12:21 -080015#include <utility>
16
Tom Finegan504e0f22016-03-21 11:20:48 -070017#include "mkvparser/mkvparser.h"
18#include "mkvparser/mkvreader.h"
Tom Finegan5f1065e2016-03-17 15:09:46 -070019#include "webvtt/webvttparser.h"
Matthew Heaney4a514132012-08-30 15:16:06 -070020
21using std::string;
22
Tom Finegane64bf752016-03-18 09:32:52 -070023namespace libwebm {
Matthew Heaney4a514132012-08-30 15:16:06 -070024namespace vttdemux {
25
26typedef long long mkvtime_t; // NOLINT
Vignesh Venkatasubramanian7b245012014-04-29 00:35:56 -070027typedef long long mkvpos_t; // NOLINT
Tom Finegan1e1872b2016-02-17 11:22:21 -080028typedef std::auto_ptr<mkvparser::Segment> segment_ptr_t;
Matthew Heaney4a514132012-08-30 15:16:06 -070029
30// WebVTT metadata tracks have a type (encoded in the CodecID for the track).
31// We use |type| to synthesize a filename for the out-of-band WebVTT |file|.
32struct MetadataInfo {
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -070033 enum Type { kSubtitles, kCaptions, kDescriptions, kMetadata, kChapters } type;
Matthew Heaney4a514132012-08-30 15:16:06 -070034 FILE* file;
35};
36
37// We use a map, indexed by track number, to collect information about
38// each track in the input file.
39typedef std::map<long, MetadataInfo> metadata_map_t; // NOLINT
40
Matthew Heaneyc26db032012-10-26 15:06:28 -070041// The distinguished key value we use to store the chapters
42// information in the metadata map.
43enum { kChaptersKey = 0 };
44
Matthew Heaney4a514132012-08-30 15:16:06 -070045// The data from the original WebVTT Cue is stored as a WebM block.
46// The FrameParser is used to parse the lines of text out from the
47// block, in order to reconstruct the original WebVTT Cue.
48class FrameParser : public libwebvtt::LineReader {
49 public:
50 // Bind the FrameParser instance to a WebM block.
51 explicit FrameParser(const mkvparser::BlockGroup* block_group);
52 virtual ~FrameParser();
53
54 // The Webm block (group) to which this instance is bound. We
55 // treat the payload of the block as a stream of characters.
56 const mkvparser::BlockGroup* const block_group_;
57
58 protected:
59 // Read the next character from the character stream (the payload
60 // of the WebM block). We increment the stream pointer |pos_| as
61 // each character from the stream is consumed.
62 virtual int GetChar(char* c);
63
64 // End-of-line handling requires that we put a character back into
65 // the stream. Here we need only decrement the stream pointer |pos_|
66 // to unconsume the character.
67 virtual void UngetChar(char c);
68
69 // The current position in the character stream (the payload of the block).
70 mkvpos_t pos_;
71
72 // The position of the end of the character stream. When the current
73 // position |pos_| equals the end position |pos_end_|, the entire
74 // stream (block payload) has been consumed and end-of-stream is indicated.
75 mkvpos_t pos_end_;
76
77 private:
78 // Disable copy ctor and copy assign
79 FrameParser(const FrameParser&);
80 FrameParser& operator=(const FrameParser&);
81};
82
Matthew Heaneyc26db032012-10-26 15:06:28 -070083// The data from the original WebVTT Cue is stored as an MKV Chapters
84// Atom element (the cue payload is stored as a Display sub-element).
85// The ChapterAtomParser is used to parse the lines of text out from
86// the String sub-element of the Display element (though it would be
87// admittedly odd if there were more than one line).
88class ChapterAtomParser : public libwebvtt::LineReader {
89 public:
90 explicit ChapterAtomParser(const mkvparser::Chapters::Display* display);
91 virtual ~ChapterAtomParser();
92
93 const mkvparser::Chapters::Display* const display_;
94
95 protected:
96 // Read the next character from the character stream (the title
97 // member of the atom's display). We increment the stream pointer
98 // |str_| as each character from the stream is consumed.
99 virtual int GetChar(char* c);
100
101 // End-of-line handling requires that we put a character back into
102 // the stream. Here we need only decrement the stream pointer |str_|
103 // to unconsume the character.
104 virtual void UngetChar(char c);
105
106 // The current position in the character stream (the title of the
107 // atom's display).
108 const char* str_;
109
110 // The position of the end of the character stream. When the current
111 // position |str_| equals the end position |str_end_|, the entire
112 // stream (title of the display) has been consumed and end-of-stream
113 // is indicated.
114 const char* str_end_;
115
116 private:
117 ChapterAtomParser(const ChapterAtomParser&);
118 ChapterAtomParser& operator=(const ChapterAtomParser&);
119};
120
Matthew Heaney4a514132012-08-30 15:16:06 -0700121// Parse the EBML header of the WebM input file, to determine whether we
122// actually have a WebM file. Returns false if this is not a WebM file.
123bool ParseHeader(mkvparser::IMkvReader* reader, mkvpos_t* pos);
124
125// Parse the Segment of the input file and load all of its clusters.
126// Returns false if there was an error parsing the file.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700127bool ParseSegment(mkvparser::IMkvReader* reader, mkvpos_t pos,
128 segment_ptr_t* segment);
Matthew Heaney4a514132012-08-30 15:16:06 -0700129
Matthew Heaneyc26db032012-10-26 15:06:28 -0700130// If |segment| has a Chapters element (in which case, there will be a
131// corresponding entry in |metadata_map|), convert the MKV chapters to
132// WebVTT chapter cues and write them to the output file. Returns
133// false on error.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700134bool WriteChaptersFile(const metadata_map_t& metadata_map,
135 const mkvparser::Segment* segment);
Matthew Heaneyc26db032012-10-26 15:06:28 -0700136
137// Convert an MKV Chapters Atom to a WebVTT cue and write it to the
138// output |file|. Returns false on error.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700139bool WriteChaptersCue(FILE* file, const mkvparser::Chapters* chapters,
140 const mkvparser::Chapters::Atom* atom,
141 const mkvparser::Chapters::Display* display);
Matthew Heaneyc26db032012-10-26 15:06:28 -0700142
Matthew Heaney28222b42012-11-13 12:44:06 -0800143// Write the Cue Identifier line of the WebVTT cue, if it's present.
144// Returns false on error.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700145bool WriteChaptersCueIdentifier(FILE* file,
146 const mkvparser::Chapters::Atom* atom);
Matthew Heaney28222b42012-11-13 12:44:06 -0800147
Matthew Heaneyc26db032012-10-26 15:06:28 -0700148// Use the timecodes from the chapters |atom| to write just the
149// timings line of the WebVTT cue. Returns false on error.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700150bool WriteChaptersCueTimings(FILE* file, const mkvparser::Chapters* chapters,
151 const mkvparser::Chapters::Atom* atom);
Matthew Heaneyc26db032012-10-26 15:06:28 -0700152
153// Parse the String sub-element of the |display| and write the payload
154// of the WebVTT cue. Returns false on error.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700155bool WriteChaptersCuePayload(FILE* file,
156 const mkvparser::Chapters::Display* display);
Matthew Heaneyc26db032012-10-26 15:06:28 -0700157
158// Iterate over the tracks of the input file (and any chapters
159// element) and cache information about each metadata track.
Matthew Heaney4a514132012-08-30 15:16:06 -0700160void BuildMap(const mkvparser::Segment* segment, metadata_map_t* metadata_map);
161
162// For each track listed in the cache, synthesize its output filename
163// and open a file handle that designates the out-of-band file.
164// Returns false if we were unable to open an output file for a track.
165bool OpenFiles(metadata_map_t* metadata_map, const char* filename);
166
167// Close the file handle for each track in the cache.
168void CloseFiles(metadata_map_t* metadata_map);
169
170// Iterate over the clusters of the input file, and write a WebVTT cue
171// for each metadata block. Returns false if processing of a cluster
172// failed.
173bool WriteFiles(const metadata_map_t& m, mkvparser::Segment* s);
174
175// Write the WebVTT header for each track in the cache. We do this
176// immediately before writing the actual WebVTT cues. Returns false
177// if the write failed.
178bool InitializeFiles(const metadata_map_t& metadata_map);
179
180// Iterate over the blocks of the |cluster|, writing a WebVTT cue to
181// its associated output file for each block of metadata. Returns
182// false if processing a block failed, or there was a parse error.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700183bool ProcessCluster(const metadata_map_t& metadata_map,
184 const mkvparser::Cluster* cluster);
Matthew Heaney4a514132012-08-30 15:16:06 -0700185
186// Look up this track number in the cache, and if found (meaning this
187// is a metadata track), write a WebVTT cue to the associated output
188// file. Returns false if writing the WebVTT cue failed.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700189bool ProcessBlockEntry(const metadata_map_t& metadata_map,
190 const mkvparser::BlockEntry* block_entry);
Matthew Heaney4a514132012-08-30 15:16:06 -0700191
192// Parse the lines of text from the |block_group| to reconstruct the
193// original WebVTT cue, and write it to the associated output |file|.
194// Returns false if there was an error writing to the output file.
195bool WriteCue(FILE* file, const mkvparser::BlockGroup* block_group);
196
197// Consume a line of text from the character stream, and if the line
198// is not empty write the cue identifier to the associated output
199// file. Returns false if there was an error writing to the file.
200bool WriteCueIdentifier(FILE* f, FrameParser* parser);
201
202// Consume a line of text from the character stream (which holds any
203// cue settings) and write the cue timings line for this cue to the
204// associated output file. Returns false if there was an error
205// writing to the file.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700206bool WriteCueTimings(FILE* f, FrameParser* parser);
Matthew Heaney4a514132012-08-30 15:16:06 -0700207
208// Write the timestamp (representating either the start time or stop
209// time of the cue) to the output file. Returns false if there was an
210// error writing to the file.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700211bool WriteCueTime(FILE* f, mkvtime_t time_ns);
Matthew Heaney4a514132012-08-30 15:16:06 -0700212
213// Consume the remaining lines of text from the character stream
214// (these lines are the actual payload of the WebVTT cue), and write
215// them to the associated output file. Returns false if there was an
216// error writing to the file.
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700217bool WriteCuePayload(FILE* f, FrameParser* parser);
Matthew Heaney4a514132012-08-30 15:16:06 -0700218} // namespace vttdemux
219
Matthew Heaney4a514132012-08-30 15:16:06 -0700220namespace vttdemux {
221
222FrameParser::FrameParser(const mkvparser::BlockGroup* block_group)
223 : block_group_(block_group) {
224 const mkvparser::Block* const block = block_group->GetBlock();
225 const mkvparser::Block::Frame& f = block->GetFrame(0);
226
227 // The beginning and end of the character stream corresponds to the
228 // position of this block's frame within the WebM input file.
229
230 pos_ = f.pos;
231 pos_end_ = f.pos + f.len;
232}
233
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700234FrameParser::~FrameParser() {}
Matthew Heaney4a514132012-08-30 15:16:06 -0700235
236int FrameParser::GetChar(char* c) {
237 if (pos_ >= pos_end_) // end-of-stream
Vignesh Venkatasubramanian7b245012014-04-29 00:35:56 -0700238 return 1; // per the semantics of libwebvtt::Reader::GetChar
Matthew Heaney4a514132012-08-30 15:16:06 -0700239
240 const mkvparser::Cluster* const cluster = block_group_->GetCluster();
241 const mkvparser::Segment* const segment = cluster->m_pSegment;
242 mkvparser::IMkvReader* const reader = segment->m_pReader;
243
244 unsigned char* const buf = reinterpret_cast<unsigned char*>(c);
245 const int result = reader->Read(pos_, 1, buf);
246
247 if (result < 0) // error
248 return -1;
249
250 ++pos_; // consume this character in the stream
251 return 0;
252}
253
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700254void FrameParser::UngetChar(char /* c */) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700255 // All we need to do here is decrement the position in the stream.
256 // The next time GetChar is called the same character will be
257 // re-read from the input file.
258 --pos_;
259}
260
Matthew Heaneyc26db032012-10-26 15:06:28 -0700261ChapterAtomParser::ChapterAtomParser(
262 const mkvparser::Chapters::Display* display)
263 : display_(display) {
264 str_ = display->GetString();
265 const size_t len = strlen(str_);
266 str_end_ = str_ + len;
267}
268
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700269ChapterAtomParser::~ChapterAtomParser() {}
Matthew Heaneyc26db032012-10-26 15:06:28 -0700270
271int ChapterAtomParser::GetChar(char* c) {
272 if (str_ >= str_end_) // end-of-stream
Vignesh Venkatasubramanian7b245012014-04-29 00:35:56 -0700273 return 1; // per the semantics of libwebvtt::Reader::GetChar
Matthew Heaneyc26db032012-10-26 15:06:28 -0700274
275 *c = *str_++; // consume this character in the stream
276 return 0;
277}
278
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700279void ChapterAtomParser::UngetChar(char /* c */) {
Matthew Heaneyc26db032012-10-26 15:06:28 -0700280 // All we need to do here is decrement the position in the stream.
281 // The next time GetChar is called the same character will be
282 // re-read from the input file.
283 --str_;
284}
285
Matthew Heaney4a514132012-08-30 15:16:06 -0700286} // namespace vttdemux
287
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700288bool vttdemux::ParseHeader(mkvparser::IMkvReader* reader, mkvpos_t* pos) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700289 mkvparser::EBMLHeader h;
290 const mkvpos_t status = h.Parse(reader, *pos);
291
292 if (status) {
293 printf("error parsing EBML header\n");
294 return false;
295 }
296
Tom Finegan714f3c42015-09-04 10:18:20 -0700297 if (h.m_docType == NULL || strcmp(h.m_docType, "webm") != 0) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700298 printf("bad doctype\n");
299 return false;
300 }
301
302 return true; // success
303}
304
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700305bool vttdemux::ParseSegment(mkvparser::IMkvReader* reader, mkvpos_t pos,
306 segment_ptr_t* segment_ptr) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700307 // We first create the segment object.
308
309 mkvparser::Segment* p;
310 const mkvpos_t create = mkvparser::Segment::CreateInstance(reader, pos, p);
311
312 if (create) {
313 printf("error parsing segment element\n");
314 return false;
315 }
316
317 segment_ptr->reset(p);
318
319 // Now parse all of the segment's sub-elements, in toto.
320
321 const long status = p->Load(); // NOLINT
322
323 if (status < 0) {
324 printf("error loading segment\n");
325 return false;
326 }
327
328 return true;
329}
330
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700331void vttdemux::BuildMap(const mkvparser::Segment* segment,
332 metadata_map_t* map_ptr) {
Matthew Heaneyc26db032012-10-26 15:06:28 -0700333 metadata_map_t& m = *map_ptr;
334 m.clear();
335
336 if (segment->GetChapters()) {
337 MetadataInfo info;
338 info.file = NULL;
339 info.type = MetadataInfo::kChapters;
340
341 m[kChaptersKey] = info;
342 }
343
Matthew Heaney4a514132012-08-30 15:16:06 -0700344 const mkvparser::Tracks* const tt = segment->GetTracks();
345 if (tt == NULL)
346 return;
347
348 const long tc = tt->GetTracksCount(); // NOLINT
349 if (tc <= 0)
350 return;
351
Matthew Heaney4a514132012-08-30 15:16:06 -0700352 // Iterate over the tracks in the intput file. We determine whether
353 // a track holds metadata by inspecting its CodecID.
354
355 for (long idx = 0; idx < tc; ++idx) { // NOLINT
356 const mkvparser::Track* const t = tt->GetTrackByIndex(idx);
357
358 if (t == NULL) // weird
359 continue;
360
Matthew Heaneyc26db032012-10-26 15:06:28 -0700361 const long tn = t->GetNumber(); // NOLINT
362
363 if (tn <= 0) // weird
364 continue;
365
Matthew Heaney4a514132012-08-30 15:16:06 -0700366 const char* const codec_id = t->GetCodecId();
367
368 if (codec_id == NULL) // weird
369 continue;
370
371 MetadataInfo info;
372 info.file = NULL;
373
374 if (strcmp(codec_id, "D_WEBVTT/SUBTITLES") == 0) {
375 info.type = MetadataInfo::kSubtitles;
376 } else if (strcmp(codec_id, "D_WEBVTT/CAPTIONS") == 0) {
377 info.type = MetadataInfo::kCaptions;
378 } else if (strcmp(codec_id, "D_WEBVTT/DESCRIPTIONS") == 0) {
379 info.type = MetadataInfo::kDescriptions;
380 } else if (strcmp(codec_id, "D_WEBVTT/METADATA") == 0) {
381 info.type = MetadataInfo::kMetadata;
382 } else {
383 continue;
384 }
385
Matthew Heaney4a514132012-08-30 15:16:06 -0700386 m[tn] = info; // create an entry in the cache for this track
387 }
388}
389
390bool vttdemux::OpenFiles(metadata_map_t* metadata_map, const char* filename) {
391 if (metadata_map == NULL || metadata_map->empty())
392 return false;
393
394 if (filename == NULL)
395 return false;
396
397 // Find the position of the filename extension. We synthesize the
398 // output filename from the directory path and basename of the input
399 // filename.
400
401 const char* const ext = strrchr(filename, '.');
402
403 if (ext == NULL) // TODO(matthewjheaney): liberalize?
404 return false;
405
406 // Remember whether a track of this type has already been seen (the
407 // map key) by keeping a count (the map item). We quality the
408 // output filename with the track number if there is more than one
409 // track having a given type.
410
411 std::map<MetadataInfo::Type, int> exists;
412
413 typedef metadata_map_t::iterator iter_t;
414
415 metadata_map_t& m = *metadata_map;
416 const iter_t ii = m.begin();
417 const iter_t j = m.end();
418
419 // Make a first pass over the cache to determine whether there is
420 // more than one track corresponding to a given metadata type.
421
422 iter_t i = ii;
423 while (i != j) {
424 const metadata_map_t::value_type& v = *i++;
425 const MetadataInfo& info = v.second;
426 const MetadataInfo::Type type = info.type;
427 ++exists[type];
428 }
429
430 // Make a second pass over the cache, synthesizing the filename of
431 // each output file (from the input file basename, the input track
432 // metadata type, and its track number if necessary), and then
433 // opening a WebVTT output file having that filename.
434
435 i = ii;
436 while (i != j) {
437 metadata_map_t::value_type& v = *i++;
438 MetadataInfo& info = v.second;
439 const MetadataInfo::Type type = info.type;
440
441 // Start with the basename of the input file.
442
443 string name(filename, ext);
444
445 // Next append the metadata kind.
446
447 switch (type) {
448 case MetadataInfo::kSubtitles:
449 name += "_SUBTITLES";
450 break;
451
452 case MetadataInfo::kCaptions:
453 name += "_CAPTIONS";
454 break;
455
456 case MetadataInfo::kDescriptions:
457 name += "_DESCRIPTIONS";
458 break;
459
460 case MetadataInfo::kMetadata:
461 name += "_METADATA";
462 break;
463
Matthew Heaneyc26db032012-10-26 15:06:28 -0700464 case MetadataInfo::kChapters:
465 name += "_CHAPTERS";
466 break;
467
Matthew Heaney4a514132012-08-30 15:16:06 -0700468 default:
469 return false;
470 }
471
472 // If there is more than one metadata track having a given type
473 // (the WebVTT-in-WebM spec doesn't preclude this), then qualify
474 // the output filename with the input track number.
475
476 if (exists[type] > 1) {
477 enum { kLen = 33 };
478 char str[kLen]; // max 126 tracks, so only 4 chars really needed
Matthew Heaney17cf7cc2014-02-28 12:33:58 -0800479#ifndef _MSC_VER
Matthew Heaney4a514132012-08-30 15:16:06 -0700480 snprintf(str, kLen, "%ld", v.first); // track number
Matthew Heaney17cf7cc2014-02-28 12:33:58 -0800481#else
482 _snprintf_s(str, sizeof(str), kLen, "%ld", v.first); // track number
483#endif
Matthew Heaney4a514132012-08-30 15:16:06 -0700484 name += str;
485 }
486
487 // Finally append the output filename extension.
488
489 name += ".vtt";
490
491 // We have synthesized the full output filename, so attempt to
492 // open the WebVTT output file.
493
494 info.file = fopen(name.c_str(), "wb");
Matthew Heaney17cf7cc2014-02-28 12:33:58 -0800495 const bool success = (info.file != NULL);
Matthew Heaney4a514132012-08-30 15:16:06 -0700496
Matthew Heaney17cf7cc2014-02-28 12:33:58 -0800497 if (!success) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700498 printf("unable to open output file %s\n", name.c_str());
499 return false;
500 }
501 }
502
503 return true;
504}
505
506void vttdemux::CloseFiles(metadata_map_t* metadata_map) {
507 if (metadata_map == NULL)
508 return;
509
510 metadata_map_t& m = *metadata_map;
511
512 typedef metadata_map_t::iterator iter_t;
513
514 iter_t i = m.begin();
515 const iter_t j = m.end();
516
517 // Gracefully close each output file, to ensure all output gets
518 // propertly flushed.
519
520 while (i != j) {
521 metadata_map_t::value_type& v = *i++;
522 MetadataInfo& info = v.second;
523
524 fclose(info.file);
525 info.file = NULL;
526 }
527}
528
529bool vttdemux::WriteFiles(const metadata_map_t& m, mkvparser::Segment* s) {
530 // First write the WebVTT header.
531
532 InitializeFiles(m);
533
Matthew Heaneyc26db032012-10-26 15:06:28 -0700534 if (!WriteChaptersFile(m, s))
535 return false;
536
Matthew Heaney4a514132012-08-30 15:16:06 -0700537 // Now iterate over the clusters, writing the WebVTT cue as we parse
538 // each metadata block.
539
540 const mkvparser::Cluster* cluster = s->GetFirst();
541
542 while (cluster != NULL && !cluster->EOS()) {
543 if (!ProcessCluster(m, cluster))
544 return false;
545
546 cluster = s->GetNext(cluster);
547 }
548
549 return true;
550}
551
552bool vttdemux::InitializeFiles(const metadata_map_t& m) {
553 // Write the WebVTT header for each output file in the cache.
554
555 typedef metadata_map_t::const_iterator iter_t;
556 iter_t i = m.begin();
557 const iter_t j = m.end();
558
559 while (i != j) {
560 const metadata_map_t::value_type& v = *i++;
561 const MetadataInfo& info = v.second;
562 FILE* const f = info.file;
563
564 if (fputs("WEBVTT\n", f) < 0) {
565 printf("unable to initialize output file\n");
566 return false;
567 }
568 }
569
570 return true;
571}
572
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700573bool vttdemux::WriteChaptersFile(const metadata_map_t& m,
574 const mkvparser::Segment* s) {
Matthew Heaneyc26db032012-10-26 15:06:28 -0700575 const metadata_map_t::const_iterator info_iter = m.find(kChaptersKey);
576 if (info_iter == m.end()) // no chapters, so nothing to do
577 return true;
578
579 const mkvparser::Chapters* const chapters = s->GetChapters();
580 if (chapters == NULL) // weird
581 return true;
582
583 const MetadataInfo& info = info_iter->second;
584 FILE* const file = info.file;
585
586 const int edition_count = chapters->GetEditionCount();
587
588 if (edition_count <= 0) // weird
Vignesh Venkatasubramanian7b245012014-04-29 00:35:56 -0700589 return true; // nothing to do
Matthew Heaneyc26db032012-10-26 15:06:28 -0700590
591 if (edition_count > 1) {
592 // TODO(matthewjheaney): figure what to do here
593 printf("more than one chapter edition detected\n");
594 return false;
595 }
596
597 const mkvparser::Chapters::Edition* const edition = chapters->GetEdition(0);
598
599 const int atom_count = edition->GetAtomCount();
600
601 for (int idx = 0; idx < atom_count; ++idx) {
602 const mkvparser::Chapters::Atom* const atom = edition->GetAtom(idx);
603 const int display_count = atom->GetDisplayCount();
604
605 if (display_count <= 0)
606 continue;
607
608 if (display_count > 1) {
609 // TODO(matthewjheaney): handle case of multiple languages
610 printf("more than 1 display in atom detected\n");
611 return false;
612 }
613
614 const mkvparser::Chapters::Display* const display = atom->GetDisplay(0);
615
616 if (const char* language = display->GetLanguage()) {
617 if (strcmp(language, "eng") != 0) {
618 // TODO(matthewjheaney): handle case of multiple languages.
619
620 // We must create a separate webvtt file for each language.
621 // This isn't a simple problem (which is why we defer it for
622 // now), because there's nothing in the header that tells us
623 // what languages we have as cues. We must parse the displays
624 // of each atom to determine that.
625
626 // One solution is to make two passes over the input data.
627 // First parse the displays, creating an in-memory cache of
628 // all the chapter cues, sorted according to their language.
629 // After we have read all of the chapter atoms from the input
630 // file, we can then write separate output files for each
631 // language.
632
633 printf("only English-language chapter cues are supported\n");
634 return false;
635 }
636 }
637
638 if (!WriteChaptersCue(file, chapters, atom, display))
639 return false;
640 }
641
642 return true;
643}
644
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700645bool vttdemux::WriteChaptersCue(FILE* f, const mkvparser::Chapters* chapters,
646 const mkvparser::Chapters::Atom* atom,
647 const mkvparser::Chapters::Display* display) {
Matthew Heaneyc26db032012-10-26 15:06:28 -0700648 // We start a new cue by writing a cue separator (an empty line)
649 // into the stream.
650
651 if (fputc('\n', f) < 0)
652 return false;
653
654 // A WebVTT Cue comprises 3 things: a cue identifier, followed by
655 // the cue timings, followed by the payload of the cue. We write
656 // each part of the cue in sequence.
657
Matthew Heaney28222b42012-11-13 12:44:06 -0800658 if (!WriteChaptersCueIdentifier(f, atom))
659 return false;
Matthew Heaneyc26db032012-10-26 15:06:28 -0700660
661 if (!WriteChaptersCueTimings(f, chapters, atom))
662 return false;
663
664 if (!WriteChaptersCuePayload(f, display))
665 return false;
666
667 return true;
668}
669
Matthew Heaney28222b42012-11-13 12:44:06 -0800670bool vttdemux::WriteChaptersCueIdentifier(
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700671 FILE* f, const mkvparser::Chapters::Atom* atom) {
Matthew Heaney28222b42012-11-13 12:44:06 -0800672 const char* const identifier = atom->GetStringUID();
673
674 if (identifier == NULL)
675 return true; // nothing else to do
676
677 if (fprintf(f, "%s\n", identifier) < 0)
678 return false;
679
680 return true;
681}
682
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700683bool vttdemux::WriteChaptersCueTimings(FILE* f,
684 const mkvparser::Chapters* chapters,
685 const mkvparser::Chapters::Atom* atom) {
Matthew Heaneyc26db032012-10-26 15:06:28 -0700686 const mkvtime_t start_ns = atom->GetStartTime(chapters);
687
688 if (start_ns < 0)
689 return false;
690
691 const mkvtime_t stop_ns = atom->GetStopTime(chapters);
692
693 if (stop_ns < 0)
694 return false;
695
696 if (!WriteCueTime(f, start_ns))
697 return false;
698
699 if (fputs(" --> ", f) < 0)
700 return false;
701
702 if (!WriteCueTime(f, stop_ns))
703 return false;
704
705 if (fputc('\n', f) < 0)
706 return false;
707
708 return true;
709}
710
711bool vttdemux::WriteChaptersCuePayload(
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700712 FILE* f, const mkvparser::Chapters::Display* display) {
Matthew Heaneyc26db032012-10-26 15:06:28 -0700713 // Bind a Chapter parser object to the display, which allows us to
714 // extract each line of text from the title-part of the display.
715 ChapterAtomParser parser(display);
716
717 int count = 0; // count of lines of payload text written to output file
718 for (string line;;) {
719 const int e = parser.GetLine(&line);
720
721 if (e < 0) // error (only -- we allow EOS here)
722 return false;
723
724 if (line.empty()) // TODO(matthewjheaney): retain this check?
725 break;
726
727 if (fprintf(f, "%s\n", line.c_str()) < 0)
728 return false;
729
730 ++count;
731 }
732
733 if (count <= 0) // WebVTT cue requires non-empty payload
734 return false;
735
736 return true;
737}
738
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700739bool vttdemux::ProcessCluster(const metadata_map_t& m,
740 const mkvparser::Cluster* c) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700741 // Visit the blocks in this cluster, writing a WebVTT cue for each
742 // metadata block.
743
744 const mkvparser::BlockEntry* block_entry;
745
746 long result = c->GetFirst(block_entry); // NOLINT
Vignesh Venkatasubramanian7b245012014-04-29 00:35:56 -0700747 if (result < 0) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700748 printf("bad cluster (unable to get first block)\n");
749 return false;
750 }
751
752 while (block_entry != NULL && !block_entry->EOS()) {
753 if (!ProcessBlockEntry(m, block_entry))
754 return false;
755
756 result = c->GetNext(block_entry, block_entry);
757 if (result < 0) { // error
758 printf("bad cluster (unable to get next block)\n");
759 return false;
760 }
761 }
762
763 return true;
764}
765
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700766bool vttdemux::ProcessBlockEntry(const metadata_map_t& m,
767 const mkvparser::BlockEntry* block_entry) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700768 // If the track number for this block is in the cache, then we have
769 // a metadata block, so write the WebVTT cue to the output file.
770
771 const mkvparser::Block* const block = block_entry->GetBlock();
772 const long long tn = block->GetTrackNumber(); // NOLINT
773
774 typedef metadata_map_t::const_iterator iter_t;
Matthew Heaney17cf7cc2014-02-28 12:33:58 -0800775 const iter_t i = m.find(static_cast<metadata_map_t::key_type>(tn));
Matthew Heaney4a514132012-08-30 15:16:06 -0700776
777 if (i == m.end()) // not a metadata track
Vignesh Venkatasubramanian7b245012014-04-29 00:35:56 -0700778 return true; // nothing else to do
Matthew Heaney4a514132012-08-30 15:16:06 -0700779
780 if (block_entry->GetKind() != mkvparser::BlockEntry::kBlockGroup)
781 return false; // weird
782
783 typedef mkvparser::BlockGroup BG;
784 const BG* const block_group = static_cast<const BG*>(block_entry);
785
786 const MetadataInfo& info = i->second;
787 FILE* const f = info.file;
788
789 return WriteCue(f, block_group);
790}
791
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700792bool vttdemux::WriteCue(FILE* f, const mkvparser::BlockGroup* block_group) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700793 // Bind a FrameParser object to the block, which allows us to
794 // extract each line of text from the payload of the block.
795 FrameParser parser(block_group);
796
797 // We start a new cue by writing a cue separator (an empty line)
798 // into the stream.
799
800 if (fputc('\n', f) < 0)
801 return false;
802
803 // A WebVTT Cue comprises 3 things: a cue identifier, followed by
804 // the cue timings, followed by the payload of the cue. We write
805 // each part of the cue in sequence.
806
807 if (!WriteCueIdentifier(f, &parser))
808 return false;
809
810 if (!WriteCueTimings(f, &parser))
811 return false;
812
813 if (!WriteCuePayload(f, &parser))
814 return false;
815
816 return true;
817}
818
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700819bool vttdemux::WriteCueIdentifier(FILE* f, FrameParser* parser) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700820 string line;
821 int e = parser->GetLine(&line);
822
823 if (e) // error or EOS
824 return false;
825
826 // If the cue identifier line is empty, this means that the original
827 // WebVTT cue did not have a cue identifier, so we don't bother
828 // writing an extra line terminator to the output file (though doing
829 // so would be harmless).
830
831 if (!line.empty()) {
832 if (fputs(line.c_str(), f) < 0)
833 return false;
834
835 if (fputc('\n', f) < 0)
836 return false;
837 }
838
839 return true;
840}
841
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700842bool vttdemux::WriteCueTimings(FILE* f, FrameParser* parser) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700843 const mkvparser::BlockGroup* const block_group = parser->block_group_;
844 const mkvparser::Cluster* const cluster = block_group->GetCluster();
845 const mkvparser::Block* const block = block_group->GetBlock();
846
847 // A WebVTT Cue "timings" line comprises two parts: the start and
848 // stop time for this cue, followed by the (optional) cue settings,
849 // such as orientation of the rendered text or its size. Only the
850 // settings part of the cue timings line is stored in the WebM
851 // block. We reconstruct the start and stop times of the WebVTT cue
852 // from the timestamp and duration of the WebM block.
853
854 const mkvtime_t start_ns = block->GetTime(cluster);
855
856 if (!WriteCueTime(f, start_ns))
857 return false;
858
859 if (fputs(" --> ", f) < 0)
860 return false;
861
862 const mkvtime_t duration_timecode = block_group->GetDurationTimeCode();
863
864 if (duration_timecode < 0)
865 return false;
866
867 const mkvparser::Segment* const segment = cluster->m_pSegment;
868 const mkvparser::SegmentInfo* const info = segment->GetInfo();
869
870 if (info == NULL)
871 return false;
872
873 const mkvtime_t timecode_scale = info->GetTimeCodeScale();
874
875 if (timecode_scale <= 0)
876 return false;
877
878 const mkvtime_t duration_ns = duration_timecode * timecode_scale;
879 const mkvtime_t stop_ns = start_ns + duration_ns;
880
881 if (!WriteCueTime(f, stop_ns))
882 return false;
883
884 string line;
885 int e = parser->GetLine(&line);
886
887 if (e) // error or EOS
888 return false;
889
890 if (!line.empty()) {
891 if (fputc(' ', f) < 0)
892 return false;
893
894 if (fputs(line.c_str(), f) < 0)
895 return false;
896 }
897
898 if (fputc('\n', f) < 0)
899 return false;
900
901 return true;
902}
903
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700904bool vttdemux::WriteCueTime(FILE* f, mkvtime_t time_ns) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700905 mkvtime_t ms = time_ns / 1000000; // WebVTT time has millisecond resolution
906
907 mkvtime_t sec = ms / 1000;
908 ms -= sec * 1000;
909
910 mkvtime_t min = sec / 60;
911 sec -= 60 * min;
912
913 mkvtime_t hr = min / 60;
914 min -= 60 * hr;
915
916 if (hr > 0) {
917 if (fprintf(f, "%02lld:", hr) < 0)
918 return false;
919 }
920
921 if (fprintf(f, "%02lld:%02lld.%03lld", min, sec, ms) < 0)
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700922 return false;
Matthew Heaney4a514132012-08-30 15:16:06 -0700923
924 return true;
925}
926
Vignesh Venkatasubramaniane3485c92014-04-14 12:14:06 -0700927bool vttdemux::WriteCuePayload(FILE* f, FrameParser* parser) {
Matthew Heaney4a514132012-08-30 15:16:06 -0700928 int count = 0; // count of lines of payload text written to output file
929 for (string line;;) {
930 const int e = parser->GetLine(&line);
931
932 if (e < 0) // error (only -- we allow EOS here)
933 return false;
934
935 if (line.empty()) // TODO(matthewjheaney): retain this check?
936 break;
937
938 if (fprintf(f, "%s\n", line.c_str()) < 0)
939 return false;
940
941 ++count;
942 }
943
944 if (count <= 0) // WebVTT cue requires non-empty payload
945 return false;
946
947 return true;
948}
Tom Finegane64bf752016-03-18 09:32:52 -0700949
950} // namespace libwebm
951
952int main(int argc, const char* argv[]) {
953 if (argc != 2) {
954 printf("usage: vttdemux <webmfile>\n");
955 return EXIT_SUCCESS;
956 }
957
958 const char* const filename = argv[1];
Tom Finegancbe5c402016-03-21 12:16:30 -0700959 mkvparser::MkvReader reader;
Tom Finegane64bf752016-03-18 09:32:52 -0700960
961 int e = reader.Open(filename);
962
963 if (e) { // error
964 printf("unable to open file\n");
965 return EXIT_FAILURE;
966 }
967
968 libwebm::vttdemux::mkvpos_t pos;
969
970 if (!libwebm::vttdemux::ParseHeader(&reader, &pos))
971 return EXIT_FAILURE;
972
973 libwebm::vttdemux::segment_ptr_t segment_ptr;
974
975 if (!libwebm::vttdemux::ParseSegment(&reader, pos, &segment_ptr))
976 return EXIT_FAILURE;
977
978 libwebm::vttdemux::metadata_map_t metadata_map;
979
980 BuildMap(segment_ptr.get(), &metadata_map);
981
982 if (metadata_map.empty()) {
983 printf("no WebVTT metadata found\n");
984 return EXIT_FAILURE;
985 }
986
987 if (!OpenFiles(&metadata_map, filename)) {
988 CloseFiles(&metadata_map); // nothing to flush, so not strictly necessary
989 return EXIT_FAILURE;
990 }
991
992 if (!WriteFiles(metadata_map, segment_ptr.get())) {
993 CloseFiles(&metadata_map); // might as well flush what we do have
994 return EXIT_FAILURE;
995 }
996
997 CloseFiles(&metadata_map);
998
999 return EXIT_SUCCESS;
Tom Finegan5f1065e2016-03-17 15:09:46 -07001000}