Source: lib/text/mp4_vtt_parser.js

  1. /**
  2. * @license
  3. * Copyright 2016 Google Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. goog.provide('shaka.text.Mp4VttParser');
  18. goog.require('goog.asserts');
  19. goog.require('shaka.log');
  20. goog.require('shaka.text.Cue');
  21. goog.require('shaka.text.TextEngine');
  22. goog.require('shaka.text.VttTextParser');
  23. goog.require('shaka.util.DataViewReader');
  24. goog.require('shaka.util.Error');
  25. goog.require('shaka.util.Functional');
  26. goog.require('shaka.util.Mp4Parser');
  27. goog.require('shaka.util.StringUtils');
  28. goog.require('shaka.util.TextParser');
  29. /**
  30. * @struct
  31. * @constructor
  32. * @implements {shaka.extern.TextParser}
  33. */
  34. shaka.text.Mp4VttParser = function() {
  35. /**
  36. * The current time scale used by the VTT parser.
  37. *
  38. * @type {?number}
  39. * @private
  40. */
  41. this.timescale_ = null;
  42. };
  43. /** @override */
  44. shaka.text.Mp4VttParser.prototype.parseInit = function(data) {
  45. const Mp4Parser = shaka.util.Mp4Parser;
  46. let sawWVTT = false;
  47. new Mp4Parser()
  48. .box('moov', Mp4Parser.children)
  49. .box('trak', Mp4Parser.children)
  50. .box('mdia', Mp4Parser.children)
  51. .fullBox('mdhd', function(box) {
  52. goog.asserts.assert(
  53. box.version == 0 || box.version == 1,
  54. 'MDHD version can only be 0 or 1');
  55. if (box.version == 0) {
  56. box.reader.skip(4); // Skip "creation_time".
  57. box.reader.skip(4); // Skip "modification_time".
  58. this.timescale_ = box.reader.readUint32();
  59. box.reader.skip(4); // Skip "duration".
  60. } else {
  61. box.reader.skip(8); // Skip "creation_time".
  62. box.reader.skip(8); // Skip "modification_time".
  63. this.timescale_ = box.reader.readUint32();
  64. box.reader.skip(8); // Skip "duration".
  65. }
  66. box.reader.skip(4); // Skip "pad", "language", and "pre-defined".
  67. }.bind(this))
  68. .box('minf', Mp4Parser.children)
  69. .box('stbl', Mp4Parser.children)
  70. .fullBox('stsd', Mp4Parser.sampleDescription)
  71. .box('wvtt', function(box) {
  72. // A valid vtt init segment, though we have no actual subtitles yet.
  73. sawWVTT = true;
  74. }).parse(data);
  75. if (!this.timescale_) {
  76. // Missing timescale for VTT content. It should be located in the MDHD.
  77. throw new shaka.util.Error(
  78. shaka.util.Error.Severity.CRITICAL,
  79. shaka.util.Error.Category.TEXT,
  80. shaka.util.Error.Code.INVALID_MP4_VTT);
  81. }
  82. if (!sawWVTT) {
  83. // A WVTT box should have been seen (a valid vtt init segment with no
  84. // actual subtitles).
  85. throw new shaka.util.Error(
  86. shaka.util.Error.Severity.CRITICAL,
  87. shaka.util.Error.Category.TEXT,
  88. shaka.util.Error.Code.INVALID_MP4_VTT);
  89. }
  90. };
  91. /** @override */
  92. shaka.text.Mp4VttParser.prototype.parseMedia = function(data, time) {
  93. if (!this.timescale_) {
  94. // Missing timescale for VTT content. We should have seen the init segment.
  95. shaka.log.error('No init segment for MP4+VTT!');
  96. throw new shaka.util.Error(
  97. shaka.util.Error.Severity.CRITICAL,
  98. shaka.util.Error.Category.TEXT,
  99. shaka.util.Error.Code.INVALID_MP4_VTT);
  100. }
  101. const Mp4VttParser = shaka.text.Mp4VttParser;
  102. const Mp4Parser = shaka.util.Mp4Parser;
  103. let baseTime = 0;
  104. /** @type {!Array.<shaka.text.Mp4VttParser.TimeSegment>} */
  105. let presentations = [];
  106. /** @type {Uint8Array} */
  107. let rawPayload;
  108. /** @type {!Array.<shaka.text.Cue>} */
  109. let cues = [];
  110. let sawTFDT = false;
  111. let sawTRUN = false;
  112. let sawMDAT = false;
  113. let defaultDuration = null;
  114. new Mp4Parser()
  115. .box('moof', Mp4Parser.children)
  116. .box('traf', Mp4Parser.children)
  117. .fullBox('tfdt', function(box) {
  118. sawTFDT = true;
  119. goog.asserts.assert(
  120. box.version == 0 || box.version == 1,
  121. 'TFDT version can only be 0 or 1');
  122. baseTime = (box.version == 0) ?
  123. box.reader.readUint32() :
  124. box.reader.readUint64();
  125. })
  126. .fullBox('tfhd', function(box) {
  127. goog.asserts.assert(
  128. box.flags != null,
  129. 'A TFHD box should have a valid flags value');
  130. defaultDuration = Mp4VttParser.parseTFHD_(
  131. box.flags, box.reader);
  132. })
  133. .fullBox('trun', function(box) {
  134. sawTRUN = true;
  135. goog.asserts.assert(
  136. box.version != null,
  137. 'A TRUN box should have a valid version value');
  138. goog.asserts.assert(
  139. box.flags != null,
  140. 'A TRUN box should have a valid flags value');
  141. presentations = Mp4VttParser.parseTRUN_(
  142. box.version, box.flags, box.reader);
  143. })
  144. .box('mdat', Mp4Parser.allData(function(data) {
  145. goog.asserts.assert(!sawMDAT,
  146. 'VTT cues in mp4 with multiple MDAT are not currently supported!');
  147. sawMDAT = true;
  148. rawPayload = data;
  149. })).parse(data);
  150. if (!sawMDAT && !sawTFDT && !sawTRUN) {
  151. // A required box is missing.
  152. throw new shaka.util.Error(
  153. shaka.util.Error.Severity.CRITICAL,
  154. shaka.util.Error.Category.TEXT,
  155. shaka.util.Error.Code.INVALID_MP4_VTT);
  156. }
  157. let currentTime = baseTime;
  158. let dataView = new DataView(
  159. rawPayload.buffer, rawPayload.byteOffset, rawPayload.byteLength);
  160. /** @type {!shaka.util.DataViewReader} */
  161. let reader = new shaka.util.DataViewReader(
  162. dataView, shaka.util.DataViewReader.Endianness.BIG_ENDIAN);
  163. presentations.forEach((presentation) => {
  164. // If one presentation corresponds to multiple payloads, it is assumed
  165. // that all of those payloads have the same start time and duration.
  166. let duration = presentation.duration || defaultDuration;
  167. let startTime = presentation.timeOffset ?
  168. baseTime + presentation.timeOffset :
  169. currentTime;
  170. currentTime = startTime + (duration || 0);
  171. // Read samples until it adds up to the given size.
  172. let totalSize = 0;
  173. do {
  174. // Read the payload size.
  175. let payloadSize = reader.readUint32();
  176. totalSize += payloadSize;
  177. // Skip the type.
  178. let payloadType = reader.readUint32();
  179. let payloadName = shaka.util.Mp4Parser.typeToString(payloadType);
  180. // Read the data payload.
  181. /** @type {Uint8Array} */
  182. let payload = null;
  183. if (payloadName == 'vttc') {
  184. if (payloadSize > 8) {
  185. payload = reader.readBytes(payloadSize - 8);
  186. }
  187. } else if (payloadName == 'vtte') {
  188. // It's a vtte, which is a vtt cue that is empty. Ignore any data that
  189. // does exist.
  190. reader.skip(payloadSize - 8);
  191. } else {
  192. shaka.log.error('Unknown box ' + payloadName + '! Skipping!');
  193. reader.skip(payloadSize - 8);
  194. }
  195. if (duration) {
  196. if (payload) {
  197. cues.push(shaka.text.Mp4VttParser.parseVTTC_(
  198. payload,
  199. time.periodStart + startTime / this.timescale_,
  200. time.periodStart + currentTime / this.timescale_));
  201. }
  202. } else {
  203. shaka.log.error('WVTT sample duration unknown, and no default found!');
  204. }
  205. goog.asserts.assert(
  206. !presentation.sampleSize || totalSize <= presentation.sampleSize,
  207. 'The samples do not fit evenly into the sample sizes given in the ' +
  208. 'TRUN box!');
  209. // If no sampleSize was specified, it's assumed that this presentation
  210. // corresponds to only a single cue.
  211. } while (presentation.sampleSize && (totalSize < presentation.sampleSize));
  212. });
  213. goog.asserts.assert(
  214. !reader.hasMoreData(),
  215. 'MDAT which contain VTT cues and non-VTT data are not currently ' +
  216. 'supported!');
  217. return /** @type {!Array.<!shaka.extern.Cue>} */ (
  218. cues.filter(shaka.util.Functional.isNotNull));
  219. };
  220. /**
  221. * @typedef {{
  222. * duration: ?number,
  223. * sampleSize: ?number,
  224. * timeOffset: ?number
  225. * }}
  226. *
  227. * @property {?number} duration
  228. * The length of the segment in timescale units.
  229. * @property {?number} sampleSize
  230. * The size of the segment in bytes.
  231. * @property {?number} timeOffset
  232. * The time since the start of the segment in timescale units. Time
  233. * offset is based of the start of the segment. If this value is
  234. * missing, the accumated durations preceeding this time segment will
  235. * be used to create the start time.
  236. */
  237. shaka.text.Mp4VttParser.TimeSegment;
  238. /**
  239. * @param {number} flags
  240. * @param {!shaka.util.DataViewReader} reader
  241. * @return {?number} The default_sample_duration field, if present.
  242. * @private
  243. */
  244. shaka.text.Mp4VttParser.parseTFHD_ = function(flags, reader) {
  245. // Skip "track_ID".
  246. reader.skip(4);
  247. // Skip "base_data_offset" if present.
  248. if (flags & 0x000001) { reader.skip(8); }
  249. // Skip "sample_description_index" if present.
  250. if (flags & 0x000002) { reader.skip(4); }
  251. // Read and return "default_sample_duration" if present.
  252. if (flags & 0x000008) { return reader.readUint32(); }
  253. // There is no "default_sample_duration".
  254. return null;
  255. };
  256. /**
  257. * @param {number} version
  258. * @param {number} flags
  259. * @param {!shaka.util.DataViewReader} reader
  260. * @return {!Array.<shaka.text.Mp4VttParser.TimeSegment>}
  261. * @private
  262. */
  263. shaka.text.Mp4VttParser.parseTRUN_ = function(version, flags, reader) {
  264. let sampleCount = reader.readUint32();
  265. // Skip "data_offset" if present.
  266. if (flags & 0x000001) { reader.skip(4); }
  267. // Skip "first_sample_flags" if present.
  268. if (flags & 0x000004) { reader.skip(4); }
  269. let samples = [];
  270. for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++) {
  271. /** @type {shaka.text.Mp4VttParser.TimeSegment} */
  272. let sample = {
  273. duration: null,
  274. sampleSize: null,
  275. timeOffset: null,
  276. };
  277. // Read "sample duration" if present.
  278. if (flags & 0x000100) { sample.duration = reader.readUint32(); }
  279. // Read "sample_size" if present.
  280. if (flags & 0x000200) { sample.sampleSize = reader.readUint32(); }
  281. // Skip "sample_flags" if present.
  282. if (flags & 0x000400) { reader.skip(4); }
  283. // Read "sample_time_offset" if present.
  284. if (flags & 0x000800) {
  285. sample.timeOffset = version == 0 ?
  286. reader.readUint32() :
  287. reader.readInt32();
  288. }
  289. samples.push(sample);
  290. }
  291. return samples;
  292. };
  293. /**
  294. * Parses a vttc box into a cue.
  295. *
  296. * @param {!Uint8Array} data
  297. * @param {number} startTime
  298. * @param {number} endTime
  299. * @return {shaka.text.Cue}
  300. * @private
  301. */
  302. shaka.text.Mp4VttParser.parseVTTC_ = function(data, startTime, endTime) {
  303. let payload;
  304. let id;
  305. let settings;
  306. new shaka.util.Mp4Parser()
  307. .box('payl', shaka.util.Mp4Parser.allData(function(data) {
  308. payload = shaka.util.StringUtils.fromUTF8(data);
  309. }))
  310. .box('iden', shaka.util.Mp4Parser.allData(function(data) {
  311. id = shaka.util.StringUtils.fromUTF8(data);
  312. }))
  313. .box('sttg', shaka.util.Mp4Parser.allData(function(data) {
  314. settings = shaka.util.StringUtils.fromUTF8(data);
  315. }))
  316. .parse(data);
  317. if (payload) {
  318. return shaka.text.Mp4VttParser.assembleCue_(payload,
  319. id,
  320. settings,
  321. startTime,
  322. endTime);
  323. } else {
  324. return null;
  325. }
  326. };
  327. /**
  328. * Take the individual components that make a cue and create a vttc cue.
  329. *
  330. * @param {string} payload
  331. * @param {?string} id
  332. * @param {?string} settings
  333. * @param {number} startTime
  334. * @param {number} endTime
  335. * @return {!shaka.text.Cue}
  336. * @private
  337. */
  338. shaka.text.Mp4VttParser.assembleCue_ = function(payload,
  339. id,
  340. settings,
  341. startTime,
  342. endTime) {
  343. let cue = new shaka.text.Cue(
  344. startTime,
  345. endTime,
  346. payload);
  347. if (id) {
  348. cue.id = id;
  349. }
  350. if (settings) {
  351. let parser = new shaka.util.TextParser(settings);
  352. let word = parser.readWord();
  353. while (word) {
  354. // TODO: Check WebVTTConfigurationBox for region info.
  355. if (!shaka.text.VttTextParser.parseCueSetting(cue, word,
  356. /* VTTRegions */ [])) {
  357. shaka.log.warning('VTT parser encountered an invalid VTT setting: ',
  358. word,
  359. ' The setting will be ignored.');
  360. }
  361. parser.skipWhitespace();
  362. word = parser.readWord();
  363. }
  364. }
  365. return cue;
  366. };
  367. shaka.text.TextEngine.registerParser(
  368. 'application/mp4; codecs="wvtt"',
  369. shaka.text.Mp4VttParser);