webvtt-parser.ts 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. import { utf8ArrayToStr } from '@svta/common-media-library/utils/utf8ArrayToStr';
  2. import { hash } from './hash';
  3. import { toMpegTsClockFromTimescale } from './timescale-conversion';
  4. import { VTTParser } from './vttparser';
  5. import { normalizePts } from '../remux/mp4-remuxer';
  6. import type { RationalTimestamp } from './timescale-conversion';
  7. import type { VTTCCs } from '../types/vtt';
  8. const LINEBREAKS = /\r\n|\n\r|\n|\r/g;
  9. // String.prototype.startsWith is not supported in IE11
  10. const startsWith = function (
  11. inputString: string,
  12. searchString: string,
  13. position: number = 0,
  14. ) {
  15. return (
  16. inputString.slice(position, position + searchString.length) === searchString
  17. );
  18. };
  19. const cueString2millis = function (timeString: string) {
  20. let ts = parseInt(timeString.slice(-3));
  21. const secs = parseInt(timeString.slice(-6, -4));
  22. const mins = parseInt(timeString.slice(-9, -7));
  23. const hours =
  24. timeString.length > 9
  25. ? parseInt(timeString.substring(0, timeString.indexOf(':')))
  26. : 0;
  27. if (
  28. !Number.isFinite(ts) ||
  29. !Number.isFinite(secs) ||
  30. !Number.isFinite(mins) ||
  31. !Number.isFinite(hours)
  32. ) {
  33. throw Error(`Malformed X-TIMESTAMP-MAP: Local:${timeString}`);
  34. }
  35. ts += 1000 * secs;
  36. ts += 60 * 1000 * mins;
  37. ts += 60 * 60 * 1000 * hours;
  38. return ts;
  39. };
  40. // Create a unique hash id for a cue based on start/end times and text.
  41. // This helps timeline-controller to avoid showing repeated captions.
  42. export function generateCueId(
  43. startTime: number,
  44. endTime: number,
  45. text: string,
  46. ) {
  47. return hash(startTime.toString()) + hash(endTime.toString()) + hash(text);
  48. }
  49. const calculateOffset = function (vttCCs: VTTCCs, cc, presentationTime) {
  50. let currCC = vttCCs[cc];
  51. let prevCC = vttCCs[currCC.prevCC];
  52. // This is the first discontinuity or cues have been processed since the last discontinuity
  53. // Offset = current discontinuity time
  54. if (!prevCC || (!prevCC.new && currCC.new)) {
  55. vttCCs.ccOffset = vttCCs.presentationOffset = currCC.start;
  56. currCC.new = false;
  57. return;
  58. }
  59. // There have been discontinuities since cues were last parsed.
  60. // Offset = time elapsed
  61. while (prevCC?.new) {
  62. vttCCs.ccOffset += currCC.start - prevCC.start;
  63. currCC.new = false;
  64. currCC = prevCC;
  65. prevCC = vttCCs[currCC.prevCC];
  66. }
  67. vttCCs.presentationOffset = presentationTime;
  68. };
  69. export function parseWebVTT(
  70. vttByteArray: ArrayBuffer,
  71. initPTS: RationalTimestamp | undefined,
  72. vttCCs: VTTCCs,
  73. cc: number,
  74. timeOffset: number,
  75. callBack: (cues: VTTCue[]) => void,
  76. errorCallBack: (error: Error) => void,
  77. ) {
  78. const parser = new VTTParser();
  79. // Convert byteArray into string, replacing any somewhat exotic linefeeds with "\n", then split on that character.
  80. // Uint8Array.prototype.reduce is not implemented in IE11
  81. const vttLines = utf8ArrayToStr(new Uint8Array(vttByteArray))
  82. .trim()
  83. .replace(LINEBREAKS, '\n')
  84. .split('\n');
  85. const cues: VTTCue[] = [];
  86. const init90kHz = initPTS
  87. ? toMpegTsClockFromTimescale(initPTS.baseTime, initPTS.timescale)
  88. : 0;
  89. let cueTime = '00:00.000';
  90. let timestampMapMPEGTS = 0;
  91. let timestampMapLOCAL = 0;
  92. let parsingError: Error;
  93. let inHeader = true;
  94. parser.oncue = function (cue: VTTCue) {
  95. // Adjust cue timing; clamp cues to start no earlier than - and drop cues that don't end after - 0 on timeline.
  96. const currCC = vttCCs[cc];
  97. let cueOffset = vttCCs.ccOffset;
  98. // Calculate subtitle PTS offset
  99. const webVttMpegTsMapOffset = (timestampMapMPEGTS - init90kHz) / 90000;
  100. // Update offsets for new discontinuities
  101. if (currCC?.new) {
  102. if (timestampMapLOCAL !== undefined) {
  103. // When local time is provided, offset = discontinuity start time - local time
  104. cueOffset = vttCCs.ccOffset = currCC.start;
  105. } else {
  106. calculateOffset(vttCCs, cc, webVttMpegTsMapOffset);
  107. }
  108. }
  109. if (webVttMpegTsMapOffset) {
  110. if (!initPTS) {
  111. parsingError = new Error('Missing initPTS for VTT MPEGTS');
  112. return;
  113. }
  114. // If we have MPEGTS, offset = presentation time + discontinuity offset
  115. cueOffset = webVttMpegTsMapOffset - vttCCs.presentationOffset;
  116. }
  117. const duration = cue.endTime - cue.startTime;
  118. const startTime =
  119. normalizePts(
  120. (cue.startTime + cueOffset - timestampMapLOCAL) * 90000,
  121. timeOffset * 90000,
  122. ) / 90000;
  123. cue.startTime = Math.max(startTime, 0);
  124. cue.endTime = Math.max(startTime + duration, 0);
  125. //trim trailing webvtt block whitespaces
  126. const text = cue.text.trim();
  127. // Fix encoding of special characters
  128. cue.text = decodeURIComponent(encodeURIComponent(text));
  129. // If the cue was not assigned an id from the VTT file (line above the content), create one.
  130. if (!cue.id) {
  131. cue.id = generateCueId(cue.startTime, cue.endTime, text);
  132. }
  133. if (cue.endTime > 0) {
  134. cues.push(cue);
  135. }
  136. };
  137. parser.onparsingerror = function (error: Error) {
  138. parsingError = error;
  139. };
  140. parser.onflush = function () {
  141. if (parsingError) {
  142. errorCallBack(parsingError);
  143. return;
  144. }
  145. callBack(cues);
  146. };
  147. // Go through contents line by line.
  148. vttLines.forEach((line) => {
  149. if (inHeader) {
  150. // Look for X-TIMESTAMP-MAP in header.
  151. if (startsWith(line, 'X-TIMESTAMP-MAP=')) {
  152. // Once found, no more are allowed anyway, so stop searching.
  153. inHeader = false;
  154. // Extract LOCAL and MPEGTS.
  155. line
  156. .slice(16)
  157. .split(',')
  158. .forEach((timestamp) => {
  159. if (startsWith(timestamp, 'LOCAL:')) {
  160. cueTime = timestamp.slice(6);
  161. } else if (startsWith(timestamp, 'MPEGTS:')) {
  162. timestampMapMPEGTS = parseInt(timestamp.slice(7));
  163. }
  164. });
  165. try {
  166. // Convert cue time to seconds
  167. timestampMapLOCAL = cueString2millis(cueTime) / 1000;
  168. } catch (error) {
  169. parsingError = error;
  170. }
  171. // Return without parsing X-TIMESTAMP-MAP line.
  172. return;
  173. } else if (line === '') {
  174. inHeader = false;
  175. }
  176. }
  177. // Parse line by default.
  178. parser.parse(line + '\n');
  179. });
  180. parser.flush();
  181. }