1use super::srt::{SRTLine, SRT};
7use super::ssa::{SSAEvent, SSAInfo, SSAStyle, SSA};
8use crate::error;
9use crate::util::{Alignment, Color};
10use regex::Regex;
11use serde::Deserialize;
12use serde::Serialize;
13use std::collections::HashMap;
14use std::fmt::Display;
15use time::Time;
16
17#[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
19pub struct VTTStyle {
20 pub selector: Option<String>,
21 pub entries: HashMap<String, String>,
22}
23
24#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
26pub struct VTTLine {
27 pub identifier: Option<String>,
28 pub start: Time,
29 pub end: Time,
30 pub settings: HashMap<String, Option<String>>,
31 pub text: String,
32}
33
34impl Default for VTTLine {
35 fn default() -> Self {
36 Self {
37 identifier: None,
38 start: Time::from_hms(0, 0, 0).unwrap(),
39 end: Time::from_hms(0, 0, 0).unwrap(),
40 settings: Default::default(),
41 text: "".to_string(),
42 }
43 }
44}
45
46#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)]
47pub struct VTTRegion {
48 pub id: Option<String>,
49 pub width: Option<f32>,
50 pub lines: Option<u32>,
51 pub region_anchor: Option<(f32, f32)>,
52 pub viewport_anchor: Option<(f32, f32)>,
53 pub scroll: bool,
54}
55impl Eq for VTTRegion {}
56
57#[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
59pub struct VTT {
60 pub regions: Vec<VTTRegion>,
61 pub styles: Vec<VTTStyle>,
62 pub lines: Vec<VTTLine>,
63}
64
65impl VTT {
66 pub fn parse<S: AsRef<str>>(content: S) -> Result<VTT, VTTError> {
68 let mut line_num = 0;
69
70 let mut regions = vec![];
71 let mut styles = vec![];
72 let mut lines = vec![];
73
74 let mut blocks = vec![vec![]];
75 for line in content.as_ref().lines() {
76 if line.trim().is_empty() {
77 if !blocks.last().unwrap().is_empty() {
78 blocks.push(vec![])
79 }
80 } else {
81 blocks.last_mut().unwrap().push(line)
82 }
83 }
84 if blocks.last().is_some_and(|b| b.is_empty()) {
85 blocks.remove(blocks.len() - 1);
86 }
87
88 parse::parse_start(blocks.remove(0).into_iter())
89 .map_err(|e| VTTError::new(e.kind, line_num + e.line))?;
90
91 line_num += 1;
92 for mut block in blocks {
93 line_num += 1;
94
95 let block_len = block.len();
96 let (first_word, _) = block[0].split_once(' ').unwrap_or((block[0], ""));
97
98 match first_word {
99 "NOTE" => (),
102 "REGION" => {
103 block.remove(0);
104 line_num += 1;
105 regions.push(
106 parse::parse_region_block(block.into_iter())
107 .map_err(|e| VTTError::new(e.kind, line_num + e.line))?,
108 )
109 }
110 "STYLE" => {
111 block[0] = &block[0][5..];
112 styles.push(
113 parse::parse_style_block(block.join("\n").trim())
114 .map_err(|e| VTTError::new(e.kind, line_num + e.line))?,
115 );
116 }
117 _ => lines.push(
118 parse::parse_cue_block(block.into_iter())
119 .map_err(|e| VTTError::new(e.kind, line_num + e.line))?,
120 ),
121 }
122
123 line_num += block_len
124 }
125
126 Ok(VTT {
127 regions,
128 styles,
129 lines,
130 })
131 }
132
133 pub fn to_ssa(&self) -> SSA {
135 let speaker_regex: Regex = Regex::new(r"(?m)^<v.*?\s(?P<speaker>.*?)>").unwrap();
136 let xml_replace_regex: Regex = Regex::new(r"(?m)<.*?>").unwrap();
137
138 let mut default_style = SSAStyle {
139 name: "Default".to_string(),
140 fontname: "Arial".to_string(),
141 fontsize: 20.0,
142 primary_color: None,
143 secondary_color: None,
144 outline_color: None,
145 back_color: None,
146 alignment: Alignment::BottomCenter,
147 ..Default::default()
148 };
149 for style in &self.styles {
150 if style.selector.is_some() {
152 continue;
153 }
154 if let Some(color) = style.entries.get("color") {
156 if let Ok(primary_color) = Color::from_vtt(color) {
157 default_style.primary_color = Some(primary_color)
158 }
159 }
160 if let Some(background_color) = style.entries.get("background-color") {
162 if let Ok(back_color) = Color::from_vtt(background_color) {
163 default_style.back_color = Some(back_color)
164 }
165 }
166 if let Some(font_size) = style.entries.get("font-size") {
169 let font_size = font_size.trim_end_matches("px");
170 if let Ok(font_size) = font_size.parse() {
171 default_style.fontsize = font_size
172 }
173 }
174 if style
176 .entries
177 .get("font-style")
178 .is_some_and(|fs| fs == "italic")
179 {
180 default_style.italic = true;
181 }
182 if style
184 .entries
185 .get("font-weight")
186 .is_some_and(|fw| fw.starts_with("bold"))
187 {
188 default_style.bold = true;
189 }
190 if let Some(text_decoration) = style.entries.get("text-decoration") {
192 if text_decoration.contains("underline") {
193 default_style.underline = true
194 }
195 if text_decoration.contains("line-through") {
196 default_style.strikeout = true
197 }
198 }
199 if let Some(letter_spacing) = style.entries.get("letter-spacing") {
202 let letter_spacing = letter_spacing.trim_end_matches("px");
203 if let Ok(letter_spacing) = letter_spacing.parse() {
204 default_style.spacing = letter_spacing
205 }
206 }
207 }
208
209 let mut events = vec![];
210 for line in &self.lines {
211 let mut captures = speaker_regex.captures_iter(&line.text);
212 let first_capture = captures.next();
213 let second_capture = captures.next();
214
215 let (mut text, speaker) = if first_capture.is_some() && second_capture.is_some() {
216 (speaker_regex.replace_all(&line.text, "").to_string(), None)
217 } else if let Some(c) = first_capture {
218 let text = line.text[c.get(0).unwrap().end()..].to_string();
219 let speaker = c.name("speaker").unwrap().as_str().to_string();
220 (text, Some(speaker))
221 } else {
222 (line.text.clone(), None)
223 };
224
225 text = text
226 .replace("<b>", "{\\b1}")
227 .replace("</b>", "{\\b0}")
228 .replace("<i>", "{\\i1}")
229 .replace("</i>", "{\\i0}")
230 .replace("<s>", "{\\s1}")
231 .replace("</s>", "{\\s0}")
232 .replace("<u>", "{\\u1}")
233 .replace("</u>", "{\\u0}");
234 text = xml_replace_regex.replace_all(&text, "").to_string();
235
236 events.push(SSAEvent {
237 start: line.start,
238 end: line.end,
239 style: "Default".to_string(),
240 name: speaker.unwrap_or_default(),
241 text: text.replace("\r\n", "\\N").replace('\n', "\\N"),
242 ..Default::default()
243 })
244 }
245
246 SSA {
247 info: SSAInfo {
248 ..Default::default()
249 },
250 styles: vec![default_style],
251 events,
252 fonts: vec![],
253 graphics: vec![],
254 }
255 }
256 pub fn to_srt(&self) -> SRT {
258 let speaker_regex: Regex = Regex::new(r"(?m)^<v.*?>").unwrap();
259
260 let mut lines = vec![];
261
262 for (i, line) in self.lines.iter().enumerate() {
263 let text = speaker_regex
264 .replace_all(line.text.as_str(), "")
265 .to_string();
266
267 lines.push(SRTLine {
268 sequence_number: i as u32 + 1,
269 start: line.start,
270 end: line.end,
271 text: text.replace('\n', "\r\n"),
272 })
273 }
274
275 SRT { lines }
276 }
277}
278
279impl Display for VTT {
280 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
281 let mut blocks = vec![];
282
283 blocks.push(vec!["WEBVTT".to_string()]);
284
285 for style in &self.styles {
286 let mut block = vec![];
287 block.push("STYLE".to_string());
288 if let Some(selector) = &style.selector {
289 block.push(format!("::cue({selector}) {{"))
290 } else {
291 block.push("::cue {".to_string())
292 }
293 for (id, value) in &style.entries {
294 block.push(format!("{id}: {value}"))
295 }
296 block.push("}".to_string());
297
298 blocks.push(block)
299 }
300
301 for line in &self.lines {
302 let mut block = vec![];
303 if let Some(identifier) = &line.identifier {
304 block.push(identifier.clone())
305 }
306
307 if !line.settings.is_empty() {
308 block.push(format!(
309 "{} --> {} {}",
310 line.start.format(parse::TIME_FORMAT).unwrap(),
311 line.end.format(parse::TIME_FORMAT).unwrap(),
312 line.settings
313 .iter()
314 .map(|(k, v)| v.as_ref().map_or(k.clone(), |v| format!("{k}: {v}")))
315 .collect::<Vec<String>>()
316 .join(" ")
317 ))
318 } else {
319 block.push(format!(
320 "{} --> {}",
321 line.start.format(parse::TIME_FORMAT).unwrap(),
322 line.end.format(parse::TIME_FORMAT).unwrap()
323 ))
324 }
325 block.push(line.text.clone());
326
327 blocks.push(block)
328 }
329
330 write!(
331 f,
332 "{}",
333 blocks
334 .into_iter()
335 .map(|b| b.join("\n"))
336 .collect::<Vec<String>>()
337 .join("\n\n")
338 )
339 }
340}
341
342mod parse {
343 use super::*;
344 use time::format_description::BorrowedFormatItem;
345 use time::macros::format_description;
346
347 pub(super) struct Error {
348 pub(super) line: usize,
349 pub(super) kind: VTTErrorKind,
350 }
351
352 pub(super) const TIME_FORMAT: &[BorrowedFormatItem] =
353 format_description!("[hour]:[minute]:[second].[subsecond digits:3]");
354
355 type Result<T> = std::result::Result<T, Error>;
356
357 pub(super) fn parse_start<'a, I: Iterator<Item = &'a str>>(mut block_lines: I) -> Result<()> {
358 let line = block_lines.next().unwrap();
359 if !line.starts_with("WEBVTT") {
360 return Err(Error {
361 line: 1,
362 kind: VTTErrorKind::InvalidFormat,
363 });
364 }
365 Ok(())
366 }
367 pub(super) fn parse_region_block<'a, I: Iterator<Item = &'a str>>(
368 block_lines: I,
369 ) -> Result<VTTRegion> {
370 let mut region = VTTRegion {
371 id: None,
372 width: None,
373 lines: None,
374 region_anchor: None,
375 viewport_anchor: None,
376 scroll: false,
377 };
378
379 for (i, line) in block_lines.enumerate() {
380 let (name, value) = line.split_once(':').ok_or(Error {
381 line: 1 + i,
382 kind: VTTErrorKind::Parse("delimiter ':' missing".to_string()),
383 })?;
384
385 match name {
386 "id" => region.id = Some(value.to_string()),
387 "width" => {
388 region.width = Some(parse_percentage(value).ok_or(Error {
389 line: 1 + i,
390 kind: VTTErrorKind::Parse(format!("invalid percentage '{value}'")),
391 })?)
392 }
393 "lines" => {
394 region.lines = Some(value.parse::<u32>().map_err(|e| Error {
395 line: 1 + i,
396 kind: VTTErrorKind::Parse(e.to_string()),
397 })?)
398 }
399 "regionanchor" => {
400 let Some((a, b)) = value.split_once(',') else {
401 return Err(Error {
402 line: 1 + i,
403 kind: VTTErrorKind::Parse("delimiter ',' missing".to_string()),
404 });
405 };
406 region.region_anchor = Some((
407 parse_percentage(a).ok_or(Error {
408 line: 1 + i,
409 kind: VTTErrorKind::Parse(format!("invalid percentage '{value}'")),
410 })?,
411 parse_percentage(b).ok_or(Error {
412 line: 1 + i,
413 kind: VTTErrorKind::Parse(format!("invalid percentage '{value}'")),
414 })?,
415 ))
416 }
417 "viewportanchor" => {
418 let Some((a, b)) = value.split_once(',') else {
419 return Err(Error {
420 line: 1 + i,
421 kind: VTTErrorKind::Parse("delimiter ',' missing".to_string()),
422 });
423 };
424 region.viewport_anchor = Some((
425 parse_percentage(a).ok_or(Error {
426 line: 1 + i,
427 kind: VTTErrorKind::Parse(format!("invalid percentage '{value}'")),
428 })?,
429 parse_percentage(b).ok_or(Error {
430 line: 1 + i,
431 kind: VTTErrorKind::Parse(format!("invalid percentage '{value}'")),
432 })?,
433 ))
434 }
435 "scroll" => {
436 region.scroll = if value == "up" {
437 true
438 } else {
439 return Err(Error {
440 line: 1 + i,
441 kind: VTTErrorKind::Parse("only allowed value is 'up'".to_string()),
442 });
443 }
444 }
445 _ => continue,
446 }
447 }
448
449 Ok(region)
450 }
451 pub(super) fn parse_style_block(block: &str) -> Result<VTTStyle> {
452 let mut selector = None;
453 let mut entries = HashMap::new();
454
455 let Some(mut block) = block.strip_prefix("::cue") else {
457 return Err(Error {
458 line: 1,
459 kind: VTTErrorKind::Parse("missing '::cue' prefix".to_string()),
460 });
461 };
462
463 if block.ends_with('}') {
465 block = &block[..block.len() - 1]
466 } else {
467 return Err(Error {
468 line: block.split('\n').count(),
469 kind: VTTErrorKind::Parse("missing '}' suffix".to_string()),
470 });
471 }
472
473 block = block.trim_start();
475 if block.starts_with('(') {
476 let Some(closing_idx) = block.find(|c| c == ')') else {
477 return Err(Error {
478 line: 1,
479 kind: VTTErrorKind::Parse("selector isn't closed".to_string()),
480 });
481 };
482 selector = Some(block[1..closing_idx].to_string());
483 block = &block[closing_idx + 1..]
484 }
485
486 let Some(mut block) = block.trim_start().strip_prefix('{') else {
488 return Err(Error {
489 line: 1,
490 kind: VTTErrorKind::Parse("missing '{'".to_string()),
491 });
492 };
493
494 let mut line_num = 0;
495 if block.starts_with('\n') {
497 line_num += 1;
498 block = &block[1..];
499 }
500
501 for line in block.split('\n') {
502 line_num += 1;
503
504 for item in line.split(';') {
505 if item.is_empty() {
506 continue;
507 }
508
509 let Some((name, value)) = item.split_once(':') else {
510 return Err(Error {
511 line: 1 + line_num,
512 kind: VTTErrorKind::Parse("delimiter ':' missing".to_string()),
513 });
514 };
515 entries.insert(name.trim().to_string(), value.trim().to_string());
516 }
517 }
518
519 Ok(VTTStyle { selector, entries })
520 }
521 pub(super) fn parse_cue_block<'a, I: Iterator<Item = &'a str>>(
522 mut block_lines: I,
523 ) -> Result<VTTLine> {
524 let mut identifier = None;
525 let mut settings = HashMap::new();
526
527 let mut timing_line = block_lines.next().unwrap();
530 if !timing_line.contains("-->") {
532 identifier = Some(timing_line.to_string());
533 timing_line = block_lines.next().ok_or(Error {
534 line: 2,
535 kind: VTTErrorKind::Parse("missing subtitle timing".to_string()),
536 })?;
537 }
538
539 let (start_str, mut end_str) = timing_line.split_once("-->").ok_or(Error {
542 line: 1 + identifier.is_some() as usize,
543 kind: VTTErrorKind::Parse("missing '-->'".to_string()),
544 })?;
545 let start = if start_str.chars().filter(|c| *c == ':').count() < 2 {
549 let start_str = format!("00:{}", start_str.trim());
550 Time::parse(&start_str, TIME_FORMAT).map_err(|e| Error {
551 line: 1 + identifier.is_some() as usize,
552 kind: VTTErrorKind::Parse(e.to_string()),
553 })?
554 } else {
555 Time::parse(start_str.trim(), TIME_FORMAT).map_err(|e| Error {
556 line: 1 + identifier.is_some() as usize,
557 kind: VTTErrorKind::Parse(e.to_string()),
558 })?
559 };
560 if end_str.trim().contains(' ') {
563 let settings_str;
564 (end_str, settings_str) = end_str.trim().split_once(' ').unwrap();
565
566 for setting in settings_str.split(' ') {
567 if let Some((id, value)) = setting.split_once(':') {
568 settings.insert(id.to_string(), Some(value.to_string()));
569 } else {
570 settings.insert(setting.to_string(), None);
571 }
572 }
573 }
574 let end = if end_str.chars().filter(|c| *c == ':').count() < 2 {
578 let end_str = format!("00:{}", end_str.trim());
579 Time::parse(&end_str, TIME_FORMAT).map_err(|e| Error {
580 line: 1 + identifier.is_some() as usize,
581 kind: VTTErrorKind::Parse(e.to_string()),
582 })?
583 } else {
584 Time::parse(end_str.trim(), TIME_FORMAT).map_err(|e| Error {
585 line: 1 + identifier.is_some() as usize,
586 kind: VTTErrorKind::Parse(e.to_string()),
587 })?
588 };
589
590 Ok(VTTLine {
591 identifier,
592 start,
593 end,
594 settings,
595 text: block_lines.collect::<Vec<&str>>().join("\n"),
596 })
597 }
598
599 fn parse_percentage(s: &str) -> Option<f32> {
600 if !s.ends_with('%') {
601 return None;
602 }
603 s[..s.len() - 1].parse().ok()
604 }
605}
606
607error! {
608 VTTError => VTTErrorKind {
609 InvalidFormat,
610 Parse(String),
611 }
612}