Module vtt_to_srt
Convert of vtt to srt format
Expand source code
#!/usr/bin/python
# Jansen A. Simanullang / Jeison Cardoso
"""Convert of vtt to srt format"""
import os
import re
import argparse
from string import Template
from stat import S_ISDIR, ST_MODE, S_ISREG
class VttToStr:
"""Convert vtt to srt"""
def __init__(self) -> None:
pass
def convert_header(self, contents):
"""Convert of vtt header to srt format
:contents -- contents of vtt file
"""
replacement = re.sub(r"WEBVTT\n", "", contents)
replacement = re.sub(r"Kind:[ \-\w]+\n", "", replacement)
replacement = re.sub(r"Language:[ \-\w]+\n", "", replacement)
return replacement
def add_padding_to_timestamp(self, contents):
"""Add 00 to padding timestamp of to srt format
:contents -- contents of vtt file
"""
find_srt = Template(r'$a,$b --> $a,$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n')
minute = r"((?:\d\d:){1}\d\d)"
second = r"((?:\d\d:){0}\d\d)"
padding_minute = find_srt.substitute(a=minute, b=r"(\d{0,3})")
padding_second = find_srt.substitute(a=second, b=r"(\d{0,3})")
replacement = re.sub(
padding_minute, r"00:\1,\2 --> 00:\3,\4\n", contents)
return re.sub(padding_second, r"00:00:\1,\2 --> 00:00:\3,\4\n", replacement)
def convert_timestamp(self, contents):
"""Convert timestamp of vtt file to srt format
:contents -- contents of vtt file
"""
find_vtt = Template(r'$a.$b --> $a.$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n')
all_timestamp = find_vtt.substitute(
a=r"((?:\d\d:){0,2}\d\d)", b=r"(\d{0,3})")
return self.add_padding_to_timestamp(re.sub(all_timestamp, r"\1,\2 --> \3,\4\n", contents))
def convert_content(self, contents):
"""Convert content of vtt file to srt format
:contents -- contents of vtt file
"""
replacement = self.convert_timestamp(contents)
replacement = self.convert_header(replacement)
replacement = re.sub(r"<c[.\w\d]*>", "", replacement)
replacement = re.sub(r"</c>", "", replacement)
replacement = re.sub(r"<\d\d:\d\d:\d\d.\d\d\d>", "", replacement)
replacement = re.sub(
r"::[\-\w]+\([\-.\w\d]+\)[ ]*{[.,:;\(\) \-\w\d]+\n }\n", "", replacement)
replacement = re.sub(r"Style:\n##\n", "", replacement)
replacement = self.add_sequence_numbers(replacement)
return replacement
def has_timestamp(self, content):
"""Check if line is a timestamp srt format
:contents -- contents of vtt file
"""
return re.match(r"((\d\d:){2}\d\d),(\d{3}) --> ((\d\d:){2}\d\d),(\d{3})", content) is not None
def add_sequence_numbers(self, contents):
"""Adds sequence numbers to subtitle contents and returns new subtitle contents
:contents -- contents of vtt file
"""
output = ''
lines = contents.split('\n')
i = 1
for line in lines:
if self.has_timestamp(line):
output += str(i) + '\n'
i += 1
output += line + '\n'
return output
def write_file(self, filename: str, data, encoding_format: str = "utf-8"):
"""Create a file with some data
:filename -- filename pat
:data -- data to write
:encoding_format -- encoding format
"""
try:
with open(filename, "w", encoding=encoding_format) as file:
file.writelines(str(data))
except IOError:
filename = filename.split(os.sep)[-1]
with open(filename, "w", encoding=encoding_format) as file:
file.writelines(str(data))
print(f"file created {filename}\n")
def read_file(self, filename: str, encoding_format: str = "utf-8"):
"""Read a file text
:filename -- filename path
:encoding_format -- encoding format
"""
content: str = ''
with open(filename, mode="r", encoding=encoding_format) as file:
print(f"file being read: {filename}\n")
content = file.read()
return content
def process(self, filename: str, encoding_format: str = "utf-8"):
"""Convert vtt file to a srt file
:str_name_file -- filename path
:encoding_format -- encoding format
"""
file_contents: str = self.read_file(filename, encoding_format)
str_data: str = ""
str_data = str_data + self.convert_content(file_contents)
filename = filename.replace(".vtt", ".srt")
self.write_file(filename, str_data, encoding_format)
class ConvertFile:
"""Convert vtt file to srt file"""
def __init__(self, pathname: str, encoding_format: str):
"""Constructor
:pathname -- path to file or directory
:encoding_format -- encoding format
"""
self.pathname = pathname
self.encoding_format = encoding_format
self.vtt_to_str = VttToStr()
def convert(self):
"""Convert vtt file to srt file"""
if ".vtt" in self.pathname:
self.vtt_to_str.process(self.pathname, self.encoding_format)
class ConvertDirectories:
"""Convert vtt files to srt files"""
def __init__(self, pathname: str, enable_recursive: bool, encoding_format: str):
"""Constructor
pathname -- path to file or directory
:enable_recursive -- enable recursive
:encoding_format -- encoding format
"""
self.pathname = pathname
self.enable_recursive = enable_recursive
self.encoding_format = encoding_format
self.vtt_to_str = VttToStr()
def _walk_dir(self, top_most_path: str, callback):
"""Walk a directory
:top_most_path -- parent directory
:callback -- function to call
"""
for file in os.listdir(top_most_path):
pathname = os.path.join(top_most_path, file)
if not os.path.isdir(pathname):
# It"s a file, call the callback function
callback(pathname)
def _walk_tree(self, top_most_path, callback):
"""Recursively descend the directory tree rooted at top_most_path,
calling the callback function for each regular file
:top_most_path -- parent directory
:callback -- function to call
"""
for file in os.listdir(top_most_path):
pathname = os.path.join(top_most_path, file)
mode = os.stat(pathname)[ST_MODE]
if S_ISDIR(mode):
# It's a directory, recurse into it
self._walk_tree(pathname, callback)
elif S_ISREG(mode):
# It's a file, call the callback function
callback(pathname)
else:
# Unknown file type, print a message
print(f"Skipping {pathname}")
def convert_vtt_to_str(self, file: str):
"""Convert vtt file to string
:file -- file to convert
"""
if ".vtt" in file:
try:
self.vtt_to_str.process(file, self.encoding_format)
except UnicodeDecodeError:
print(f"UnicodeDecodeError: {file}")
def _vtt_to_srt_batch(self, directory: str):
"""Walk down directory searching for vtt files
:directory -- path to search
"""
top_most_path = directory
if self.enable_recursive:
self._walk_tree(top_most_path, self.convert_vtt_to_str)
else:
self._walk_dir(top_most_path, self.convert_vtt_to_str)
def convert(self):
"""Convert vtt files to srt files"""
self._vtt_to_srt_batch(self.pathname)
def _show_usage():
"""Show a info message about the usage"""
print("\nUsage:\tvtt_to_srt pathname [-r]\n")
print("\tpathname\t- a file or directory with files to be converted")
print("\t-r\t\t- walk path recursively\n")
def _parse_args():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(
description='Convert vtt files to srt files')
parser.add_argument(
"pathname", help="a file or directory with files to be converted")
parser.add_argument("-r", "--recursive",
help="walk path recursively", action="store_true")
parser.add_argument("-e", "--encoding",
help="encoding format for input and output files")
args = parser.parse_args()
return args
def main():
"""main function"""
args = _parse_args()
pathname = args.pathname
recursive = args.recursive
encoding = args.encoding
if not encoding:
encoding = "utf-8"
if os.path.isfile(pathname):
print(f"file being converted: {pathname}\n")
ConvertFile(pathname, encoding).convert()
if os.path.isdir(pathname):
print(f"directory being converted: {pathname}\n")
ConvertDirectories(pathname, recursive, encoding).convert()
if not os.path.isfile(pathname) and not os.path.isdir(pathname):
print(f"pathname is not a file or directory: {pathname}\n")
_show_usage()
if __name__ == "__main__":
main()
Functions
def main()
-
main function
Expand source code
def main(): """main function""" args = _parse_args() pathname = args.pathname recursive = args.recursive encoding = args.encoding if not encoding: encoding = "utf-8" if os.path.isfile(pathname): print(f"file being converted: {pathname}\n") ConvertFile(pathname, encoding).convert() if os.path.isdir(pathname): print(f"directory being converted: {pathname}\n") ConvertDirectories(pathname, recursive, encoding).convert() if not os.path.isfile(pathname) and not os.path.isdir(pathname): print(f"pathname is not a file or directory: {pathname}\n") _show_usage()
Classes
class ConvertDirectories (pathname: str, enable_recursive: bool, encoding_format: str)
-
Convert vtt files to srt files
Constructor
pathname – path to file or directory :enable_recursive – enable recursive :encoding_format – encoding format
Expand source code
class ConvertDirectories: """Convert vtt files to srt files""" def __init__(self, pathname: str, enable_recursive: bool, encoding_format: str): """Constructor pathname -- path to file or directory :enable_recursive -- enable recursive :encoding_format -- encoding format """ self.pathname = pathname self.enable_recursive = enable_recursive self.encoding_format = encoding_format self.vtt_to_str = VttToStr() def _walk_dir(self, top_most_path: str, callback): """Walk a directory :top_most_path -- parent directory :callback -- function to call """ for file in os.listdir(top_most_path): pathname = os.path.join(top_most_path, file) if not os.path.isdir(pathname): # It"s a file, call the callback function callback(pathname) def _walk_tree(self, top_most_path, callback): """Recursively descend the directory tree rooted at top_most_path, calling the callback function for each regular file :top_most_path -- parent directory :callback -- function to call """ for file in os.listdir(top_most_path): pathname = os.path.join(top_most_path, file) mode = os.stat(pathname)[ST_MODE] if S_ISDIR(mode): # It's a directory, recurse into it self._walk_tree(pathname, callback) elif S_ISREG(mode): # It's a file, call the callback function callback(pathname) else: # Unknown file type, print a message print(f"Skipping {pathname}") def convert_vtt_to_str(self, file: str): """Convert vtt file to string :file -- file to convert """ if ".vtt" in file: try: self.vtt_to_str.process(file, self.encoding_format) except UnicodeDecodeError: print(f"UnicodeDecodeError: {file}") def _vtt_to_srt_batch(self, directory: str): """Walk down directory searching for vtt files :directory -- path to search """ top_most_path = directory if self.enable_recursive: self._walk_tree(top_most_path, self.convert_vtt_to_str) else: self._walk_dir(top_most_path, self.convert_vtt_to_str) def convert(self): """Convert vtt files to srt files""" self._vtt_to_srt_batch(self.pathname)
Methods
def convert(self)
-
Convert vtt files to srt files
Expand source code
def convert(self): """Convert vtt files to srt files""" self._vtt_to_srt_batch(self.pathname)
def convert_vtt_to_str(self, file: str)
-
Convert vtt file to string
:file – file to convert
Expand source code
def convert_vtt_to_str(self, file: str): """Convert vtt file to string :file -- file to convert """ if ".vtt" in file: try: self.vtt_to_str.process(file, self.encoding_format) except UnicodeDecodeError: print(f"UnicodeDecodeError: {file}")
class ConvertFile (pathname: str, encoding_format: str)
-
Convert vtt file to srt file
Constructor
:pathname – path to file or directory :encoding_format – encoding format
Expand source code
class ConvertFile: """Convert vtt file to srt file""" def __init__(self, pathname: str, encoding_format: str): """Constructor :pathname -- path to file or directory :encoding_format -- encoding format """ self.pathname = pathname self.encoding_format = encoding_format self.vtt_to_str = VttToStr() def convert(self): """Convert vtt file to srt file""" if ".vtt" in self.pathname: self.vtt_to_str.process(self.pathname, self.encoding_format)
Methods
def convert(self)
-
Convert vtt file to srt file
Expand source code
def convert(self): """Convert vtt file to srt file""" if ".vtt" in self.pathname: self.vtt_to_str.process(self.pathname, self.encoding_format)
class VttToStr
-
Convert vtt to srt
Expand source code
class VttToStr: """Convert vtt to srt""" def __init__(self) -> None: pass def convert_header(self, contents): """Convert of vtt header to srt format :contents -- contents of vtt file """ replacement = re.sub(r"WEBVTT\n", "", contents) replacement = re.sub(r"Kind:[ \-\w]+\n", "", replacement) replacement = re.sub(r"Language:[ \-\w]+\n", "", replacement) return replacement def add_padding_to_timestamp(self, contents): """Add 00 to padding timestamp of to srt format :contents -- contents of vtt file """ find_srt = Template(r'$a,$b --> $a,$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n') minute = r"((?:\d\d:){1}\d\d)" second = r"((?:\d\d:){0}\d\d)" padding_minute = find_srt.substitute(a=minute, b=r"(\d{0,3})") padding_second = find_srt.substitute(a=second, b=r"(\d{0,3})") replacement = re.sub( padding_minute, r"00:\1,\2 --> 00:\3,\4\n", contents) return re.sub(padding_second, r"00:00:\1,\2 --> 00:00:\3,\4\n", replacement) def convert_timestamp(self, contents): """Convert timestamp of vtt file to srt format :contents -- contents of vtt file """ find_vtt = Template(r'$a.$b --> $a.$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n') all_timestamp = find_vtt.substitute( a=r"((?:\d\d:){0,2}\d\d)", b=r"(\d{0,3})") return self.add_padding_to_timestamp(re.sub(all_timestamp, r"\1,\2 --> \3,\4\n", contents)) def convert_content(self, contents): """Convert content of vtt file to srt format :contents -- contents of vtt file """ replacement = self.convert_timestamp(contents) replacement = self.convert_header(replacement) replacement = re.sub(r"<c[.\w\d]*>", "", replacement) replacement = re.sub(r"</c>", "", replacement) replacement = re.sub(r"<\d\d:\d\d:\d\d.\d\d\d>", "", replacement) replacement = re.sub( r"::[\-\w]+\([\-.\w\d]+\)[ ]*{[.,:;\(\) \-\w\d]+\n }\n", "", replacement) replacement = re.sub(r"Style:\n##\n", "", replacement) replacement = self.add_sequence_numbers(replacement) return replacement def has_timestamp(self, content): """Check if line is a timestamp srt format :contents -- contents of vtt file """ return re.match(r"((\d\d:){2}\d\d),(\d{3}) --> ((\d\d:){2}\d\d),(\d{3})", content) is not None def add_sequence_numbers(self, contents): """Adds sequence numbers to subtitle contents and returns new subtitle contents :contents -- contents of vtt file """ output = '' lines = contents.split('\n') i = 1 for line in lines: if self.has_timestamp(line): output += str(i) + '\n' i += 1 output += line + '\n' return output def write_file(self, filename: str, data, encoding_format: str = "utf-8"): """Create a file with some data :filename -- filename pat :data -- data to write :encoding_format -- encoding format """ try: with open(filename, "w", encoding=encoding_format) as file: file.writelines(str(data)) except IOError: filename = filename.split(os.sep)[-1] with open(filename, "w", encoding=encoding_format) as file: file.writelines(str(data)) print(f"file created {filename}\n") def read_file(self, filename: str, encoding_format: str = "utf-8"): """Read a file text :filename -- filename path :encoding_format -- encoding format """ content: str = '' with open(filename, mode="r", encoding=encoding_format) as file: print(f"file being read: {filename}\n") content = file.read() return content def process(self, filename: str, encoding_format: str = "utf-8"): """Convert vtt file to a srt file :str_name_file -- filename path :encoding_format -- encoding format """ file_contents: str = self.read_file(filename, encoding_format) str_data: str = "" str_data = str_data + self.convert_content(file_contents) filename = filename.replace(".vtt", ".srt") self.write_file(filename, str_data, encoding_format)
Methods
def add_padding_to_timestamp(self, contents)
-
Add 00 to padding timestamp of to srt format
:contents – contents of vtt file
Expand source code
def add_padding_to_timestamp(self, contents): """Add 00 to padding timestamp of to srt format :contents -- contents of vtt file """ find_srt = Template(r'$a,$b --> $a,$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n') minute = r"((?:\d\d:){1}\d\d)" second = r"((?:\d\d:){0}\d\d)" padding_minute = find_srt.substitute(a=minute, b=r"(\d{0,3})") padding_second = find_srt.substitute(a=second, b=r"(\d{0,3})") replacement = re.sub( padding_minute, r"00:\1,\2 --> 00:\3,\4\n", contents) return re.sub(padding_second, r"00:00:\1,\2 --> 00:00:\3,\4\n", replacement)
def add_sequence_numbers(self, contents)
-
Adds sequence numbers to subtitle contents and returns new subtitle contents
:contents – contents of vtt file
Expand source code
def add_sequence_numbers(self, contents): """Adds sequence numbers to subtitle contents and returns new subtitle contents :contents -- contents of vtt file """ output = '' lines = contents.split('\n') i = 1 for line in lines: if self.has_timestamp(line): output += str(i) + '\n' i += 1 output += line + '\n' return output
def convert_content(self, contents)
-
Convert content of vtt file to srt format
:contents – contents of vtt file
Expand source code
def convert_content(self, contents): """Convert content of vtt file to srt format :contents -- contents of vtt file """ replacement = self.convert_timestamp(contents) replacement = self.convert_header(replacement) replacement = re.sub(r"<c[.\w\d]*>", "", replacement) replacement = re.sub(r"</c>", "", replacement) replacement = re.sub(r"<\d\d:\d\d:\d\d.\d\d\d>", "", replacement) replacement = re.sub( r"::[\-\w]+\([\-.\w\d]+\)[ ]*{[.,:;\(\) \-\w\d]+\n }\n", "", replacement) replacement = re.sub(r"Style:\n##\n", "", replacement) replacement = self.add_sequence_numbers(replacement) return replacement
def convert_header(self, contents)
-
Convert of vtt header to srt format
:contents – contents of vtt file
Expand source code
def convert_header(self, contents): """Convert of vtt header to srt format :contents -- contents of vtt file """ replacement = re.sub(r"WEBVTT\n", "", contents) replacement = re.sub(r"Kind:[ \-\w]+\n", "", replacement) replacement = re.sub(r"Language:[ \-\w]+\n", "", replacement) return replacement
def convert_timestamp(self, contents)
-
Convert timestamp of vtt file to srt format
:contents – contents of vtt file
Expand source code
def convert_timestamp(self, contents): """Convert timestamp of vtt file to srt format :contents -- contents of vtt file """ find_vtt = Template(r'$a.$b --> $a.$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n') all_timestamp = find_vtt.substitute( a=r"((?:\d\d:){0,2}\d\d)", b=r"(\d{0,3})") return self.add_padding_to_timestamp(re.sub(all_timestamp, r"\1,\2 --> \3,\4\n", contents))
def has_timestamp(self, content)
-
Check if line is a timestamp srt format
:contents – contents of vtt file
Expand source code
def has_timestamp(self, content): """Check if line is a timestamp srt format :contents -- contents of vtt file """ return re.match(r"((\d\d:){2}\d\d),(\d{3}) --> ((\d\d:){2}\d\d),(\d{3})", content) is not None
def process(self, filename: str, encoding_format: str = 'utf-8')
-
Convert vtt file to a srt file
:str_name_file – filename path :encoding_format – encoding format
Expand source code
def process(self, filename: str, encoding_format: str = "utf-8"): """Convert vtt file to a srt file :str_name_file -- filename path :encoding_format -- encoding format """ file_contents: str = self.read_file(filename, encoding_format) str_data: str = "" str_data = str_data + self.convert_content(file_contents) filename = filename.replace(".vtt", ".srt") self.write_file(filename, str_data, encoding_format)
def read_file(self, filename: str, encoding_format: str = 'utf-8')
-
Read a file text
:filename – filename path :encoding_format – encoding format
Expand source code
def read_file(self, filename: str, encoding_format: str = "utf-8"): """Read a file text :filename -- filename path :encoding_format -- encoding format """ content: str = '' with open(filename, mode="r", encoding=encoding_format) as file: print(f"file being read: {filename}\n") content = file.read() return content
def write_file(self, filename: str, data, encoding_format: str = 'utf-8')
-
Create a file with some data
:filename – filename pat :data – data to write :encoding_format – encoding format
Expand source code
def write_file(self, filename: str, data, encoding_format: str = "utf-8"): """Create a file with some data :filename -- filename pat :data -- data to write :encoding_format -- encoding format """ try: with open(filename, "w", encoding=encoding_format) as file: file.writelines(str(data)) except IOError: filename = filename.split(os.sep)[-1] with open(filename, "w", encoding=encoding_format) as file: file.writelines(str(data)) print(f"file created {filename}\n")