本文整理汇总了Python中pysrt.SubRipFile类的典型用法代码示例。如果您正苦于以下问题:Python SubRipFile类的具体用法?Python SubRipFile怎么用?Python SubRipFile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SubRipFile类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: to_srt
def to_srt(df, filename):
out = SubRipFile(encoding='utf-8')
for i, r in df.iterrows():
begin = convert_time(r['begin'])
end = convert_time(r['end'])
out.append(SubRipItem(0, begin, end, r['text']))
out.save(filename)
示例2: test_windows1252
def test_windows1252(self):
srt_string = codecs.open(self.windows_path, encoding='windows-1252').read()
srt_file = SubRipFile.from_string(srt_string, encoding='windows-1252', eol='\r\n')
self.assertEquals(len(srt_file), 1332)
self.assertEquals(srt_file.eol, '\r\n')
self.assertRaises(UnicodeDecodeError, SubRipFile.open,
self.utf8_path, encoding='ascii')
示例3: convert
def convert(content, input_format, output_format):
"""
Convert transcript `content` from `input_format` to `output_format`.
Accepted input formats: sjson, srt.
Accepted output format: srt, txt.
"""
assert input_format in ('srt', 'sjson')
assert output_format in ('txt', 'srt', 'sjson')
if input_format == output_format:
return content
if input_format == 'srt':
if output_format == 'txt':
text = SubRipFile.from_string(content.decode('utf8')).text
return HTMLParser().unescape(text)
elif output_format == 'sjson':
raise NotImplementedError
if input_format == 'sjson':
if output_format == 'txt':
text = json.loads(content)['text']
return HTMLParser().unescape("\n".join(text))
elif output_format == 'srt':
return generate_srt_from_sjson(json.loads(content), speed=1.0)
示例4: mostrarSubtitulos
def mostrarSubtitulos(self, escena, ruta):
if (self.ok==1):
self.escena= escena
#subs = SubRipFile.open(ruta, encoding='iso-8859-1')
subs = SubRipFile.open(ruta, encoding='UTF-8') # Con esta codificacion logramos ver los tildes
#print("Hay" ,subs.__len__()," subtitulos")
#print "SEGUNDOS=", cant_segs
if (self.tmp== subs.__len__()): # cuando llega al final de los subtitulos
#self.tmp= subs.__len__()-1
self.tmp= 0
self.ok= 0
#print("entro en tiempo " ,self.tiempoActual)
self.tiempoActual= 0
linea= subs[self.tmp]
tics_ini = (linea.start.minutes*60*1000)+(linea.start.seconds*1000)+linea.start.milliseconds
tics_fin = (linea.end.minutes*60*1000)+(linea.end.seconds*1000)+linea.end.milliseconds
if ((tics_ini<=(pygame.time.get_ticks()-self.offset)) and ((pygame.time.get_ticks()-self.offset)<=tics_fin)):
if (self.imprimir==1):
self.escena.draw() # reimprime la escena
self.printTexto(linea.text) # imprime mensaje
self.imprimir= 0
self.tmp= self.tmp+1
self.entrar= 1
else:
if (self.entrar==1):
self.printTexto("")
self.imprimir= 1
self.entrar=0
示例5: add_videos_to_index
def add_videos_to_index(subtitle_index, output_file, index):
vindexReader = csv.reader(open(subtitle_index, 'rb'))
vinfoWriter = csv.writer(open(output_file, 'wt'))
vinfoWriter.writerow(['title', 'filename', 'id', 'views', 'type', 'url', 'text'])
for row in vindexReader:
try:
filename = row[1] + '.en.srt'
url = 'http://www.youtube.com/watch?v=' + row[2]
text = open(filename).read()
text_ascii = removeNonAscii(text)
subtitles = SubRipFile.open(filename)
vinfoWriter.writerow([row[0], row[1], row[2], row[3], row[4], url, text_ascii])
punctuation = '!"#$%&\'()*+,-./:;<=>[email protected][\\]^_`{|}~'
stopwords = ['']
with open('/Users/connormendenhall/Python/DaveDaveFind/DaveDaveFind/data/stopwords.csv', 'rb') as f:
wordlist = csv.reader(f)
for stopword in wordlist:
stopwords.append(stopword[0])
for sentence in subtitles:
text = (sentence.text)
wordlist = text.split()
for word in wordlist:
word = word.lstrip(punctuation)
word = word.rstrip(punctuation)
word = word.lower()
if word not in stopwords:
add_to_index(index, word, url)
except:
pass
print "[add_videos_to_index()] Videos added."
return index
示例6: convert
def convert(content, input_format, output_format):
"""
Convert transcript `content` from `input_format` to `output_format`.
Accepted input formats: sjson, srt.
Accepted output format: srt, txt, sjson.
Raises:
TranscriptsGenerationException: On parsing the invalid srt content during conversion from srt to sjson.
"""
assert input_format in ('srt', 'sjson')
assert output_format in ('txt', 'srt', 'sjson')
if input_format == output_format:
return content
if input_format == 'srt':
if output_format == 'txt':
text = SubRipFile.from_string(content.decode('utf8')).text
return HTMLParser().unescape(text)
elif output_format == 'sjson':
try:
# With error handling (set to 'ERROR_RAISE'), we will be getting
# the exception if something went wrong in parsing the transcript.
srt_subs = SubRipFile.from_string(
# Skip byte order mark(BOM) character
content.decode('utf-8-sig'),
error_handling=SubRipFile.ERROR_RAISE
)
except Error as ex: # Base exception from pysrt
raise TranscriptsGenerationException(text_type(ex))
return json.dumps(generate_sjson_from_srt(srt_subs))
if input_format == 'sjson':
if output_format == 'txt':
text = json.loads(content)['text']
text_without_none = [line if line else '' for line in text]
return HTMLParser().unescape("\n".join(text_without_none))
elif output_format == 'srt':
return generate_srt_from_sjson(json.loads(content), speed=1.0)
示例7: input_file
def input_file(self):
if not hasattr(self, '_source_file'):
with open(self.arguments.file, 'rb') as f:
content = f.read()
encoding = detect(content).get('encoding')
encoding = self.normalize_encoding(encoding)
self._source_file = SubRipFile.open(self.arguments.file,
encoding=encoding, error_handling=SubRipFile.ERROR_LOG)
return self._source_file
示例8: save
def save(self, path):
if path.endswith('srt'):
verify_dependencies(['pysrt'])
from pysrt import SubRipFile, SubRipItem
from datetime import time
out = SubRipFile()
for elem in self._elements:
start = time(*self._to_tup(elem.onset))
end = time(*self._to_tup(elem.onset + elem.duration))
out.append(SubRipItem(0, start, end, elem.text))
out.save(path)
else:
with open(path, 'w') as f:
f.write('onset\ttext\tduration\n')
for elem in self._elements:
f.write('{}\t{}\t{}\n'.format(elem.onset,
elem.text,
elem.duration))
示例9: test_eol_conversion
def test_eol_conversion(self):
input_file = open(self.windows_path, "rU")
input_file.read()
self.assertEquals(input_file.newlines, "\r\n")
srt_file = SubRipFile.open(self.windows_path, encoding="windows-1252")
srt_file.save(self.temp_path, eol="\n")
output_file = open(self.temp_path, "rU")
output_file.read()
self.assertEquals(output_file.newlines, "\n")
示例10: merge_subtitle
def merge_subtitle(sub_a, sub_b, delta, encoding='utf-8'):
"""
合并两种不同言语的srt字幕
因为两个字幕文件的时间轴不一样,所以合并后的字幕会在某一字幕文件转换时生成新的一条字幕,
导致双语字幕并不是同时变化,不过这也是没有办法的事,无法避免
参考https://github.com/byroot/pysrt/issues/17
https://github.com/byroot/pysrt/issues/15
:param sub_a: 使用sub_a = SubRipFile.open(sub_a_path, encoding=encoding)
:param sub_b:
:param delta:
:return:
"""
out = SubRipFile()
intervals = [item.start.ordinal for item in sub_a]
intervals.extend([item.end.ordinal for item in sub_a])
intervals.extend([item.start.ordinal for item in sub_b])
intervals.extend([item.end.ordinal for item in sub_b])
intervals.sort()
j = k = 0
for i in xrange(1, len(intervals)):
start = SubRipTime.from_ordinal(intervals[i - 1])
end = SubRipTime.from_ordinal(intervals[i])
if (end - start) > delta:
text_a, j = find_subtitle(sub_a, start, end, j)
text_b, k = find_subtitle(sub_b, start, end, k)
text = join_lines(text_a, text_b)
if len(text) > 0:
item = SubRipItem(0, start, end, text)
out.append(item)
out.clean_indexes()
return out
示例11: get_captions
def get_captions(client_name, clip_id):
h = httplib2.Http()
g_url = 'http://%s/JSON.php?clip_id=%s' % ( client_name, clip_id)
print "Fetching URL: %s" % g_url
try:
response, j = h.request(g_url)
except httplib.BadStatusLine as exception:
return None
dirname = os.getcwd() + "/data/granicus/srt/%s/" % client_name
filename = dirname + "%s.srt" % clip_id
subs = SubRipFile()
if response.get('status') == '200':
captions = []
try:
j = json.loads(j, strict=False)[0]
except ValueError:
ts = re.sub('([{,]\s+)([a-z]+)(: ")', lambda s: '%s"%s"%s' % (s.groups()[0], s.groups()[1], s.groups()[2]), j).replace("\\", "")
try:
j = json.loads(ts, strict=False)[0]
except UnicodeDecodeError:
ts = unicode(ts, errors='ignore')
j = json.loads(ts, strict=False)[0]
except:
j = False
sub_count = 0
for item in j:
if item["type"] == "text":
cap = item["text"]
offset = round(float(item["time"]), 3)
captions.append({'time': offset, 'text': cap})
end = get_cap_end(j, sub_count)
if end:
subtitle = SubRipItem(index=sub_count, start=SubRipTime(seconds=offset), end=SubRipTime(seconds=end), text=cap)
subs.append(subtitle)
sub_count = sub_count + 1
try:
subs.save(path=filename, encoding="utf-8")
except IOError:
p = subprocess.Popen('mkdir -p %s' % dirname, shell=True, stdout=subprocess.PIPE)
t = p.wait()
subs.save(path=filename, encoding="utf-8")
s3_url = push_to_s3(filename, '%s/%s.srt' % (client_name, clip_id))
return (captions, s3_url)
else:
return ([], '')
示例12: save
def save(self, *args, **kwargs):
episode = super(Episode, self).save(*args, **kwargs)
# Delete existing subtitles
self.subtitle_set.all().delete()
# Import subtitles from file
subs = SubRipFile.open(self.subtitles.path)
with transaction.commit_on_success():
for sub in subs:
self.subtitle_set.create(
start=sub.start.ordinal, end=sub.end.ordinal,
text=sub.text)
示例13: merge_subtitle
def merge_subtitle(sub_a, sub_b, delta):
out = SubRipFile()
intervals = [item.start.ordinal for item in sub_a]
intervals.extend([item.end.ordinal for item in sub_a])
intervals.extend([item.start.ordinal for item in sub_b])
intervals.extend([item.end.ordinal for item in sub_b])
intervals.sort()
j = k = 0
for i in xrange(1, len(intervals)):
start = SubRipTime.from_ordinal(intervals[i-1])
end = SubRipTime.from_ordinal(intervals[i])
if (end-start) > delta:
text_a, j = find_subtitle(sub_a, start, end, j)
text_b, k = find_subtitle(sub_b, start, end, k)
text = join_lines(text_a, text_b)
if len(text) > 0:
item = SubRipItem(0, start, end, text)
out.append(item)
out.clean_indexes()
return out
示例14: GetSrtCaptions
def GetSrtCaptions(self):
"""Retrieves and parses the actual ASR captions track's data.
Given the URL of an ASR captions track, this retrieves it in the SRT format
and uses the pysrt library to parse it into a format we can manipulate.
Raises:
Error: The ASR caption track could not be retrieved.
"""
response_headers, body = self.http.request("%s?fmt=srt" % self.track_url, "GET", headers=self.headers)
if response_headers["status"] == "200":
self.srt_captions = SubRipFile.from_string(body)
else:
raise Error("Received HTTP response %s when requesting %s?fmt=srt." % (response_headers["status"], self.track_url))
示例15: generate_subs_from_source
def generate_subs_from_source(speed_subs, subs_type, subs_filedata, item, language='en'):
"""Generate transcripts from source files (like SubRip format, etc.)
and save them to assets for `item` module.
We expect, that speed of source subs equal to 1
:param speed_subs: dictionary {speed: sub_id, ...}
:param subs_type: type of source subs: "srt", ...
:param subs_filedata:unicode, content of source subs.
:param item: module object.
:param language: str, language of translation of transcripts
:returns: True, if all subs are generated and saved successfully.
"""
_ = item.runtime.service(item, "i18n").ugettext
if subs_type.lower() != 'srt':
raise TranscriptsGenerationException(_("We support only SubRip (*.srt) transcripts format."))
try:
srt_subs_obj = SubRipFile.from_string(subs_filedata)
except Exception as ex:
msg = _("Something wrong with SubRip transcripts file during parsing. Inner message is {error_message}").format(
error_message=ex.message
)
raise TranscriptsGenerationException(msg)
if not srt_subs_obj:
raise TranscriptsGenerationException(_("Something wrong with SubRip transcripts file during parsing."))
sub_starts = []
sub_ends = []
sub_texts = []
for sub in srt_subs_obj:
sub_starts.append(sub.start.ordinal)
sub_ends.append(sub.end.ordinal)
sub_texts.append(sub.text.replace('\n', ' '))
subs = {
'start': sub_starts,
'end': sub_ends,
'text': sub_texts}
for speed, subs_id in speed_subs.iteritems():
save_subs_to_store(
generate_subs(speed, 1, subs),
subs_id,
item,
language
)
return subs