本文整理汇总了Python中pysrt.open函数的典型用法代码示例。如果您正苦于以下问题:Python open函数的具体用法?Python open怎么用?Python open使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了open函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_word_freq_dict
def get_word_freq_dict(inputfiles, verification_list):
freq_dict = {}
for inputfile in inputfiles:
print 'processing', inputfile
subs = []
try:
subs = pysrt.open(inputfile)
except UnicodeDecodeError as e:
subs = []
if not subs:
for enc in ['utf8',"iso-8859-1"]:
try:
print 'trying with' , enc
subs = pysrt.open(inputfile, encoding=enc)
except UnicodeDecodeError as e:
subs =[]
if subs:
break
if not subs:
print 'couldnt open ', inputfile
continue
for sub in subs:
words = sub.text.split()
for w in words:
insert_word(freq_dict, w, verification_list)
print len(freq_dict), sum(freq_dict.values())
return freq_dict
示例2: collect_subtitles_lines
def collect_subtitles_lines(self, subtitle_file, file_path=None):
if not file_path:
file_path = os.path.join(
subtitle_file.directory, subtitle_file.file_name)
try:
subs = pysrt.open(file_path)
except UnicodeDecodeError:
subs = pysrt.open(
file_path,
encoding='iso-8859-1')
for sub in subs:
start = str(datetime.timedelta(
milliseconds=sub.start.ordinal))
end = str(datetime.timedelta(
milliseconds=sub.end.ordinal))
text = sub.text
try:
line = SubtitlesLine.objects.create(
subtitlefile=subtitle_file,
index=sub.index,
start=str(start),
end=str(end),
text=text,
)
except (ValidationError, ValueError) as e:
print 'Ignoring: {t}'.format(t=text.encode('utf8'))
continue
line.text_vector = SearchVector(
'text', config=subtitle_file.language)
line.save()
示例3: _detect_subtitle_language
def _detect_subtitle_language(srt_path):
log.debug('Detecting subtitle language')
# Load srt file (try first iso-8859-1 with fallback to utf-8)
try:
subtitle = pysrt.open(path=srt_path, encoding='iso-8859-1')
except Exception:
try:
subtitle = pysrt.open(path=srt_path, encoding='utf-8')
except Exception:
# If we can't read it, we can't detect, so return
return None
# Read first 5 subtitle lines to determine the language
if len(subtitle) >= 5:
text = ''
for sub in subtitle[0:5]:
text += sub.text
# Detect the language with highest probability and return it if it's more than the required minimum probability
detected_languages = langdetect.detect_langs(text)
log.debug('Detected subtitle language(s): %s', detected_languages)
if len(detected_languages) > 0:
# Get first detected language (list is sorted according to probability, highest first)
detected_language = detected_languages[0]
language_probability = detected_language.prob
if language_probability >= autosubliminal.DETECTEDLANGUAGEPROBABILITY:
log.debug('Probability of detected subtitle language accepted: %s', detected_language)
return Language.fromietf(detected_language.lang)
else:
log.debug('Probability of detected subtitle language too low: %s', detected_language)
return None
示例4: readAllFiles
def readAllFiles(path):
for root, dirs, files in os.walk(path):
files.sort()
for fileread in files:
if fileread.endswith(".scenes"):
with open(path + fileread) as a_file:
lines = a_file.readlines()
fileName = lines[0].rstrip()
del lines[0]
number = 1
listSrt = list()
for item in lines:
lhs, rhs = item.split(",", 1)
partName, ext = fileName.split(".", 1)
newPath = path + 'parted/'
if not os.path.exists(newPath):
os.makedirs(newPath)
newFileNameMovie = newPath + partName + '_' + str(number) + '.mp4'
newFileNameSrt = newPath + partName + '_' + str(number) + '.srt'
number += 1
# Split movie file
# ffmpeg -i video.mp4 -ss 00:01:00 -to 00:02:00 -c copy cut.mp4
# ffmpeg -i input.avi -c:v libx264 -crf 19 -preset slow -c:a libfaac -b:a 192k -ac 2 out.mp4
try:
if ext == 'mp4':
os.system('ffmpeg -i "%s" -ss "%s" -to "%s" -c copy "%s" ' % (
path + fileName, lhs, rhs.rstrip(), newFileNameMovie))
else:
os.system('ffmpeg -i "%s" -ss "%s" -to "%s" -c:v libx264 -c:a copy "%s" ' % (
path + fileName, lhs, rhs.rstrip(), newFileNameMovie))
except:
print "Error with spliting movie file"
# Split *.srt file
try:
#subs = SubRipFile.open(path + partName + '.srt')
try:
subs = pysrt.open(path + partName + '.srt')
except UnicodeDecodeError:
subs = pysrt.open(path + partName + '.srt',encoding='iso-8859-1')
Hs, Ms, Ss = lhs.split(":", 2)
He, Me, Se = rhs.split(":", 2)
part = subs.slice(starts_after={'hours': int(Hs), 'minutes': int(Ms), 'seconds': int(Ss)},
ends_before={'hours': int(He), 'minutes': int(Me), 'seconds': int(Se)})
part.save(newFileNameSrt)
listSrt.append(newFileNameSrt)
# part.shift(hours=-int(Hs), minutes=-int(Ms), seconds=-int(Ss))
except:
print "Error with spliting srt file"
if not listSrt:
print "Error there are no srt files"
else:
"""
srtdir = path+'wholeSrt/'
ensure_dir(srtdir)
srtmerge(listSrt, srtdir + partName + '_new.srt', offset=1000)
srtToTxt(path+'wholeSrt/')
"""
srtToTxt(newPath)
示例5: srtToTxt
def srtToTxt(dirName):
for infile in glob.glob(os.path.join(dirName, '*.srt')):
# os.system("iconv --from-code=ISO-8859-1 --to-code=UTF-8 \"" + infile + "\" > \"" + infile2 + "\"")
#subs = SubRipFile.open(infile)
try:
subs = pysrt.open(infile)
except UnicodeDecodeError:
subs = pysrt.open(infile,encoding='iso-8859-1')
outfile = infile[:-4] + '.txt'
f = codecs.open(outfile, "w", encoding="utf-8")
#f = open(outfile, 'w')
for i in range(len(subs)):
f.write(subs[i].text)
f.close()
示例6: join_srt_files
def join_srt_files(srt_top, srt_btm, srt_out):
"""Join two subtitles and save result.
"""
top = pysrt.open(srt_top)
btm = pysrt.open(srt_btm)
merged = pysrt.SubRipFile(items=btm)
for item in top:
item.text = TOP_SRT_TEMPLATE.format(item.text)
merged.append(item)
merged.sort()
merged.clean_indexes()
merged.save(srt_out)
示例7: emptyEntries
def emptyEntries(myFile, keep, verbose):
emptyEntryFound = False
emptyEntries = 0
entriesToDelete = []
if verbose:
print "--- Searching for empty entries"
subs = pysrt.open(myFile, encoding='utf-8') # open sub with pysrt as utf-8
entries = len(subs) # count entries
if verbose:
print "--- %s entries total" % entries
for entryNo in range(0, entries): # count entry numbers up to number of entries
subEntry = u"%s" % subs[entryNo] # read single entry
lines = subEntry.split('\n') # split entry into lines
lineNo = 0 # set first line to 0
emptyEntry = False
for row in lines: # read lines one by one
if lineNo == 2:
if (row == " "
or row == " "
or not row): # if third line is or empty
emptyEntry = True
if emptyEntry and lineNo == 3 and row == "": # if third line is and fourth line is empty
emptyEntryFound = True
emptyEntries += 1
entriesToDelete.append(entryNo) # add entry number to list
lineNo += 1
if emptyEntryFound: # if empty entry is found
print "*** %s empty entries found" % emptyEntries
for entryNo in reversed(entriesToDelete): # run through entry numbers in reverse
# print lineNo
del subs[entryNo] # delete entry
if keep:
if verbose:
print "--- Copying original file to %s.emptyEntries" % myFile
copyfile(myFile, "%s.emptyEntries" % myFile)
subs.save(myFile, encoding='utf-8') # save sub
subs = pysrt.open(myFile, encoding='utf-8') # open new sub with pysrt
entries = len(subs) # count entries
print "--- Now has %s entries" % entries
return emptyEntryFound
示例8: extract_lines
def extract_lines(subtitle_path):
try:
subtitle_object = pysrt.open(subtitle_path)
except UnicodeDecodeError:
subtitle_object = pysrt.open(subtitle_path, encoding='latin1')
subtitle_lines = []
for sub in subtitle_object:
text = sub.text
# Removing any formatting via HTML tags
text = re.sub('<[^<]+?>', '', text)
# Skipping links (usually ads or subtitle credits so irrelevant)
if re.search(URL_REGEX, text):
continue
subtitle_lines.append(text)
return subtitle_lines
示例9: cut_subtitle
def cut_subtitle(self):
sbt_in = self.subtitle_pick.get_text()
if os.path.isfile(sbt_in):
sbt_out = self.save_pick.get_text() + os.path.splitext(sbt_in)[1]
h1, m1, s1 = self.start.get_h_m_s()
h2, m2, s2 = self.stop.get_h_m_s()
import chardet
detected = chardet.detect(open(sbt_in, "rb").read(1024 * 1024))
enc = detected["encoding"]
cnf = detected["confidence"]
e = None
encs = OrderedSet([enc, "utf-8", "latin1"])
for encoding in encs:
try:
logger.info("Trying to open subtitle with encoding %s" % encoding)
subs = pysrt.open(sbt_in, error_handling=pysrt.ERROR_LOG, encoding=encoding)
subtitle_cut(h1, m1, s1, h2, m2, s2, subs, sbt_out)
return
except Exception as ae:
e = e or ae
logger.warning("encoding %s failed", encoding, exc_info=1)
msg = (
"Could not open {} with any of the following encodings:\n {}\n\n"
"Confidence on {} was {}.\nFirst error was: {}"
)
msg = msg.format(os.path.basename(sbt_in), ", ".join(encs), enc, cnf, str(e))
QMessageBox.warning(self, "Opening subtitle failed", msg, defaultButton=QMessageBox.NoButton)
return sbt_out
示例10: start
def start(text_input,language_analysis_stimulated):
#time.sleep(0.3) # Wait 0.5 seconds for other processes's start
t0 = time.time() # Initiation time
if os.path.exists(text_input): # If captions file exist
subs = pysrt.open(text_input) # Get whole subtitles
i = 0 # Step counter
while i < len(subs): # While step counter less than amount of subtitles
time.sleep(0.1) # Wait 0.5 seconds to prevent aggressive loop
if (time.time() - t0 + 0.8) > subs[i].start.seconds: # If current time is greater than subtitle's start
sub_starting_time = datetime.datetime.now() # Starting time of the memory
language_analysis_stimulated.value = 1 # Language analysis stimulated
sub_ending_time = sub_starting_time + datetime.timedelta(seconds=(subs[i].end - subs[i].start).seconds) # Calculate the ending time by subtitle's delta
sub = subs[i].text.encode('ascii','ignore') # Encode subtitle's text by ascii and assign to sub variable
sub = sub.translate(None, '[email protected]#$?,')
words = sub.split()
phone_groups = []
for word in words:
phone_groups.append(LanguageAnalyzer.word_to_phones(word))
phones = " ".join(phone_groups)
phone_duration = datetime.timedelta(seconds=(subs[i].end - subs[i].start).seconds) / len(phones)
starting_time = sub_starting_time
for word_inphones in phone_groups:
ending_time = starting_time + phone_duration * len(word_inphones.split())
if ending_time <= sub_ending_time and word_inphones != "":
process5 = multiprocessing.Process(target=LanguageMemoryUtil.add_memory, args=(word_inphones, starting_time, ending_time)) # Define write memory process
process5.start() # Start write memory process
starting_time = ending_time + datetime.timedelta(milliseconds=50)
print subs[i].text + "\n" # Print subtitle's text
print phones + "\n"
print "_____________________________________________________________________________________\n"
language_analysis_stimulated.value = 0 # Language analysis NOT stimulated
i += 1 # Increase step counter
else: # If captions file doesn't exist
raise ValueError('VTT file doesn\'t exist!') # Raise a ValueError
示例11: _read_subtitle
def _read_subtitle(self, subtitle_filename):
"""Read the subtitle file and output dialogs.
"""
subtitle_text = pysrt.open(subtitle_filename, encoding='iso-8859-1')
subtitle_text = [l.strip() for l in subtitle_text.text.split('\n')]
subtitle_text = [quote_matches.sub('', l).strip() for l in subtitle_text]
# Prepare dialogs
dialogs = []
create_new_dialog = True
for l in subtitle_text:
if not l: # Get rid of newlines
continue
if create_new_dialog:
dialogs.append([l]) # Start new dialog
else:
dialogs[-1].append(l) # Append to last dialog
# Decide what to do with next line based on current line ending
create_new_dialog = False
if l[-1] in ['.', '!', '?', ':', ')']:
create_new_dialog = True
# Join the lists to form single dialogs
for d in range(len(dialogs)):
dialogs[d] = ' '.join(dialogs[d])
return dialogs
示例12: handle_subtitle
def handle_subtitle(cls, filename, target=None, to='zh', by_words=True):
subs = pysrt.open(filename)
words_list = cls.init_word_list()
for sub in subs:
if by_words:
result = ''
result_dict = BaiduTranslate.translate(sub.text.replace(' ', '\n'))
for k in result_dict:
if cls.is_word_valid(k, words_list):
result += k + '(' + result_dict.get(k) + ') '
else:
result += k + ' '
sub.text = result
print(result)
else:
try:
result = BaiduTranslate.translate(sub.text, to=to)
except requests.exceptions.ReadTimeout:
time.sleep(10)
BaiduTranslate.log('HTTP TIME OUT : ' + sub.text)
continue
for r in result:
sub.text += '\n' + result[r]
subs.save(target or filename + '.' + to + '.srt')
return True
示例13: download_sub
def download_sub(self):
print 'Validation: ' + str(self.validate)
if self.validate:
validate = Validate(self.movie_path)
chain_iterators = chain(DBSub().download_sub(self.movie_path),
OpenSubs().download_sub(self.movie_path))
for file_path in chain_iterators:
if self.validate:
subs = pysrt.open(file_path)
text_slices = subs.slice(starts_after={'minutes': validate.start_min - 1, 'seconds': 59},
ends_before={'minutes': validate.start_min,
'seconds': 11})
text = ''
for t_slice in text_slices.data:
text = text + t_slice.text + ' '
text = ' '.join(text.split())
print("For file : {} Movie Text is : {}".format(file_path, text))
if validate.validate(text):
print("Found validated subtitle")
self._final(True)
return
os.remove(file_path)
else:
continue
self._final(False)
示例14: parseSubs
def parseSubs(subtitles):
for filename in os.listdir(SUB_PATH_BASE_DIR):
print ("Parsing srt file: " + filename)
try:
subs = pysrt.open(SUB_PATH_BASE_DIR+filename)
except:
print "Could not parse "+ filename
continue
for i in xrange(len(subs)):
sub = subs[i]
if i != len(subs)-1:
# some subbers are crazy impatient! subs drop out prematurely
# given a threshold for about 2 seconds, we will extend the sub up to
# 1 second based on the start time of the next subtitle
nextSub = subs[i+1]
timeToNextSub = nextSub.start - sub.end
secondsToNextSub = timeToNextSub.seconds + timeToNextSub.milliseconds/1000.0
if secondsToNextSub <= 2:
sub.end.seconds += secondsToNextSub/2.0
else:
sub.end.seconds += 1
if (len(sub.text.split()) == 0): continue
CurrentSubtitle = Subtitle(sub, filename)
subtitles.append(CurrentSubtitle)
示例15: parse_srt
def parse_srt(sub_file=None):
sub_file = sub_file or get_random_srt()
debug(u"Using {} as SRT".format(sub_file))
try:
subs = srt.open(sub_file)
except:
subs = srt.open(sub_file, "latin1")
flat_subs = subs.text.replace("\n", " ")
clean_subs = junk_re.sub(" ", flat_subs)
piece_iter = iter(split_re.split(clean_subs))
split_subs = [l+next(piece_iter, '').strip() for l in piece_iter]
return split_subs