本文整理汇总了Python中Text.Text.from_file方法的典型用法代码示例。如果您正苦于以下问题:Python Text.from_file方法的具体用法?Python Text.from_file怎么用?Python Text.from_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Text.Text
的用法示例。
在下文中一共展示了Text.from_file方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_bisents
# 需要导入模块: from Text import Text [as 别名]
# 或者: from Text.Text import from_file [as 别名]
def extract_bisents(file1, lang1, file2, lang2, alignment_file):
assert isinstance(lang1, (unicode, str)) and len(lang1)==2
assert isinstance(lang2, (unicode, str)) and len(lang2)==2
t1 = Text.from_file(file1, lang1)
t2 = Text.from_file(file2, lang2)
alignment = Alignment.from_file(alignment_file)
bisents = alignment.as_pairs(t1.as_sentences_flat(),
t2.as_sentences_flat())
return bisents
示例2: fetch_sentences
# 需要导入模块: from Text import Text [as 别名]
# 或者: from Text.Text import from_file [as 别名]
def fetch_sentences(basename, lang):
assert lang in ('pl', 'plm',
'cu', 'cum', 'cut', 'cue',
'el', 'elm', 'elt'), "invalid lang " + lang
real_lang = lang[:2]
transformation = lang[2:]
basename_with_lang = ("%s/%s" % (basename, real_lang))
try:
#TODO maybe open ready metaphone files?
with file("%s.sentences" % basename_with_lang) as f:
t = [line.decode('utf-8').strip() for line in f.readlines()]
except IOError:
t = Text.from_file("%s.txt" % basename_with_lang,
lang=real_lang).as_sentences_flat()
if transformation:
if transformation == 'm':
return [metaphone_text(s, lang=real_lang) for s in t]
elif transformation == 't':
return [translit_pl(s, real_lang) for s in t]
elif transformation == 'e':
return [expand_cu(s, numbers=True) for s in t]
return t
示例3: export_sentences
# 需要导入模块: from Text import Text [as 别名]
# 或者: from Text.Text import from_file [as 别名]
def export_sentences(input_file, lang, export_type):
from translit.metaphone import metaphone
t = Text.from_file(input_file, lang)
for s in t.as_sentences(paragraph_separator='¶'):
if export_type == 'hunalign':
if s == '¶':
s = '<p>'
else:
s = ' '.join(metaphone(w) for w in s.split())
print s.encode('utf-8')
示例4: open
# 需要导入模块: from Text import Text [as 别名]
# 或者: from Text.Text import from_file [as 别名]
if __name__ == '__main__':
import sys
import re
from Text import Text
try:
alignment_filename = sys.argv[1]
if sys.argv[2:]:
[fn1, fn2] = sys.argv[2:]
assert not fn1.endswith('.txt')
assert not fn2.endswith('.txt')
with open(fn1) as f:
seq1 = [l.decode('utf-8').strip() for l in f.readlines()]
with open(fn2) as f:
seq2 = [l.decode('utf-8').strip() for l in f.readlines()]
else:
m = re.match(r'(.*/)?(..)-(..)\.(.*)$', alignment_filename)
t1 = Text.from_file("%s%s.txt" % (m.group(1), m.group(2)),
lang=m.group(2))
t2 = Text.from_file("%s%s.txt" % (m.group(1), m.group(3)),
lang=m.group(3))
seq1 = t1.as_sentences_flat()
seq2 = t2.as_sentences_flat()
a = Alignment.from_file(alignment_filename)
a.pretty_print(seq1, seq2)
print "Total cost: " + str(sum(c for (_, _, c) in a.data))
except IndexError, ValueError:
print >> sys.stderr, __doc__
except IOError, e:
print >> sys.stderr, e
示例5: get_text
# 需要导入模块: from Text import Text [as 别名]
# 或者: from Text.Text import from_file [as 别名]
def get_text(self, lang):
return Text.from_file(self._p(str(lang) + ".txt"), lang=str(lang))
示例6: gen
# 需要导入模块: from Text import Text [as 别名]
# 或者: from Text.Text import from_file [as 别名]
def gen():
yield (0, 0, 0)
prev_i2 = 0
for (i1, i2) in al12:
for _i2 in range(prev_i2+1, i2+1):
try:
i3s = map23[_i2]
for i3 in i3s:
# if map31[i3] == i1:
yield (i1, _i2, i3)
except KeyError:
pass
prev_i2 = i2
return Alignment(list(gen()), no_costs=True)
if __name__ == '__main__':
import sys
name = sys.argv[1]
a1 = Alignment.from_file(name + '/pl-cu.my').as_ladder()
a2 = Alignment.from_file(name + '/cu-el.my').as_ladder()
a3 = Alignment.from_file(name + '/pl-el.my').as_ladder()
a3 = [(b, a) for (a, b) in a3]
ma = merge_3_alignments(a1, a2, a3)
ma.pretty_print(Text.from_file(name + '/pl.txt', lang='pl').as_sentences_flat(),
Text.from_file(name + '/cu.txt', lang='cu').as_sentences_flat(),
Text.from_file(name + '/el.txt', lang='el').as_sentences_flat())