本文整理汇总了Python中Text.Text类的典型用法代码示例。如果您正苦于以下问题:Python Text类的具体用法?Python Text怎么用?Python Text使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Text类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_bisents
def extract_bisents(file1, lang1, file2, lang2, alignment_file):
assert isinstance(lang1, (unicode, str)) and len(lang1)==2
assert isinstance(lang2, (unicode, str)) and len(lang2)==2
t1 = Text.from_file(file1, lang1)
t2 = Text.from_file(file2, lang2)
alignment = Alignment.from_file(alignment_file)
bisents = alignment.as_pairs(t1.as_sentences_flat(),
t2.as_sentences_flat())
return bisents
示例2: readFields
def readFields(self, data_input):
count = data_input.readInt()
if count < 0:
raise IOError("Invalid size: %d for file metadata object" % count)
for i in xrange(count):
key = Text.readString(data_input)
value = Text.readString(data_input)
self._meta[key] = value
示例3: _initialize
def _initialize(self, path, start, length):
self._stream = self.getStream(path)
if length == 0:
self._end = self._stream.getPos() + self._stream.length()
else:
self._end = self._stream.getPos() + length
# Parse Header
version_block = self._stream.read(len(VERSION))
self._version = version_block[len(VERSION_PREFIX)]
if not self._version.startswith(VERSION_PREFIX):
raise VersionPrefixException(VERSION_PREFIX,
self._version[0:len(VERSION_PREFIX)])
if self._version > VERSION[len(VERSION_PREFIX)]:
raise VersionMismatchException(VERSION[len(VERSION_PREFIX)],
self._version)
if self._version < BLOCK_COMPRESS_VERSION:
# Same as below, but with UTF8 Deprecated Class
raise NotImplementedError
else:
self._key_class_name = Text.readString(self._stream)
self._value_class_name = Text.readString(self._stream)
if ord(self._version) > 2:
self._decompress = self._stream.readBoolean()
else:
self._decompress = False
if self._version >= BLOCK_COMPRESS_VERSION:
self._block_compressed = self._stream.readBoolean()
else:
self._block_compressed = False
# setup compression codec
if self._decompress:
if self._version >= CUSTOM_COMPRESS_VERSION:
codec_class = Text.readString(self._stream)
self._codec = CodecPool().getDecompressor(codec_class)
else:
self._codec = CodecPool().getDecompressor()
self._metadata = Metadata()
if self._version >= VERSION_WITH_METADATA:
self._metadata.readFields(self._stream)
if self._version > 1:
self._sync = self._stream.read(SYNC_HASH_SIZE)
self._header_end = self._stream.getPos()
示例4: fetch_sentences
def fetch_sentences(basename, lang):
assert lang in ('pl', 'plm',
'cu', 'cum', 'cut', 'cue',
'el', 'elm', 'elt'), "invalid lang " + lang
real_lang = lang[:2]
transformation = lang[2:]
basename_with_lang = ("%s/%s" % (basename, real_lang))
try:
#TODO maybe open ready metaphone files?
with file("%s.sentences" % basename_with_lang) as f:
t = [line.decode('utf-8').strip() for line in f.readlines()]
except IOError:
t = Text.from_file("%s.txt" % basename_with_lang,
lang=real_lang).as_sentences_flat()
if transformation:
if transformation == 'm':
return [metaphone_text(s, lang=real_lang) for s in t]
elif transformation == 't':
return [translit_pl(s, real_lang) for s in t]
elif transformation == 'e':
return [expand_cu(s, numbers=True) for s in t]
return t
示例5: main
def main():
consts = Constant.constant()
smmry = Text.text(readfromfile(fname))
smmry.parseSentences()
smmry.debugSentencesToFile()
smmry.debugSentenceContextToFile()
smmry.debugTextFreqToFile()
#smmry.getSummary()
smmry.getSMMRY()
示例6: __init__
def __init__(self, model, visitor, obj=None, config=None):
self.model = model
Generic.__init__(self, visitor, obj, config)
GenericSerial.__init__(self, visitor, obj, config)
Text.__init__(self, rows=model.rows, cols=model.cols, yres=8, xres=6, goto=model.goto, chars=model.chars, char0=model.char0)
if obj is None:
self.port = ''
self.baud = 19200
self.command_rate = .0165
self.layout_timeout = 0
else:
self.port = obj.port
self.baud = obj.baud
self.command_rate = obj.command_rate
self.layout_timeout = obj.layout_timeout
self.command_thread = ThreadedTask(self.command_worker, None, 1)
self.command_queue = Queue.Queue()
self.command_time = time.time()
示例7: export_sentences
def export_sentences(input_file, lang, export_type):
from translit.metaphone import metaphone
t = Text.from_file(input_file, lang)
for s in t.as_sentences(paragraph_separator='¶'):
if export_type == 'hunalign':
if s == '¶':
s = '<p>'
else:
s = ' '.join(metaphone(w) for w in s.split())
print s.encode('utf-8')
示例8: __init__
def __init__ ( self, visitor, obj=None, config=None):
Generic.__init__(self, visitor, obj, config)
GenericSerial.__init__(self, visitor, obj, config)
Text.__init__(self, rows=4, cols=20, yres=8, xres=5, goto=2, chars=8, char0=0)
if obj == None:
self.name = 'noname'
self.port = ''
self.baud = 19200
self.layout_timeout = 0 #Default layout timeout. 0 = no transitions. Override at layout level.
self.layouts = {}
self.write_rate = .0165
else:
self.name = obj.name
self.port = obj.port
self.baud = obj.baud
self.layout_timeout = obj.layout_timeout
self.layouts = obj.layouts
self.write_rate = obj.write_rate
self.app = visitor
self.debug = visitor.debug
self.AddFunction("backlight", 0, self.my_backlight)
self.write_thread = threading.Thread(target=self.write_worker)
self.write_active = False
self.write_queue = Queue.Queue()
示例9: __init__
def __init__(self, darea, rows=4, cols=20, config=None):
file = open('cfa635_fonts.dat', 'r')
self.lcd_fonts = pickle.load(file)
file.close()
self.ch_data = []
self.fontP = None
self.interface = None
self.current_state = None
self.previous_state = None
self.mode_flag = 0
self.data_latch = None
self.data_latch_phase = None
self.debug = False
self.cursor = {'row':0, 'col':0}
self.lcd_gc = None
self.rows = rows
self.cols = cols
self.dots = {'x':6, 'y':8} # Resolution in lcd pixels. e.g. 5x7
self.pixels = {'x':4, 'y':4} # Resolution in crt pixels - scaled
self.contrast = None
self.dot_color = None
self.title = None
self.window = None
self.darea = None
self.w_width = None
self.w_height = None
self.disp_type = 0
self.border = 5
self.darea = darea
self.darea.connect('expose-event', self.lcd_expose_event, self)
self.darea.connect('button-press-event', self.cursor_event)
self.bg_color = gtk.gdk.color_parse("#78a878")
self.fg_color = gtk.gdk.color_parse("#113311")
self.config = config
Generic.__init__(self, None, config)
Text.__init__(self, rows=rows, cols=cols, yres=8, xres=6, goto=0, chars=8, char0=0)
示例10: _writeFileHeader
def _writeFileHeader(self):
self._stream.write(VERSION)
Text.writeString(self._stream, self.getKeyClassName())
Text.writeString(self._stream, self.getValueClassName())
self._stream.writeBoolean(self._compress)
self._stream.writeBoolean(self._block_compress)
if self._codec:
Text.writeString(self._stream, 'org.apache.hadoop.io.compress.DefaultCodec')
self._metadata.write(self._stream)
self._stream.write(self._sync)
示例11: _writeFileHeader
def _writeFileHeader(self):
self._stream.write(VERSION)
Text.writeString(self._stream, self.getKeyClassName())
Text.writeString(self._stream, self.getValueClassName())
self._stream.writeBoolean(self._compress)
self._stream.writeBoolean(self._block_compress)
if self._codec:
Text.writeString(self._stream, hadoopClassName(self._codec.__class__))
self._metadata.write(self._stream)
self._stream.write(self._sync)
示例12: __guess_key
def __guess_key(self, min_len=1, max_len=9, display=False):
keylen = self.guess_key_length(min_len, max_len, display)
if keylen == 0:
print "[!] No key length found."
return -1
if display:
print "[*] Most probable key length : " + str(keylen) + "\n"
freq_fr = {'e': 14.715, 's': 7.948, 'a': 7.636, 'i': 7.529, 't': 7.244, 'n': 7.095, 'r': 6.553, 'u': 6.311, 'l': 5.456, 'o': 5.378, 'd': 3.669, 'c': 3.260, 'p': 3.021, 'm': 2.968, 'v': 1.628, 'q': 1.362, 'f': 1.066, 'b': 0.901, 'g': 0.866, 'h': 0.737, 'j': 0.545, 'x': 0.387, 'y': 0.308, 'z': 0.136, 'w': 0.114, 'k': 0.049}
password = ""
for i in range(keylen):
sub_alphabet = VigenereCipher(''.join([self._s[keylen*j + i] for j in range(self._len//keylen)]))
min_differential = 99999
password_letter = ""
for c in range(65, 65+26):
sub_alphabet.key = chr(c)
decrypted = VigenereCipher(sub_alphabet.decipher().tostring())
sub_alphabet.encipher()
freq_s = { k:round((v/decrypted.len)*100, 3) for k,v in dict(decrypted.get_frequencies()).items()}
differential = sum([abs(freq_fr[k.lower()]-v) for k,v in freq_s.items()])
if differential < min_differential:
min_differential = differential
password_letter = chr(c)
password += password_letter
# Little hack for repetitive password due to frequency analysis
for i in range(1, len(password)):
if len(password) % i == 0:
duplicate = True
s = [password[j*i:(j+1)*i] for j in range(len(password)//i)]
ex_prec = s[0]
for ex in s:
if ex != ex_prec:
duplicate = False
break
ex_prec = ex
if duplicate:
password = ex
if display:
print "[*] [UPDATE] Most probable key length : " + str(len(password)) + "\n"
break
return Text.clean_string(password)
示例13: write
def write(self, data_output):
data_output.writeInt(len(self._meta))
for key, value in self._meta.iteritems():
Text.writeString(data_output, key)
Text.writeString(data_output, value)
示例14: open
if __name__ == '__main__':
import sys
import re
from Text import Text
try:
alignment_filename = sys.argv[1]
if sys.argv[2:]:
[fn1, fn2] = sys.argv[2:]
assert not fn1.endswith('.txt')
assert not fn2.endswith('.txt')
with open(fn1) as f:
seq1 = [l.decode('utf-8').strip() for l in f.readlines()]
with open(fn2) as f:
seq2 = [l.decode('utf-8').strip() for l in f.readlines()]
else:
m = re.match(r'(.*/)?(..)-(..)\.(.*)$', alignment_filename)
t1 = Text.from_file("%s%s.txt" % (m.group(1), m.group(2)),
lang=m.group(2))
t2 = Text.from_file("%s%s.txt" % (m.group(1), m.group(3)),
lang=m.group(3))
seq1 = t1.as_sentences_flat()
seq2 = t2.as_sentences_flat()
a = Alignment.from_file(alignment_filename)
a.pretty_print(seq1, seq2)
print "Total cost: " + str(sum(c for (_, _, c) in a.data))
except IndexError, ValueError:
print >> sys.stderr, __doc__
except IOError, e:
print >> sys.stderr, e
示例15: __init__
def __init__(self, ownerDocument, data):
Text.__init__(self, ownerDocument, data)
self.__dict__['__nodeName'] = "#cdata-section"