本文整理汇总了Python中pyth.plugins.plaintext.writer.PlaintextWriter类的典型用法代码示例。如果您正苦于以下问题:Python PlaintextWriter类的具体用法?Python PlaintextWriter怎么用?Python PlaintextWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PlaintextWriter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: read_rtf_text
def read_rtf_text(fp, errors='strict', encoding='utf-8'):
doc = CustomRtf15Reader.read(fp, errors=errors)
for p in doc.content:
p.content = filter(paragraph_is_text_like, p.content)
return PlaintextWriter.write(doc).read().decode(encoding)
示例2: convert_to_txt
def convert_to_txt(file_path):
logger.debug("convert_to_txt: %s" % file_path)
words = None
if not os.path.exists(file_path):
logger.error("missing file %s", file_path)
file_size = os.stat(file_path).st_size
logger.debug("convert_to_txt: %d bytes at %s",file_size, file_path)
ext = _get_extension(file_path)
if ext == '.txt':
logger.debug("loading txt file")
worked = False
try:
encoding, file_handle, words = open_with_correct_encoding(file_path)
except Exception as e:
logger.error("Wasn't able to read the words from the file %s" % file_path)
words = ""
elif ext == '.docx':
logger.debug("loading docx file")
words = _docx_to_txt(file_path)
elif ext == '.rtf':
logger.debug("loading rtf file")
doc = Rtf15Reader.read(open(file_path))
words = PlaintextWriter.write(doc).getvalue()
else:
logging.warning("Couldn't find an extension on the file, so assuming text")
with codecs.open(file_path, 'r', ENCODING_UTF_8) as myfile:
words = myfile.read()
logger.debug("loaded %d chars" % len(words))
return words
示例3: GetExternal
def GetExternal(version, odl_data, source, class_id):
external = ""
for item in version[2]:
if item[0] == "Attribute" \
and item[1] == "_Art1_RTF":
if len(item[2]) == 2:
if isinstance(source, ZipFile):
data = source.open(item[2][0]).read()
else:
file_name = join(source, item[2][0])
f = open(file_name, 'rb')
data = f.read()
f.close()
data = data.replace("\x0c", "")
elif len(item[2]) == 1:
data = item[2][0]
if data == "":
return ""
f = StringIO()
f.write(data)
doc = Rtf15Reader.read(f, clean_paragraphs = False)
external = PlaintextWriter.write(doc).getvalue()
external = external.replace("\n\n", "\n")
return ReplaceTextNames(external, version, odl_data, class_id)
示例4: read_recommendations
def read_recommendations(self, file_name):
"""
Function reads the targeted values from the file "WHO Daily Recommended Values.rtf"
It process the entries and creates a dictionary with
Nutrient name as Key and Nutrient Value as value
:param file_name:
:return:
"""
target = dict()
filtered_col = list()
doc = Rtf15Reader.read(open(file_name))
entities = PlaintextWriter.write(doc).getvalue().split('\n\n')
for item in entities:
splited = item.split(',')
name = splited[0].split('(')[0]
value = splited[1]
try:
unit = splited[0].split('(')[1].split(')')[0]
except:
unit = ''
# target.append({'nutrient': name,
# 'unit': unit,
# 'value': value})
target.update({name: value})
filtered_col.append(name)
self.target_values = target
return target, filtered_col
示例5: upload
def upload(request):
# user uploads a document -> convert into a dict of the terms found
if request.FILES:
if 'file' in request.FILES:
result = ''
f = request.FILES['file']
fp = 'shake_v3/static/data/' + str(f)
fp2 = fp[:len(fp)-3] + 'txt'
if fp[len(fp)-3:len(fp)] == 'pdf':
with open(fp, 'wb+') as pdff:
for chunk in f.chunks():
pdff.write(chunk)
result = pdf_to_txt(fp)
with open(fp2, 'wb+') as txtf:
txtf.write(result)
elif fp[len(fp)-3:len(fp)] == 'rtf':
with open(fp, 'wb+') as rtff:
for line in f:
rtff.write(line)
doc = Rtf15Reader.read(open(fp, 'rb'))
doctxt = PlaintextWriter.write(doc).getvalue()
with open(fp2, 'wb+') as txtf:
for line in doctxt:
txtf.write(line)
f = str(f)[:-4] + ".txt"
result = doctxt
else:
with open(fp2, 'wb+') as txtf:
for line in f:
txtf.write(line)
result = open(fp2, 'r').read()
response_dict = generate_term_dict(result)
response_dict['fp'] = 'static/data/' + str(f)
return HttpResponse(simplejson.dumps(response_dict), mimetype='application/javascript')
# user indicates terms -> give a grade
elif request.POST:
#TO DO: implement saving the data
rating = ""
score = custom_POST_to_score(request)
if score > 4.5:
rating = 'A+'
elif score > 4:
rating = 'A'
elif score > 3.5:
rating = 'B+'
elif score > 3:
rating = 'B'
elif score > 2.5:
rating = 'C+'
elif score > 2:
rating = 'C'
elif score > 1:
rating = 'D'
else:
rating = 'F'
return HttpResponse(rating)
# display the upload part 1
else:
score = 0
return render_to_response('upload.html', {'score': score}, context_instance = RequestContext(request))
示例6: analyze
def analyze(committeeFile):
try:
doc = Rtf15Reader.read(open(committeeFile, "rb"))
except:
print "%s - skipped..." % committeeFile
errFile = committeeFile.replace(global_options.indir, global_options.errdir)
shutil.copyfile(committeeFile, errFile)
return False
#print PlaintextWriter.write(doc).getValue()
f = open("test.out", 'w')
f.write(PlaintextWriter.write(doc).getvalue())
f.close()
f = open("test.out", 'r')
participants = find_participants(f.read())
f.close()
# Getting the indication whether the participant spoke in the committee
f = open("test.out", 'r')
docstring = f.read()
for line in docstring.splitlines():
name = ''
if ":" in line:
participant = line.split(":")[0]
for p in participants:
if participant in p['name']:
p['speaker'] = True
p['speak_count'] += 1
f.close()
fname = committeeFile.replace(global_options.indir, global_options.outdir)
fname = fname.replace("rtf", "txt")
file = codecs.open(fname, "w", "utf-8")
for participant in participants:
string_builder = []
for key, val in participant.iteritems():
string = u"'%s': '%s'"
if val is not None:
if type(val) == str:
val = val.replace("'", "")
val = val.replace('"', '')
string = string % (key, print_unicode(val))
string_builder.append(string)
wrt_ln = ', '.join(string_builder)
wrt_ln += ',\n'
try:
file.write(wrt_ln)
except UnicodeEncodeError:
print wrt_ln
file.close()
verbose("Generated participants file: " + fname)
return True
示例7: extract_terms
def extract_terms(rtffile):
""" Get data from rtffile """
judges_list = []
rtf_text = PlaintextWriter.write(rtffile).getvalue()
lines = re.split('\n',rtf_text)
for line in itertools.islice(lines, 0, None, 4): # 1: from the second line ([1]),
judges_list.append(line) # None: to the end,
return judges_list # 2: step
示例8: load_stickies
def load_stickies(path):
stickies = []
with open(path) as fd:
for i,rtf in enumerate(parse_sticky_database(fd.read())):
doc = Rtf15Reader.read(StringIO.StringIO(rtf))
plaintext = PlaintextWriter.write(doc).getvalue()
stickies.append(plaintext)
return stickies
示例9: get_rtf_text
def get_rtf_text(path):
"""
Take the path of an rtf file as an argument and return the text
"""
doc = Rtf15Reader.read(open(path))
return PlaintextWriter.write(doc).getvalue()
示例10: readRtf
def readRtf(self, path):
try:
doc = Rtf15Reader.read(open(path, "rb"))
except:
self._log("Some screwy rtf shit going on with " + path)
return "Can't process ur shitty rtf <3 dfbot"
contents = PlaintextWriter.write(doc).getvalue()
#print contents
return contents
示例11: parse
def parse(self, path):
# Directory
if os.path.isdir(path):
raise NotImplementedError()
# File
else:
doc = Rtf15Reader.read(open(path))
sample = Sample(path, None, PlaintextWriter.write(doc).getvalue())
return sample
示例12: test_read2
def test_read2(self):
rtf = StringIO("""{\\rtf1\\ansi\\ansicpg1252\\cocoartf1343\\cocoasubrtf160\\cocoascreenfonts1{\\fonttbl\\f0\\fnil\\fcharset222 Thonburi;}
{\\colortbl;\\red255\\green255\\blue255;}
\\pard\\tx560\\tx1120\\tx1680\\tx2240\\tx2800\\tx3360\\tx3920\\tx4480\\tx5040\\tx5600\\tx6160\\tx6720\\pardirnatural\\qc
{\\f0\\fs24 \\cf0 \\'b9\\'e9\\'d3\\'b5\\'a1}""")
doc = Rtf15Reader.read(rtf)
text = PlaintextWriter.write(doc).read()
print text
self.assertEquals(u"น้ำตก", text.decode('utf8'))
示例13: clean_rtf
def clean_rtf(fname):
doc = Rtf15Reader.read(open(fname))
plain = PlaintextWriter.write(doc).getvalue()
lines = plain.split("\n")
# print '#############################\norig: %s' % pprint.pformat(lines[:10])
lines = filter(lambda l: len(l) > 0, lines)
# print "##############################\nno blank lines:\t%s" % pprint.pformat(lines[:10])
lines = [line.split(";") for line in lines]
lines = [[val[1:-1] for val in line] for line in lines]
# print "##############################\nsplit lines:\t%s" % pprint.pformat(lines[:10])
return lines
示例14: _rtf_to_txt
def _rtf_to_txt(file_path, dst_dir, file_name):
"""
Uses the pyth python module to extract text from a rtf file and save
to .txt in dst_dir.
"""
if file_name is None:
file_name = os.path.split(file_path)[1]
file_dst = os.path.join(dst_dir, re.sub(r'\.rtf$', '.txt', file_name))
doc = Rtf15Reader.read(open(file_path))
txt = PlaintextWriter.write(doc).getvalue()
txt = unidecode(txt)
with open(file_dst, 'w') as f:
f.write(txt)
return 0
示例15: _convert_rtf_to_text
def _convert_rtf_to_text(self, password=None):
input_rtf = self.cvFile
rtf = Rtf15Reader.read(open(input_rtf))
outputPath = self.scratchDir
inputPath = os.getcwd()
if os.path.exists(input_rtf):
inputPath = os.path.dirname(input_rtf)
input_filename = os.path.basename(input_rtf)
input_parts = input_filename.split(".")
input_parts.pop()
randomStr = int(time.time())
output_filename = outputPath + os.path.sep + ".".join(input_parts) + randomStr.__str__() + r".txt"
self.cvTextFile = output_filename
fw = open(self.cvTextFile, "w")
fw.write(PlaintextWriter.write(rtf).getvalue())
fw.close()
return (0)