本文整理汇总了Python中pyth.plugins.rtf15.reader.Rtf15Reader.read方法的典型用法代码示例。如果您正苦于以下问题:Python Rtf15Reader.read方法的具体用法?Python Rtf15Reader.read怎么用?Python Rtf15Reader.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyth.plugins.rtf15.reader.Rtf15Reader
的用法示例。
在下文中一共展示了Rtf15Reader.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: rtf
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def rtf(f):
doc = Rtf15Reader.read(open(f, "rb"))
result = []
for element in doc.content:
for text in element.content:
result.append("".join(text.content))
return "".join(result)
示例2: get_one_month_from_rtf
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def get_one_month_from_rtf(url):
rtf_file = urllib2.urlopen(url)
rtf_file = StringIO(rtf_file.read())
doc = Rtf15Reader.read(rtf_file)
final_data = []
header = False
for c in doc.content:
full_p = c.content.__repr__().lower()
if "capacity" in full_p and "use cna" in full_p:
header = True
continue
if header:
row= re.split(r"\t", c.content[0].content[0])
if len(row) == 7 :
final_data.append(row)
df = pd.DataFrame(final_data, columns = ["prison_name","baseline_cna", "in_use_cna", "operational_capacity", "population", "perc_pop_to_used_cna", "perc_acc_available"])
df.iloc[:,1:] = df.iloc[:,1:].replace("%", "", regex=True).replace(",", "", regex=True)
for c in df.columns:
df[c]= pd.to_numeric(df[c], errors='ignore')
cols = [c for c in df.columns if "perc" in c]
df.loc[:,cols] = df.loc[:,cols]/100
return df
示例3: convert_to_txt
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def convert_to_txt(file_path):
logger.debug("convert_to_txt: %s" % file_path)
words = None
if not os.path.exists(file_path):
logger.error("missing file %s", file_path)
file_size = os.stat(file_path).st_size
logger.debug("convert_to_txt: %d bytes at %s",file_size, file_path)
ext = _get_extension(file_path)
if ext == '.txt':
logger.debug("loading txt file")
worked = False
try:
encoding, file_handle, words = open_with_correct_encoding(file_path)
except Exception as e:
logger.error("Wasn't able to read the words from the file %s" % file_path)
words = ""
elif ext == '.docx':
logger.debug("loading docx file")
words = _docx_to_txt(file_path)
elif ext == '.rtf':
logger.debug("loading rtf file")
doc = Rtf15Reader.read(open(file_path))
words = PlaintextWriter.write(doc).getvalue()
else:
logging.warning("Couldn't find an extension on the file, so assuming text")
with codecs.open(file_path, 'r', ENCODING_UTF_8) as myfile:
words = myfile.read()
logger.debug("loaded %d chars" % len(words))
return words
示例4: GetExternal
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def GetExternal(version, odl_data, source, class_id):
external = ""
for item in version[2]:
if item[0] == "Attribute" \
and item[1] == "_Art1_RTF":
if len(item[2]) == 2:
if isinstance(source, ZipFile):
data = source.open(item[2][0]).read()
else:
file_name = join(source, item[2][0])
f = open(file_name, 'rb')
data = f.read()
f.close()
data = data.replace("\x0c", "")
elif len(item[2]) == 1:
data = item[2][0]
if data == "":
return ""
f = StringIO()
f.write(data)
doc = Rtf15Reader.read(f, clean_paragraphs = False)
external = PlaintextWriter.write(doc).getvalue()
external = external.replace("\n\n", "\n")
return ReplaceTextNames(external, version, odl_data, class_id)
示例5: upload
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def upload(request):
# user uploads a document -> convert into a dict of the terms found
if request.FILES:
if 'file' in request.FILES:
result = ''
f = request.FILES['file']
fp = 'shake_v3/static/data/' + str(f)
fp2 = fp[:len(fp)-3] + 'txt'
if fp[len(fp)-3:len(fp)] == 'pdf':
with open(fp, 'wb+') as pdff:
for chunk in f.chunks():
pdff.write(chunk)
result = pdf_to_txt(fp)
with open(fp2, 'wb+') as txtf:
txtf.write(result)
elif fp[len(fp)-3:len(fp)] == 'rtf':
with open(fp, 'wb+') as rtff:
for line in f:
rtff.write(line)
doc = Rtf15Reader.read(open(fp, 'rb'))
doctxt = PlaintextWriter.write(doc).getvalue()
with open(fp2, 'wb+') as txtf:
for line in doctxt:
txtf.write(line)
f = str(f)[:-4] + ".txt"
result = doctxt
else:
with open(fp2, 'wb+') as txtf:
for line in f:
txtf.write(line)
result = open(fp2, 'r').read()
response_dict = generate_term_dict(result)
response_dict['fp'] = 'static/data/' + str(f)
return HttpResponse(simplejson.dumps(response_dict), mimetype='application/javascript')
# user indicates terms -> give a grade
elif request.POST:
#TO DO: implement saving the data
rating = ""
score = custom_POST_to_score(request)
if score > 4.5:
rating = 'A+'
elif score > 4:
rating = 'A'
elif score > 3.5:
rating = 'B+'
elif score > 3:
rating = 'B'
elif score > 2.5:
rating = 'C+'
elif score > 2:
rating = 'C'
elif score > 1:
rating = 'D'
else:
rating = 'F'
return HttpResponse(rating)
# display the upload part 1
else:
score = 0
return render_to_response('upload.html', {'score': score}, context_instance = RequestContext(request))
示例6: compute
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def compute(self):
""" compute() -> None
Dispatch the HTML contents to the spreadsheet
"""
filename = self.get_input("File").name
text_format = self.get_input("Format")
with open(filename, 'rb') as fp:
if text_format == 'html':
html = fp.read() # reads bytes
elif text_format == 'rtf':
try:
py_import('pyth', {'pip': 'pyth'})
except ImportError:
raise ModuleError(self, "'rtf' format requires the pyth "
"Python library")
else:
from pyth.plugins.rtf15.reader import Rtf15Reader
from pyth.plugins.xhtml.writer import XHTMLWriter
doc = Rtf15Reader.read(fp)
html = XHTMLWriter.write(doc).read() # gets bytes
else:
raise ModuleError(self, "'%s' format is unknown" % text_format)
self.displayAndWait(RichTextCellWidget, (html,))
示例7: read_recommendations
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def read_recommendations(self, file_name):
"""
Function reads the targeted values from the file "WHO Daily Recommended Values.rtf"
It process the entries and creates a dictionary with
Nutrient name as Key and Nutrient Value as value
:param file_name:
:return:
"""
target = dict()
filtered_col = list()
doc = Rtf15Reader.read(open(file_name))
entities = PlaintextWriter.write(doc).getvalue().split('\n\n')
for item in entities:
splited = item.split(',')
name = splited[0].split('(')[0]
value = splited[1]
try:
unit = splited[0].split('(')[1].split(')')[0]
except:
unit = ''
# target.append({'nutrient': name,
# 'unit': unit,
# 'value': value})
target.update({name: value})
filtered_col.append(name)
self.target_values = target
return target, filtered_col
示例8: main
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def main():
'''
Purpose::
Input::
Output::
Assumptions::
'''
# Get arguments
args = parse_arguments()
if args.url:
url = args.url
# Get file and read it into structure
try:
with open(url, 'rb') as rtffile:
judges = extract_terms(Rtf15Reader.read(rtffile))
#print PlaintextWriter.write(doc).getvalue()
except IOError as e:
print 'An error occured fetching %s \n %s' % (url, e.reason)
return 1
f = open('US_legal_lexicon.txt', 'w')
# Print data
#f.write("\n".join(str(i).encode('utf8') for i in judges))
for i in judges:
f.write((i).encode('utf8') +'\n')
f.close()
示例9: decode_cell
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def decode_cell(cell):
'''The cell matched so lets handle it'''
# variable that will hold the converted text
temp_cell = []
# pyth checks for the rtf syntax before processing, so 'unicode_escape' escapes the '\' so pyth doesn't complain
cell_encode = re.sub(r'\\u|\\\\u|\\N|\\\\N', ' ', cell)
cell_encode = cell_encode.decode('unicode_escape')
cell_encode = filter(lambda x: x in string.printable, cell_encode)
cell_rtf = Rtf15Reader.read(StringIO(cell_encode))
# turn the pyth object into readable text
cell_txt = [x.content for x in cell_rtf.content]
# iterate and extract the pyth object text into temp_cell
for line in cell_txt:
for l in line:
temp_cell.append(l.content)
# combine and join the extracted text into one string (for one cell)
combined = [i for sub in temp_cell for i in sub]
new_cell = ' '.join(combined)
# the non-ascii characters in your file were followed by _ so i removed them for cleanliness
# uncomment to keep the _
new_cell = re.sub('_', '', new_cell)
# remove extra whitespace and return the converted cell
# remove L at end of string
return ' '.join(new_cell[:-1].split())
示例10: analyze
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def analyze(committeeFile):
try:
doc = Rtf15Reader.read(open(committeeFile, "rb"))
except:
print "%s - skipped..." % committeeFile
errFile = committeeFile.replace(global_options.indir, global_options.errdir)
shutil.copyfile(committeeFile, errFile)
return False
#print PlaintextWriter.write(doc).getValue()
f = open("test.out", 'w')
f.write(PlaintextWriter.write(doc).getvalue())
f.close()
f = open("test.out", 'r')
participants = find_participants(f.read())
f.close()
# Getting the indication whether the participant spoke in the committee
f = open("test.out", 'r')
docstring = f.read()
for line in docstring.splitlines():
name = ''
if ":" in line:
participant = line.split(":")[0]
for p in participants:
if participant in p['name']:
p['speaker'] = True
p['speak_count'] += 1
f.close()
fname = committeeFile.replace(global_options.indir, global_options.outdir)
fname = fname.replace("rtf", "txt")
file = codecs.open(fname, "w", "utf-8")
for participant in participants:
string_builder = []
for key, val in participant.iteritems():
string = u"'%s': '%s'"
if val is not None:
if type(val) == str:
val = val.replace("'", "")
val = val.replace('"', '')
string = string % (key, print_unicode(val))
string_builder.append(string)
wrt_ln = ', '.join(string_builder)
wrt_ln += ',\n'
try:
file.write(wrt_ln)
except UnicodeEncodeError:
print wrt_ln
file.close()
verbose("Generated participants file: " + fname)
return True
示例11: test_inline_png
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def test_inline_png(self):
sample_with_image = os.path.join(os.path.abspath(os.path.dirname(__file__)), "rtfs", "sample-with-image.rtf")
with open(sample_with_image, 'rb') as rtf:
doc = Rtf15Reader.read(rtf)
image = next(node.content[0] for node in doc.content if isinstance(node.content[0], pyth.document.Image))
expected = {'pngblip': True, 'picw': '20714', 'picwgoal': '750', 'pich': '12143',
'pichgoal': '750', 'picscaley': '100', 'picscalex': '100'}
self.assertEquals(expected, image.properties)
示例12: test_inline_png
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def test_inline_png(self):
sample_with_image = os.path.join(os.path.abspath(os.path.dirname(__file__)), "rtfs", "sample-with-image.rtf")
with open(sample_with_image, 'rb') as rtf:
source = Rtf15Reader.read(rtf)
doc = XHTMLWriter.write(source).getvalue()
self.assertIn('<img src="data:image/png;base64,', doc)
self.assertIn('width:50px', doc)
self.assertIn('height:50px', doc)
示例13: load_stickies
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def load_stickies(path):
stickies = []
with open(path) as fd:
for i,rtf in enumerate(parse_sticky_database(fd.read())):
doc = Rtf15Reader.read(StringIO.StringIO(rtf))
plaintext = PlaintextWriter.write(doc).getvalue()
stickies.append(plaintext)
return stickies
示例14: rtf
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def rtf(f):
with open(f, "rb") as f:
doc = Rtf15Reader.read(f)
result = []
for element in doc.content:
for text in element.content:
result.append(''.join(text.content))
return '\r\n'.join(result)
示例15: parse
# 需要导入模块: from pyth.plugins.rtf15.reader import Rtf15Reader [as 别名]
# 或者: from pyth.plugins.rtf15.reader.Rtf15Reader import read [as 别名]
def parse(self, path):
# Directory
if os.path.isdir(path):
raise NotImplementedError()
# File
else:
doc = Rtf15Reader.read(open(path))
sample = Sample(path, None, PlaintextWriter.write(doc).getvalue())
return sample