当前位置: 首页>>代码示例>>Python>>正文


Python reader.Rtf15Reader类代码示例

本文整理汇总了Python中pyth.plugins.rtf15.reader.Rtf15Reader的典型用法代码示例。如果您正苦于以下问题:Python Rtf15Reader类的具体用法?Python Rtf15Reader怎么用?Python Rtf15Reader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Rtf15Reader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: rtf

def rtf(f):
    doc = Rtf15Reader.read(open(f, "rb"))
    result = []
    for element in doc.content:
        for text in element.content:
            result.append("".join(text.content))
    return "".join(result)
开发者ID:jmunoz298,项目名称:libreQDA,代码行数:7,代码来源:text_extraction.py

示例2: get_one_month_from_rtf

def get_one_month_from_rtf(url):
	rtf_file = urllib2.urlopen(url)
	rtf_file = StringIO(rtf_file.read())
	doc = Rtf15Reader.read(rtf_file)

	final_data = []

	header = False
	for c in doc.content:
	    full_p = c.content.__repr__().lower()
	    if "capacity" in full_p and "use cna" in full_p:
	        
	        header = True
	        continue
	        
	    if header:
	        row= re.split(r"\t", c.content[0].content[0])
	        if len(row) == 7 :
	            final_data.append(row)

	df = pd.DataFrame(final_data, columns = ["prison_name","baseline_cna", "in_use_cna", "operational_capacity", "population", "perc_pop_to_used_cna", "perc_acc_available"])

	df.iloc[:,1:] = df.iloc[:,1:].replace("%", "", regex=True).replace(",", "", regex=True)



	for c in df.columns:
	    df[c]= pd.to_numeric(df[c], errors='ignore')

	cols = [c for c in df.columns if "perc" in c]
	df.loc[:,cols] = df.loc[:,cols]/100
	return df
开发者ID:RobinL,项目名称:prison_population_viewer,代码行数:32,代码来源:get_one_month.py

示例3: convert_to_txt

def convert_to_txt(file_path):
    logger.debug("convert_to_txt: %s" % file_path)
    words = None
    if not os.path.exists(file_path):
        logger.error("missing file %s", file_path)
    file_size = os.stat(file_path).st_size
    logger.debug("convert_to_txt: %d bytes at %s",file_size, file_path)
    ext = _get_extension(file_path)
    if ext == '.txt':
        logger.debug("loading txt file")
        worked = False
        try:
            encoding, file_handle, words = open_with_correct_encoding(file_path)
        except Exception as e:
            logger.error("Wasn't able to read the words from the file %s" % file_path)
            words = ""
    elif ext == '.docx':
        logger.debug("loading docx file")
        words = _docx_to_txt(file_path)
    elif ext == '.rtf':
        logger.debug("loading rtf file")
        doc = Rtf15Reader.read(open(file_path))
        words = PlaintextWriter.write(doc).getvalue()
    else:
        logging.warning("Couldn't find an extension on the file, so assuming text")
        with codecs.open(file_path, 'r', ENCODING_UTF_8) as myfile:
            words = myfile.read()
    logger.debug("loaded %d chars" % len(words))
    return words
开发者ID:c4fcm,项目名称:DataBasic,代码行数:29,代码来源:filehandler.py

示例4: GetExternal

def GetExternal(version, odl_data, source, class_id):
    external = ""

    for item in version[2]:
        if item[0] == "Attribute" \
                and item[1] == "_Art1_RTF":

            if len(item[2]) == 2:
                if isinstance(source, ZipFile):
                    data = source.open(item[2][0]).read()
                else:
                    file_name = join(source, item[2][0])
                    f = open(file_name, 'rb')
                    data = f.read()
                    f.close()
                data = data.replace("\x0c", "")
            elif len(item[2]) == 1:
                data = item[2][0]

            if data == "":
                return ""

            f = StringIO()
            f.write(data)
            doc = Rtf15Reader.read(f, clean_paragraphs = False)
            external = PlaintextWriter.write(doc).getvalue()
            external = external.replace("\n\n", "\n")

    return ReplaceTextNames(external, version, odl_data, class_id)
开发者ID:jeroenk,项目名称:artisanConvert,代码行数:29,代码来源:odl_extract.py

示例5: upload

def upload(request):
	# user uploads a document -> convert into a dict of the terms found
	if request.FILES:
		if 'file' in request.FILES:
			result = ''
			f = request.FILES['file']
			fp = 'shake_v3/static/data/' + str(f)
			fp2 = fp[:len(fp)-3] + 'txt'
			if fp[len(fp)-3:len(fp)] == 'pdf':
				with open(fp, 'wb+') as pdff:
					for chunk in f.chunks():
						pdff.write(chunk)
				result = pdf_to_txt(fp)
				with open(fp2, 'wb+') as txtf:
					txtf.write(result)			
			elif fp[len(fp)-3:len(fp)] == 'rtf':
				with open(fp, 'wb+') as rtff:
					for line in f:
						rtff.write(line)
				doc = Rtf15Reader.read(open(fp, 'rb'))
				doctxt = PlaintextWriter.write(doc).getvalue()
				with open(fp2, 'wb+') as txtf:
					for line in doctxt:
						txtf.write(line)
				f = str(f)[:-4] + ".txt"
				result = doctxt
			else:
				with open(fp2, 'wb+') as txtf:
					for line in f:
						txtf.write(line)
				result = open(fp2, 'r').read()
		response_dict = generate_term_dict(result)
		response_dict['fp'] = 'static/data/' + str(f)
		return HttpResponse(simplejson.dumps(response_dict), mimetype='application/javascript')
	# user indicates terms -> give a grade
	elif request.POST:
		#TO DO: implement saving the data
		rating = ""
		score = custom_POST_to_score(request)
		if score > 4.5:
			rating = 'A+'
		elif score > 4:
			rating = 'A'
		elif score > 3.5:
			rating = 'B+'
		elif score > 3:
			rating = 'B'
		elif score > 2.5:
			rating = 'C+'
		elif score > 2:
			rating = 'C'
		elif score > 1:
			rating = 'D'
		else:
			rating = 'F'
		return HttpResponse(rating)
	# display the upload part 1
	else:
		score = 0
		return render_to_response('upload.html', {'score': score}, context_instance = RequestContext(request))
开发者ID:vickimo,项目名称:shakev3,代码行数:60,代码来源:views.py

示例6: compute

    def compute(self):
        """ compute() -> None
        Dispatch the HTML contents to the spreadsheet
        """
        filename = self.get_input("File").name

        text_format = self.get_input("Format")
        with open(filename, 'rb') as fp:
            if text_format == 'html':
                html = fp.read() # reads bytes
            elif text_format == 'rtf':
                try:
                    py_import('pyth', {'pip': 'pyth'})
                except ImportError:
                    raise ModuleError(self, "'rtf' format requires the pyth "
                                      "Python library")
                else:
                    from pyth.plugins.rtf15.reader import Rtf15Reader
                    from pyth.plugins.xhtml.writer import XHTMLWriter
                    doc = Rtf15Reader.read(fp)
                    html = XHTMLWriter.write(doc).read() # gets bytes
            else:
                raise ModuleError(self, "'%s' format is unknown" % text_format)

        self.displayAndWait(RichTextCellWidget, (html,))
开发者ID:hjanime,项目名称:VisTrails,代码行数:25,代码来源:richtext.py

示例7: read_recommendations

 def read_recommendations(self, file_name):
     """
     Function reads the targeted values from the file "WHO Daily Recommended Values.rtf"
     It process the entries and creates a dictionary with
     Nutrient name as Key and Nutrient Value as value
     :param file_name:
     :return:
     """
     target = dict()
     filtered_col = list()
     doc = Rtf15Reader.read(open(file_name))
     entities = PlaintextWriter.write(doc).getvalue().split('\n\n')
     for item in entities:
         splited = item.split(',')
         name = splited[0].split('(')[0]
         value = splited[1]
         try:
             unit = splited[0].split('(')[1].split(')')[0]
         except:
             unit = ''
         # target.append({'nutrient': name,
         # 'unit': unit,
         # 'value': value})
         target.update({name: value})
         filtered_col.append(name)
     self.target_values = target
     return target, filtered_col
开发者ID:Basit-qc,项目名称:WHO---Food-Menu,代码行数:27,代码来源:buildmenu.py

示例8: main

def main():
    '''
    Purpose::

    Input::
    
    Output::
    
    Assumptions::
    '''
    # Get arguments
    args = parse_arguments()
    if args.url:
        url = args.url

    # Get file and read it into structure
    try:
        with open(url, 'rb') as rtffile:
            judges = extract_terms(Rtf15Reader.read(rtffile))
            #print PlaintextWriter.write(doc).getvalue()
                
    except IOError as e:
        print 'An error occured fetching %s \n %s' % (url, e.reason)   
        return 1

    f = open('US_legal_lexicon.txt', 'w')
    # Print data
    #f.write("\n".join(str(i).encode('utf8') for i in judges))
    for i in judges:
        f.write((i).encode('utf8') +'\n')

    f.close()
开发者ID:JonathanBowker,项目名称:memex-gate,代码行数:32,代码来源:scrape_legal_lexicon.py

示例9: decode_cell

def decode_cell(cell):
    '''The cell matched so lets handle it'''
    
    # variable that will hold the converted text
    temp_cell = []
    
    # pyth checks for the rtf syntax before processing, so 'unicode_escape' escapes the '\' so pyth doesn't complain
    cell_encode = re.sub(r'\\u|\\\\u|\\N|\\\\N', ' ', cell)
    cell_encode = cell_encode.decode('unicode_escape')
    cell_encode = filter(lambda x: x in string.printable, cell_encode)
    cell_rtf = Rtf15Reader.read(StringIO(cell_encode))

    # turn the pyth object into readable text
    cell_txt = [x.content for x in cell_rtf.content]
    
    # iterate and extract the pyth object text into temp_cell
    for line in cell_txt:
        for l in line:
            temp_cell.append(l.content)
                
    
    # combine and join the extracted text into one string (for one cell)
    combined = [i for sub in temp_cell for i in sub]
    new_cell =  ' '.join(combined)
    
    # the non-ascii characters in your file were followed by _ so i removed them for cleanliness
    # uncomment to keep the _
    new_cell = re.sub('_', '', new_cell)
    
    # remove extra whitespace and return the converted cell
    # remove L at end of string
    return ' '.join(new_cell[:-1].split())
开发者ID:icdoctor2017,项目名称:DrLulz,代码行数:32,代码来源:Lulz_working.py

示例10: analyze

def analyze(committeeFile):
    
    try:
        doc = Rtf15Reader.read(open(committeeFile, "rb"))
    except:
        print "%s - skipped..." % committeeFile
        errFile = committeeFile.replace(global_options.indir, global_options.errdir)
        shutil.copyfile(committeeFile, errFile)
        return False

    #print PlaintextWriter.write(doc).getValue()

    f = open("test.out", 'w')
    f.write(PlaintextWriter.write(doc).getvalue())
    f.close()

    f = open("test.out", 'r')
    participants = find_participants(f.read())
    f.close()

    # Getting the indication whether the participant spoke in the committee
    f = open("test.out", 'r')
    docstring = f.read()
    for line in docstring.splitlines():
        name = ''
        if ":" in line:
            participant = line.split(":")[0]
            for p in participants:
                if participant in p['name']:
                    p['speaker'] = True
                    p['speak_count'] += 1

    f.close()

    fname = committeeFile.replace(global_options.indir, global_options.outdir)
    fname = fname.replace("rtf", "txt")
    file = codecs.open(fname, "w", "utf-8")

    for participant in participants:
        string_builder = []
        for key, val in participant.iteritems():
            string = u"'%s': '%s'"
            if val is not None:
                if type(val) == str:
                    val = val.replace("'", "")
                    val = val.replace('"', '')
                string = string % (key, print_unicode(val))
                string_builder.append(string)
        wrt_ln = ', '.join(string_builder)
        wrt_ln += ',\n'
        try:
            file.write(wrt_ln)

        except UnicodeEncodeError:
            print wrt_ln

    file.close()
    verbose("Generated participants file: " + fname)
    return True
开发者ID:assafsinvani,项目名称:gknesset,代码行数:59,代码来源:analyze_protocols.py

示例11: test_inline_png

 def test_inline_png(self):
     sample_with_image = os.path.join(os.path.abspath(os.path.dirname(__file__)), "rtfs", "sample-with-image.rtf")
     with open(sample_with_image, 'rb') as rtf:
         doc = Rtf15Reader.read(rtf)
         image = next(node.content[0] for node in doc.content if isinstance(node.content[0], pyth.document.Image))
         expected = {'pngblip': True, 'picw': '20714', 'picwgoal': '750', 'pich': '12143',
                     'pichgoal': '750', 'picscaley': '100', 'picscalex': '100'}
         self.assertEquals(expected, image.properties)
开发者ID:kippr,项目名称:pyth,代码行数:8,代码来源:test_readrtf15.py

示例12: test_inline_png

 def test_inline_png(self):
     sample_with_image = os.path.join(os.path.abspath(os.path.dirname(__file__)), "rtfs", "sample-with-image.rtf")
     with open(sample_with_image, 'rb') as rtf:
         source = Rtf15Reader.read(rtf)
         doc = XHTMLWriter.write(source).getvalue()
         self.assertIn('<img src="data:image/png;base64,', doc)
         self.assertIn('width:50px', doc)
         self.assertIn('height:50px', doc)
开发者ID:kippr,项目名称:pyth,代码行数:8,代码来源:test_writexhtml.py

示例13: load_stickies

def load_stickies(path):
    stickies = []
    with open(path) as fd:
        for i,rtf in enumerate(parse_sticky_database(fd.read())):
            doc = Rtf15Reader.read(StringIO.StringIO(rtf))
            plaintext = PlaintextWriter.write(doc).getvalue()
            stickies.append(plaintext)
    return stickies
开发者ID:alexflint,项目名称:sticky-sync,代码行数:8,代码来源:client.py

示例14: rtf

def rtf(f):
    with open(f, "rb") as f:
        doc = Rtf15Reader.read(f)
    result = []
    for element in doc.content:
        for text in element.content:
            result.append(''.join(text.content))
    return '\r\n'.join(result)
开发者ID:DarioGT,项目名称:libreQDA,代码行数:8,代码来源:text_extraction.py

示例15: parse

	def parse(self, path):
		# Directory
		if os.path.isdir(path):
			raise NotImplementedError()
		# File
		else:
			doc = Rtf15Reader.read(open(path))
			sample = Sample(path, None, PlaintextWriter.write(doc).getvalue())
			return sample
开发者ID:hcouch21,项目名称:styloproject,代码行数:9,代码来源:RtfParser.py


注:本文中的pyth.plugins.rtf15.reader.Rtf15Reader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。