当前位置: 首页>>代码示例>>Python>>正文


Python writer.PlaintextWriter类代码示例

本文整理汇总了Python中pyth.plugins.plaintext.writer.PlaintextWriter的典型用法代码示例。如果您正苦于以下问题:Python PlaintextWriter类的具体用法?Python PlaintextWriter怎么用?Python PlaintextWriter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了PlaintextWriter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: read_rtf_text

def read_rtf_text(fp, errors='strict', encoding='utf-8'):
    doc = CustomRtf15Reader.read(fp, errors=errors)

    for p in doc.content:
        p.content = filter(paragraph_is_text_like, p.content)

    return PlaintextWriter.write(doc).read().decode(encoding)
开发者ID:labhackercd,项目名称:novo-retorica,代码行数:7,代码来源:utils.py

示例2: convert_to_txt

def convert_to_txt(file_path):
    logger.debug("convert_to_txt: %s" % file_path)
    words = None
    if not os.path.exists(file_path):
        logger.error("missing file %s", file_path)
    file_size = os.stat(file_path).st_size
    logger.debug("convert_to_txt: %d bytes at %s",file_size, file_path)
    ext = _get_extension(file_path)
    if ext == '.txt':
        logger.debug("loading txt file")
        worked = False
        try:
            encoding, file_handle, words = open_with_correct_encoding(file_path)
        except Exception as e:
            logger.error("Wasn't able to read the words from the file %s" % file_path)
            words = ""
    elif ext == '.docx':
        logger.debug("loading docx file")
        words = _docx_to_txt(file_path)
    elif ext == '.rtf':
        logger.debug("loading rtf file")
        doc = Rtf15Reader.read(open(file_path))
        words = PlaintextWriter.write(doc).getvalue()
    else:
        logging.warning("Couldn't find an extension on the file, so assuming text")
        with codecs.open(file_path, 'r', ENCODING_UTF_8) as myfile:
            words = myfile.read()
    logger.debug("loaded %d chars" % len(words))
    return words
开发者ID:c4fcm,项目名称:DataBasic,代码行数:29,代码来源:filehandler.py

示例3: GetExternal

def GetExternal(version, odl_data, source, class_id):
    external = ""

    for item in version[2]:
        if item[0] == "Attribute" \
                and item[1] == "_Art1_RTF":

            if len(item[2]) == 2:
                if isinstance(source, ZipFile):
                    data = source.open(item[2][0]).read()
                else:
                    file_name = join(source, item[2][0])
                    f = open(file_name, 'rb')
                    data = f.read()
                    f.close()
                data = data.replace("\x0c", "")
            elif len(item[2]) == 1:
                data = item[2][0]

            if data == "":
                return ""

            f = StringIO()
            f.write(data)
            doc = Rtf15Reader.read(f, clean_paragraphs = False)
            external = PlaintextWriter.write(doc).getvalue()
            external = external.replace("\n\n", "\n")

    return ReplaceTextNames(external, version, odl_data, class_id)
开发者ID:jeroenk,项目名称:artisanConvert,代码行数:29,代码来源:odl_extract.py

示例4: read_recommendations

 def read_recommendations(self, file_name):
     """
     Function reads the targeted values from the file "WHO Daily Recommended Values.rtf"
     It process the entries and creates a dictionary with
     Nutrient name as Key and Nutrient Value as value
     :param file_name:
     :return:
     """
     target = dict()
     filtered_col = list()
     doc = Rtf15Reader.read(open(file_name))
     entities = PlaintextWriter.write(doc).getvalue().split('\n\n')
     for item in entities:
         splited = item.split(',')
         name = splited[0].split('(')[0]
         value = splited[1]
         try:
             unit = splited[0].split('(')[1].split(')')[0]
         except:
             unit = ''
         # target.append({'nutrient': name,
         # 'unit': unit,
         # 'value': value})
         target.update({name: value})
         filtered_col.append(name)
     self.target_values = target
     return target, filtered_col
开发者ID:Basit-qc,项目名称:WHO---Food-Menu,代码行数:27,代码来源:buildmenu.py

示例5: upload

def upload(request):
	# user uploads a document -> convert into a dict of the terms found
	if request.FILES:
		if 'file' in request.FILES:
			result = ''
			f = request.FILES['file']
			fp = 'shake_v3/static/data/' + str(f)
			fp2 = fp[:len(fp)-3] + 'txt'
			if fp[len(fp)-3:len(fp)] == 'pdf':
				with open(fp, 'wb+') as pdff:
					for chunk in f.chunks():
						pdff.write(chunk)
				result = pdf_to_txt(fp)
				with open(fp2, 'wb+') as txtf:
					txtf.write(result)			
			elif fp[len(fp)-3:len(fp)] == 'rtf':
				with open(fp, 'wb+') as rtff:
					for line in f:
						rtff.write(line)
				doc = Rtf15Reader.read(open(fp, 'rb'))
				doctxt = PlaintextWriter.write(doc).getvalue()
				with open(fp2, 'wb+') as txtf:
					for line in doctxt:
						txtf.write(line)
				f = str(f)[:-4] + ".txt"
				result = doctxt
			else:
				with open(fp2, 'wb+') as txtf:
					for line in f:
						txtf.write(line)
				result = open(fp2, 'r').read()
		response_dict = generate_term_dict(result)
		response_dict['fp'] = 'static/data/' + str(f)
		return HttpResponse(simplejson.dumps(response_dict), mimetype='application/javascript')
	# user indicates terms -> give a grade
	elif request.POST:
		#TO DO: implement saving the data
		rating = ""
		score = custom_POST_to_score(request)
		if score > 4.5:
			rating = 'A+'
		elif score > 4:
			rating = 'A'
		elif score > 3.5:
			rating = 'B+'
		elif score > 3:
			rating = 'B'
		elif score > 2.5:
			rating = 'C+'
		elif score > 2:
			rating = 'C'
		elif score > 1:
			rating = 'D'
		else:
			rating = 'F'
		return HttpResponse(rating)
	# display the upload part 1
	else:
		score = 0
		return render_to_response('upload.html', {'score': score}, context_instance = RequestContext(request))
开发者ID:vickimo,项目名称:shakev3,代码行数:60,代码来源:views.py

示例6: analyze

def analyze(committeeFile):
    
    try:
        doc = Rtf15Reader.read(open(committeeFile, "rb"))
    except:
        print "%s - skipped..." % committeeFile
        errFile = committeeFile.replace(global_options.indir, global_options.errdir)
        shutil.copyfile(committeeFile, errFile)
        return False

    #print PlaintextWriter.write(doc).getValue()

    f = open("test.out", 'w')
    f.write(PlaintextWriter.write(doc).getvalue())
    f.close()

    f = open("test.out", 'r')
    participants = find_participants(f.read())
    f.close()

    # Getting the indication whether the participant spoke in the committee
    f = open("test.out", 'r')
    docstring = f.read()
    for line in docstring.splitlines():
        name = ''
        if ":" in line:
            participant = line.split(":")[0]
            for p in participants:
                if participant in p['name']:
                    p['speaker'] = True
                    p['speak_count'] += 1

    f.close()

    fname = committeeFile.replace(global_options.indir, global_options.outdir)
    fname = fname.replace("rtf", "txt")
    file = codecs.open(fname, "w", "utf-8")

    for participant in participants:
        string_builder = []
        for key, val in participant.iteritems():
            string = u"'%s': '%s'"
            if val is not None:
                if type(val) == str:
                    val = val.replace("'", "")
                    val = val.replace('"', '')
                string = string % (key, print_unicode(val))
                string_builder.append(string)
        wrt_ln = ', '.join(string_builder)
        wrt_ln += ',\n'
        try:
            file.write(wrt_ln)

        except UnicodeEncodeError:
            print wrt_ln

    file.close()
    verbose("Generated participants file: " + fname)
    return True
开发者ID:assafsinvani,项目名称:gknesset,代码行数:59,代码来源:analyze_protocols.py

示例7: extract_terms

def extract_terms(rtffile):
    """ Get data from rtffile """
    judges_list = []
    rtf_text = PlaintextWriter.write(rtffile).getvalue()
    lines = re.split('\n',rtf_text)
    for line in itertools.islice(lines, 0, None, 4): # 1: from the second line ([1]), 
        judges_list.append(line)              # None: to the end,
    return judges_list                                  # 2: step
开发者ID:JonathanBowker,项目名称:memex-gate,代码行数:8,代码来源:scrape_legal_lexicon.py

示例8: load_stickies

def load_stickies(path):
    stickies = []
    with open(path) as fd:
        for i,rtf in enumerate(parse_sticky_database(fd.read())):
            doc = Rtf15Reader.read(StringIO.StringIO(rtf))
            plaintext = PlaintextWriter.write(doc).getvalue()
            stickies.append(plaintext)
    return stickies
开发者ID:alexflint,项目名称:sticky-sync,代码行数:8,代码来源:client.py

示例9: get_rtf_text

def get_rtf_text(path):
	"""
	Take the path of an rtf file as an argument and return the text
	"""
	
		
	doc = Rtf15Reader.read(open(path))

	return PlaintextWriter.write(doc).getvalue()
开发者ID:vignesh117,项目名称:MusicalText,代码行数:9,代码来源:makecorpusfirstset.py

示例10: readRtf

 def readRtf(self, path):
     try:
         doc = Rtf15Reader.read(open(path, "rb"))
     except:
         self._log("Some screwy rtf shit going on with " + path)
         return "Can't process ur shitty rtf <3 dfbot"
     contents = PlaintextWriter.write(doc).getvalue()
     #print contents
     return contents
开发者ID:danielhfrank,项目名称:Tumbox,代码行数:9,代码来源:tumbox.py

示例11: parse

	def parse(self, path):
		# Directory
		if os.path.isdir(path):
			raise NotImplementedError()
		# File
		else:
			doc = Rtf15Reader.read(open(path))
			sample = Sample(path, None, PlaintextWriter.write(doc).getvalue())
			return sample
开发者ID:hcouch21,项目名称:styloproject,代码行数:9,代码来源:RtfParser.py

示例12: test_read2

    def test_read2(self):
        rtf = StringIO("""{\\rtf1\\ansi\\ansicpg1252\\cocoartf1343\\cocoasubrtf160\\cocoascreenfonts1{\\fonttbl\\f0\\fnil\\fcharset222 Thonburi;}
{\\colortbl;\\red255\\green255\\blue255;}
\\pard\\tx560\\tx1120\\tx1680\\tx2240\\tx2800\\tx3360\\tx3920\\tx4480\\tx5040\\tx5600\\tx6160\\tx6720\\pardirnatural\\qc

{\\f0\\fs24 \\cf0 \\'b9\\'e9\\'d3\\'b5\\'a1}""")
        doc = Rtf15Reader.read(rtf)
        text = PlaintextWriter.write(doc).read()
        print text
        self.assertEquals(u"น้ำตก", text.decode('utf8'))
开发者ID:pphetra,项目名称:pyth,代码行数:10,代码来源:test_readosxrtf.py

示例13: clean_rtf

def clean_rtf(fname):
    doc = Rtf15Reader.read(open(fname))
    plain = PlaintextWriter.write(doc).getvalue()
    lines = plain.split("\n")
    # print '#############################\norig: %s' % pprint.pformat(lines[:10])
    lines = filter(lambda l: len(l) > 0, lines)
    # print "##############################\nno blank lines:\t%s" % pprint.pformat(lines[:10])
    lines = [line.split(";") for line in lines]
    lines = [[val[1:-1] for val in line] for line in lines]
    # print "##############################\nsplit lines:\t%s" % pprint.pformat(lines[:10])
    return lines
开发者ID:embr,项目名称:nonce,代码行数:11,代码来源:upload_jami_rtf.py

示例14: _rtf_to_txt

def _rtf_to_txt(file_path, dst_dir, file_name):
    """
    Uses the pyth python module to extract text from a rtf file and save
    to .txt in dst_dir.
    """
    if file_name is None:
        file_name = os.path.split(file_path)[1]
    file_dst = os.path.join(dst_dir, re.sub(r'\.rtf$', '.txt', file_name))
    doc = Rtf15Reader.read(open(file_path))
    txt = PlaintextWriter.write(doc).getvalue()
    txt = unidecode(txt)
    with open(file_dst, 'w') as f:
        f.write(txt)
    return 0
开发者ID:rjweiss,项目名称:rosetta,代码行数:14,代码来源:converters.py

示例15: _convert_rtf_to_text

    def _convert_rtf_to_text(self, password=None):
	input_rtf = self.cvFile
	rtf = Rtf15Reader.read(open(input_rtf))
	outputPath = self.scratchDir
    	inputPath = os.getcwd()
    	if os.path.exists(input_rtf):
            inputPath = os.path.dirname(input_rtf)
    	input_filename = os.path.basename(input_rtf)
    	input_parts = input_filename.split(".")
    	input_parts.pop()
	randomStr = int(time.time())
    	output_filename = outputPath + os.path.sep + ".".join(input_parts) + randomStr.__str__() + r".txt"
	self.cvTextFile = output_filename
	fw = open(self.cvTextFile, "w")
	fw.write(PlaintextWriter.write(rtf).getvalue())
	fw.close()
	return (0)
开发者ID:arshpreetsingh,项目名称:cv-parser,代码行数:17,代码来源:cvparser.py


注:本文中的pyth.plugins.plaintext.writer.PlaintextWriter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。