Python OHCOVector.Record类代码示例

本文整理汇总了Python中philologic.OHCOVector.Record类的典型用法代码示例。如果您正苦于以下问题：Python Record类的具体用法？Python Record怎么用？Python Record使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了Record类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: prev_next_obj

def prev_next_obj(loader_obj, text, depth=5):
    object_types = ['doc', 'div1', 'div2', 'div3', 'para', 'sent', 'word'][:depth]
    record_dict = {}
    temp_file = text['raw'] + '.tmp'
    output_file = open(temp_file, 'w')
    for line in open(text['sortedtoms']):
        type, word, id, attrib = line.split('\t')
        id = id.split()
        record = Record(type, word, id)
        record.attrib = eval(attrib) 
        if type in record_dict:
            record_dict[type].attrib['next'] = ' '.join(id)
            if type in object_types:
                print >> output_file, record_dict[type]
            else:
                del record_dict[type].attrib['next']
                del record_dict[type].attrib['prev']
                print >> output_file, record_dict[type]
            record.attrib['prev'] = ' '.join(record_dict[type].id)
            record_dict[type] = record
        else:
            record.attrib['prev'] = ''
            record_dict[type] = record
    object_types.reverse()
    for obj in object_types:
        record_dict[obj].attrib['next'] = ''
        print >> output_file, record_dict[obj]
    output_file.close()
    os.remove(text['sortedtoms'])
    tomscommand = "cat %s | egrep \"^doc|^div|^para\" | sort %s > %s" % (temp_file,loader_obj.sort_by_id,text["sortedtoms"])
    os.system(tomscommand)
    os.remove(temp_file)

开发者ID:waltms，项目名称:libphilo，代码行数:32，代码来源:LoadFilters.py

示例2: normalize_these_columns

 def normalize_these_columns(loader_obj,text):
     current_values = {}
     tmp_file = open(text["sortedtoms"] + ".tmp","w")
     for column in columns:
         current_values[column] = ""
     for line in open(text["sortedtoms"]):
         type, word, id, attrib = line.split('\t')
         id = id.split()
         record = Record(type, word, id)
         record.attrib = eval(attrib)
         if type == "div1":
             for column in columns:
                 if column in record.attrib:
                     current_values[column] = record.attrib[column]
                 else:
                     current_values[column] = ""
         elif type == "div2":
             for column in columns:
                 if column in record.attrib:
                     current_values[column] = record.attrib[column]
         elif type == "div3":
             for column in columns:
                 if column not in record.attrib:
                     record.attrib[column] = current_values[column]
         print >> tmp_file, record
     tmp_file.close()
     os.remove(text["sortedtoms"])
     os.rename(text["sortedtoms"] + ".tmp",text["sortedtoms"])

开发者ID:mbwolff，项目名称:PhiloLogic4，代码行数:28，代码来源:LoadFilters.py

示例3: inner_prev_next_obj

 def inner_prev_next_obj(loader_obj, text):
     record_dict = {}
     temp_file = text['raw'] + '.tmp'
     output_file = open(temp_file, 'w')
     for line in open(text['sortedtoms']):
         type, word, id, attrib = line.split('\t')
         id = id.split()
         record = Record(type, word, id)
         record.attrib = eval(attrib) 
         if type in record_dict:
             record_dict[type].attrib['next'] = ' '.join(id)
             if type in types:
                 print >> output_file, record_dict[type]
             else:
                 del record_dict[type].attrib['next']
                 del record_dict[type].attrib['prev']
                 print >> output_file, record_dict[type]
             record.attrib['prev'] = ' '.join(record_dict[type].id)
             record_dict[type] = record
         else:
             record.attrib['prev'] = ''
             record_dict[type] = record
     types.reverse()
     for obj in types:
         try:
             record_dict[obj].attrib['next'] = ''
             print >> output_file, record_dict[obj]
         except KeyError:
             pass
     output_file.close()
     os.remove(text['sortedtoms'])
     type_pattern = "|".join("^%s" % t for t in loader_obj.types)
     tomscommand = "cat %s | egrep \"%s\" | sort %s > %s" % (temp_file,type_pattern,loader_obj.sort_by_id,text["sortedtoms"])
     os.system(tomscommand)
     os.remove(temp_file)

开发者ID:mbwolff，项目名称:PhiloLogic4，代码行数:35，代码来源:LoadFilters.py

示例4: load_record

 def load_record(line):
     philo_type, word, philo_id, attrib = line.split('\t')
     philo_id = philo_id.split()
     record = Record(philo_type, word, philo_id)
     record.attrib = loads(attrib)
     record.attrib["prev"] = ""
     record.attrib["next"] = ""
     return record

开发者ID:clovis，项目名称:PhiloLogic4，代码行数:8，代码来源:LoadFilters.py

示例5: load_record

 def load_record(line):
     type, word, id, attrib = line.split('\t')
     id = id.split()
     record = Record(type, word, id)
     record.attrib = loads(attrib)
     record.attrib["prev"] = ""
     record.attrib["next"] = ""
     return record

开发者ID:katelaurel，项目名称:PhiloLogic4，代码行数:8，代码来源:LoadFilters.py

示例6: fix_pages

def fix_pages(loader_obj,text,depth=4):
    """Unfinished, do not use"""
    object_types = ['doc', 'div1', 'div2', 'div3', 'para', 'sent', 'word'][:depth]
    current_page = 0;
    temp_file = open(text["sortedtoms"] + ".tmp","w")
    for line in open(text["sortedtoms"]):
        type, word, id, attrib = line.split('\t')
        id = id.split()
        record = Record(type, word, id)
        record.attrib = eval(attrib)

开发者ID:vincent-ferotin，项目名称:PhiloLogic4，代码行数:10，代码来源:LoadFilters.py

示例7: tag_words

    def tag_words(loader_obj, text):
        # Set up the treetagger process
        tt_args = [tt_path, "-token", "-lemma", "-prob", '-no-unknown', "-threshold", ".01", param_file]
        ttout_fh = open(text["raw"] + ".ttout", "w")
        tt_worker = Popen(tt_args, stdin=PIPE, stdout=ttout_fh)
        raw_fh = open(text["raw"], "r")
        line_count = 0

        # read through the object file, pass the words to treetagger
        for line in raw_fh:
            type, word, id, attrib = line.split('\t')
            id = id.split()
            if type == "word":
                word = word.decode('utf-8', 'ignore').lower().encode('utf-8')
                # close and re-open the treetagger process to prevent garbage
                # output.
                if line_count > maxlines:
                    tt_worker.stdin.close()
                    tt_worker.wait()
                    new_ttout_fh = open(text["raw"] + ".ttout", "a")
                    tt_worker = Popen(tt_args, stdin=PIPE, stdout=new_ttout_fh)
                    line_count = 0
                print >> tt_worker.stdin, word
                line_count += 1

        # finish tagging
        tt_worker.stdin.close()
        tt_worker.wait()

        # go back through the object file, and add the treetagger results to
        # each word
        tmp_fh = open(text["raw"] + ".tmp", "w")
        tag_fh = open(text["raw"] + ".ttout", "r")
        for line in open(text["raw"], "r"):
            type, word, id, attrib = line.split('\t')
            id = id.split()
            record = Record(type, word, id)
            record.attrib = loads(attrib)
            if type == "word":
                tag_l = tag_fh.readline()
                next_word, tag = tag_l.split("\t")[0:2]
                pos, lem, prob = tag.split(" ")
                if next_word != word.decode('utf-8', 'ignore').lower().encode('utf-8'):
                    print >> sys.stderr, "TREETAGGER ERROR:", next_word, " != ", word, pos, lem
                    return
                else:
                    record.attrib["pos"] = pos
                    record.attrib["lemma"] = lem
                    print >> tmp_fh, record
            else:
                print >> tmp_fh, record
        os.remove(text["raw"])
        os.rename(text["raw"] + ".tmp", text["raw"])
        os.remove(text["raw"] + ".ttout")

开发者ID:katelaurel，项目名称:PhiloLogic4，代码行数:54，代码来源:LoadFilters.py

示例8: normalize_unicode_raw_words

def normalize_unicode_raw_words(loader_obj, text):
    tmp_file = open(text["raw"] + ".tmp","w")
    for line in open(text["raw"]):
        rec_type, word, id, attrib = line.split('\t')
        id = id.split()
        if rec_type == "word":
            word = word.decode("utf-8").lower().encode("utf-8")
        record = Record(rec_type, word, id)
        record.attrib = eval(attrib)
        print >> tmp_file, record
    os.remove(text["raw"])
    os.rename(text["raw"] + ".tmp",text["raw"])

开发者ID:mbwolff，项目名称:PhiloLogic4，代码行数:12，代码来源:LoadFilters.py

示例9: normalize_unicode_raw_words

def normalize_unicode_raw_words(loader_obj, text):
    tmp_file = open(text["raw"] + ".tmp", "w")
    with open(text["raw"]) as fh:
        for line in fh:
            rec_type, word, id, attrib = line.split('\t')
            id = id.split()
            if rec_type == "word":
                word = word.decode("utf-8").lower().encode("utf-8")
            record = Record(rec_type, word, id)
            record.attrib = loads(attrib)
            print(record, file=tmp_file)
    tmp_file.close()
    os.remove(text["raw"])
    os.rename(text["raw"] + ".tmp", text["raw"])

开发者ID:，项目名称:，代码行数:14，代码来源:

示例10: normalize_unicode_raw_words

def normalize_unicode_raw_words(loader_obj, text):
    tmp_file = open(text["raw"] + ".tmp", "w")
    with open(text["raw"]) as filehandle:
        for line in filehandle:
            rec_type, word, philo_id, attrib = line.split('\t')
            philo_id = philo_id.split()
            if rec_type == "word":
                word = word.lower()
            record = Record(rec_type, word, philo_id)
            record.attrib = loads(attrib)
            print(record, file=tmp_file)
    tmp_file.close()
    os.remove(text["raw"])
    os.rename(text["raw"] + ".tmp", text["raw"])

开发者ID:clovis，项目名称:PhiloLogic4，代码行数:14，代码来源:LoadFilters.py

示例11: inner_make_object_ancestors

 def inner_make_object_ancestors(loader_obj, text):
     temp_file = text['words'] + '.tmp'
     output_file = open(temp_file, 'w')
     for line in open(text['words']):
         type, word, id, attrib = line.split('\t')
         id = id.split()
         record = Record(type, word, id)
         record.attrib = eval(attrib)
         for type in types:
             zeros_to_add = ['0' for i in range(7 - type_depth[type])]
             philo_id = id[:type_depth[type]] + zeros_to_add
             record.attrib[type + '_ancestor'] = ' '.join(philo_id)
         print >> output_file, record
     output_file.close()
     os.remove(text['words'])
     os.rename(temp_file, text['words'])

开发者ID:mbwolff，项目名称:PhiloLogic4，代码行数:16，代码来源:LoadFilters.py

示例12: inner_make_object_ancestors

 def inner_make_object_ancestors(loader_obj, text):
     temp_file = text['words'] + '.tmp'
     output_file = open(temp_file, 'w')
     with open(text['words']) as filehandle:
         for line in filehandle:
             philo_type, word, philo_id, attrib = line.split('\t')
             philo_id = philo_id.split()
             record = Record(philo_type, word, philo_id)
             record.attrib = loads(attrib)
             for philo_type in philo_types:
                 zeros_to_add = ['0' for i in range(7 - philo_type_depth[philo_type])]
                 philo_id = philo_id[:philo_type_depth[philo_type]] + zeros_to_add
                 record.attrib[philo_type + '_ancestor'] = ' '.join(philo_id)
             print(record, file=output_file)
     output_file.close()
     os.remove(text['words'])
     os.rename(temp_file, text['words'])

开发者ID:clovis，项目名称:PhiloLogic4，代码行数:17，代码来源:LoadFilters.py

示例13: smash_these_unicode_columns

 def smash_these_unicode_columns(loader_obj, text):
     tmp_file = open(text["sortedtoms"] + ".tmp", "w")
     for line in open(text["sortedtoms"]):
         type, word, id, attrib = line.split('\t')
         id = id.split()
         record = Record(type, word, id)
         record.attrib = loads(attrib)
         for column in columns:
             if column in record.attrib:
                 #                    print >> sys.stderr, repr(record.attrib)
                 col = record.attrib[column].decode("utf-8")
                 col = col.lower()
                 smashed_col = [c for c in unicodedata.normalize("NFKD", col) if not unicodedata.combining(c)]
                 record.attrib[column + "_norm"] = ''.join(smashed_col).encode("utf-8")
         print(record, file=tmp_file)
     tmp_file.close()
     os.remove(text["sortedtoms"])
     os.rename(text["sortedtoms"] + ".tmp", text["sortedtoms"])

开发者ID:，项目名称:，代码行数:18，代码来源:

示例14: make_word_counts

def make_word_counts(loader_obj, text, depth=5):
    object_types = ['doc', 'div1', 'div2', 'div3', 'para', 'sent', 'word']
    counts = [0 for i in range(depth)]
    temp_file = text['raw'] + '.tmp'
    output_file = open(temp_file, 'w')
    for line in open(text['raw']):
        type, word, id, attrib = line.split('\t')
        id = id.split()
        record = Record(type, word, id)
        record.attrib = eval(attrib)
        for d,count in enumerate(counts):
            if type == 'word':
                counts[d] += 1
            elif type == object_types[d]:
                record.attrib['word_count'] = counts[d]
                counts[d] = 0
        print >> output_file, record
    output_file.close()
    os.remove(text['raw'])
    os.rename(temp_file, text['raw'])

开发者ID:mbwolff，项目名称:PhiloLogic4，代码行数:20，代码来源:LoadFilters.py

示例15: make_word_counts

def make_word_counts(loader_obj, text, depth=4):
    object_types = ["doc", "div1", "div2", "div3", "para", "sent", "word"]
    counts = [0 for i in range(depth)]
    temp_file = text["raw"] + ".tmp"
    output_file = open(temp_file, "w")
    for line in open(text["raw"]):
        type, word, id, attrib = line.split("\t")
        id = id.split()
        record = Record(type, word, id)
        record.attrib = eval(attrib)
        for d, count in enumerate(counts):
            if type == "word":
                counts[d] += 1
            elif type == object_types[d]:
                record.attrib["word_count"] = counts[d]
                counts[d] = 0
        print >> output_file, record
    output_file.close()
    os.remove(text["raw"])
    os.rename(temp_file, text["raw"])

开发者ID:vincent-ferotin，项目名称:PhiloLogic4-WSGI，代码行数:20，代码来源:LoadFilters.py

注：本文中的philologic.OHCOVector.Record类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。