Python codecs.open方法代码示例

本文整理汇总了Python中codecs.open方法的典型用法代码示例。如果您正苦于以下问题：Python codecs.open方法的具体用法？Python codecs.open怎么用？Python codecs.open使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类codecs的用法示例。

在下文中一共展示了codecs.open方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: loadWordNet

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def loadWordNet(self):
        """
        load zh_wordnet into the object.
        将cow-not-full文件中的数据集整合成set
        """
        f = codecs.open(self.wordnet_txt, "rb", "utf-8")
        self.known = dict()
        #self.known = set()
        for l in f:
            if l.startswith('\ufeff#') or not l.strip():
                continue
            row = l.strip().split("\t")
            (synset,lemma)=row
            #if len(row) == 2:
            #    (synset, lemma) = row 
            #elif len(row) == 3:
            #    (synset, lemma, status) = row #根本就没有三个东西的项
            #else:
            #    print("illformed line: ", l.strip())
            #if not (synset.strip(), lemma.strip()) in self.known:
            #    self.known.add((synset.strip(), lemma.strip()))
            if not lemma.strip() in self.known.keys():
                self.known[lemma.strip()]=[]
            self.known[lemma.strip()].append(synset)

开发者ID:Coldog2333，项目名称:Financial-NLP，代码行数:26，代码来源:NLP.py

示例2: txt2sentence

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def txt2sentence(self, filename):    
        """
        read a <cut_file> and return an iterator sentences
        (that is a list of some lists, and the second 'list' is a list of words ).
        """
        sentences=[]
        try:
            fp=open(filename,'r',encoding='utf-8')
            lines=fp.readlines()
        except:
            fp=open(filename,'r',encoding='gbk')
            lines=fp.readlines()

        for line in lines:
            line = line.strip()
            if len(line)<=1:
                continue
            line=line.replace('\n','').replace('\r','').split(' ')
            sentences.append(line)
        return sentences

开发者ID:Coldog2333，项目名称:Financial-NLP，代码行数:22，代码来源:NLP.py

示例3: get_perf

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def get_perf(filename):
    ''' run conlleval.pl perl script to obtain
    precision/recall and F1 score '''
    _conlleval = PREFIX + 'conlleval'
    if not isfile(_conlleval):
        #download('http://www-etud.iro.umontreal.ca/~mesnilgr/atis/conlleval.pl') 
        os.system('wget https://www.comp.nus.edu.sg/%7Ekanmy/courses/practicalNLP_2008/packages/conlleval.pl')
        chmod('conlleval.pl', stat.S_IRWXU) # give the execute permissions
    
    out = []
    proc = subprocess.Popen(["perl", _conlleval], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    stdout, _ = proc.communicate(open(filename).read())
    for line in stdout.split('\n'):
        if 'accuracy' in line:
            out = line.split()
            break
    
    # out = ['accuracy:', '16.26%;', 'precision:', '0.00%;', 'recall:', '0.00%;', 'FB1:', '0.00']
    precision = float(out[3][:-2])
    recall    = float(out[5][:-2])
    f1score   = float(out[7])

    return {'p':precision, 'r':recall, 'f1':f1score}

开发者ID:lingluodlut，项目名称:Att-ChemdNER，代码行数:25，代码来源:utils.py

示例4: save_mappings

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def save_mappings(self, id_to_word, id_to_char, id_to_tag):
#{{{
        """
        We need to save the mappings if we want to use the model later.
        """
        self.id_to_word = id_to_word
        self.id_to_char = id_to_char
        self.id_to_tag = id_to_tag
        with open(self.mappings_path, 'wb') as f:
            mappings = {
                'id_to_word': self.id_to_word,
                'id_to_char': self.id_to_char,
                'id_to_tag': self.id_to_tag,
            }
            cPickle.dump(mappings, f)
#}}}

开发者ID:lingluodlut，项目名称:Att-ChemdNER，代码行数:18，代码来源:model.py

示例5: load_sentences

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def load_sentences(path, lower, zeros):
#{{{
    """
    Load sentences. A line must contain at least a word and its tag.
    Sentences are separated by empty lines.
    """
    sentences = []
    sentence = []
    for line in codecs.open(path, 'r', 'utf8'):
        line = zero_digits(line.rstrip()) if zeros else line.rstrip()
        if not line:
            if len(sentence) > 0:
                if 'DOCSTART' not in sentence[0][0]:
                    sentences.append(sentence)
                sentence = []
        else:
            word = line.split()
            assert len(word) >= 2
            sentence.append(word)
    if len(sentence) > 0:
        if 'DOCSTART' not in sentence[0][0]:
            sentences.append(sentence)
    return sentences
#}}}

开发者ID:lingluodlut，项目名称:Att-ChemdNER，代码行数:26，代码来源:loader.py

示例6: find_version

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def find_version(*file_paths):
    # Open in Latin-1 so that we avoid encoding errors.
    # Use codecs.open for Python 2 compatibility
    try:
        f = codecs.open(os.path.join(here, *file_paths), 'r', 'latin1')
        version_file = f.read()
        f.close()
    except:
        raise RuntimeError("Unable to find version string.")

    # The version line must have the form
    # __version__ = 'ver'
    version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
                              version_file, re.M)
    if version_match:
        return version_match.group(1)
    raise RuntimeError("Unable to find version string.")


# Get the long description from the relevant file

开发者ID:NatanaelAntonioli，项目名称:L.E.S.M.A，代码行数:22，代码来源:setup.py

示例7: _add_missing_init_py

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def _add_missing_init_py(self, paths):
        """Add missing __init__.py files in the module subdirectories."""
        results = []
        folders = [os.path.dirname(p) for p in paths]

        # Avoid adding an init on repo level if setup.py or other script on the
        # top level has changed
        if self.cmd_root in folders:
            folders.remove(self.cmd_root)

        for folder in folders:
            init_py = os.path.join(folder, "__init__.py")
            exists = os.path.exists(init_py)
            if not exists:
                with codecs.open(init_py, 'w', 'utf-8') as handle:
                    handle.flush()
                result = {
                    'path': init_py,
                    'created': not exists,
                    'diff': diff('', ''),
                    'error': None,
                }
                results.append(result)
        return results

开发者ID:ContinuumIO，项目名称:ciocheck，代码行数:26，代码来源:formatters.py

示例8: init

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def __init__(self, stop_words_file = None, allow_speech_tags = util.allow_speech_tags):
        """
        Keyword arguments:
        stop_words_file    -- 保存停止词的文件路径，utf8编码，每行一个停止词。若不是str类型，则使用默认的停止词
        allow_speech_tags  -- 词性列表，用于过滤
        """     
        
        allow_speech_tags = [util.as_text(item) for item in allow_speech_tags]

        self.default_speech_tag_filter = allow_speech_tags
        self.stop_words = set()
        self.stop_words_file = get_default_stop_words_file()
        if type(stop_words_file) is str:
            self.stop_words_file = stop_words_file
        for word in codecs.open(self.stop_words_file, 'r', 'utf-8', 'ignore'):
            self.stop_words.add(word.strip())

开发者ID:ouprince，项目名称:text-rank，代码行数:18，代码来源:Segmentation.py

示例9: write_to_conll

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def write_to_conll(outf, fsp, firstex, sentid):
    mode = "a"
    if firstex:
        mode = "w"

    with codecs.open(outf, mode, "utf-8") as outf:
        for i in xrange(fsp.sent.size()):
            token, postag, nltkpostag, nltklemma, lu, frm, role = fsp.info_at_idx(i)

            outf.write(str(i+1) + "\t") # ID = 0
            outf.write(token.encode('utf-8') + "\t") # FORM = 1
            outf.write("_\t" + nltklemma + "\t") # LEMMA PLEMMA = 2,3
            outf.write(postag + "\t" + nltkpostag + "\t") # POS PPOS = 4,5
            outf.write(str(sentid-1) + "\t_\t") # FEAT PFEAT = 6,7 ~ replacing FEAT with sentence number
            outf.write("_\t_\t") # HEAD PHEAD = 8,9
            outf.write("_\t_\t") # DEPREL PDEPREL = 10,11
            outf.write(lu + "\t" + frm + "\t") # FILLPRED PRED = 12,13
            outf.write(role + "\n") #APREDS = 14

        outf.write("\n") # end of sentence
        outf.close()

开发者ID:swabhs，项目名称:open-sesame，代码行数:23，代码来源:preprocess.py

示例10: load_data_and_labels

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def load_data_and_labels():
    """
    Loads MR polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    """
    # download dataset
    get_chinese_text()

    # Load data from files
    positive_examples = list(codecs.open("./data/pos.txt", "r", "utf-8").readlines())
    positive_examples = [s.strip() for s in positive_examples]
    positive_examples = [pe for pe in positive_examples if len(pe) < 100]
    negative_examples = list(codecs.open("./data/neg.txt", "r", "utf-8").readlines())
    negative_examples = [s.strip() for s in negative_examples]
    negative_examples = [ne for ne in negative_examples if len(ne) < 100]
    # Split by words
    x_text = positive_examples + negative_examples
    # x_text = [clean_str(sent) for sent in x_text]
    x_text = [list(s) for s in x_text]

    # Generate labels
    positive_labels = [[0, 1] for _ in positive_examples]
    negative_labels = [[1, 0] for _ in negative_examples]
    y = np.concatenate([positive_labels, negative_labels], 0)
    return [x_text, y]

开发者ID:awslabs，项目名称:dynamic-training-with-apache-mxnet-on-aws，代码行数:27，代码来源:data_helpers.py

示例11: sendImage

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def sendImage(self, to_, path):
      M = Message(to=to_,contentType = 1)
      M.contentMetadata = None
      M.contentPreview = None
      M_id = self.Talk.client.sendMessage(0,M).id
      files = {
         'file': open(path, 'rb'),
      }
      params = {
         'name': 'media',
         'oid': M_id,
         'size': len(open(path, 'rb').read()),
         'type': 'image',
         'ver': '1.0',
      }
      data = {
         'params': json.dumps(params)
      }
      r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
      if r.status_code != 201:
         raise Exception('Upload image failure.')
      return True

开发者ID:CyberTKR，项目名称:CyberTK-Self，代码行数:24，代码来源:Self.py

示例12: sendAudio

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def sendAudio(self, to_, path):
        M = Message(to=to_, text=None, contentType = 3)
        M_id = self.Talk.client.sendMessage(0,M).id
        files = {
            'file': open(path, 'rb'),
        }
        params = {
            'name': 'media',
            'oid': M_id,
            'size': len(open(path, 'rb').read()),
            'type': 'audio',
            'ver': '1.0',
        }
        data = {
            'params': json.dumps(params)            
        }       

        r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
        print r
        if r.status_code != 201:
            raise Exception('Upload audio failure.')

开发者ID:CyberTKR，项目名称:CyberTK-Self，代码行数:23，代码来源:Self.py

示例13: sendVoice

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def sendVoice(self, to_, path):
        M = Message(to=to_, text=None, contentType = 3)
        M.contentPreview = None
        M_id = self._client.sendMessage(0,M).id
        files = {
            'file': open(path, 'rb'),
        }
        params = {
            'name': 'voice_message',
            'oid': M_id,
            'size': len(open(path, 'rb').read()),
            'type': 'audio',
            'ver': '1.0',
        }
        data = {
            'params': json.dumps(params)
        }
        r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
        if r.status_code != 201:
            raise Exception('Upload voice failure.')
        return True