当前位置: 首页>>代码示例>>Python>>正文

Python codecs.open方法代码示例

本文整理汇总了Python中codecs.open方法的典型用法代码示例。如果您正苦于以下问题:Python codecs.open方法的具体用法?Python codecs.open怎么用?Python codecs.open使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在codecs的用法示例。


示例1: loadWordNet

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def loadWordNet(self):
        load zh_wordnet into the object.
        f = codecs.open(self.wordnet_txt, "rb", "utf-8")
        self.known = dict()
        #self.known = set()
        for l in f:
            if l.startswith('\ufeff#') or not l.strip():
            row = l.strip().split("\t")
            #if len(row) == 2:
            #    (synset, lemma) = row 
            #elif len(row) == 3:
            #    (synset, lemma, status) = row #根本就没有三个东西的项
            #    print("illformed line: ", l.strip())
            #if not (synset.strip(), lemma.strip()) in self.known:
            #    self.known.add((synset.strip(), lemma.strip()))
            if not lemma.strip() in self.known.keys():

示例2: txt2sentence

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def txt2sentence(self, filename):    
        read a <cut_file> and return an iterator sentences
        (that is a list of some lists, and the second 'list' is a list of words ).

        for line in lines:
            line = line.strip()
            if len(line)<=1:
            line=line.replace('\n','').replace('\r','').split(' ')
        return sentences 

示例3: get_perf

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def get_perf(filename):
    ''' run conlleval.pl perl script to obtain
    precision/recall and F1 score '''
    _conlleval = PREFIX + 'conlleval'
    if not isfile(_conlleval):
        os.system('wget https://www.comp.nus.edu.sg/%7Ekanmy/courses/practicalNLP_2008/packages/conlleval.pl')
        chmod('conlleval.pl', stat.S_IRWXU) # give the execute permissions
    out = []
    proc = subprocess.Popen(["perl", _conlleval], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    stdout, _ = proc.communicate(open(filename).read())
    for line in stdout.split('\n'):
        if 'accuracy' in line:
            out = line.split()
    # out = ['accuracy:', '16.26%;', 'precision:', '0.00%;', 'recall:', '0.00%;', 'FB1:', '0.00']
    precision = float(out[3][:-2])
    recall    = float(out[5][:-2])
    f1score   = float(out[7])

    return {'p':precision, 'r':recall, 'f1':f1score} 

示例4: save_mappings

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def save_mappings(self, id_to_word, id_to_char, id_to_tag):
        We need to save the mappings if we want to use the model later.
        self.id_to_word = id_to_word
        self.id_to_char = id_to_char
        self.id_to_tag = id_to_tag
        with open(self.mappings_path, 'wb') as f:
            mappings = {
                'id_to_word': self.id_to_word,
                'id_to_char': self.id_to_char,
                'id_to_tag': self.id_to_tag,
            cPickle.dump(mappings, f)

示例5: load_sentences

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def load_sentences(path, lower, zeros):
    Load sentences. A line must contain at least a word and its tag.
    Sentences are separated by empty lines.
    sentences = []
    sentence = []
    for line in codecs.open(path, 'r', 'utf8'):
        line = zero_digits(line.rstrip()) if zeros else line.rstrip()
        if not line:
            if len(sentence) > 0:
                if 'DOCSTART' not in sentence[0][0]:
                sentence = []
            word = line.split()
            assert len(word) >= 2
    if len(sentence) > 0:
        if 'DOCSTART' not in sentence[0][0]:
    return sentences

示例6: find_version

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def find_version(*file_paths):
    # Open in Latin-1 so that we avoid encoding errors.
    # Use codecs.open for Python 2 compatibility
        f = codecs.open(os.path.join(here, *file_paths), 'r', 'latin1')
        version_file = f.read()
        raise RuntimeError("Unable to find version string.")

    # The version line must have the form
    # __version__ = 'ver'
    version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
                              version_file, re.M)
    if version_match:
        return version_match.group(1)
    raise RuntimeError("Unable to find version string.")

# Get the long description from the relevant file 

示例7: _add_missing_init_py

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def _add_missing_init_py(self, paths):
        """Add missing __init__.py files in the module subdirectories."""
        results = []
        folders = [os.path.dirname(p) for p in paths]

        # Avoid adding an init on repo level if setup.py or other script on the
        # top level has changed
        if self.cmd_root in folders:

        for folder in folders:
            init_py = os.path.join(folder, "__init__.py")
            exists = os.path.exists(init_py)
            if not exists:
                with codecs.open(init_py, 'w', 'utf-8') as handle:
                result = {
                    'path': init_py,
                    'created': not exists,
                    'diff': diff('', ''),
                    'error': None,
        return results 

示例8: __init__

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def __init__(self, stop_words_file = None, allow_speech_tags = util.allow_speech_tags):
        Keyword arguments:
        stop_words_file    -- 保存停止词的文件路径,utf8编码,每行一个停止词。若不是str类型,则使用默认的停止词
        allow_speech_tags  -- 词性列表,用于过滤
        allow_speech_tags = [util.as_text(item) for item in allow_speech_tags]

        self.default_speech_tag_filter = allow_speech_tags
        self.stop_words = set()
        self.stop_words_file = get_default_stop_words_file()
        if type(stop_words_file) is str:
            self.stop_words_file = stop_words_file
        for word in codecs.open(self.stop_words_file, 'r', 'utf-8', 'ignore'):

示例9: write_to_conll

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def write_to_conll(outf, fsp, firstex, sentid):
    mode = "a"
    if firstex:
        mode = "w"

    with codecs.open(outf, mode, "utf-8") as outf:
        for i in xrange(fsp.sent.size()):
            token, postag, nltkpostag, nltklemma, lu, frm, role = fsp.info_at_idx(i)

            outf.write(str(i+1) + "\t") # ID = 0
            outf.write(token.encode('utf-8') + "\t") # FORM = 1
            outf.write("_\t" + nltklemma + "\t") # LEMMA PLEMMA = 2,3
            outf.write(postag + "\t" + nltkpostag + "\t") # POS PPOS = 4,5
            outf.write(str(sentid-1) + "\t_\t") # FEAT PFEAT = 6,7 ~ replacing FEAT with sentence number
            outf.write("_\t_\t") # HEAD PHEAD = 8,9
            outf.write("_\t_\t") # DEPREL PDEPREL = 10,11
            outf.write(lu + "\t" + frm + "\t") # FILLPRED PRED = 12,13
            outf.write(role + "\n") #APREDS = 14

        outf.write("\n") # end of sentence

示例10: load_data_and_labels

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def load_data_and_labels():
    Loads MR polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    # download dataset

    # Load data from files
    positive_examples = list(codecs.open("./data/pos.txt", "r", "utf-8").readlines())
    positive_examples = [s.strip() for s in positive_examples]
    positive_examples = [pe for pe in positive_examples if len(pe) < 100]
    negative_examples = list(codecs.open("./data/neg.txt", "r", "utf-8").readlines())
    negative_examples = [s.strip() for s in negative_examples]
    negative_examples = [ne for ne in negative_examples if len(ne) < 100]
    # Split by words
    x_text = positive_examples + negative_examples
    # x_text = [clean_str(sent) for sent in x_text]
    x_text = [list(s) for s in x_text]

    # Generate labels
    positive_labels = [[0, 1] for _ in positive_examples]
    negative_labels = [[1, 0] for _ in negative_examples]
    y = np.concatenate([positive_labels, negative_labels], 0)
    return [x_text, y] 

示例11: sendImage

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def sendImage(self, to_, path):
      M = Message(to=to_,contentType = 1)
      M.contentMetadata = None
      M.contentPreview = None
      M_id = self.Talk.client.sendMessage(0,M).id
      files = {
         'file': open(path, 'rb'),
      params = {
         'name': 'media',
         'oid': M_id,
         'size': len(open(path, 'rb').read()),
         'type': 'image',
         'ver': '1.0',
      data = {
         'params': json.dumps(params)
      r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
      if r.status_code != 201:
         raise Exception('Upload image failure.')
      return True 

示例12: sendAudio

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def sendAudio(self, to_, path):
        M = Message(to=to_, text=None, contentType = 3)
        M_id = self.Talk.client.sendMessage(0,M).id
        files = {
            'file': open(path, 'rb'),
        params = {
            'name': 'media',
            'oid': M_id,
            'size': len(open(path, 'rb').read()),
            'type': 'audio',
            'ver': '1.0',
        data = {
            'params': json.dumps(params)            

        r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
        print r
        if r.status_code != 201:
            raise Exception('Upload audio failure.') 

示例13: sendVoice

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def sendVoice(self, to_, path):
        M = Message(to=to_, text=None, contentType = 3)
        M.contentPreview = None
        M_id = self._client.sendMessage(0,M).id
        files = {
            'file': open(path, 'rb'),
        params = {
            'name': 'voice_message',
            'oid': M_id,
            'size': len(open(path, 'rb').read()),
            'type': 'audio',
            'ver': '1.0',
        data = {
            'params': json.dumps(params)
        r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
        if r.status_code != 201:
            raise Exception('Upload voice failure.')
        return True 

示例14: bod2darknet

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def bod2darknet(subpath, label, extractclassname):
    labelpath = os.path.join(subpath, label)
    filelist = GetFileFromThisRootDir(labelpath)
    outpath = r'/home/dj/data/bod-subset/labels'
    for fullname in filelist:
        objects = parse_bod_poly(fullname)
        name = os.path.splitext(os.path.basename(fullname))[0]
        with open(os.path.join(outpath, name + '.txt'), 'w') as f_out:
            for obj in objects:
                poly = obj['poly']
                bbox = np.array(dots4ToRecC(poly)) / 1024
                if (sum(bbox <= 0) + sum(bbox >= 1)) >= 1:
                if (obj['name'] in extractclassname):
                    id = extractclassname.index(obj['name'])
                outline = str(id) + ' ' + ' '.join(list(map(str, bbox)))
                f_out.write(outline + '\n') 

示例15: bodpolyToRec

# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def bodpolyToRec(self, label):
        Recpath = os.path.join(self.basepath, r'ReclabelTxt')
        for basename in self.namelist:
#            objects = parse_bod_poly(os.path.join(self.labelpath, basename + '.txt'))
            objects = parse_bod_poly(os.path.join(self.basepath, label, basename + '.txt'))
            f_out = codecs.open(os.path.join(Recpath, basename + '.txt'), 'w', 'utf_16')
            for obj in objects:
                bbox = dots4ToRec8(obj['poly'])
                name = obj['name']
                difficult = obj['difficult']
                bbox = list(map(str, bbox))
                outline = ' '.join(bbox)
                outline = outline + ' ' + name
                if difficult:
                    outline = outline + ' ' + str(difficult)
                f_out.write(outline + '\n') 
