本文整理汇总了Python中codecs.open方法的典型用法代码示例。如果您正苦于以下问题:Python codecs.open方法的具体用法?Python codecs.open怎么用?Python codecs.open使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类codecs
的用法示例。
在下文中一共展示了codecs.open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: loadWordNet
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def loadWordNet(self):
"""
load zh_wordnet into the object.
将cow-not-full文件中的数据集整合成set
"""
f = codecs.open(self.wordnet_txt, "rb", "utf-8")
self.known = dict()
#self.known = set()
for l in f:
if l.startswith('\ufeff#') or not l.strip():
continue
row = l.strip().split("\t")
(synset,lemma)=row
#if len(row) == 2:
# (synset, lemma) = row
#elif len(row) == 3:
# (synset, lemma, status) = row #根本就没有三个东西的项
#else:
# print("illformed line: ", l.strip())
#if not (synset.strip(), lemma.strip()) in self.known:
# self.known.add((synset.strip(), lemma.strip()))
if not lemma.strip() in self.known.keys():
self.known[lemma.strip()]=[]
self.known[lemma.strip()].append(synset)
示例2: txt2sentence
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def txt2sentence(self, filename):
"""
read a <cut_file> and return an iterator sentences
(that is a list of some lists, and the second 'list' is a list of words ).
"""
sentences=[]
try:
fp=open(filename,'r',encoding='utf-8')
lines=fp.readlines()
except:
fp=open(filename,'r',encoding='gbk')
lines=fp.readlines()
for line in lines:
line = line.strip()
if len(line)<=1:
continue
line=line.replace('\n','').replace('\r','').split(' ')
sentences.append(line)
return sentences
示例3: get_perf
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def get_perf(filename):
''' run conlleval.pl perl script to obtain
precision/recall and F1 score '''
_conlleval = PREFIX + 'conlleval'
if not isfile(_conlleval):
#download('http://www-etud.iro.umontreal.ca/~mesnilgr/atis/conlleval.pl')
os.system('wget https://www.comp.nus.edu.sg/%7Ekanmy/courses/practicalNLP_2008/packages/conlleval.pl')
chmod('conlleval.pl', stat.S_IRWXU) # give the execute permissions
out = []
proc = subprocess.Popen(["perl", _conlleval], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
stdout, _ = proc.communicate(open(filename).read())
for line in stdout.split('\n'):
if 'accuracy' in line:
out = line.split()
break
# out = ['accuracy:', '16.26%;', 'precision:', '0.00%;', 'recall:', '0.00%;', 'FB1:', '0.00']
precision = float(out[3][:-2])
recall = float(out[5][:-2])
f1score = float(out[7])
return {'p':precision, 'r':recall, 'f1':f1score}
示例4: save_mappings
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def save_mappings(self, id_to_word, id_to_char, id_to_tag):
#{{{
"""
We need to save the mappings if we want to use the model later.
"""
self.id_to_word = id_to_word
self.id_to_char = id_to_char
self.id_to_tag = id_to_tag
with open(self.mappings_path, 'wb') as f:
mappings = {
'id_to_word': self.id_to_word,
'id_to_char': self.id_to_char,
'id_to_tag': self.id_to_tag,
}
cPickle.dump(mappings, f)
#}}}
示例5: load_sentences
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def load_sentences(path, lower, zeros):
#{{{
"""
Load sentences. A line must contain at least a word and its tag.
Sentences are separated by empty lines.
"""
sentences = []
sentence = []
for line in codecs.open(path, 'r', 'utf8'):
line = zero_digits(line.rstrip()) if zeros else line.rstrip()
if not line:
if len(sentence) > 0:
if 'DOCSTART' not in sentence[0][0]:
sentences.append(sentence)
sentence = []
else:
word = line.split()
assert len(word) >= 2
sentence.append(word)
if len(sentence) > 0:
if 'DOCSTART' not in sentence[0][0]:
sentences.append(sentence)
return sentences
#}}}
示例6: find_version
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def find_version(*file_paths):
# Open in Latin-1 so that we avoid encoding errors.
# Use codecs.open for Python 2 compatibility
try:
f = codecs.open(os.path.join(here, *file_paths), 'r', 'latin1')
version_file = f.read()
f.close()
except:
raise RuntimeError("Unable to find version string.")
# The version line must have the form
# __version__ = 'ver'
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
version_file, re.M)
if version_match:
return version_match.group(1)
raise RuntimeError("Unable to find version string.")
# Get the long description from the relevant file
示例7: _add_missing_init_py
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def _add_missing_init_py(self, paths):
"""Add missing __init__.py files in the module subdirectories."""
results = []
folders = [os.path.dirname(p) for p in paths]
# Avoid adding an init on repo level if setup.py or other script on the
# top level has changed
if self.cmd_root in folders:
folders.remove(self.cmd_root)
for folder in folders:
init_py = os.path.join(folder, "__init__.py")
exists = os.path.exists(init_py)
if not exists:
with codecs.open(init_py, 'w', 'utf-8') as handle:
handle.flush()
result = {
'path': init_py,
'created': not exists,
'diff': diff('', ''),
'error': None,
}
results.append(result)
return results
示例8: __init__
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def __init__(self, stop_words_file = None, allow_speech_tags = util.allow_speech_tags):
"""
Keyword arguments:
stop_words_file -- 保存停止词的文件路径,utf8编码,每行一个停止词。若不是str类型,则使用默认的停止词
allow_speech_tags -- 词性列表,用于过滤
"""
allow_speech_tags = [util.as_text(item) for item in allow_speech_tags]
self.default_speech_tag_filter = allow_speech_tags
self.stop_words = set()
self.stop_words_file = get_default_stop_words_file()
if type(stop_words_file) is str:
self.stop_words_file = stop_words_file
for word in codecs.open(self.stop_words_file, 'r', 'utf-8', 'ignore'):
self.stop_words.add(word.strip())
示例9: write_to_conll
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def write_to_conll(outf, fsp, firstex, sentid):
mode = "a"
if firstex:
mode = "w"
with codecs.open(outf, mode, "utf-8") as outf:
for i in xrange(fsp.sent.size()):
token, postag, nltkpostag, nltklemma, lu, frm, role = fsp.info_at_idx(i)
outf.write(str(i+1) + "\t") # ID = 0
outf.write(token.encode('utf-8') + "\t") # FORM = 1
outf.write("_\t" + nltklemma + "\t") # LEMMA PLEMMA = 2,3
outf.write(postag + "\t" + nltkpostag + "\t") # POS PPOS = 4,5
outf.write(str(sentid-1) + "\t_\t") # FEAT PFEAT = 6,7 ~ replacing FEAT with sentence number
outf.write("_\t_\t") # HEAD PHEAD = 8,9
outf.write("_\t_\t") # DEPREL PDEPREL = 10,11
outf.write(lu + "\t" + frm + "\t") # FILLPRED PRED = 12,13
outf.write(role + "\n") #APREDS = 14
outf.write("\n") # end of sentence
outf.close()
示例10: load_data_and_labels
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def load_data_and_labels():
"""
Loads MR polarity data from files, splits the data into words and generates labels.
Returns split sentences and labels.
"""
# download dataset
get_chinese_text()
# Load data from files
positive_examples = list(codecs.open("./data/pos.txt", "r", "utf-8").readlines())
positive_examples = [s.strip() for s in positive_examples]
positive_examples = [pe for pe in positive_examples if len(pe) < 100]
negative_examples = list(codecs.open("./data/neg.txt", "r", "utf-8").readlines())
negative_examples = [s.strip() for s in negative_examples]
negative_examples = [ne for ne in negative_examples if len(ne) < 100]
# Split by words
x_text = positive_examples + negative_examples
# x_text = [clean_str(sent) for sent in x_text]
x_text = [list(s) for s in x_text]
# Generate labels
positive_labels = [[0, 1] for _ in positive_examples]
negative_labels = [[1, 0] for _ in negative_examples]
y = np.concatenate([positive_labels, negative_labels], 0)
return [x_text, y]
示例11: sendImage
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def sendImage(self, to_, path):
M = Message(to=to_,contentType = 1)
M.contentMetadata = None
M.contentPreview = None
M_id = self.Talk.client.sendMessage(0,M).id
files = {
'file': open(path, 'rb'),
}
params = {
'name': 'media',
'oid': M_id,
'size': len(open(path, 'rb').read()),
'type': 'image',
'ver': '1.0',
}
data = {
'params': json.dumps(params)
}
r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
if r.status_code != 201:
raise Exception('Upload image failure.')
return True
示例12: sendAudio
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def sendAudio(self, to_, path):
M = Message(to=to_, text=None, contentType = 3)
M_id = self.Talk.client.sendMessage(0,M).id
files = {
'file': open(path, 'rb'),
}
params = {
'name': 'media',
'oid': M_id,
'size': len(open(path, 'rb').read()),
'type': 'audio',
'ver': '1.0',
}
data = {
'params': json.dumps(params)
}
r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
print r
if r.status_code != 201:
raise Exception('Upload audio failure.')
示例13: sendVoice
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def sendVoice(self, to_, path):
M = Message(to=to_, text=None, contentType = 3)
M.contentPreview = None
M_id = self._client.sendMessage(0,M).id
files = {
'file': open(path, 'rb'),
}
params = {
'name': 'voice_message',
'oid': M_id,
'size': len(open(path, 'rb').read()),
'type': 'audio',
'ver': '1.0',
}
data = {
'params': json.dumps(params)
}
r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
if r.status_code != 201:
raise Exception('Upload voice failure.')
return True
示例14: bod2darknet
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def bod2darknet(subpath, label, extractclassname):
labelpath = os.path.join(subpath, label)
filelist = GetFileFromThisRootDir(labelpath)
outpath = r'/home/dj/data/bod-subset/labels'
for fullname in filelist:
objects = parse_bod_poly(fullname)
name = os.path.splitext(os.path.basename(fullname))[0]
with open(os.path.join(outpath, name + '.txt'), 'w') as f_out:
for obj in objects:
poly = obj['poly']
bbox = np.array(dots4ToRecC(poly)) / 1024
if (sum(bbox <= 0) + sum(bbox >= 1)) >= 1:
continue
if (obj['name'] in extractclassname):
id = extractclassname.index(obj['name'])
else:
continue
outline = str(id) + ' ' + ' '.join(list(map(str, bbox)))
f_out.write(outline + '\n')
示例15: bodpolyToRec
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import open [as 别名]
def bodpolyToRec(self, label):
Recpath = os.path.join(self.basepath, r'ReclabelTxt')
for basename in self.namelist:
# objects = parse_bod_poly(os.path.join(self.labelpath, basename + '.txt'))
objects = parse_bod_poly(os.path.join(self.basepath, label, basename + '.txt'))
f_out = codecs.open(os.path.join(Recpath, basename + '.txt'), 'w', 'utf_16')
for obj in objects:
bbox = dots4ToRec8(obj['poly'])
name = obj['name']
difficult = obj['difficult']
bbox = list(map(str, bbox))
outline = ' '.join(bbox)
outline = outline + ' ' + name
if difficult:
outline = outline + ' ' + str(difficult)
f_out.write(outline + '\n')