本文整理汇总了Python中nltk.data.gzip_open_unicode方法的典型用法代码示例。如果您正苦于以下问题:Python data.gzip_open_unicode方法的具体用法?Python data.gzip_open_unicode怎么用?Python data.gzip_open_unicode使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.data
的用法示例。
在下文中一共展示了data.gzip_open_unicode方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import gzip_open_unicode [as 别名]
def train(cls, train_toks, **kwargs):
algorithm = kwargs.get('algorithm', 'tao_lmvm')
trace = kwargs.get('trace', 3)
encoding = kwargs.get('encoding', None)
labels = kwargs.get('labels', None)
sigma = kwargs.get('gaussian_prior_sigma', 0)
count_cutoff = kwargs.get('count_cutoff', 0)
max_iter = kwargs.get('max_iter')
ll_delta = kwargs.get('min_lldelta')
# Construct an encoding from the training data.
if not encoding:
encoding = TadmEventMaxentFeatureEncoding.train(train_toks,
count_cutoff,
labels=labels)
trainfile_fd, trainfile_name = \
tempfile.mkstemp(prefix='nltk-tadm-events-', suffix='.gz')
weightfile_fd, weightfile_name = \
tempfile.mkstemp(prefix='nltk-tadm-weights-')
trainfile = gzip_open_unicode(trainfile_name, 'w')
write_tadm_file(train_toks, encoding, trainfile)
trainfile.close()
options = []
options.extend(['-monitor'])
options.extend(['-method', algorithm])
if sigma:
options.extend(['-l2', '%.6f' % sigma**2])
if max_iter:
options.extend(['-max_it', '%d' % max_iter])
if ll_delta:
options.extend(['-fatol', '%.6f' % abs(ll_delta)])
options.extend(['-events_in', trainfile_name])
options.extend(['-params_out', weightfile_name])
if trace < 3:
options.extend(['2>&1'])
else:
options.extend(['-summary'])
call_tadm(options)
with open(weightfile_name, 'r') as weightfile:
weights = parse_tadm_weights(weightfile)
os.remove(trainfile_name)
os.remove(weightfile_name)
# Convert from base-e to base-2 weights.
weights *= numpy.log2(numpy.e)
# Build the classifier
return cls(encoding, weights)
######################################################################
#{ Demo
######################################################################