本文整理汇总了Python中nltk.compat.izip方法的典型用法代码示例。如果您正苦于以下问题:Python compat.izip方法的具体用法?Python compat.izip怎么用?Python compat.izip使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.compat
的用法示例。
在下文中一共展示了compat.izip方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: are_files_identical
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import izip [as 别名]
def are_files_identical(filename1, filename2, debug=False):
"""
Compare two files, ignoring carriage returns.
"""
with open(filename1, "rb") as fileA:
with open(filename2, "rb") as fileB:
result = True
for lineA, lineB in izip(sorted(fileA.readlines()),
sorted(fileB.readlines())):
if lineA.strip() != lineB.strip():
if debug:
print("Error while comparing files. " +
"First difference at line below.")
print("=> Output file line: {0}".format(lineA))
print("=> Refer. file line: {0}".format(lineB))
result = False
break
return result
示例2: accuracy
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import izip [as 别名]
def accuracy(reference, test):
"""
Given a list of reference values and a corresponding list of test
values, return the fraction of corresponding values that are
equal. In particular, return the fraction of indices
``0<i<=len(test)`` such that ``test[i] == reference[i]``.
:type reference: list
:param reference: An ordered list of reference values.
:type test: list
:param test: A list of values to compare against the corresponding
reference values.
:raise ValueError: If ``reference`` and ``length`` do not have the
same length.
"""
if len(reference) != len(test):
raise ValueError("Lists must have the same length.")
return sum(x == y for x, y in izip(reference, test)) / len(test)
示例3: log_likelihood
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import izip [as 别名]
def log_likelihood(reference, test):
"""
Given a list of reference values and a corresponding list of test
probability distributions, return the average log likelihood of
the reference values, given the probability distributions.
:param reference: A list of reference values
:type reference: list
:param test: A list of probability distributions over values to
compare against the corresponding reference values.
:type test: list(ProbDistI)
"""
if len(reference) != len(test):
raise ValueError("Lists must have the same length.")
# Return the average value of dist.logprob(val).
total_likelihood = sum(dist.logprob(val)
for (val, dist) in izip(reference, test))
return total_likelihood / len(reference)
示例4: accuracy
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import izip [as 别名]
def accuracy(reference, test):
"""
Given a list of reference values and a corresponding list of test
values, return the fraction of corresponding values that are
equal. In particular, return the fraction of indices
``0<i<=len(test)`` such that ``test[i] == reference[i]``.
:type reference: list
:param reference: An ordered list of reference values.
:type test: list
:param test: A list of values to compare against the corresponding
reference values.
:raise ValueError: If ``reference`` and ``length`` do not have the
same length.
"""
if len(reference) != len(test):
raise ValueError("Lists must have the same length.")
return float(sum(x == y for x, y in izip(reference, test))) / len(test)
示例5: log_likelihood
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import izip [as 别名]
def log_likelihood(reference, test):
"""
Given a list of reference values and a corresponding list of test
probability distributions, return the average log likelihood of
the reference values, given the probability distributions.
:param reference: A list of reference values
:type reference: list
:param test: A list of probability distributions over values to
compare against the corresponding reference values.
:type test: list(ProbDistI)
"""
if len(reference) != len(test):
raise ValueError("Lists must have the same length.")
# Return the average value of dist.logprob(val).
total_likelihood = sum(dist.logprob(val)
for (val, dist) in izip(reference, test))
return total_likelihood/len(reference)
示例6: buildClassifier_score
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import izip [as 别名]
def buildClassifier_score(trainSet,devtestSet,classifier):
#print devtestSet
from nltk import compat
dev, tag_dev = zip(*devtestSet) #????????????????????????????
classifier = SklearnClassifier(classifier) #?nltk ???scikit-learn ???
#x,y in list(compat.izip(*trainSet))
classifier.train(trainSet) #?????
#help('SklearnClassifier.batch_classify')
pred = classifier.classify_many(dev)#batch_classify(testSet) #?????????????????????
return accuracy_score(tag_dev, pred) #???????????????????????????
示例7: train
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import izip [as 别名]
def train(self, labeled_featuresets):
"""
Train (fit) the scikit-learn estimator.
:param labeled_featuresets: A list of ``(featureset, label)``
where each ``featureset`` is a dict mapping strings to either
numbers, booleans or strings.
"""
X, y = list(compat.izip(*labeled_featuresets))
X = self._vectorizer.fit_transform(X)
y = self._encoder.fit_transform(y)
self._clf.fit(X, y)
return self
示例8: generate_chomsky
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import izip [as 别名]
def generate_chomsky(times=5, line_length=72):
parts = []
for part in (leadins, subjects, verbs, objects):
phraselist = list(map(str.strip, part.splitlines()))
random.shuffle(phraselist)
parts.append(phraselist)
output = chain(*islice(izip(*parts), 0, times))
print(textwrap.fill(" ".join(output), line_length))
示例9: _tag
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import izip [as 别名]
def _tag(self, unlabeled_sequence):
path = self._best_path(unlabeled_sequence)
return list(izip(unlabeled_sequence, path))
示例10: test
# 需要导入模块: from nltk import compat [as 别名]
# 或者: from nltk.compat import izip [as 别名]
def test(self, test_sequence, verbose=False, **kwargs):
"""
Tests the HiddenMarkovModelTagger instance.
:param test_sequence: a sequence of labeled test instances
:type test_sequence: list(list)
:param verbose: boolean flag indicating whether training should be
verbose or include printed output
:type verbose: bool
"""
def words(sent):
return [word for (word, tag) in sent]
def tags(sent):
return [tag for (word, tag) in sent]
def flatten(seq):
return list(itertools.chain(*seq))
test_sequence = self._transform(test_sequence)
predicted_sequence = list(imap(self._tag, imap(words, test_sequence)))
if verbose:
for test_sent, predicted_sent in izip(test_sequence, predicted_sequence):
print('Test:',
' '.join('%s/%s' % (token, tag)
for (token, tag) in test_sent))
print()
print('Untagged:',
' '.join("%s" % token for (token, tag) in test_sent))
print()
print('HMM-tagged:',
' '.join('%s/%s' % (token, tag)
for (token, tag) in predicted_sent))
print()
print('Entropy:',
self.entropy([(token, None) for
(token, tag) in predicted_sent]))
print()
print('-' * 60)
test_tags = flatten(imap(tags, test_sequence))
predicted_tags = flatten(imap(tags, predicted_sequence))
acc = accuracy(test_tags, predicted_tags)
count = sum(len(sent) for sent in test_sequence)
print('accuracy over %d tokens: %.2f' % (count, acc * 100))