本文整理汇总了Python中corpus.Corpus方法的典型用法代码示例。如果您正苦于以下问题:Python corpus.Corpus方法的具体用法?Python corpus.Corpus怎么用?Python corpus.Corpus使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus
的用法示例。
在下文中一共展示了corpus.Corpus方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: params
# 需要导入模块: import corpus [as 别名]
# 或者: from corpus import Corpus [as 别名]
def params(self):
"""Waits for parameters to come down the line and returns them. May block on the client."""
line = sys.stdin.readline().strip()
while line:
line = codecs.decode(line, "utf8")
if line:
input_line = line
input_json = json.loads(line)
if "command" in input_json:
if input_json["command"] == "BEGIN_EXECUTION":
corpus = Corpus(input_json)
self.scriptParams = input_json["parameterMap"]
return self.scriptParams
elif input_json["command"] == "ABORT_EXECUTION":
return
elif input_json["command"] == "END_EXECUTION":
return
line = sys.stdin.readline().strip()
示例2: load_ibm
# 需要导入模块: import corpus [as 别名]
# 或者: from corpus import Corpus [as 别名]
def load_ibm():
""" Load the train and dev datasets """
IBM_PATH = '/home/mgimenez/Dev/corpora/Quora/IBM'
TRAIN_PATH = join(IBM_PATH, 'train.tsv')
train = Corpus('ibm', TRAIN_PATH)
DEV_PATH = join(IBM_PATH, 'dev.tsv')
dev = Corpus('ibm', DEV_PATH)
TEST_PATH = join(IBM_PATH, 'test.tsv')
test = Corpus('ibm', TEST_PATH)
vocab_processor, seq_len = build_vocabulary(train.sim_data,
train.non_sim_data)
train.to_index(vocab_processor)
dev.to_index(vocab_processor)
test.to_index(vocab_processor)
return train.non_sim_data, train.sim_data, \
dev.non_sim_data, dev.sim_data, \
test.sim_data, test.non_sim_data, \
vocab_processor, seq_len
示例3: fit
# 需要导入模块: import corpus [as 别名]
# 或者: from corpus import Corpus [as 别名]
def fit(self, corpus, valid_split=0.0, algorithm='GS', n_iter=1000, verbose=True):
"""
:param corpus:
corpus.Corpus()
:param valid_split:
:param n_iter:
:param algorithm:
'GS' -> Gibbs sampling
'VI' -> Variational Inference
:param verbose:
True: print log information
:return: LDA
"""
assert isinstance(corpus, Corpus), 'Input should be Corpus type'
self.valid_split = valid_split
V = self.V = corpus.V
M = int(corpus.M * (1 - valid_split))
if algorithm == 'GS':
self._fit_GS(corpus.docs[: M], V, n_iter, verbose)
elif algorithm == 'VI':
pass
else:
raise ValueError("algorithm must be either 'GS' or 'VI'")
return self
示例4: start
# 需要导入模块: import corpus [as 别名]
# 或者: from corpus import Corpus [as 别名]
def start(self):
line = sys.stdin.readline().strip()
while line:
line = codecs.decode(line, "utf8")
if line:
self.input_line = line
input_json = json.loads(line)
if "command" in input_json:
if input_json["command"] == "BEGIN_EXECUTION":
self.corpus = Corpus(input_json)
self.scriptParams = input_json["parameterMap"]
self.init(**fill_params(self.scriptParams, self.init))
self.beginExecution()
elif input_json["command"] == "ABORT_EXECUTION":
self.abortExecution()
return
elif input_json["command"] == "END_EXECUTION":
self.endExecution()
return
else:
self.document = Document.load(input_json)
self.inputAS = self.document.annotationSets[self.scriptParams.get("inputAS", None)]
self.outputAS = self.document.annotationSets[self.scriptParams.get("outputAS", None)]
self.execute(self.document, **fill_params(self.scriptParams, self.execute))
print json.dumps(self.document.logger)
sys.stdout.flush()
line = sys.stdin.readline().strip()
示例5: __iter__
# 需要导入模块: import corpus [as 别名]
# 或者: from corpus import Corpus [as 别名]
def __iter__(self):
line = sys.stdin.readline().strip()
while line:
line = codecs.decode(line, "utf8")
if line:
input_line = line
input_json = json.loads(line)
if "command" in input_json:
if input_json["command"] == "BEGIN_EXECUTION":
corpus = Corpus(input_json)
self.scriptParams = input_json["parameterMap"]
elif input_json["command"] == "ABORT_EXECUTION":
return
elif input_json["command"] == "END_EXECUTION":
return
else:
try:
document = Document.load(input_json)
yield document
print json.dumps(document.logger)
except InvalidOffsetException as e:
print >> sys.stderr, "InvalidOffsetException prevented reading a document " + e.message
print json.dumps([])
sys.stdout.flush()
line = sys.stdin.readline().strip()
示例6: load_quora
# 需要导入模块: import corpus [as 别名]
# 或者: from corpus import Corpus [as 别名]
def load_quora():
QUORA_PATH = '/home/mgimenez/Dev/corpora/Quora/quora_duplicate_questions.tsv'
dataset = Corpus('quora', QUORA_PATH)
train_non_sim, train_sim, dev_non_sim, dev_sim, \
test_non_sim, test_sim, \
vocab_processor, seq_len = dataset.make_partitions_quora()
return train_non_sim, train_sim, dev_non_sim, dev_sim, \
test_non_sim, test_sim, vocab_processor, seq_len
示例7: train
# 需要导入模块: import corpus [as 别名]
# 或者: from corpus import Corpus [as 别名]
def train(self, dot_path):
print("Begin train...")
corpus = Corpus()
corpus_data = corpus.load_corpus(dot_path)
for data in corpus_data:
for conversation in data: # conversation??????
statement_history = []
for text in conversation: # ???????????
if statement_history:
self.storage.add(statement_history[-1], text.encode('utf-8'));
statement_history.append(text.encode('utf-8')) # ??????????
print("End of train!")
示例8: main
# 需要导入模块: import corpus [as 别名]
# 或者: from corpus import Corpus [as 别名]
def main():
QAfile = sys.argv[1]
ReviewFile = sys.argv[2]
minReview = int(sys.argv[3])
k = int(sys.argv[4])
numiter = int(sys.argv[5])
Lambda = float(sys.argv[6])
predictionsOut = sys.argv[7]
rankingOut = sys.argv[8]
corpus = Corpus(QAfile, ReviewFile, minReview)
corpus.construct_QAnswersAndQPerItem()
corpus.construct_SentencesAndSPerItem()
corpus.Calculate_PairWiseFeature()
print "Vocabulary Size: " + str(corpus.Map.V)
print "Number of Questions: " + str(len(corpus.QAnswers))
print "Number of Reviews: " + str(len(corpus.Sentences))
print "Number of Items " + str(len(corpus.Map.ItemIDMap))
print "Avg review length " + str(sum(corpus.Avgdl.values())/len(corpus.Avgdl))
model = Model(k, numiter, Lambda, corpus)
sess = model.train_model()
print "\nModel is trained and optimal model loaded!\n"
valid_accuracy, test_accuracy, topRanked = model.valid_test_perf(sess)
if (predictionsOut):
model.save_predictions(topRanked, predictionsOut)
if (rankingOut):
topRanked = model.top_ranked(sess, 10)
model.save_top_ranked(topRanked, rankingOut)
print "Predictions are saved\n"
valid_AUC, test_AUC = model.AUC(sess)
print "-----------------------------------------------"
print "----------------------------------------------\n"
print "Accuracy: "
print "\tValidation: "+str(valid_accuracy)
print "\tTest: "+str(test_accuracy)
print "\n"
print "AUC: "
print "\tValidation: "+str(valid_AUC)
print "\tTest: "+str(test_AUC)
print "\n"
print "-----------------------------------------------"
print "----------------------------------------------\n"