本文整理汇总了Python中providedcode.transitionparser.TransitionParser类的典型用法代码示例。如果您正苦于以下问题:Python TransitionParser类的具体用法?Python TransitionParser怎么用?Python TransitionParser使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TransitionParser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main():
if len(sys.argv) < 4:
print """
Usage:
python parse.py in.model > out.conll
Input can be provided manually via the command prompt or piped directly
to the script using cat.
"""
# END if
if sys.stdin.isatty():
rawtext = [raw_input("Please type a sentence!")]
else:
rawtext = sys.stdin.read()
# END if
out_filename = sys.argv[3]
model_filename = sys.argv[1]
try:
tp = TransitionParser.load(model_filename)
parsed = tp.parse(rawtext)
with open(out_filename, 'w') as f:
for p in parsed:
f.write(p.to_conll(10).encode('utf-8'))
f.write('\n')
# END for
# END with
except Exception:
"Error."
示例2: main
def main():
try:
sentences = sys.stdin.readlines()
model_file = sys.argv[1]
except:
raise ValueError('''Usage: cat <file of sentences> | python parse.py <model_file>
or, python parse.py <model_file>, type sentences and hit Ctrl+d''')
if not os.path.isfile(model_file):
raise ValueError('cant find the model file')
# scrub list / remove line breaks
sentences = [sent.rstrip() for sent in sentences]
# generate dependency graph object from sentences
depgraphs = [DependencyGraph.from_sentence(sent) for sent in sentences]
# load model and parse
tp = TransitionParser.load(model_file)
parsed = tp.parse(depgraphs)
# print to stdout.
# can cat this to a conll file for viewing with MaltEval
for p in parsed:
print(p.to_conll(10).encode('utf-8'))
return
示例3: evaluate_parse
def evaluate_parse(partIdx):
if partIdx == 3:
print 'Evaluating your swedish model ... '
testdata = dataset.get_swedish_test_corpus().parsed_sents()
if not os.path.exists('./swedish.model'):
print 'No model. Please save your model as swedish.model at current directory before submission.'
sys.exit(0)
tp = TransitionParser.load('swedish.model')
parsed = tp.parse(testdata)
ev = DependencyEvaluator(testdata, parsed)
uas, las = ev.eval()
print 'UAS:',uas
print 'LAS:',las
swed_score = (min(las, 0.7) / 0.7) ** 2
return swed_score
if partIdx == 1:
print 'Evaluating your english model ... '
testdata = dataset.get_english_test_corpus().parsed_sents()
if not os.path.exists('./english.model'):
print 'No model. Please save your model as english.model at current directory before submission.'
sys.exit(0)
tp = TransitionParser.load('english.model')
parsed = tp.parse(testdata)
ev = DependencyEvaluator(testdata, parsed)
uas, las = ev.eval()
print 'UAS:',uas
print 'LAS:',las
eng_score = (min(las, 0.7) / 0.7) ** 2
return eng_score
if partIdx == 2:
print 'Evaluating your danish model ... '
testdata = dataset.get_danish_test_corpus().parsed_sents()
if not os.path.exists('./danish.model'):
print 'No model. Please save your model danish.model at current directory before submission.'
sys.exit(0)
tp = TransitionParser.load('danish.model')
parsed = tp.parse(testdata)
ev = DependencyEvaluator(testdata, parsed)
uas, las = ev.eval()
print 'UAS:',uas
print 'LAS:',las
dan_score = (min(las, 0.7) / 0.7) ** 2
return dan_score
示例4: parse
def parse(argv):
if len(argv) != 2:
sys.exit( "python parse.py language.model")
# data = dataset.get_english_train_corpus().parsed_sents()
# random.seed(1234)
# subdata = random.sample(data, 200)
language_model = argv[1]
try:
sentences = sys.stdin.readlines()
for i,sentence in enumerate(sentences):
dg = DependencyGraph.from_sentence(sentence)
tp = TransitionParser.load(language_model)
parsed = tp.parse([dg])
print parsed[0].to_conll(10).encode('utf-8')
# tp = TransitionParser(Transition, FeatureExtractor)
# tp.train(subdata)
# tp.save('english.model')
# testdata = dataset.get_swedish_test_corpus().parsed_sents()
# tp = TransitionParser.load('english.model')
# parsed = tp.parse(testdata)
#open new file for write on first sentence
if i == 0:
with open('test.conll', 'w') as f:
for p in parsed:
f.write(p.to_conll(10).encode('utf-8'))
f.write('\n')
#append for rest sentences
else:
with open('test.conll', 'a') as f:
for p in parsed:
f.write(p.to_conll(10).encode('utf-8'))
f.write('\n')
# ev = DependencyEvaluator(testdata, parsed)
# print "UAS: {} \nLAS: {}".format(*ev.eval())
except NotImplementedError:
print """
示例5: train_model
def train_model(lang,training_set='train'):
# load and sample data
data = get_data(lang,dataset=training_set).parsed_sents()
if len(data) >200:
random.seed(1234)
subdata = random.sample(data, 200)
else:
subdata = data
# train model and save
tp = TransitionParser(Transition, FeatureExtractor)
tp.train(subdata)
tp.save('{0}.model'.format(lang))
# test performance on new data
if lang != 'english':
testdata = get_data(lang,dataset='test').parsed_sents()
# english test data not available
# so find a subset of training data
# that is disjoint from data used for training
else:
not_in_training = [sent for sent in data if sent not in subdata]
testdata = random.sample(not_in_training,200)
parsed = tp.parse(testdata)
ev = DependencyEvaluator(testdata, parsed)
# store and print results
with open('results.txt','a') as results_file:
results_file.write('{0} model:\n'.format(lang))
results_file.write("UAS: {} \nLAS: {}\n".format(*ev.eval()))
print '{0} model:\n'.format(lang)
print "UAS: {} \nLAS: {}\n".format(*ev.eval())
return ev.eval()[1]
示例6: TransitionParser
from providedcode.transitionparser import TransitionParser
from providedcode.evaluate import DependencyEvaluator
from featureextractor import FeatureExtractor
from transition import Transition
from providedcode.dependencygraph import DependencyGraph
if __name__ == '__main__':
# data = dataset.get_swedish_train_corpus().parsed_sents()
# data = dataset.get_english_train_corpus().parsed_sents()
# data = dataset.get_dutch_train_corpus().parsed_sents()
data = dataset.get_danish_train_corpus().parsed_sents()
random.seed(1234)
subdata = random.sample(data, 200)
try:
tp = TransitionParser(Transition, FeatureExtractor)
tp.train(subdata)
# tp.save('swedish.model')
# tp.save('english.model')
tp.save('danish.model')
# testdata = dataset.get_swedish_test_corpus().parsed_sents()
testdata = dataset.get_danish_test_corpus().parsed_sents()
# tp = TransitionParser.load('badfeatures.model')
# testdata = dataset.get_english_test_corpus().parsed_sents()
# tp = TransitionParser.load('english.model')
parsed = tp.parse(testdata)
with open('test.conll', 'w') as f:
for p in parsed:
示例7:
import sys
from providedcode.transitionparser import TransitionParser
from providedcode.dependencygraph import DependencyGraph
if __name__ == '__main__':
sentences = sys.stdin.readlines()
tp = TransitionParser.load(sys.argv[1])
for sentence in sentences:
dg = DependencyGraph.from_sentence(sentence)
parsed = tp.parse([dg])
print parsed[0].to_conll(10).encode('utf-8')
#print '\n'
示例8: sentences
from providedcode.dependencygraph import DependencyGraph
from providedcode import dataset
from providedcode.transitionparser import TransitionParser
from providedcode.evaluate import DependencyEvaluator
from featureextractor import FeatureExtractor
from transition import Transition
import sys
if __name__ == "__main__":
try:
# parsing arbitrary sentences (english):
fromInput = "".join(sys.stdin.readlines())
# print fromInput
sentence = DependencyGraph.from_sentence(fromInput)
tp = TransitionParser.load("english.model")
parsed = tp.parse([sentence])
print parsed[0].to_conll(10).encode("utf-8")
except NotImplementedError:
print """
This file is currently broken! We removed the implementation of Transition
(in transition.py), which tells the transitionparser how to go from one
Configuration to another Configuration. This is an essential part of the
arc-eager dependency parsing algorithm, so you should probably fix that :)
The algorithm is described in great detail here:
http://aclweb.org/anthology//C/C12/C12-1059.pdf
We also haven't actually implemented most of the features for for the
support vector machine (in featureextractor.py), so as you might expect the
evaluator is going to give you somewhat bad results...
示例9: DependencyGraphs
if __name__ == '__main__':
# 'data' is parsed sentences converted into Dependency Graph objects.
model_dict = {
'english' : ('english.model', dataset.get_english_train_corpus, dataset.get_english_test_corpus),
'danish' : ('danish.model', dataset.get_danish_train_corpus, dataset.get_danish_test_corpus),
'swedish' : ('swedish.model', dataset.get_swedish_train_corpus, dataset.get_swedish_test_corpus)
}
for model_type, model_tuple in model_dict.iteritems():
model, data, testdata = model_tuple[0], model_tuple[1]().parsed_sents(), model_tuple[2]().parsed_sents()
random.seed(1234)
subdata = random.sample(data, 200) # 200 randomly selected DependencyGraphs(sentences) for model training.
try:
tp = TransitionParser(Transition, FeatureExtractor)
tp.train(subdata) # train with 200 randomly selected dependency graphs(sentences).
tp.save(model) # save the trained model.
tp = TransitionParser.load(model) # load the trained model for parsing.
parsed = tp.parse(testdata) # parse the test data
with open('test.conll', 'w') as f:
for p in parsed:
f.write(p.to_conll(10).encode('utf-8'))
f.write('\n')
# evaluate the test parse result here...
ev = DependencyEvaluator(testdata, parsed)
print 'Model: {}'.format(model_type)
示例10: TransitionParser
from providedcode.evaluate import DependencyEvaluator
from featureextractor import FeatureExtractor
from transition import Transition
if __name__ == "__main__":
data = dataset.get_swedish_train_corpus().parsed_sents()
random.seed(1234)
subdata = random.sample(data, 200)
try:
# tp = TransitionParser(Transition, FeatureExtractor)
# tp.train(subdata)
# tp.save('swedish.model')
testdata = dataset.get_swedish_test_corpus().parsed_sents()
tp = TransitionParser.load("badfeatures.model")
parsed = tp.parse(testdata)
with open("test.conll", "w") as f:
for p in parsed:
f.write(p.to_conll(10).encode("utf-8"))
f.write("\n")
ev = DependencyEvaluator(testdata, parsed)
print "LAS: {} \nUAS: {}".format(*ev.eval())
# parsing arbitrary sentences (english):
# sentence = DependencyGraph.from_sentence('Hi, this is a test')
# tp = TransitionParser.load('english.model')
示例11: MODEL
subdata = random.sample(data, 200) # use this subdata for bad features and swedish
# NEED DANISH AND ENGLISH
data_e = dataset.get_english_train_corpus().parsed_sents()
random.seed(1234)
subdata_e = random.sample(data_e, 200)
data_d = dataset.get_danish_train_corpus().parsed_sents()
random.seed(1234)
subdata_d = random.sample(data_d, 200)
try:
# BAD FEATURES MODEL (SWEDISH DATA)
print "Starting Bad Features"
testdata = dataset.get_swedish_test_corpus().parsed_sents()
tp = TransitionParser.load('badfeatures.model')
parsed = tp.parse(testdata)
with open('test.conll', 'w') as f:
for p in parsed:
f.write(p.to_conll(10).encode('utf-8'))
f.write('\n')
ev = DependencyEvaluator(testdata, parsed)
print "Bad Features Results"
print "UAS: {} \nLAS: {}".format(*ev.eval())
t1 = time.time()
print "Time: "+str(t1 - t0) + '\n'
# SWEDISH FEATURE MODELS
示例12: TransitionParser
from transition import Transition
if __name__ == '__main__':
#data = dataset.get_swedish_train_corpus().parsed_sents()
data = dataset.get_english_train_corpus().parsed_sents()
#data = dataset.get_danish_train_corpus().parsed_sents()
random.seed(1234)
subdata = random.sample(data, 200)
# For Swedish to get 200 projectives
#subdata = random.sample(data, 223)
# For Danish to get 200 projectives
#subdata = random.sample(data, 236)
try:
tp = TransitionParser(Transition, FeatureExtractor)
tp.train(subdata)
#tp.save('swedish.model')
#tp.save('english.model')
#tp.save('danish.model')
#testdata = dataset.get_swedish_test_corpus().parsed_sents()
testdata = dataset.get_english_dev_corpus().parsed_sents()
#testdata = dataset.get_danish_test_corpus().parsed_sents()
#tp = TransitionParser.load('badfeatures.model')
parsed = tp.parse(testdata)
with open('test.conll', 'w') as f:
for p in parsed:
示例13: TransitionParser
from transition import Transition
if __name__ == '__main__':
data = dataset.get_swedish_train_corpus().parsed_sents()
# data = dataset.get_english_test_corpus().parsed_sents()
# data = dataset.get_danish_train_corpus().parsed_sents()
random.seed(1234)
subdata = random.sample(data, 200)
try:
tp = TransitionParser(Transition, FeatureExtractor)
tp.train(subdata)
tp.save('swedish.model')
# tp.save('english.model')
# tp.save('danish.model')
testdata = dataset.get_swedish_test_corpus().parsed_sents()
#tp = TransitionParser.load('badfeatures.model')
parsed = tp.parse(testdata)
with open('test.conll', 'w') as f:
for p in parsed:
f.write(p.to_conll(10).encode('utf-8'))
f.write('\n')
示例14: handle_input
def handle_input(input_file, model_file):
tp = TransitionParser.load(model_file)
for line in input_file:
sentence = DependencyGraph.from_sentence(line)
parsed = tp.parse([sentence])
print parsed[0].to_conll(10).encode('utf-8')
示例15: TransitionParser
scoreWeight = {'swedish': 25.,
'danish': 25.,
'english': 50.}
totalPoints = 0
for testName in tests.keys():
data = tests[testName]().parsed_sents()
data_1h = data[0:(len(data)/2)]
data_2h = data[(len(data)/2):-1]
random.seed(99999)
traindata = random.sample(data_1h, 200)
testdata = random.sample(data_2h, 800)
try:
print "Training {0} model...".format(testName)
tp = TransitionParser(Transition, MyFeatureExtractor)
tp.train(traindata)
tp.save(testName + ".model")
print "Testing {0} model...".format(testName)
parsed = tp.parse(testdata)
# with open('test.conll', 'w') as f:
# for p in parsed:
# f.write(p.to_conll(10).encode('utf-8'))
# f.write('\n')
ev = DependencyEvaluator(testdata, parsed)
print "Test Results For: {0}".format(testName)
(uas, las) = ev.eval()
points = scoreWeight[testName] * (min(0.7, las)/0.7)**2