本文整理匯總了Python中nltk.parse.stanford.StanfordParser方法的典型用法代碼示例。如果您正苦於以下問題:Python stanford.StanfordParser方法的具體用法?Python stanford.StanfordParser怎麽用?Python stanford.StanfordParser使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類nltk.parse.stanford
的用法示例。
在下文中一共展示了stanford.StanfordParser方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: from nltk.parse import stanford [as 別名]
# 或者: from nltk.parse.stanford import StanfordParser [as 別名]
def __init__(self, datasets_path, corpus_name, parse_type, lang='english'):
self.datasets_path = datasets_path
self.corpus_name = corpus_name
self.corpus_path = path.join(datasets_path, corpus_name)
self.docs_path = path.join(self.corpus_path, "docs")
self.topics_file = path.join(self.corpus_path, "topics.xml")
self.models_path = path.join(self.corpus_path, "models")
self.smodels_path = path.join(self.corpus_path, "smodels")
self.jar_path = path.join(PROJECT_PATH, "summarizer", "jars")
os.environ['CLASSPATH'] = self.jar_path
self.cleaned_path = path.join(datasets_path, "processed")
if parse_type == 'parse':
if lang == 'english':
self.parser = stanford.StanfordParser(model_path="%s/englishPCFG.ser.gz" % (self.jar_path))
if lang == 'german':
self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
# self.cleaned_path = path.join(datasets_path, "processed.parse")
if parse_type == 'props': # TODO
if lang == 'english':
self.props_parser = ClausIE.get_instance()
if lang == 'german':
self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
示例2: __init__
# 需要導入模塊: from nltk.parse import stanford [as 別名]
# 或者: from nltk.parse.stanford import StanfordParser [as 別名]
def __init__(self):
"""
Initialize the SVO Methods
"""
self.noun_types = ["NN", "NNP", "NNPS", "NNS", "PRP"]
self.verb_types = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
self.adjective_types = ["JJ", "JJR", "JJS"]
self.pred_verb_phrase_siblings = None
self.parser = stanford.StanfordParser()
self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
示例3: main
# 需要導入模塊: from nltk.parse import stanford [as 別名]
# 或者: from nltk.parse.stanford import StanfordParser [as 別名]
def main(argv):
debug = False
try:
opts, args = getopt.getopt(argv, "hd",["help","debug"])
except getopt.GetoptError as e:
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ["-h", "help"]:
usage()
sys.exit(2)
if opt in ["-d", "debug"]:
debug = True
parser = stanford.StanfordParser()
line = raw_input("Enter line: ")
while line != 'stop':
sent = list(parser.raw_parse(line))[0]
if debug:
print sent # print parse tree
if sent[0].label() == "SBARQ":
print answer(sent)
else:
try:
describe(sent)
except ValueError as e:
print "Error describing sentence. " + e
if debug:
print smap # print semantic map
line = raw_input("Enter line: ")
示例4: clear_data
# 需要導入模塊: from nltk.parse import stanford [as 別名]
# 或者: from nltk.parse.stanford import StanfordParser [as 別名]
def clear_data(self):
self.parser = stanford.StanfordParser(model_path=r"/users/ted/stanford nlp/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
self.first_NP = ''
self.first_VP = ''
self.parse_tree = None
self.subject = RDF_Triple.RDF_SOP('subject')
self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB')
self.Object = RDF_Triple.RDF_SOP('object')
示例5: setup_extractor
# 需要導入模塊: from nltk.parse import stanford [as 別名]
# 或者: from nltk.parse.stanford import StanfordParser [as 別名]
def setup_extractor(self):
self.splitter = PunktSentenceSplitter(self.language)
self.parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar',
path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar',
java_options=' -mx2G -Djava.ext.dirs=dev/')
self.token_to_lemma = {}
for lemma, tokens in self.lemma_to_token.iteritems():
for t in tokens:
self.token_to_lemma[t] = lemma
self.all_verbs = set(self.token_to_lemma.keys())
示例6: main
# 需要導入模塊: from nltk.parse import stanford [as 別名]
# 或者: from nltk.parse.stanford import StanfordParser [as 別名]
def main(corpus, verbs, processes, outfile, sub_sentences):
""" Compute the LU distribution in the corpus, i.e. how many LUs per sentence
"""
global splitter, tagger, parser, all_verbs
splitter = PunktSentenceSplitter('en')
tagger = TTPosTagger('en')
parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar',
path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar',
java_options=' -mx1G -Djava.ext.dirs=dev/') # no way to make classpath work
all_verbs = reduce(lambda x, y: x.union(y), imap(set, json.load(verbs).values()), set())
all_verbs.discard('be')
all_verbs.discard('have')
args = load_corpus(corpus, 'bio', text_only=True)
worker = worker_with_sub_sentences if sub_sentences else worker_with_sentences
counter = defaultdict(int)
for i, counts in enumerate(parallel.map(worker, args, processes)):
for k, v in counts.iteritems():
counter[k] += v
if (i + 1) % 10000 == 0:
logger.info('Processed %d documents', i + 1)
counter = OrderedDict(sorted(counter.items(), key=lambda (k, v): k))
for k, v in counter.iteritems():
print k, v
json.dump(counter, outfile, indent=2)
示例7: __init__
# 需要導入模塊: from nltk.parse import stanford [as 別名]
# 或者: from nltk.parse.stanford import StanfordParser [as 別名]
def __init__(self):
self.parser = StanfordParser()