本文整理汇总了Python中nltk.ChartParser方法的典型用法代码示例。如果您正苦于以下问题:Python nltk.ChartParser方法的具体用法?Python nltk.ChartParser怎么用?Python nltk.ChartParser使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk
的用法示例。
在下文中一共展示了nltk.ChartParser方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: to_one_hot
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ChartParser [as 别名]
def to_one_hot(smiles, MaxNumSymbols, check=True):
""" Encode a list of smiles strings to one-hot vectors """
assert type(smiles) == list
prod_map = {}
for ix, prod in enumerate(zinc_grammar.GCFG.productions()):
prod_map[prod] = ix
tokenize = get_zinc_tokenizer(zinc_grammar.GCFG)
tokens = list(map(tokenize, smiles))
parser = nltk.ChartParser(zinc_grammar.GCFG)
parse_trees = [next(parser.parse(t)) for t in tokens]
productions_seq = [tree.productions() for tree in parse_trees]
#if check:
# print(productions_seq)
indices = [np.array([prod_map[prod] for prod in entry], dtype=int) for entry in productions_seq]
one_hot = np.zeros((len(indices), MaxNumSymbols, NCHARS), dtype=np.float32)
for i in range(len(indices)):
num_productions = len(indices[i])
one_hot[i][np.arange(num_productions),indices[i]] = 1.
one_hot[i][np.arange(num_productions, MaxNumSymbols),-1] = 1.
return one_hot
示例2: encode
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ChartParser [as 别名]
def encode(smiles):
GCFG = smiles_grammar.GCFG
tokenize = get_smiles_tokenizer(GCFG)
tokens = tokenize(smiles)
parser = nltk.ChartParser(GCFG)
parse_tree = parser.parse(tokens).__next__()
productions_seq = parse_tree.productions()
productions = GCFG.productions()
prod_map = {}
for ix, prod in enumerate(productions):
prod_map[prod] = ix
indices = np.array([prod_map[prod] for prod in productions_seq], dtype=int)
return indices
示例3: parse
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ChartParser [as 别名]
def parse(sent):
parser = nltk.ChartParser(grammar)
tokens = nltk.wordpunct_tokenize(sent)
return parser.parse(tokens)
示例4: load
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ChartParser [as 别名]
def load(self, filepath):
cfg_string = ''.join(list(open(filepath).readlines()))
# parse from nltk
cfg_grammar = nltk.CFG.fromstring(cfg_string)
# self.cfg_parser = cfg_parser = nltk.RecursiveDescentParser(cfg_grammar)
self.cfg_parser = cfg_parser = nltk.ChartParser(cfg_grammar)
# our info for rule macthing
self.head_to_rules = head_to_rules = {}
self.valid_tokens = valid_tokens = set()
rule_ranges = {}
total_num_rules = 0
first_head = None
for line in cfg_string.split('\n'):
if len(line.strip()) > 0:
head, rules = line.split('->')
head = Nonterminal(head.strip()) # remove space
rules = [_.strip() for _ in rules.split('|')] # split and remove space
rules = [tuple([Nonterminal(_) if not _.startswith("'") else _[1:-1] for _ in rule.split()]) for rule in rules]
head_to_rules[head] = rules
for rule in rules:
for t in rule:
if isinstance(t, str):
valid_tokens.add(t)
if first_head is None:
first_head = head
rule_ranges[head] = (total_num_rules, total_num_rules + len(rules))
total_num_rules += len(rules)
self.first_head = first_head
self.rule_ranges = rule_ranges
self.total_num_rules = total_num_rules
示例5: get_parser
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ChartParser [as 别名]
def get_parser(production_file):
prods = [_.strip() for _ in open(production_file).readlines()] + ['Nothing -> None']
string = '\n'.join(prods)
GCFG = nltk.CFG.fromstring(string)
parser = nltk.ChartParser(GCFG)
return parser
示例6: load
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ChartParser [as 别名]
def load(self, filepath):
cfg_string = ''.join(list(open(filepath).readlines()))
# parse from nltk
cfg_grammar = nltk.CFG.fromstring(cfg_string)
# self.cfg_parser = cfg_parser = nltk.RecursiveDescentParser(cfg_grammar)
self.cfg_parser = cfg_parser = nltk.ChartParser(cfg_grammar)
# our info for rule macthing
self.head_to_rules = head_to_rules = {}
self.valid_tokens = valid_tokens = set()
rule_ranges = {}
total_num_rules = 0
first_head = None
for line in cfg_string.split('\n'):
if len(line.strip()) > 0:
head, rules = line.split('->')
head = Nonterminal(head.strip()) # remove space
rules = [_.strip() for _ in rules.split('|')] # split and remove space
rules = [
tuple([Nonterminal(_) if not _.startswith("'") else _[1:-1] for _ in rule.split()])
for rule in rules
]
head_to_rules[head] = rules
for rule in rules:
for t in rule:
if isinstance(t, str):
valid_tokens.add(t)
if first_head is None:
first_head = head
rule_ranges[head] = (total_num_rules, total_num_rules + len(rules))
total_num_rules += len(rules)
self.first_head = first_head
self.rule_ranges = rule_ranges
self.total_num_rules = total_num_rules
示例7: SizeOneHot
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ChartParser [as 别名]
def SizeOneHot(smiles, check=True):
""" Encode a list of smiles strings to one-hot vectors """
assert type(smiles) == list
prod_map = {}
for ix, prod in enumerate(zinc_grammar.GCFG.productions()):
prod_map[prod] = ix
tokenize = get_zinc_tokenizer(zinc_grammar.GCFG)
tokens = list(map(tokenize, smiles))
parser = nltk.ChartParser(zinc_grammar.GCFG)
parse_trees = [next(parser.parse(t)) for t in tokens]
productions_seq = [tree.productions() for tree in parse_trees]
indices = [np.array([prod_map[prod] for prod in entry], dtype=int) for entry in productions_seq]
return len(indices[0])
# SINGLE EXAMPLE
#smile = [L[0]]
##smile = ['C']
#one_hot_single = to_one_hot(smile, )
#print(one_hot_single.shape)
#print(one_hot_single)
# GOING THROUGH ALL OF ZINC....
#OH = np.zeros((len(L),MAX_LEN,NCHARS))
#for i in range(0, len(L), 100):
# print('Processing: i=[' + str(i) + ':' + str(i+100) + ']')
# onehot = to_one_hot(L[i:i+100], False)
# OH[i:i+100,:,:] = onehot
#
#h5f = h5py.File('zinc_grammar_dataset.h5','w')
#h5f.create_dataset('data', data=OH)
#h5f.close()
示例8: __init__
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ChartParser [as 别名]
def __init__(self, cfg_grammar=None, test_file=None):
super().__init__()
self.name = 'cfg'
if cfg_grammar is None:
cfg_grammar = """
S -> S PLUS x | S SUB x | S PROD x | S DIV x | x | '(' S ')'
PLUS -> '+'
SUB -> '-'
PROD -> '*'
DIV -> '/'
x -> 'x' | 'y'
"""
self.grammar = nltk.CFG.fromstring(cfg_grammar)
self.parser = nltk.ChartParser(self.grammar)
self.test_file = test_file