本文整理汇总了Python中utils.datasets.Datasets.get_index方法的典型用法代码示例。如果您正苦于以下问题:Python Datasets.get_index方法的具体用法?Python Datasets.get_index怎么用?Python Datasets.get_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils.datasets.Datasets
的用法示例。
在下文中一共展示了Datasets.get_index方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_all_matched_rows
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def get_all_matched_rows(query, request, inclusive_instructions):
buffer_ = StringIO()
writer = csv.writer(buffer_)
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
features = sorted([field['path'] for field in es_m.get_mapped_fields()])
query['size'] = ES_SCROLL_BATCH
writer.writerow(features)
ds.get_index()
ds.get_mapping()
es_url
request_url = os.path.join(es_url, ds.get_index(), ds.get_mapping(), '_search?scroll=1m')
response = requests.get(request_url, data=json.dumps(query)).json()
scroll_id = response['_scroll_id']
hits = response['hits']['hits']
scroll_payload = json.dumps({'scroll':'1m', 'scroll_id':scroll_id})
while hits:
for hit in hits:
feature_dict = {feature_name:hit['_source'][feature_name] for feature_name in hit['_source']}
feature_dict = {}
row = []
for feature_name in features:
feature_path = feature_name.split('.')
parent_source = hit['_source']
for path_component in feature_path:
if path_component in parent_source:
parent_source = parent_source[path_component]
else:
parent_source = ""
break
content = parent_source
row.append(content)
feature_dict[feature_name] = content
layer_dict = matcher.LayerDict(feature_dict)
if inclusive_instructions.match(layer_dict):
writer.writerow([element.encode('utf-8') if isinstance(element,unicode) else element for element in row])
buffer_.seek(0)
data = buffer_.read()
buffer_.seek(0)
buffer_.truncate()
yield data
response = requests.get(os.path.join(es_url,'_search','scroll'), data=scroll_payload).json()
hits = response['hits']['hits']
scroll_id = response['_scroll_id']
示例2: index
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def index(request):
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
fields = get_fields(es_m)
datasets = Datasets().get_allowed_datasets(request.user)
language_models = Task.objects.filter(task_type='train_model').filter(status__iexact='completed').order_by('-pk')
preprocessors = collect_map_entries(preprocessor_map)
enabled_preprocessors = [preprocessor for preprocessor in preprocessors]
# Hide fact graph if no facts_str_val is present in fields
display_fact_graph = 'hidden'
for i in fields:
if json.loads(i['data'])['type'] == "fact_str_val":
display_fact_graph = ''
break
template_params = {'display_fact_graph': display_fact_graph,
'STATIC_URL': STATIC_URL,
'URL_PREFIX': URL_PREFIX,
'fields': fields,
'searches': Search.objects.filter(author=request.user),
'lexicons': Lexicon.objects.all().filter(author=request.user),
'dataset': ds.get_index(),
'language_models': language_models,
'allowed_datasets': datasets,
'enabled_preprocessors': enabled_preprocessors,
'task_params': task_params}
template = loader.get_template('searcher.html')
return HttpResponse(template.render(template_params, request))
示例3: get_grammar_listing
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def get_grammar_listing(request):
ds = Datasets().activate_dataset(request.session)
dataset = ds.get_index()
mapping = ds.get_mapping()
grammars = Grammar.objects.filter(author=request.user, dataset__index=dataset, dataset__mapping=mapping).order_by('-last_modified')
grammar_json = json.dumps([{'id':grammar.id, 'name':grammar.name, 'last_modified':grammar.last_modified.strftime("%d/%m/%y %H:%M:%S")} for grammar in grammars])
return HttpResponse(grammar_json)
示例4: parse_request
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def parse_request(self,request):
self.lookup_types = request.POST['lookup_types'].split(',')
self.key_constraints = request.POST['key_constraints'].split(',')
self.content = request.POST['content'].split('\n')[-1].strip()
print(self.content)
ds = Datasets().activate_dataset(request.session)
self.dataset = ds.get_index()
self.mapping = ds.get_mapping()
self.es_m = ES_Manager(self.dataset, self.mapping)
self.user = request.user
示例5: get_example_texts
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def get_example_texts(request, field, value):
ds = Datasets().activate_dataset(request.session)
dataset = ds.get_index()
mapping = ds.get_mapping()
query = json.dumps({ "size":10, "highlight": {"fields": {field: {}}}, "query": {"match": {field: value}}})
response = ES_Manager.plain_scroll(es_url, dataset, mapping, query)
matched_sentences = []
for hit in response['hits']['hits']:
for match in hit['highlight'].values():
matched_sentences.append(match[0])
return matched_sentences
示例6: get_table_header
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def get_table_header(request):
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
# get columns names from ES mapping
fields = es_m.get_column_names()
template_params = {'STATIC_URL': STATIC_URL,
'URL_PREFIX': URL_PREFIX,
'fields': fields,
'searches': Search.objects.filter(author=request.user),
'columns': [{'index':index, 'name':field_name} for index, field_name in enumerate(fields)],
'dataset': ds.get_index(),
'mapping': ds.get_mapping()}
template = loader.get_template('searcher_results.html')
return HttpResponse(template.render(template_params, request))
示例7: __init__
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def __init__(self,request):
ds = Datasets().activate_dataset(request.session)
self.dataset = ds.get_index()
self.mapping = ds.get_mapping()
self.es_m = ES_Manager(self.dataset, self.mapping)
# PREPARE AGGREGATION
self.es_params = request.POST
interval = self.es_params["interval_1"]
self.daterange = self._get_daterange(self.es_params)
self.ranges,self.date_labels = self._get_date_intervals(self.daterange,interval)
self.agg_query = self.prepare_agg_query()
# EXECUTE AGGREGATION
agg_results = self.aggregate()
# PARSE RESPONSES INTO JSON OBJECT
self.agg_data = self.parse_responses(agg_results)
示例8: save_grammar
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def save_grammar(request):
grammar_dict = json.loads(request.POST['json'])
grammar_id = grammar_dict[0]['id']
if grammar_id == 'new':
name = grammar_dict[0]['text']
ds = Datasets().activate_dataset(request.session)
dataset = ds.get_index()
mapping = ds.get_mapping()
grammar = Grammar(name=name, json='', author=request.user, dataset=Dataset.objects.filter(index=dataset, mapping=mapping)[0])
grammar.save()
grammar_dict[0]['id'] = grammar.id
else:
grammar = Grammar.objects.get(id=grammar_id)
grammar.json = json.dumps(grammar_dict)
grammar.save()
return HttpResponse(json.dumps({'id':grammar.id}))
示例9: find_mappings
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def find_mappings(request):
try:
slop = int(request.POST['slop'])
max_len = int(request.POST['max_len'])
min_len = int(request.POST['min_len'])
min_freq = int(request.POST['min_freq'])
match_field = request.POST['match_field']
description = request.POST['description']
batch_size = 50
# Define selected mapping
ds = Datasets().activate_dataset(request.session)
dataset = ds.get_index()
mapping = ds.get_mapping()
lexicon = []
word_index = {}
num_lexicons = 0
for i,lexicon_id in enumerate(request.POST.getlist('lexicons[]')):
num_lexicons +=1
for word in Word.objects.filter(lexicon=lexicon_id):
word = word.wrd
lexicon.append(word)
if word not in word_index:
word_index[word] = []
word_index[word].append(i)
lexicon = list(set(lexicon))
if min_len > num_lexicons:
min_len = num_lexicons
mwe_counter = 0
group_counter = 0
phrases = []
final = {}
data = []
new_run = Run(minimum_frequency=min_freq,maximum_length=max_len,minimum_length=min_len,run_status='running',run_started=datetime.now(),run_completed=None,user=request.user,description=description)
new_run.save()
logging.getLogger(INFO_LOGGER).info(json.dumps({'process':'MINE MWEs','event':'mwe_mining_started','args':{'user_name':request.user.username,'run_id':new_run.id,'slop':slop,'min_len':min_len,'max_len':max_len,'min_freq':min_freq,'match_field':match_field,'desc':description}}))
for i in range(min_len,max_len+1):
print('Permutation len:',i)
for permutation in itertools.permutations(lexicon,i):
word_indices = list(flatten([word_index[word] for word in permutation]))
if len(word_indices) == len(set(word_indices)):
permutation = ' '.join(permutation)
if slop > 0:
query = {"query": {"match_phrase": {match_field: {"query": permutation,"slop": slop}}}}
else:
query = {"query": {"match_phrase": {match_field: {"query": permutation}}}}
data.append(json.dumps({"index":dataset,"mapping":mapping})+'\n'+json.dumps(query))
phrases.append(permutation)
if len(data) == batch_size:
for j,response in enumerate(ES_Manager.plain_multisearch(es_url, dataset, mapping, data)):
try:
if response['hits']['total'] >= min_freq:
sorted_phrase = ' '.join(sorted(phrases[j].split(' ')))
sorted_conceptualised_phrase = conceptualise_phrase(sorted_phrase,request.user)
if sorted_conceptualised_phrase not in final:
final[sorted_conceptualised_phrase] = {'total_freq':0,'mwes':[],'display_name':{'freq':0,'label':False},'id':group_counter}
group_counter+=1
final[sorted_conceptualised_phrase]['total_freq']+=response['hits']['total']
final[sorted_conceptualised_phrase]['mwes'].append({'mwe':phrases[j],'freq':response['hits']['total'],'accepted':False,'id':mwe_counter})
mwe_counter+=1
final[sorted_conceptualised_phrase]['mwes'].sort(reverse=True,key=lambda k: k['freq'])
if response['hits']['total'] > final[sorted_conceptualised_phrase]['display_name']['freq']:
final[sorted_conceptualised_phrase]['display_name']['freq'] = response['hits']['total']
final[sorted_conceptualised_phrase]['display_name']['label'] = phrases[j]
except KeyError as e:
raise e
data = []
phrases = []
logging.getLogger(INFO_LOGGER).info(json.dumps({'process':'MINE MWEs','event':'mwe_mining_progress','args':{'user_name':request.user.username,'run_id':new_run.id},'data':{'permutations_processed':i+1-min_len,'total_permutations':max_len-min_len+1}}))
m_response = ES_Manager.plain_multisearch(es_url, dataset, mapping, data)
for j,response in enumerate(m_response):
try:
if response['hits']['total'] >= min_freq:
sorted_phrase = ' '.join(sorted(phrases[j].split(' ')))
sorted_conceptualised_phrase = conceptualise_phrase(sorted_phrase,request.user)
if sorted_conceptualised_phrase not in final:
final[sorted_conceptualised_phrase] = {'total_freq':0,'mwes':[],'display_name':{'freq':0,'label':False},'id':group_counter}
group_counter+=1
final[sorted_conceptualised_phrase]['total_freq']+=response['hits']['total']
final[sorted_conceptualised_phrase]['mwes'].append({'mwe':phrases[j],'freq':response['hits']['total'],'accepted':False,'id':mwe_counter})
mwe_counter+=1
final[sorted_conceptualised_phrase]['mwes'].sort(reverse=True,key=lambda k: k['freq'])
if response['hits']['total'] > final[sorted_conceptualised_phrase]['display_name']['freq']:
final[sorted_conceptualised_phrase]['display_name']['freq'] = response['hits']['total']
final[sorted_conceptualised_phrase]['display_name']['label'] = phrases[j]
except KeyError as e:
raise e
for key in final:
final[key]['concept_name'] = {'freq':-1,'label':''}
r = Run.objects.get(pk=new_run.pk)
r.run_completed = datetime.now()
r.run_status = 'completed'
r.results =json.dumps(final)
r.save()
logging.getLogger(INFO_LOGGER).info(json.dumps({'process':'MINE MWEs','event':'mwe_mining_completed','args':{'user_name':request.user.username,'run_id':new_run.id}}))
except Exception as e:
#.........这里部分代码省略.........
示例10: get_table_data
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def get_table_data(request):
query_data = {}
query_data['search_id'] = request.GET['search_id']
query_data['polarity'] = request.GET['polarity']
query_data['requested_page'] = int(request.GET['iDisplayStart'])/int(request.GET['iDisplayLength'])+1
query_data['page_length'] = int(request.GET['iDisplayLength'])
if request.GET['is_test'] == 'true':
query_data['inclusive_metaquery'] = json.loads(request.GET['inclusive_test_grammar'])
query_data['inclusive_grammar_id'] = -1
query_data['exclusive_grammar_id'] = -1
query_data['features'] = sorted(extract_layers(query_data['inclusive_metaquery']))
else:
query_data['inclusive_grammar_id'] = request.GET['inclusive_grammar_id']
query_data['exclusive_grammar_id'] = request.GET['exclusive_grammar_id']
query_data['inclusive_metaquery'] = generate_metaquery_dict(int(query_data['inclusive_grammar_id']), request.user, component={})
query_data['exclusive_metaquery'] = generate_metaquery_dict(int(query_data['exclusive_grammar_id']), request.user, component={})
query_data['features'] = sorted(extract_layers(query_data['inclusive_metaquery']) | extract_layers(query_data['exclusive_metaquery']))
GrammarPageMapping.objects.filter(search_id=query_data['search_id'],
inclusive_grammar=query_data['inclusive_grammar_id'],
exclusive_grammar=query_data['exclusive_grammar_id'],
polarity=query_data['polarity'], author=request.user).delete()
ds = Datasets().activate_dataset(request.session)
query_data['dataset'] = ds.get_index()
query_data['mapping'] = ds.get_mapping()
component_query = ElasticGrammarQuery(query_data['inclusive_metaquery'], None).generate()
es_m = ds.build_manager(ES_Manager)
if query_data['search_id'] != '-1':
saved_query = json.loads(Search.objects.get(pk=query_data['search_id']).query)
es_m.load_combined_query(saved_query)
if query_data['polarity'] == 'positive':
es_m.merge_combined_query_with_query_dict(component_query)
else:
#es_m.combined_query = {"main": {"query": {"bool": {"should": [{"match_all":{}}], "must": [], "must_not": []}}},
#"facts": {"include": [], 'total_include': 0,
#"exclude": [], 'total_exclude': 0}}
es_m.combined_query = {"main": {"query":{"match_all":{}}}}
if query_data['polarity'] == 'positive':
es_m.combined_query = component_query
# Add paging data to the query
#es_m.set_query_parameter('from', request.session['grammar_'+polarity+'_cursor'])
es_m.set_query_parameter('size', request.GET['iDisplayLength'])
es_m.set_query_parameter('_source', query_data['features'])
query_data['inclusive_instructions'] = generate_instructions(query_data['inclusive_metaquery'])
query_data['exclusive_instructions'] = {} #generate_instructions(query_data['exclusive_metaquery'])
data = scroll_data(es_m.combined_query['main'], request, query_data)
data['sEcho'] = request.GET['sEcho']
return HttpResponse(json.dumps(data,ensure_ascii=False))
示例11: facts_agg
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import get_index [as 别名]
def facts_agg(es_params, request):
logger = LogManager(__name__, 'FACTS AGGREGATION')
distinct_values = []
query_results = []
lexicon = []
aggregation_data = es_params['aggregate_over']
aggregation_data = json.loads(aggregation_data)
original_aggregation_field = aggregation_data['path']
aggregation_field = 'texta_link.facts'
try:
aggregation_size = 50
aggregations = {"strings": {es_params['sort_by']: {"field": aggregation_field, 'size': 0}},
"distinct_values": {"cardinality": {"field": aggregation_field}}}
# Define selected mapping
ds = Datasets().activate_dataset(request.session)
dataset = ds.get_index()
mapping = ds.get_mapping()
date_range = ds.get_date_range()
es_m = ES_Manager(dataset, mapping, date_range)
for item in es_params:
if 'saved_search' in item:
s = Search.objects.get(pk=es_params[item])
name = s.description
saved_query = json.loads(s.query)
es_m.load_combined_query(saved_query)
es_m.set_query_parameter('aggs', aggregations)
response = es_m.search()
# Filter response
bucket_filter = '{0}.'.format(original_aggregation_field.lower())
final_bucket = []
for b in response['aggregations']['strings']['buckets']:
if bucket_filter in b['key']:
fact_name = b['key'].split('.')[-1]
b['key'] = fact_name
final_bucket.append(b)
final_bucket = final_bucket[:aggregation_size]
response['aggregations']['distinct_values']['value'] = len(final_bucket)
response['aggregations']['strings']['buckets'] = final_bucket
normalised_counts,labels = normalise_agg(response, es_m, es_params, 'strings')
lexicon = list(set(lexicon+labels))
query_results.append({'name':name,'data':normalised_counts,'labels':labels})
distinct_values.append({'name':name,'data':response['aggregations']['distinct_values']['value']})
es_m.build(es_params)
# FIXME
# this is confusing for the user
if not es_m.is_combined_query_empty():
es_m.set_query_parameter('aggs', aggregations)
response = es_m.search()
# Filter response
bucket_filter = '{0}.'.format(original_aggregation_field.lower())
final_bucket = []
for b in response['aggregations']['strings']['buckets']:
if bucket_filter in b['key']:
fact_name = b['key'].split('.')[-1]
b['key'] = fact_name
final_bucket.append(b)
final_bucket = final_bucket[:aggregation_size]
response['aggregations']['distinct_values']['value'] = len(final_bucket)
response['aggregations']['strings']['buckets'] = final_bucket
normalised_counts,labels = normalise_agg(response, es_m, es_params, 'strings')
lexicon = list(set(lexicon+labels))
query_results.append({'name':'Query','data':normalised_counts,'labels':labels})
distinct_values.append({'name':'Query','data':response['aggregations']['distinct_values']['value']})
data = [a+zero_list(len(query_results)) for a in map(list, zip(*[lexicon]))]
data = [['Word']+[query_result['name'] for query_result in query_results]]+data
for i,word in enumerate(lexicon):
for j,query_result in enumerate(query_results):
for k,label in enumerate(query_result['labels']):
if word == label:
data[i+1][j+1] = query_result['data'][k]
logger.set_context('user_name', request.user.username)
logger.info('facts_aggregation_queried')
except Exception as e:
print('-- Exception[{0}] {1}'.format(__name__, e))
logger.set_context('user_name', request.user.username)
logger.exception('facts_aggregation_query_failed')
table_height = len(data)*15
table_height = table_height if table_height > 500 else 500
return {'data':[data[0]]+sorted(data[1:], key=lambda x: sum(x[1:]), reverse=True),'height':table_height,'type':'bar','distinct_values':json.dumps(distinct_values)}