本文整理汇总了Python中utils.datasets.Datasets.build_manager方法的典型用法代码示例。如果您正苦于以下问题:Python Datasets.build_manager方法的具体用法?Python Datasets.build_manager怎么用?Python Datasets.build_manager使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils.datasets.Datasets
的用法示例。
在下文中一共展示了Datasets.build_manager方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: api_mass_train_tagger
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def api_mass_train_tagger(request, user, params):
""" Apply mass train tagger (via auth_token)
"""
# Read all params
dataset_id = params.get('dataset', None)
selected_tags = set(params.get('tags', []))
field = params.get("field", None)
normalizer_opt = params.get("normalizer_opt", "0")
classifier_opt = params.get("classifier_opt", "0")
reductor_opt = params.get("reductor_opt", "0")
extractor_opt = params.get("extractor_opt", "0")
retrain_only = params.get("retrain_only", False)
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
es_m = ds.build_manager(ES_Manager)
mass_helper = MassHelper(es_m)
data = mass_helper.schedule_tasks(selected_tags, normalizer_opt, classifier_opt, reductor_opt, extractor_opt, field, dataset_id, user)
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
示例2: api_tag_list
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def api_tag_list(request, user, params):
""" Get list of available tags for API user (via auth_token)
"""
dataset_id = params['dataset']
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
es_m = ds.build_manager(ES_Manager)
mass_helper = MassHelper(es_m)
tag_set = mass_helper.get_unique_tags()
tag_frequency = mass_helper.get_tag_frequency(tag_set)
tag_models = set([tagger.description for tagger in Task.objects.filter(task_type=TaskTypes.TRAIN_TAGGER.value)])
data = []
for tag in sorted(tag_frequency.keys()):
count = tag_frequency[tag]
has_model = tag in tag_models
doc = {'description': tag,
'count': count,
'has_model': has_model}
data.append(doc)
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
示例3: export_matched_data
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def export_matched_data(request):
search_id = request.GET['search_id']
inclusive_metaquery = json.loads(request.GET['inclusive_grammar'])
ds = Datasets().activate_dataset(request.session)
component_query = ElasticGrammarQuery(inclusive_metaquery, None).generate()
es_m = ds.build_manager(ES_Manager)
if search_id == '-1': # Full search
es_m.combined_query = component_query
else:
saved_query = json.loads(Search.objects.get(pk=search_id).query)
es_m.load_combined_query(saved_query)
es_m.merge_combined_query_with_query_dict(component_query)
inclusive_instructions = generate_instructions(inclusive_metaquery)
response = StreamingHttpResponse(get_all_matched_rows(es_m.combined_query['main'], request, inclusive_instructions), content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="%s"' % ('extracted.csv')
return response
示例4: run
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def run(self, task_id):
self.task_id = task_id
self.task_obj = Task.objects.get(pk=self.task_id)
params = json.loads(self.task_obj.parameters)
self.task_obj.update_status(Task.STATUS_RUNNING)
try:
ds = Datasets().activate_datasets_by_id(params['dataset'])
es_m = ds.build_manager(ES_Manager)
# es_m.load_combined_query(self._parse_query(params))
self.es_m = es_m
self.params = params
result = self._start_subworker()
self.task_obj.result = result
self.task_obj.update_status(Task.STATUS_COMPLETED, set_time_completed=True)
except TaskCanceledException as e:
# If here, task was canceled while processing
# Delete task
self.task_obj.delete()
logging.getLogger(INFO_LOGGER).info(json.dumps({'process': 'PROCESSOR WORK', 'event': 'management_worker_canceled', 'data': {'task_id': self.task_id}}))
print("--- Task canceled")
except Exception as e:
logging.getLogger(ERROR_LOGGER).exception(json.dumps(
{'process': 'PROCESSOR WORK', 'event': 'manager_worker_failed', 'data': {'task_id': self.task_id}}), exc_info=True)
# declare the job as failed.
self.task_obj.result = json.dumps({'error': repr(e)})
self.task_obj.update_status(Task.STATUS_FAILED, set_time_completed=True)
print('Done with management task')
示例5: index
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def index(request):
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
fields = get_fields(es_m)
datasets = Datasets().get_allowed_datasets(request.user)
language_models = Task.objects.filter(task_type='train_model').filter(status__iexact='completed').order_by('-pk')
preprocessors = collect_map_entries(preprocessor_map)
enabled_preprocessors = [preprocessor for preprocessor in preprocessors]
# Hide fact graph if no facts_str_val is present in fields
display_fact_graph = 'hidden'
for i in fields:
if json.loads(i['data'])['type'] == "fact_str_val":
display_fact_graph = ''
break
template_params = {'display_fact_graph': display_fact_graph,
'STATIC_URL': STATIC_URL,
'URL_PREFIX': URL_PREFIX,
'fields': fields,
'searches': Search.objects.filter(author=request.user),
'lexicons': Lexicon.objects.all().filter(author=request.user),
'dataset': ds.get_index(),
'language_models': language_models,
'allowed_datasets': datasets,
'enabled_preprocessors': enabled_preprocessors,
'task_params': task_params}
template = loader.get_template('searcher.html')
return HttpResponse(template.render(template_params, request))
示例6: get_all_rows
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def get_all_rows(es_params, request):
features = es_params['features']
# Prepare in-memory csv writer.
buffer_ = StringIO()
writer = csv.writer(buffer_)
# Write the first headers.
writer.writerow(features)
ds = Datasets().activate_datasets(request.session)
es_m = ds.build_manager(ES_Manager)
es_m.build(es_params)
es_m.set_query_parameter('size', ES_SCROLL_BATCH)
# Fetch the initial scroll results.
response = es_m.scroll()
scroll_id = response['_scroll_id']
hits = response['hits']['hits']
while hits:
process_hits(hits, features, write=True, writer=writer)
# Return some data with the StreamingResponce
yield _get_buffer_data(buffer_)
# Continue with the scroll.
response = es_m.scroll(scroll_id=scroll_id)
hits = response['hits']['hits']
scroll_id = response['_scroll_id']
示例7: index
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def index(request):
ds = Datasets().activate_datasets(request.session)
es_m = ds.build_manager(ES_Manager)
fields = get_fields(es_m)
datasets = Datasets().get_allowed_datasets(request.user)
language_models = Task.objects.filter(task_type=TaskTypes.TRAIN_MODEL.value).filter(status__iexact=Task.STATUS_COMPLETED).order_by('-pk')
# Hide fact graph if no facts_str_val is present in fields
display_fact_graph = 'hidden'
for i in fields:
if json.loads(i['data'])['type'] == "fact_str_val":
display_fact_graph = ''
break
template_params = {'display_fact_graph': display_fact_graph,
'STATIC_URL': STATIC_URL,
'URL_PREFIX': URL_PREFIX,
'fields': fields,
'searches': Search.objects.filter(author=request.user),
'lexicons': Lexicon.objects.all().filter(author=request.user),
'language_models': language_models,
'allowed_datasets': datasets,
}
template = loader.get_template('searcher.html')
return HttpResponse(template.render(template_params, request))
示例8: run
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def run(self, task_id):
self.task_id = task_id
task = Task.objects.get(pk=self.task_id)
params = json.loads(task.parameters)
task.update_status(Task.STATUS_RUNNING)
try:
ds = Datasets().activate_datasets_by_id(params['dataset'])
es_m = ds.build_manager(ES_Manager)
es_m.load_combined_query(self._parse_query(params))
self.es_m = es_m
self.params = params
valid, msg = self._check_if_request_bad(self.params)
if valid:
self._preprocessor_worker()
else:
raise UserWarning(msg)
except TaskCanceledException as e:
# If here, task was canceled while processing
# Delete task
task = Task.objects.get(pk=self.task_id)
task.delete()
logging.getLogger(INFO_LOGGER).info(json.dumps({'process': 'PROCESSOR WORK', 'event': 'processor_worker_canceled', 'data': {'task_id': self.task_id}}), exc_info=True)
print("--- Task canceled")
except Exception as e:
logging.getLogger(ERROR_LOGGER).exception(json.dumps(
{'process': 'PROCESSOR WORK', 'event': 'processor_worker_failed', 'data': {'task_id': self.task_id}}), exc_info=True)
# declare the job as failed.
task = Task.objects.get(pk=self.task_id)
task.result = json.dumps({'error': repr(e)})
task.update_status(Task.STATUS_FAILED, set_time_completed=True)
示例9: mlt_query
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def mlt_query(request):
logger = LogManager(__name__, 'SEARCH MLT')
es_params = request.POST
mlt_fields = [json.loads(field)['path'] for field in es_params.getlist('mlt_fields')]
handle_negatives = request.POST['handle_negatives']
docs_accepted = [a.strip() for a in request.POST['docs'].split('\n') if a]
docs_rejected = [a.strip() for a in request.POST['docs_rejected'].split('\n') if a]
# stopwords
stopword_lexicon_ids = request.POST.getlist('mlt_stopword_lexicons')
stopwords = []
for lexicon_id in stopword_lexicon_ids:
lexicon = Lexicon.objects.get(id=int(lexicon_id))
words = Word.objects.filter(lexicon=lexicon)
stopwords+=[word.wrd for word in words]
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
es_m.build(es_params)
response = es_m.more_like_this_search(mlt_fields,docs_accepted=docs_accepted,docs_rejected=docs_rejected,handle_negatives=handle_negatives,stopwords=stopwords)
documents = []
for hit in response['hits']['hits']:
fields_content = get_fields_content(hit,mlt_fields)
documents.append({'id':hit['_id'],'content':fields_content})
template_params = {'STATIC_URL': STATIC_URL,
'URL_PREFIX': URL_PREFIX,
'documents':documents}
template = loader.get_template('mlt_results.html')
return HttpResponse(template.render(template_params, request))
示例10: save
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def save(request):
logger = LogManager(__name__, 'SAVE SEARCH')
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
es_params = request.POST
es_m.build(es_params)
combined_query = es_m.get_combined_query()
try:
q = combined_query
desc = request.POST['search_description']
s_content = json.dumps([request.POST[x] for x in request.POST.keys() if 'match_txt' in x])
search = Search(author=request.user,search_content=s_content,description=desc,dataset=Dataset.objects.get(pk=int(request.session['dataset'])),query=json.dumps(q))
search.save()
logger.set_context('user_name', request.user.username)
logger.set_context('search_id', search.id)
logger.info('search_saved')
except Exception as e:
print('-- Exception[{0}] {1}'.format(__name__, e))
logger.set_context('es_params', es_params)
logger.exception('search_saving_failed')
return HttpResponse()
示例11: api_document_tags_list
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def api_document_tags_list(request, user, params):
""" Get document tags (via auth_token)
"""
dataset_id = params.get('dataset', None)
document_ids = params.get('document_ids', None)
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
es_m = ds.build_manager(ES_Manager)
mass_helper = MassHelper(es_m)
resp = mass_helper.get_document_by_ids(document_ids)
data = []
for doc in resp['hits']['hits']:
for f in doc['_source'].get('texta_facts', []):
if f['fact'] == 'TEXTA_TAG':
doc_id = doc['_id']
doc_path = f['doc_path']
doc_tag = f['str_val']
data.append({ 'document_id': doc_id, 'field': doc_path, 'tag': doc_tag})
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
示例12: get_all_rows
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def get_all_rows(es_params, request):
buffer_ = StringIO()
writer = csv.writer(buffer_)
writer.writerow([feature for feature in es_params['features']])
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
es_m.build(es_params)
es_m.set_query_parameter('size', ES_SCROLL_BATCH)
features = sorted(es_params['features'])
response = es_m.scroll()
scroll_id = response['_scroll_id']
hits = response['hits']['hits']
while hits:
process_hits(hits, features, write=True, writer=writer)
buffer_.seek(0)
data = buffer_.read()
buffer_.seek(0)
buffer_.truncate()
yield data
response = es_m.scroll(scroll_id=scroll_id)
hits = response['hits']['hits']
scroll_id = response['_scroll_id']
示例13: __init__
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def __init__(self, params):
self.field = json.loads(params['field'])['path']
query = json.loads(Search.objects.get(pk=int(params['search'])).query)
# Define selected mapping
ds = Datasets().activate_dataset_by_id(params['dataset'])
self.es_m = ds.build_manager(ES_Manager)
self.es_m.load_combined_query(query)
示例14: get_query
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def get_query(request):
es_params = request.POST
ds = Datasets().activate_datasets(request.session)
es_m = ds.build_manager(ES_Manager)
es_m.build(es_params)
# GET ONLY MAIN QUERY
query = es_m.combined_query['main']
return HttpResponse(json.dumps(query))
示例15: get_all_matched_rows
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import build_manager [as 别名]
def get_all_matched_rows(query, request, inclusive_instructions):
buffer_ = StringIO()
writer = csv.writer(buffer_)
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
features = sorted([field['path'] for field in es_m.get_mapped_fields()])
query['size'] = ES_SCROLL_BATCH
writer.writerow(features)
ds.get_index()
ds.get_mapping()
es_url
request_url = os.path.join(es_url, ds.get_index(), ds.get_mapping(), '_search?scroll=1m')
response = requests.get(request_url, data=json.dumps(query)).json()
scroll_id = response['_scroll_id']
hits = response['hits']['hits']
scroll_payload = json.dumps({'scroll':'1m', 'scroll_id':scroll_id})
while hits:
for hit in hits:
feature_dict = {feature_name:hit['_source'][feature_name] for feature_name in hit['_source']}
feature_dict = {}
row = []
for feature_name in features:
feature_path = feature_name.split('.')
parent_source = hit['_source']
for path_component in feature_path:
if path_component in parent_source:
parent_source = parent_source[path_component]
else:
parent_source = ""
break
content = parent_source
row.append(content)
feature_dict[feature_name] = content
layer_dict = matcher.LayerDict(feature_dict)
if inclusive_instructions.match(layer_dict):
writer.writerow([element.encode('utf-8') if isinstance(element,unicode) else element for element in row])
buffer_.seek(0)
data = buffer_.read()
buffer_.seek(0)
buffer_.truncate()
yield data
response = requests.get(os.path.join(es_url,'_search','scroll'), data=scroll_payload).json()
hits = response['hits']['hits']
scroll_id = response['_scroll_id']