本文整理汇总了Python中utils.datasets.Datasets.activate_datasets_by_id方法的典型用法代码示例。如果您正苦于以下问题:Python Datasets.activate_datasets_by_id方法的具体用法?Python Datasets.activate_datasets_by_id怎么用?Python Datasets.activate_datasets_by_id使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utils.datasets.Datasets
的用法示例。
在下文中一共展示了Datasets.activate_datasets_by_id方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: api_mass_train_tagger
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import activate_datasets_by_id [as 别名]
def api_mass_train_tagger(request, user, params):
""" Apply mass train tagger (via auth_token)
"""
# Read all params
dataset_id = params.get('dataset', None)
selected_tags = set(params.get('tags', []))
field = params.get("field", None)
normalizer_opt = params.get("normalizer_opt", "0")
classifier_opt = params.get("classifier_opt", "0")
reductor_opt = params.get("reductor_opt", "0")
extractor_opt = params.get("extractor_opt", "0")
retrain_only = params.get("retrain_only", False)
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
es_m = ds.build_manager(ES_Manager)
mass_helper = MassHelper(es_m)
data = mass_helper.schedule_tasks(selected_tags, normalizer_opt, classifier_opt, reductor_opt, extractor_opt, field, dataset_id, user)
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
示例2: api_document_tags_list
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import activate_datasets_by_id [as 别名]
def api_document_tags_list(request, user, params):
""" Get document tags (via auth_token)
"""
dataset_id = params.get('dataset', None)
document_ids = params.get('document_ids', None)
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
es_m = ds.build_manager(ES_Manager)
mass_helper = MassHelper(es_m)
resp = mass_helper.get_document_by_ids(document_ids)
data = []
for doc in resp['hits']['hits']:
for f in doc['_source'].get('texta_facts', []):
if f['fact'] == 'TEXTA_TAG':
doc_id = doc['_id']
doc_path = f['doc_path']
doc_tag = f['str_val']
data.append({ 'document_id': doc_id, 'field': doc_path, 'tag': doc_tag})
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
示例3: api_tag_list
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import activate_datasets_by_id [as 别名]
def api_tag_list(request, user, params):
""" Get list of available tags for API user (via auth_token)
"""
dataset_id = params['dataset']
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
es_m = ds.build_manager(ES_Manager)
mass_helper = MassHelper(es_m)
tag_set = mass_helper.get_unique_tags()
tag_frequency = mass_helper.get_tag_frequency(tag_set)
tag_models = set([tagger.description for tagger in Task.objects.filter(task_type=TaskTypes.TRAIN_TAGGER.value)])
data = []
for tag in sorted(tag_frequency.keys()):
count = tag_frequency[tag]
has_model = tag in tag_models
doc = {'description': tag,
'count': count,
'has_model': has_model}
data.append(doc)
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
示例4: api_search_list
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import activate_datasets_by_id [as 别名]
def api_search_list(request, user, params):
""" Get list of available searches for API user (via auth_token)
"""
# Read all params
dataset_id = int(params['dataset'])
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
# Build response structure
data = []
dataset = Dataset(pk=dataset_id)
search_list = list(Search.objects.filter(dataset=dataset))
for search in search_list:
row = {
'dataset': dataset_id,
'search': search.id,
'description': search.description
}
data.append(row)
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
示例5: api_field_list
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import activate_datasets_by_id [as 别名]
def api_field_list(request, user, params):
""" Get list of available fields for API user (via auth_token)
"""
dataset_id = params['dataset']
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
es_m = ds.build_manager(ES_Manager)
fields = get_fields(es_m)
data = sorted([x['path'] for x in fields])
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
示例6: api_tag_feedback
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import activate_datasets_by_id [as 别名]
def api_tag_feedback(request, user, params):
""" Apply tag feedback (via auth_token)
Currently working corrently with 1 tag per document. Needs further development.
"""
decision_id = params.get('decision_id', None)
if not decision_id:
error = {'error': 'no decision ID supported'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
doc_path = params.get('doc_path', None)
if not doc_path:
error = {'error': 'no doc_path supported. cannot index feedback'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
prediction = params.get('prediction', None)
if not prediction:
error = {'error': 'no prediction supported'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
feedback_obj = TagFeedback.update(user, decision_id, prediction)
# retrieve dataset id from task params
params = Task.objects.get(pk = feedback_obj.tagger.pk).parameters
params_json = json.loads(params)
dataset_id = params_json['dataset']
tagger_name = params_json['description']
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
document = json.loads(feedback_obj.document)
in_dataset = int(feedback_obj.in_dataset)
data = {'success': True}
# check if document already indexed in ES
if in_dataset == 0:
es_m = ds.build_manager(ES_Manager)
# add tag to the document
if prediction > 0:
# add facts here!!!!
new_fact = {"fact": "TEXTA_TAG", "str_val": tagger_name, "doc_path": doc_path, "spans": "[[0,0]]"}
document['texta_facts'] = [new_fact]
es_m.add_document(document)
feedback_obj.in_dataset = 1
feedback_obj.save()
data['feedback_indexed'] = True
else:
data['feedback_indexed'] = False
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
示例7: api_hybrid_tagger
# 需要导入模块: from utils.datasets import Datasets [as 别名]
# 或者: from utils.datasets.Datasets import activate_datasets_by_id [as 别名]
def api_hybrid_tagger(request, user, params):
""" Apply hybrid tagger (via auth_token)
"""
DEFAULT_TAGS_THRESHOLD = 50
DEFAULT_MAX_TAGGERS = 20
dataset_id = params['dataset']
search = params['search']
field = params['field']
max_taggers = int(params.get('max_taggers', DEFAULT_MAX_TAGGERS))
min_count_threshold = int(params.get('min_count_threshold', DEFAULT_TAGS_THRESHOLD))
if 'description' not in params:
params['description'] = "via API call"
# Paramater projection for preprocessor task
task_type = TaskTypes.APPLY_PREPROCESSOR
params["preprocessor_key"] = "text_tagger"
params["text_tagger_feature_names"] = params['field']
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
param_query = json.loads(Search.objects.get(pk=int(search)).query)
es_m = ds.build_manager(ES_Manager)
es_m.load_combined_query(param_query)
# Get similar documents in a neighborhood of size 1000
response = es_m.more_like_this_search([field], search_size=1000)
docs = response['hits']['hits']
# Build Tag frequency
tag_freq = {}
for doc in docs:
for f in doc['_source'].get('texta_facts', []):
if f['fact'] == 'TEXTA_TAG' and f['doc_path'] == field:
doc_tag = f['str_val']
if doc_tag not in tag_freq:
tag_freq[doc_tag] = 0
tag_freq[doc_tag] += 1
# Top Tags to limit the number of taggers
top_tags = [t[0] for t in sorted(tag_freq.items(), key=lambda x: x[1], reverse=True)]
top_tags = set(top_tags[:max_taggers])
# Perform tag selection
data = {'task': {}, 'explain': []}
candidate_tags = set()
for tag in tag_freq:
selected = 0
count = tag_freq[tag]
if count >= min_count_threshold and tag in top_tags:
selected = 1
candidate_tags.add(tag)
data['explain'].append({'tag': tag,
'selected': selected,
'count': count })
# Filter tags
tagger_search = Task.objects.filter(task_type=TaskTypes.TRAIN_TAGGER.value).filter(status=Task.STATUS_COMPLETED)
taggers = [tagger.id for tagger in tagger_search if tagger.description in candidate_tags]
# Create Task if taggers is not zero
if len(taggers) > 0:
description = params['description']
params['text_tagger_taggers'] = taggers
# Create execution task
task_id = create_task(task_type, description, params, user)
# Add task to queue
task = Task.get_by_id(task_id)
task.update_status(Task.STATUS_QUEUED)
# Return reference to task
data['task'] = {
'task_id': task_id,
'task_type': task_type,
'status': task.status,
'user': task.user.username
}
else:
# If here, no taggers were selected
data['task'] = {"error": "no similar documents have tags count above threshold"}
# Generate response
data['min_count_threshold'] = min_count_threshold
data['max_taggers'] = max_taggers
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')