当前位置: 首页>>代码示例>>Python>>正文


Python datasets.Datasets类代码示例

本文整理汇总了Python中utils.datasets.Datasets的典型用法代码示例。如果您正苦于以下问题:Python Datasets类的具体用法?Python Datasets怎么用?Python Datasets使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Datasets类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: mlt_query

def mlt_query(request):
    logger = LogManager(__name__, 'SEARCH MLT')
    es_params = request.POST

    mlt_fields = [json.loads(field)['path'] for field in es_params.getlist('mlt_fields')]

    handle_negatives = request.POST['handle_negatives']
    docs_accepted = [a.strip() for a in request.POST['docs'].split('\n') if a]
    docs_rejected = [a.strip() for a in request.POST['docs_rejected'].split('\n') if a]

    # stopwords
    stopword_lexicon_ids = request.POST.getlist('mlt_stopword_lexicons')
    stopwords = []

    for lexicon_id in stopword_lexicon_ids:
        lexicon = Lexicon.objects.get(id=int(lexicon_id))
        words = Word.objects.filter(lexicon=lexicon)
        stopwords+=[word.wrd for word in words]

    ds = Datasets().activate_dataset(request.session)
    es_m = ds.build_manager(ES_Manager)
    es_m.build(es_params)

    response = es_m.more_like_this_search(mlt_fields,docs_accepted=docs_accepted,docs_rejected=docs_rejected,handle_negatives=handle_negatives,stopwords=stopwords)

    documents = []
    for hit in response['hits']['hits']:
        fields_content = get_fields_content(hit,mlt_fields)
        documents.append({'id':hit['_id'],'content':fields_content})

    template_params = {'STATIC_URL': STATIC_URL,
                       'URL_PREFIX': URL_PREFIX,
                       'documents':documents}
    template = loader.get_template('mlt_results.html')
    return HttpResponse(template.render(template_params, request))
开发者ID:cbentes,项目名称:texta,代码行数:35,代码来源:views.py

示例2: run

    def run(self, task_id):
        self.task_id = task_id
        task = Task.objects.get(pk=self.task_id)
        params = json.loads(task.parameters)
        task.update_status(Task.STATUS_RUNNING)

        try:
            ds = Datasets().activate_datasets_by_id(params['dataset'])
            es_m = ds.build_manager(ES_Manager)
            es_m.load_combined_query(self._parse_query(params))

            self.es_m = es_m
            self.params = params
            valid, msg = self._check_if_request_bad(self.params)
            if valid:
                self._preprocessor_worker()
            else:
                raise UserWarning(msg)

        except TaskCanceledException as e:
            # If here, task was canceled while processing
            # Delete task
            task = Task.objects.get(pk=self.task_id)
            task.delete()
            logging.getLogger(INFO_LOGGER).info(json.dumps({'process': 'PROCESSOR WORK', 'event': 'processor_worker_canceled', 'data': {'task_id': self.task_id}}), exc_info=True)
            print("--- Task canceled")

        except Exception as e:
            logging.getLogger(ERROR_LOGGER).exception(json.dumps(
                {'process': 'PROCESSOR WORK', 'event': 'processor_worker_failed', 'data': {'task_id': self.task_id}}), exc_info=True)
            # declare the job as failed.
            task = Task.objects.get(pk=self.task_id)
            task.result = json.dumps({'error': repr(e)})
            task.update_status(Task.STATUS_FAILED, set_time_completed=True)
开发者ID:ekt68,项目名称:texta,代码行数:34,代码来源:preprocessor_worker.py

示例3: save

def save(request):
    logger = LogManager(__name__, 'SAVE SEARCH')

    ds = Datasets().activate_dataset(request.session)
    es_m = ds.build_manager(ES_Manager)

    es_params = request.POST
    es_m.build(es_params)
    combined_query = es_m.get_combined_query()

    try:
        q = combined_query
        desc = request.POST['search_description']
        s_content = json.dumps([request.POST[x] for x in request.POST.keys() if 'match_txt' in x])
        search = Search(author=request.user,search_content=s_content,description=desc,dataset=Dataset.objects.get(pk=int(request.session['dataset'])),query=json.dumps(q))
        search.save()
        logger.set_context('user_name', request.user.username)
        logger.set_context('search_id', search.id)
        logger.info('search_saved')

    except Exception as e:
        print('-- Exception[{0}] {1}'.format(__name__, e))
        logger.set_context('es_params', es_params)
        logger.exception('search_saving_failed')

    return HttpResponse()
开发者ID:cbentes,项目名称:texta,代码行数:26,代码来源:views.py

示例4: run

    def run(self, task_id):
        self.task_id = task_id
        self.task_obj = Task.objects.get(pk=self.task_id)
        params = json.loads(self.task_obj.parameters)
        self.task_obj.update_status(Task.STATUS_RUNNING)

        try:
            ds = Datasets().activate_datasets_by_id(params['dataset'])
            es_m = ds.build_manager(ES_Manager)
            # es_m.load_combined_query(self._parse_query(params))
            self.es_m = es_m
            self.params = params

            result = self._start_subworker()
            self.task_obj.result = result
            self.task_obj.update_status(Task.STATUS_COMPLETED, set_time_completed=True)

        except TaskCanceledException as e:
            # If here, task was canceled while processing
            # Delete task
            self.task_obj.delete()
            logging.getLogger(INFO_LOGGER).info(json.dumps({'process': 'PROCESSOR WORK', 'event': 'management_worker_canceled', 'data': {'task_id': self.task_id}}))
            print("--- Task canceled")

        except Exception as e:
            logging.getLogger(ERROR_LOGGER).exception(json.dumps(
                {'process': 'PROCESSOR WORK', 'event': 'manager_worker_failed', 'data': {'task_id': self.task_id}}), exc_info=True)
            # declare the job as failed.
            self.task_obj.result = json.dumps({'error': repr(e)})
            self.task_obj.update_status(Task.STATUS_FAILED, set_time_completed=True)
        print('Done with management task')
开发者ID:ekt68,项目名称:texta,代码行数:31,代码来源:management_worker.py

示例5: index

def index(request):
    ds = Datasets().activate_datasets(request.session)
    es_m = ds.build_manager(ES_Manager)
    fields = get_fields(es_m)

    datasets = Datasets().get_allowed_datasets(request.user)
    language_models = Task.objects.filter(task_type=TaskTypes.TRAIN_MODEL.value).filter(status__iexact=Task.STATUS_COMPLETED).order_by('-pk')

    # Hide fact graph if no facts_str_val is present in fields
    display_fact_graph = 'hidden'
    for i in fields:
        if json.loads(i['data'])['type'] == "fact_str_val":
            display_fact_graph = ''
            break

    template_params = {'display_fact_graph': display_fact_graph,
                       'STATIC_URL': STATIC_URL,
                       'URL_PREFIX': URL_PREFIX,
                       'fields': fields,
                       'searches': Search.objects.filter(author=request.user),
                       'lexicons': Lexicon.objects.all().filter(author=request.user),
                       'language_models': language_models, 
                       'allowed_datasets': datasets,                       
                       }

    template = loader.get_template('searcher.html')

    return HttpResponse(template.render(template_params, request))
开发者ID:ekt68,项目名称:texta,代码行数:28,代码来源:views.py

示例6: api_document_tags_list

def api_document_tags_list(request, user, params):
    """ Get document tags (via auth_token)
    """
    dataset_id = params.get('dataset', None)
    document_ids = params.get('document_ids', None)

    ds = Datasets()
    ds.activate_datasets_by_id(dataset_id, use_default=False)
    # Check if dataset_id is valid
    if not ds.is_active():
        error = {'error': 'invalid dataset parameter'}
        data_json = json.dumps(error)
        return HttpResponse(data_json, status=400, content_type='application/json')

    es_m = ds.build_manager(ES_Manager)
    mass_helper = MassHelper(es_m)
    resp = mass_helper.get_document_by_ids(document_ids)

    data = []
    for doc in resp['hits']['hits']:
        for f in doc['_source'].get('texta_facts', []):
            if f['fact'] == 'TEXTA_TAG':
                doc_id = doc['_id']
                doc_path = f['doc_path']
                doc_tag = f['str_val']
                data.append({ 'document_id': doc_id, 'field': doc_path, 'tag': doc_tag})

    data_json = json.dumps(data)
    return HttpResponse(data_json, status=200, content_type='application/json')
开发者ID:ekt68,项目名称:texta,代码行数:29,代码来源:api_v1.py

示例7: get_all_rows

def get_all_rows(es_params, request):
    buffer_ = StringIO()
    writer = csv.writer(buffer_)

    writer.writerow([feature for feature in es_params['features']])

    ds = Datasets().activate_dataset(request.session)
    es_m = ds.build_manager(ES_Manager)
    es_m.build(es_params)

    es_m.set_query_parameter('size', ES_SCROLL_BATCH)

    features = sorted(es_params['features'])

    response = es_m.scroll()

    scroll_id = response['_scroll_id']
    hits = response['hits']['hits']

    while hits:
        process_hits(hits, features, write=True, writer=writer)

        buffer_.seek(0)
        data = buffer_.read()
        buffer_.seek(0)
        buffer_.truncate()
        yield data

        response = es_m.scroll(scroll_id=scroll_id)
        hits = response['hits']['hits']
        scroll_id = response['_scroll_id']
开发者ID:cbentes,项目名称:texta,代码行数:31,代码来源:export_pages.py

示例8: index

def index(request):
    ds = Datasets().activate_dataset(request.session)
    es_m = ds.build_manager(ES_Manager)
    fields = get_fields(es_m)

    datasets = Datasets().get_allowed_datasets(request.user)
    language_models = Task.objects.filter(task_type='train_model').filter(status__iexact='completed').order_by('-pk')
    
    preprocessors = collect_map_entries(preprocessor_map)
    enabled_preprocessors = [preprocessor for preprocessor in preprocessors]

    # Hide fact graph if no facts_str_val is present in fields
    display_fact_graph = 'hidden'
    for i in fields:
        if json.loads(i['data'])['type'] == "fact_str_val":
            display_fact_graph = ''
            break

    template_params = {'display_fact_graph': display_fact_graph,
                       'STATIC_URL': STATIC_URL,
                       'URL_PREFIX': URL_PREFIX,
                       'fields': fields,
                       'searches': Search.objects.filter(author=request.user),
                       'lexicons': Lexicon.objects.all().filter(author=request.user),
                       'dataset': ds.get_index(),
                       'language_models': language_models, 
                       'allowed_datasets': datasets,                       
                       'enabled_preprocessors': enabled_preprocessors,
                       'task_params': task_params}

    template = loader.get_template('searcher.html')

    return HttpResponse(template.render(template_params, request))
开发者ID:cbentes,项目名称:texta,代码行数:33,代码来源:views.py

示例9: api_tag_list

def api_tag_list(request, user, params):
    """ Get list of available tags for API user (via auth_token)
    """
    dataset_id = params['dataset']
    ds = Datasets()
    ds.activate_datasets_by_id(dataset_id, use_default=False)
    # Check if dataset_id is valid
    if not ds.is_active():
            error = {'error': 'invalid dataset parameter'}
            data_json = json.dumps(error)
            return HttpResponse(data_json, status=400, content_type='application/json')

    es_m = ds.build_manager(ES_Manager)
    mass_helper = MassHelper(es_m)
    tag_set = mass_helper.get_unique_tags()
    tag_frequency = mass_helper.get_tag_frequency(tag_set)
    tag_models = set([tagger.description for tagger in Task.objects.filter(task_type=TaskTypes.TRAIN_TAGGER.value)])

    data = []
    for tag in sorted(tag_frequency.keys()):
        count = tag_frequency[tag]
        has_model = tag in tag_models
        doc = {'description': tag,
               'count': count,
               'has_model': has_model}
        data.append(doc)
    data_json = json.dumps(data)
    return HttpResponse(data_json, status=200, content_type='application/json')
开发者ID:ekt68,项目名称:texta,代码行数:28,代码来源:api_v1.py

示例10: api_mass_train_tagger

def api_mass_train_tagger(request, user, params):
    """ Apply mass train tagger (via auth_token)
    """
    # Read all params
    dataset_id = params.get('dataset', None)
    selected_tags = set(params.get('tags', []))
    field = params.get("field", None)
    normalizer_opt = params.get("normalizer_opt", "0")
    classifier_opt = params.get("classifier_opt", "0")
    reductor_opt = params.get("reductor_opt", "0")
    extractor_opt = params.get("extractor_opt", "0")
    retrain_only = params.get("retrain_only", False)

    ds = Datasets()
    ds.activate_datasets_by_id(dataset_id, use_default=False)
    # Check if dataset_id is valid
    if not ds.is_active():
            error = {'error': 'invalid dataset parameter'}
            data_json = json.dumps(error)
            return HttpResponse(data_json, status=400, content_type='application/json')

    es_m = ds.build_manager(ES_Manager)
    mass_helper = MassHelper(es_m)
    
    data = mass_helper.schedule_tasks(selected_tags, normalizer_opt, classifier_opt, reductor_opt, extractor_opt, field, dataset_id, user)
    data_json = json.dumps(data)
    return HttpResponse(data_json, status=200, content_type='application/json')
开发者ID:ekt68,项目名称:texta,代码行数:27,代码来源:api_v1.py

示例11: export_matched_data

def export_matched_data(request):
    search_id = request.GET['search_id']

    inclusive_metaquery = json.loads(request.GET['inclusive_grammar'])

    ds = Datasets().activate_dataset(request.session)

    component_query = ElasticGrammarQuery(inclusive_metaquery, None).generate()

    es_m = ds.build_manager(ES_Manager)

    if search_id == '-1': # Full search
        es_m.combined_query = component_query
    else:
        saved_query = json.loads(Search.objects.get(pk=search_id).query)
        es_m.load_combined_query(saved_query)
        es_m.merge_combined_query_with_query_dict(component_query)

    inclusive_instructions = generate_instructions(inclusive_metaquery)

    response = StreamingHttpResponse(get_all_matched_rows(es_m.combined_query['main'], request, inclusive_instructions), content_type='text/csv')

    response['Content-Disposition'] = 'attachment; filename="%s"' % ('extracted.csv')

    return response
开发者ID:cbentes,项目名称:texta,代码行数:25,代码来源:views.py

示例12: api_search_list

def api_search_list(request, user, params):
    """ Get list of available searches for API user (via auth_token)
    """

    # Read all params
    dataset_id = int(params['dataset'])

    ds = Datasets()
    ds.activate_datasets_by_id(dataset_id, use_default=False)
    # Check if dataset_id is valid
    if not ds.is_active():
            error = {'error': 'invalid dataset parameter'}
            data_json = json.dumps(error)
            return HttpResponse(data_json, status=400, content_type='application/json')

    # Build response structure
    data = []
    dataset = Dataset(pk=dataset_id)
    search_list = list(Search.objects.filter(dataset=dataset))
    for search in search_list:
        row = {
            'dataset': dataset_id,
            'search': search.id,
            'description': search.description
        }
        data.append(row)

    data_json = json.dumps(data)
    return HttpResponse(data_json, status=200, content_type='application/json')
开发者ID:ekt68,项目名称:texta,代码行数:29,代码来源:api_v1.py

示例13: get_all_rows

def get_all_rows(es_params, request):
    features = es_params['features']

    # Prepare in-memory csv writer.
    buffer_ = StringIO()
    writer = csv.writer(buffer_)

    # Write the first headers.
    writer.writerow(features)

    ds = Datasets().activate_datasets(request.session)
    es_m = ds.build_manager(ES_Manager)
    es_m.build(es_params)
    es_m.set_query_parameter('size', ES_SCROLL_BATCH)

    # Fetch the initial scroll results.
    response = es_m.scroll()
    scroll_id = response['_scroll_id']
    hits = response['hits']['hits']

    while hits:
        process_hits(hits, features, write=True, writer=writer)
        # Return some data with the StreamingResponce
        yield _get_buffer_data(buffer_)

        # Continue with the scroll.
        response = es_m.scroll(scroll_id=scroll_id)
        hits = response['hits']['hits']
        scroll_id = response['_scroll_id']
开发者ID:ekt68,项目名称:texta,代码行数:29,代码来源:export_pages.py

示例14: get_query

def get_query(request):
    es_params = request.POST
    ds = Datasets().activate_datasets(request.session)
    es_m = ds.build_manager(ES_Manager)
    es_m.build(es_params)
    # GET ONLY MAIN QUERY
    query = es_m.combined_query['main']
    return HttpResponse(json.dumps(query))
开发者ID:ekt68,项目名称:texta,代码行数:8,代码来源:views.py

示例15: __init__

    def __init__(self, params):
        self.field = json.loads(params['field'])['path']
        query = json.loads(Search.objects.get(pk=int(params['search'])).query)

        # Define selected mapping
        ds = Datasets().activate_dataset_by_id(params['dataset'])
        self.es_m = ds.build_manager(ES_Manager)
        self.es_m.load_combined_query(query)
开发者ID:cbentes,项目名称:texta,代码行数:8,代码来源:data_manager.py


注:本文中的utils.datasets.Datasets类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。