当前位置: 首页>>代码示例>>Python>>正文


Python ES.force_bulk方法代码示例

本文整理汇总了Python中pyes.ES.force_bulk方法的典型用法代码示例。如果您正苦于以下问题:Python ES.force_bulk方法的具体用法?Python ES.force_bulk怎么用?Python ES.force_bulk使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyes.ES的用法示例。


在下文中一共展示了ES.force_bulk方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import force_bulk [as 别名]
class Indexer:
    def __init__(self, es_host, batch_mode=True, batch_size=100):
        self.client = ES(es_host)
        self.batch_mode = batch_mode
        self.client.bulk_size = int(batch_size)

    def bulk_index(self, index, type, shapefile, sleep_time=0.1):
        print 'Indexing [%s] docs into [%s] from %s' % (type, index, shapefile)

        index_count = 0

        id_re = re.compile('^.*?"id"\s*:\s*"([^"]+)"')
        parens_re = re.compile('\(.*?\)')

        for line in input(shapefile):
            id = id_re.match(line).group(1)

            # cleanup any lines that contain parentheticals
            line = parens_re.sub('', line).strip()

            # sweet dec/encodings bro
            line = line.decode('latin-1').encode('utf-8')
            id = id.decode('latin-1').encode('utf-8')

            try:
                self.client.index(line, index, type, id, bulk=self.batch_mode)
            except UnicodeDecodeError as e:
                print "Error processing line with id %s: %s" % (id, e.message)
            except NoServerAvailable as e:
                print "The server failed to respond while indexing %s: [%s]. Sleeping %d seconds and retrying..." % (id, e.message, sleep_time)
                sleep(5)
                try:
                    print "Retrying indexing of %s" % id
                    self.client.index(line, index, type, id, bulk=self.batch_mode)
                except NoServerAvailable as e:
                    print "Failed to reconnect again. Skipping indexing %s" % id
                except Exception as e:
                    print "This happened: %s" % e

            index_count += 1
            if index_count % int(self.client.bulk_size) == 0:
                print 'Indexing batch of %d, starting from %s' % (self.client.bulk_size, id)
                sleep(sleep_time)

        # index remaining bulk entries
        self.client.force_bulk()
开发者ID:knockrentals,项目名称:us-shapes,代码行数:48,代码来源:indexer.py

示例2: ES

# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import force_bulk [as 别名]
es = ES()

index_name = "my_index"
type_name = "my_type"

from utils_pyes import create_and_add_mapping

create_and_add_mapping(es, index_name, type_name)

es.index(doc={"name": "Joe Tester", "parsedtext": "Joe Testere nice guy", "uuid": "11111", "position": 1},
         index=index_name, doc_type=type_name, id=1)
es.index(doc={"name": "data1", "value": "value1"}, index=index_name, doc_type=type_name + "2", id=1, parent=1)
es.index(doc={"name": "Bill Baloney", "parsedtext": "Bill Testere nice guy", "uuid": "22222", "position": 2},
         index=index_name, doc_type=type_name, id=2, bulk=True)
es.index(doc={"name": "data2", "value": "value2"}, index=index_name, doc_type=type_name + "2", id=2, parent=2,
         bulk=True)
es.index(doc={"name": "Bill Clinton", "parsedtext": """Bill is not
        nice guy""", "uuid": "33333", "position": 3}, index=index_name, doc_type=type_name, id=3, bulk=True)

es.force_bulk()

es.update(index=index_name, doc_type=type_name, id=2, script='ctx._source.position += 1')
es.update(index=index_name, doc_type=type_name, id=2, script='ctx._source.position += 1', bulk=True)

es.delete(index=index_name, doc_type=type_name, id=1, bulk=True)
es.delete(index=index_name, doc_type=type_name, id=3)

es.force_bulk()
es.indices.refresh(index_name)

es.indices.delete_index(index_name)
开发者ID:Hafizirshaid,项目名称:elasticsearch-cookbook-second-edition,代码行数:33,代码来源:document_management_pyes.py

示例3:

# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import force_bulk [as 别名]
dataset = shelve.open("samples.shelve")

mapping = { u'description': {'boost': 1.0,
                 'index': 'analyzed',
                 'store': 'yes',
                 'type': u'string',
                 "term_vector" : "with_positions_offsets"
                 },
         u'name': {'boost': 1.0,
                    'index': 'analyzed',
                    'store': 'yes',
                    'type': u'string',
                    "term_vector" : "with_positions_offsets"
                    },
         u'age': {'store': 'yes',
                    'type': u'integer'},    
                    }
conn.create_index("test-index")
conn.put_mapping("test-type", {'properties':mapping}, ["test-index"])

start = datetime.now()
for k, userdata in dataset.items():
#    conn.index(userdata, "test-index", "test-type", k)
    conn.index(userdata, "test-index", "test-type", k, bulk=True)
conn.force_bulk()
end = datetime.now()

print "time:", end-start
dataset.close()

开发者ID:akheron,项目名称:pyes,代码行数:31,代码来源:performance.py

示例4: ProcessSpiderData

# 需要导入模块: from pyes import ES [as 别名]
# 或者: from pyes.ES import force_bulk [as 别名]
class ProcessSpiderData(Task):
    def run(self, spider_name):
        cities = []
        backup_source = []
        backup_created_date = None

        self.elastic = ES(settings.SEARCH_HOSTS, timeout=22.0, bulk_size=1500)
        java = JavaInterface()

        self.extractor = java.ArticleSentencesExtractor.INSTANCE
        self.logger = ProcessSpiderData.get_logger()

        spider = Data.objects.get(name=spider_name)
        source = spider.source

        if spider and len(source):
            backup_created_date = spider.created_date
            index_new = '%s_%d' % (spider.name, int(time.time()))

            # create new index (not connected to alias)
            self.elastic.create_index(index_new)
            self.elastic.put_mapping('job', {'job':{'properties':mapping}}, index_new)

            for item in source:
                item = self._process_content(item)
                item = self._get_location(item)

                if item.has_key('city'):
                    cities.append(item['city'])

                self._create_index(index_new, item)
                backup_source.append(item)

            # save new index (in bulk)
            self.elastic.force_bulk()

            # create alias
            indices_old = self.elastic.get_alias(spider.name)
            self.elastic.set_alias(spider.name, [index_new])

            # delete all indices
            for index in indices_old:
                self.elastic.delete_index_if_exists(index)

            # optimize
            self.elastic.optimize(index_new, refresh=True)

        # save backup (currently processed data)
        if len(backup_source) and backup_created_date:
            self._process_cities(set(cities), spider_name)
            cache.clear()

            obj = DataBackup.objects.get_or_create(
                name=spider_name,
                created_date=backup_created_date
            )

            obj[0].source = binascii.hexlify(bz2.compress(
                JSONEncoder().encode(backup_source)
            ))

            obj[0].save()

        # force java & ES garbage collection
        self.elastic.connection.close()
        del self.extractor
        del java

        return True

    def _process_content(self, item):
        if len(item['content']):
            item['content'] = self.extractor.getText(jpype.JString(item['content']))
        return item

    def _get_location(self, item):
        if not item.has_key('city'):
            return item

        try:
            geo = geocoders.GeoNames()
            places = geo.geocode(item['city'].encode('utf-8'), exactly_one=False)

            if places:
                place, (lat, lon) = places[0] if isinstance(places, list) else places
                if place: item['pin'] = {
                    'location': { 'lat': lat, 'lon': lon }
                 }
        except: pass
        return item

    def _create_index(self, index, item):
        id = item['id']
        del item['id']

        try:
            self.elastic.get(index, 'job', id)
        except ElasticSearchException:
            self.elastic.index(
                dumps(item, cls=DjangoJSONEncoder),
#.........这里部分代码省略.........
开发者ID:mkramb,项目名称:zaposlim.se,代码行数:103,代码来源:process.py


注:本文中的pyes.ES.force_bulk方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。