当前位置: 首页>>代码示例>>Python>>正文


Python extraction.SlybotIBLExtractor类代码示例

本文整理汇总了Python中slybot.plugins.scrapely_annotations.extraction.SlybotIBLExtractor的典型用法代码示例。如果您正苦于以下问题:Python SlybotIBLExtractor类的具体用法?Python SlybotIBLExtractor怎么用?Python SlybotIBLExtractor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了SlybotIBLExtractor类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_extractor_w_empty_string_extraction

    def test_extractor_w_empty_string_extraction(self):
        schema = {
            'fields': {
                'gender': {
                    'required': False,
                    'type': 'text',
                    'vary': False,
                },
                'name': {
                    'required': True,
                    'type': 'text',
                    'vary': False,
                }
            }
        }
        descriptor = create_slybot_item_descriptor(schema)
        extractors = {
            1: {
                "regular_expression": "([0-9]+)"
            }
        }
        apply_extractors(descriptor, {"gender": [1]}, extractors)

        ibl_extractor = SlybotIBLExtractor([
            (self.template2, {'#default': descriptor}, '0.12.0')])
        self.assertEqual(ibl_extractor.extract(self.target2)[0][0]['name'], [u'Name Olivia'])
开发者ID:FFFFFurry,项目名称:portia,代码行数:26,代码来源:test_extractors.py

示例2: test_extract_missing_schema

 def test_extract_missing_schema(self):
     extractor = SlybotIBLExtractor([(sample_411, {}, '0.13.0')])
     data = extractor.extract(page_411)[0][1]
     raw_html = ('<span itemprop="name"><span itemprop="givenName">Joe'
                 '</span> <span itemprop="familyName">Smith</span></span>')
     self.assertEqual(data['full_name'], [raw_html])
     self.assertEqual(data['first_name'], [raw_html])
     self.assertEqual(data['last_name'], [raw_html])
开发者ID:daqv,项目名称:portia-dashboard,代码行数:8,代码来源:test_multiple_item_extraction.py

示例3: test_per_annotation_extractors

 def test_per_annotation_extractors(self):
     schema = {
         'fields': {
             'url': {
                 'required': False,
                 'type': 'text',
                 'vary': False,
             },
             'name': {
                 'required': True,
                 'type': 'text',
                 'vary': False,
             }
         }
     }
     extractors = {
         '1': {
             'type_extractor': 'url'
         },
         '2': {
             'regular_expression': '(.*)\.html'
         },
         '3': {
             'regular_expression': 'Name: (.*)'
         },
         '4': {
             'type_extractor': 'text'
         },
         '5': {
             'type_extractor': 'price'
         },
         '6': {
             'type_extractor': 'number'
         },
         '7': {
             'type_extractor': 'date'
         },
         '8': {
             'regular_expression': '(\d+)-'
         }
     }
     descriptors = {'#default': create_slybot_item_descriptor(schema)}
     add_extractors_to_descriptors(descriptors, extractors)
     ibl_extractor = SlybotIBLExtractor([
         (self.template3, descriptors, '0.13.0')
     ])
     result = {'name': [u'Olivia'], 'url': [u'http://www.test.com/olivia'],
               'title': [u'Name: Olivia'], 'price': [u'2016'],
               'date': [datetime(2016, 3, 17, 20, 25)]}
     data = ibl_extractor.extract(self.target3)[0][0]
     del data['_template']
     self.assertEqual(data, result)
开发者ID:01-,项目名称:portia,代码行数:52,代码来源:test_extractors.py

示例4: test_default_type_extractor

    def test_default_type_extractor(self):
        schema = {
            'fields': {}
        }
        descriptor = create_slybot_item_descriptor(schema)
        extractors = {
            1: {"regular_expression": "Gender\\s+(Male|Female)"}
        }
        apply_extractors(descriptor, {"gender": [1]}, extractors)

        ibl_extractor = SlybotIBLExtractor([
            (self.template, {'#default': descriptor}, '0.12.0')])
        self.assertEqual(ibl_extractor.extract(self.target)[0][0]['gender'], [u'Male'])
开发者ID:FFFFFurry,项目名称:portia,代码行数:13,代码来源:test_extractors.py

示例5: test_required_annotation

 def test_required_annotation(self):
     extractor = SlybotIBLExtractor([(sample_daft, {}, '0.13.0')])
     data = extractor.extract(page_daft)[0]
     self.assertEqual(len(data), 5)
     assert all('ber' in house for house in data)
     assert all('address' in house for house in data)
     assert all('price_change' in house for house in data)
     extractor = SlybotIBLExtractor([(sample_daft_no_requireds, {},
                                      '0.13.0')])
     data = extractor.extract(page_daft)[0]
     self.assertEqual(len(data), 8)
     assert all('ber' in house for house in data)
     assert all('address' in house for house in data)
     assert any('price_change' not in house for house in data)
开发者ID:FrankieChan885,项目名称:portia,代码行数:14,代码来源:test_multiple_item_extraction.py

示例6: test_required_annotation

 def test_required_annotation(self):
     ibl_extractor = SlybotIBLExtractor([
         (simple_template, simple_descriptors, '0.13.0')
     ])
     data, _ = ibl_extractor.extract(target1)
     self.assertEqual(len(data), 10)
     self.assertTrue(all('rank' in item and item['rank'] for item in data))
     self.assertTrue(all('description' in item and item['description']
                         for item in data))
     data, _ = ibl_extractor.extract(target2)
     self.assertEqual(len(data), 5)
     self.assertTrue(all('rank' in item and item['rank'] for item in data))
     self.assertTrue(all('description' in item and item['description']
                         for item in data))
开发者ID:daqv,项目名称:portia-dashboard,代码行数:14,代码来源:test_multiple_item_extraction.py

示例7: test_extract_single_attribute_to_multiple_fields

 def test_extract_single_attribute_to_multiple_fields(self):
     extractors = {'1': {'regular_expression': '(.*)\s'},
                   '2': {'regular_expression': '\s(.*)'}}
     descriptors = {'#default': create_slybot_item_descriptor({'fields': {
         'full_name': {'type': 'text', 'required': False, 'vary': False},
         'first_name': {'type': 'text', 'required': False, 'vary': False,
                        'name': u'prénom'},
         'last_name': {'type': 'text', 'required': False, 'vary': False,
                       'name': 'nom'},
         'address': {'type': 'text', 'required': False, 'vary': False}}})}
     add_extractors_to_descriptors(descriptors, extractors)
     extractor = SlybotIBLExtractor([(sample_411, descriptors, '0.13.0')])
     data = extractor.extract(page_411)[0]
     self.assertEqual(data[1]['full_name'], [u'Joe Smith'])
     self.assertEqual(data[1][u'prénom'], [u'Joe'])
     self.assertEqual(data[1]['nom'], [u'Smith'])
开发者ID:FrankieChan885,项目名称:portia,代码行数:16,代码来源:test_multiple_item_extraction.py

示例8: test_text_type_w_regex

    def test_text_type_w_regex(self):
        schema = {
            "fields": {
                'gender': {
                    'required': False,
                    'type': 'text',
                    'vary': False,
                }
            }
        }
        descriptor = create_slybot_item_descriptor(schema)
        extractors = {1: {"regular_expression": "Gender\\s+(Male|Female)"}}
        apply_extractors(descriptor, {"gender": [1]}, extractors)

        ibl_extractor = SlybotIBLExtractor([
            (self.template, {'#default': descriptor}, '0.12.0')])
        self.assertEqual(ibl_extractor.extract(self.target)[0][0]['gender'], [u'Male'])
开发者ID:FFFFFurry,项目名称:portia,代码行数:17,代码来源:test_extractors.py

示例9: test_raw_type_w_regex

    def test_raw_type_w_regex(self):
        schema = {
            'fields': {
                'gender': {
                    'required': False,
                    'type': 'raw',
                    'vary': False,
                }
            }
        }
        descriptor = create_slybot_item_descriptor(schema)
        extractors =  {1: {
                        "regular_expression": "Gender.*(<td\s*>(?:Male|Female)</td>)"
        }}
        apply_extractors(descriptor, {"gender": [1]}, extractors)

        ibl_extractor = SlybotIBLExtractor([(self.template, {'#default': descriptor})])
        self.assertEqual(ibl_extractor.extract(self.target)[0][0]['gender'], [u'<td >Male</td>'])
开发者ID:TimoC1982,项目名称:portia,代码行数:18,代码来源:test_extractors.py

示例10: test_extract_missing_schema

 def test_extract_missing_schema(self):
     extractor = SlybotIBLExtractor([(sample_411, {}, '0.13.0')])
     data = extractor.extract(page_411)[0]
     self.assertEqual(data[1]['full_name'], [u'Joe Smith'])
     self.assertEqual(data[1]['first_name'], [u'Joe Smith'])
     self.assertEqual(data[1]['last_name'], [u'Joe Smith'])
开发者ID:FrankieChan885,项目名称:portia,代码行数:6,代码来源:test_multiple_item_extraction.py


注:本文中的slybot.plugins.scrapely_annotations.extraction.SlybotIBLExtractor类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。