當前位置: 首頁>>代碼示例>>Python>>正文


Python template.TemplateMaker類代碼示例

本文整理匯總了Python中scrapely.template.TemplateMaker的典型用法代碼示例。如果您正苦於以下問題:Python TemplateMaker類的具體用法?Python TemplateMaker怎麽用?Python TemplateMaker使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了TemplateMaker類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_annotations

 def test_annotations(self):
     tm = TemplateMaker(self.PAGE)
     tm.annotate("field1", best_match("text to annotate"), best_match=False)
     annotations = [x[0] for x in tm.annotations()]
     self.assertEqual(
         annotations, [{u"annotations": {u"content": u"field1"}}, {u"annotations": {u"content": u"field1"}}]
     )
開發者ID:netconstructor,項目名稱:scrapely,代碼行數:7,代碼來源:test_template.py

示例2: test_annotations

 def test_annotations(self):
     tm = TemplateMaker(self.PAGE)
     tm.annotate('field1', best_match('text to annotate'), best_match=False)
     annotations = [x[0] for x in tm.annotations()]
     self.assertEqual(annotations,
         [{u'annotations': {u'content': u'field1'}},
          {u'annotations': {u'content': u'field1'}}])
開發者ID:scrapy,項目名稱:scrapely,代碼行數:7,代碼來源:test_template.py

示例3: test_selected_data

 def test_selected_data(self):
     tm = TemplateMaker(self.PAGE)
     indexes = tm.select(best_match('text to annotate'))
     data = [tm.selected_data(i) for i in indexes]
     self.assertEqual(data, \
         [u'<p>Some text to annotate here</p>', \
         u'<p>Another text to annotate there</p>'])
開發者ID:scrapy,項目名稱:scrapely,代碼行數:7,代碼來源:test_template.py

示例4: test_annotate_ignore_unpaired

 def test_annotate_ignore_unpaired(self):
     tm = TemplateMaker(self.PAGE)
     tm.annotate('field1', best_match("and that's"), best_match=False)
     tpl = tm.get_template()
     ex = InstanceBasedLearningExtractor([(tpl, None)])
     self.assertEqual(ex.extract(self.PAGE)[0],
         [{u'field1': [u"More text with unpaired tag <img />and that's it"]}])
開發者ID:scrapy,項目名稱:scrapely,代碼行數:7,代碼來源:test_template.py

示例5: test_annotate_multiple

 def test_annotate_multiple(self):
     tm = TemplateMaker(self.PAGE)
     tm.annotate('field1', best_match('text to annotate'), best_match=False)
     tpl = tm.get_template()
     ex = InstanceBasedLearningExtractor([(tpl, None)])
     self.assertEqual(ex.extract(self.PAGE)[0],
         [{u'field1': [u'Some text to annotate here', u'Another text to annotate there']}])
開發者ID:scrapy,項目名稱:scrapely,代碼行數:7,代碼來源:test_template.py

示例6: test_annotate_multiple

 def test_annotate_multiple(self):
     tm = TemplateMaker(self.PAGE)
     tm.annotate("field1", best_match("text to annotate"), best_match=False)
     tpl = tm.get_template()
     ex = InstanceBasedLearningExtractor([tpl])
     self.assertEqual(
         ex.extract(self.PAGE)[0], [{u"field1": [u"Some text to annotate here", u"Another text to annotate there"]}]
     )
開發者ID:netconstructor,項目名稱:scrapely,代碼行數:8,代碼來源:test_template.py

示例7: do_al

 def do_al(self, template_id):
     """al <template> - list annotations"""
     if assert_or_print(template_id, "missing template id"):
         return
     t = self._load_template(template_id)
     tm = TemplateMaker(t)
     for n, (a, i) in enumerate(tm.annotations()):
         print "[%s-%d] (%s) %r" % (template_id, n, a['annotations']['content'], 
             remove_annotation(tm.selected_data(i)))
開發者ID:esimionato,項目名稱:scrapely,代碼行數:9,代碼來源:tool.py

示例8: do_t

 def do_t(self, line):
     """ts <template> <text> - test selection text"""
     template_id, criteria = line.split(' ', 1)
     t = self._load_template(template_id)
     criteria = parse_criteria(criteria)
     tm = TemplateMaker(t)
     selection = apply_criteria(criteria, tm)
     for n, i in enumerate(selection):
         print "[%d] %r" % (n, remove_annotation(tm.selected_data(i)))
開發者ID:esimionato,項目名稱:scrapely,代碼行數:9,代碼來源:tool.py

示例9: train_from_htmlpage

 def train_from_htmlpage(self, htmlpage, data):
     assert data, "Cannot train with empty data"
     tm = TemplateMaker(htmlpage)
     for field, values in data.items():
         if (isinstance(values, (bytes, str)) or
                 not hasattr(values, '__iter__')):
             values = [values]
         for value in values:
             value = str_to_unicode(value, htmlpage.encoding)
             tm.annotate(field, best_match(value))
     self.add_template(tm.get_template())
開發者ID:CodeOps,項目名稱:scrapely,代碼行數:11,代碼來源:__init__.py

示例10: train_from_htmlpage

 def train_from_htmlpage(self, htmlpage, data):
     assert data, "Cannot train with empty data"
     tm = TemplateMaker(htmlpage)
     for field, values in data.items():
         if not hasattr(values, '__iter__'):
             values = [values]
         for value in values:
             if isinstance(value, str):
                 value = value.decode(htmlpage.encoding or 'utf-8')
             tm.annotate(field, best_match(value))
     self.add_template(tm.get_template())
開發者ID:1060460048,項目名稱:scrapely,代碼行數:11,代碼來源:__init__.py

示例11: train

 def train(self, url=None, data=None, html=None, encoding='utf-8'):
     assert data, "Cannot train with empty data"
     page = self._get_page(url, encoding, html)
     tm = TemplateMaker(page)
     for field, values in data.items():
         if not hasattr(values, '__iter__'):
             values = [values]
         for value in values:
             if isinstance(value, str):
                 value = value.decode(encoding)
             tm.annotate(field, best_match(value))
     self.templates.append(tm.get_template())
開發者ID:carriercomm,項目名稱:scraperwiki-scraper-vault,代碼行數:12,代碼來源:scrapely-hack.py

示例12: do_annotate

    def do_annotate(self, line):
        """annotate <template_id> <data> [-n number] [-f field]- add or test annotation (aliases: a, t)

        Add a new annotation (if -f is passed) or test what would be annotated
        otherwise
        """
        if line.find(' ') < 0:
            print("You must provide a valid template identifier (check output of ls_templates)")
            print(IblTool.do_annotate.__doc__)
            return
        template_id, criteria = line.split(' ', 1)
        t = self._load_template(template_id)
        if not t:
            return
        criteria = self._parse_criteria(criteria)
        tm = TemplateMaker(t)
        selection = apply_criteria(criteria, tm)
        if criteria.field:
            for index in selection:
                index = selection[0]
                tm.annotate_fragment(index, criteria.field)
                self._save_template(template_id, tm.get_template())
                print("[new] (%s) %r" % (criteria.field,
                    remove_annotation(tm.selected_data(index))))
        else:
            for n, i in enumerate(selection):
                print("[%d] %r" % (n, remove_annotation(tm.selected_data(i))))
開發者ID:CodeOps,項目名稱:scrapely,代碼行數:27,代碼來源:tool.py

示例13: do_a

 def do_a(self, line):
     """a <template> <data> [-n number] [-f field]- add or test annotation
     
     Add a new annotation (if -f is passed) or test what would be annotated
     otherwise
     """
     template_id, criteria = line.split(' ', 1)
     t = self._load_template(template_id)
     criteria = parse_criteria(criteria)
     tm = TemplateMaker(t)
     selection = apply_criteria(criteria, tm)
     if criteria.field:
         for index in selection:
             index = selection[0]
             tm.annotate_fragment(index, criteria.field)
             self._save_template(template_id, tm.get_template())
             print "[new] (%s) %r" % (criteria.field,
                 remove_annotation(tm.selected_data(index)))
     else:
         for n, i in enumerate(selection):
             print "[%d] %r" % (n, remove_annotation(tm.selected_data(i)))
開發者ID:esimionato,項目名稱:scrapely,代碼行數:21,代碼來源:tool.py

示例14: _load_annotations

 def _load_annotations(self, template_id):
     t = self._load_template(template_id)
     tm = TemplateMaker(t)
     return [x[0] for x in tm.annotations()]
開發者ID:esimionato,項目名稱:scrapely,代碼行數:4,代碼來源:tool.py

示例15: test_selected_data

 def test_selected_data(self):
     tm = TemplateMaker(self.PAGE)
     indexes = tm.select(best_match("text to annotate"))
     data = [tm.selected_data(i) for i in indexes]
     self.assertEqual(data, [u"<p>Some text to annotate here</p>", u"<p>Another text to annotate there</p>"])
開發者ID:netconstructor,項目名稱:scrapely,代碼行數:5,代碼來源:test_template.py


注:本文中的scrapely.template.TemplateMaker類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。