当前位置: 首页>>代码示例>>Python>>正文


Python scraper.Scraper类代码示例

本文整理汇总了Python中scraper.Scraper的典型用法代码示例。如果您正苦于以下问题:Python Scraper类的具体用法?Python Scraper怎么用?Python Scraper使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Scraper类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: execute

 def execute(self):
     myScraper = Scraper(self.url,self.matchingDict)
     result = myScraper.scrape()
     if self.target is None:
         return result
     else:
         self.target(result, self.url)
开发者ID:PhilipFraDIKU,项目名称:Projects-in-Stock,代码行数:7,代码来源:adapter.py

示例2: test_find_docs

def test_find_docs():

    declare_test_start( 'follow_link' ) 

    url_data = {
        'url_id': 1,
        'target_url': 'http://timduffy.me/',
        'max_link_level': 6,
        'creation_date_time': str(datetime.datetime.now()),
        'doc_type': 'application/pdf',
        'dispatch_datetime': str(datetime.datetime.now()),
        'allowed_domains': [],
    }

    uid = str(uuid.uuid4())
    scraper = Scraper(uid)
    scraper.set_url_data(url_data)
    docs = scraper.find_docs( )

    print '[ TEST ] {0}'.format(json.dumps(scraper.status))
    print '[ TEST ] {0}'.format(json.dumps(docs))

    passed = False
    if len(docs) > 0:
        passed = True

    declare_test_end( passed )
开发者ID:reustonium,项目名称:BarkingOwl,代码行数:27,代码来源:tests.py

示例3: testExtractTag

 def testExtractTag(self):
     pattern = "<a name='$name'></a>"
     _scraper = Scraper(pattern)
     exp = BeautifulSoup(pattern)
     
     # one attribute
     actual = BeautifulSoup("<a name='abc'></a>")
     self.assertEqual('abc', _scraper.extractTag(exp.contents[0], actual.contents[0])['name'])
     
     # one attribute
     actual = BeautifulSoup("<a name='abc' age='27'></a>")
     self.assertEqual('abc', _scraper.extractTag(exp.contents[0], actual.contents[0])['name'])
     
     # two attributes
     pattern = "<a name='$name' age='$age'></a>"
     exp = BeautifulSoup(pattern)
     actual = BeautifulSoup("<a name='abc' age='27'></a>")
     ret =  _scraper.extractTag(exp.contents[0], actual.contents[0])
     self.assertEqual(2, len(ret))
     self.assertEqual('abc', ret['name'])
     self.assertEqual('27', ret['age'])
     
     # get attribute from sub tag
     pattern = "<a><b name='$name'></b></a>"
     exp = BeautifulSoup(pattern)
     
     # one attribute
     actual = BeautifulSoup("<a><b name='abc'></b></a>")
     self.assertEqual('abc', _scraper.extractTag(exp.contents[0], actual.contents[0])['name'])
开发者ID:Zacchy,项目名称:nickcheng-python,代码行数:29,代码来源:scraper_test.py

示例4: main

def main(request):
	username = request.POST['username']
	password = request.POST['password']
	scraper = Scraper(username, password)
	data = scraper.scrap(True)

	return HttpResponse(obj2json(data), mimetype='application/json')
开发者ID:castelanjr,项目名称:uNotas,代码行数:7,代码来源:views.py

示例5: create_job

def create_job():
    worker = Scraper(FREQUENCY, TAB, UNIT, FINAL_YEAR)
    while True:
        item = q.get()
        worker.do_work(item)
        print(item + ' is downloaded | ' + str(q.qsize()) + ' item(s) left')
        q.task_done()
开发者ID:nguyenminhquan,项目名称:BVS_Scraper,代码行数:7,代码来源:main.py

示例6: scrape

def scrape():
    scraper = Scraper(**get_creds())

    #   Fetch usage info re: boosters.
    le = UsageDataPoint(
        time=datetime.datetime.utcnow(),
        **scraper.fetch_booster_usage()
    )

    db_session.add(le)
    yield le

    #   Fetch latest transactions and put these in the DB,
    #   but only if we don't already have them.
    for transaction in scraper.fetch_most_recent_transactions():
        existing = KoodoTransaction \
            .query \
            .filter_by(koodo_id=transaction['koodo_id']) \
            .first()
        if not existing:
            kt = KoodoTransaction(**transaction)
            db_session.add(kt)
            yield kt

    db_session.commit()
开发者ID:psobot,项目名称:koodo-prepaid-api,代码行数:25,代码来源:koodo.py

示例7: scrape

def scrape(request, tvdb_id):
    """
    Takes a scrape request, constructs a Scraper object and performs a scrape for the show if it hasn't been scraped
    before or hasn't been scraped within the last :math:`x` days (where :math:`x` is the number of days specified by
    RESCRAPE_AFTER). Otherwise if the show exists and has been scraped within the last :math:`x` days redirect to the
    appropriate show page

    :param request: A Scrape request object.
    :param tvdb_id: The id of the tv show to be scraped (or shown)
    :return: A HttpResponse Object containing the page of the show requested.
    """

    # Determine if the show already exists in the datastore
    q = TVShow.get_by_key_name(tvdb_id)

    if users.is_current_user_admin() and 'force' in request.GET and request.GET[
        'force'] == '1':
        Scraper(tvdb_id, rescrape=True, options=q.options)
        return HttpResponseRedirect('/show/{0}'.format(q.url_string))

    # Check if the show has been scraped before and if that scrape was in the last x days specified by RESCRAPE_AFTER
    if q and q.last_scraped > datetime.now() - timedelta(days=RESCRAPE_AFTER):
        url_slug = q.url_string
    else:
        # If scraping is switched on then scrape the show
        if settings.SCRAPING:
            s = Scraper(tvdb_id)
            url_slug = s.get_url_slug()
        else:
            url_slug = tvdb_id

    return HttpResponseRedirect('/show/{0}'.format(url_slug))
开发者ID:jako218,项目名称:comp3001,代码行数:32,代码来源:views.py

示例8: save_info_from

def save_info_from(href, data_dir):

    # initialize child destination
    scrap = Scraper(href)
    dest = scrap.create_destination()
    dest.children_href = scrap.get_children()

    # check if we have already crawled this area
    OBJECT_OUTFILE = data_dir + dest.nickname + '.pickle'
    if os.path.exists(OBJECT_OUTFILE):
        print dest.nickname + ' has already been crawled'
        pass
    else:
        if not os.path.isdir(os.path.dirname(OBJECT_OUTFILE)):
            os.makedirs(os.path.dirname(OBJECT_OUTFILE))

        # traverse tree of areas-->routes
        all_dest = traverse(dest)
        # returns destination object

        # write out to file.. for viz??
        BIG_JSON = data_dir + dest.nickname + '.json'
        with open(BIG_JSON, 'w+') as dump:
            flat = json.dumps(all_dest, default=lambda o: o.__dict__)
            dump.write(flat)

        # save destination object as pickle
        BIG_PICKLE = data_dir + dest.nickname + '.pickle'
        with open(BIG_PICKLE, 'wb') as handle:
            pickle.dump(all_dest, handle)

        flourish = '<<<'+'-'*25
        print flourish + dest.nickname + flourish[::-1]
        print
开发者ID:TheAdamEvans,项目名称:RouteRobot,代码行数:34,代码来源:obj_scrape.py

示例9: traverse

def traverse(node):
    """ Pre-order depth-first search of Mountain Project tree """

    children = []
    for href in node.children_href:
        # initialize Scraper for this page
        scrap = Scraper(href)
        if scrap.soup is None:
            pass
        else:
            # grab features from the soup
            dest = scrap.create_destination()
            # find children in the soup if any
            dest.children_href = scrap.get_children()
            # recursively deeper down the tree if this is an area
            if dest.children_href != None:
                print
                print '**'+dest.nickname+'**'
                traverse(dest)
            # inner traverse function has returned with destination object
            print dest.nickname + ' | ' + dest.href
            children.append(dest)

    node.children = children
    return node
开发者ID:TheAdamEvans,项目名称:RouteRobot,代码行数:25,代码来源:obj_scrape.py

示例10: __init__

	def __init__(self, base_url = awards_base_url, search_url = ""):
		Scraper.__init__(self, base_url, search_url)
		self.file = open('academy_awards.csv', 'wb')
		self.writer = csv.writer(self.file, delimiter='\t')
		self.writer.writerow(['Year', 'Category', 'Won', 'FilmName', 'ActorDirectorName'])
		self.soup = self.connect(base_url)
		self.next_record = '1'
开发者ID:jercoh,项目名称:project_luther,代码行数:7,代码来源:awards_scraper.py

示例11: main

def main():
    uid = str(uuid.uuid4())

    print "Creating Scraper() instance ..."

    scraper = Scraper(uid)
    scraper.run()

    print "Running tests ..."

    # typelink()
    test_typelink(scraper)    

    # checkmatch()
    test_checkmatch(scraper)

    # getpagelinks
    test_getpagelinks(scraper)

    # folowlinks()
    test_followlinks(scraper)

    # get scraper status
    text_getstatus(scraper)

    scraper.stop();

    print "Done."
开发者ID:citruspi,项目名称:BarkingOwl,代码行数:28,代码来源:testscraper.py

示例12: scrape_all

def scrape_all(root_href, data_dir):
    """ Scrape Mountain Project and save Destination objects """
    
    scrap = Scraper(root_href)

    # iterate over children of the root (e.g. states in the US)
    for href in scrap.get_children():
        save_info_from(href, data_dir)
开发者ID:TheAdamEvans,项目名称:RouteRobot,代码行数:8,代码来源:obj_scrape.py

示例13: testExtractText

 def testExtractText(self):
     pattern = "<a>$text</a>"
     _scraper = Scraper(pattern)
     exp = BeautifulSoup(pattern)
     
     # one text
     actual = BeautifulSoup("<a>hello world</a>")
     self.assertEqual('hello world', _scraper.extractText(exp.contents[0], actual.contents[0])['text'])
开发者ID:Zacchy,项目名称:nickcheng-python,代码行数:8,代码来源:scraper_test.py

示例14: run

 def run(self):
     try:
         print "y"
         s = Scraper('en')
         #s.getCategory(self.cat)
         s.getGameList(self.cat, endPage=0)
     except:
         print "n"
开发者ID:mrbahrani,项目名称:scraper,代码行数:8,代码来源:update_db.py

示例15: testExtract

 def testExtract(self):
     pattern = "<a name='$name'>$text</a>"
     _scraper = Scraper(pattern)
     exp = BeautifulSoup(pattern)
     
     # text in sub tag        
     actual = BeautifulSoup("<a name='abc'>hello world</a>")
     ret = _scraper.extract(actual.contents[0])
     self.assertEqual('hello world', ret['text'])
开发者ID:Zacchy,项目名称:nickcheng-python,代码行数:9,代码来源:scraper_test.py


注:本文中的scraper.Scraper类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。