当前位置: 首页>>代码示例>>Python>>正文


Python PyQuery.items方法代码示例

本文整理汇总了Python中pyquery.PyQuery.items方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.items方法的具体用法?Python PyQuery.items怎么用?Python PyQuery.items使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyquery.PyQuery的用法示例。


在下文中一共展示了PyQuery.items方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_fund_mapping

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import items [as 别名]
    def get_fund_mapping(self):
        # sub_code, sub_name, main_code, main_name
        sub_to_main_mapping = []
        html = requests.get(self._url, timeout=30.0).content
        document = PyQuery(unicode(html, 'utf-8'))

        fund_blocks = [document.items('.aa'), document.items('.dd')]
        for each_block in fund_blocks:
            for class_tag in each_block:
                items_list = [item.text() for item in class_tag.items('td')]
                sub_to_main_mapping.append((items_list[1], items_list[3]))
        return dict(sub_to_main_mapping)
开发者ID:xutaoding,项目名称:csf_scraper,代码行数:14,代码来源:fund_mapping.py

示例2: getResTb

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import items [as 别名]
def getResTb():
    html = fileworker.getHTML()
    pq = PyQuery(html)
    result = dict()
    blocks = list()
    for i in pq.items('.row.result'):
        list.append(i)
开发者ID:T10TheKinOfStars,项目名称:crawlerforfun,代码行数:9,代码来源:parser.py

示例3: extract_urls

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import items [as 别名]
def extract_urls(html):
    urls = []
    pq = PyQuery(html)
    for link in pq.items("a"):
        url = link.attr("href")
        if url and url.startswith("http") and url not in seen_urls:
            urls.append(url)
            waitting_urls.append(url)
    return urls
开发者ID:ajioy,项目名称:python-ex,代码行数:11,代码来源:aio_http_spider.py

示例4: getItemsByKeyword

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import items [as 别名]
def getItemsByKeyword(
    keyword,
    encoding='GBK',
    page=1,
    price=None,
):
    if not PyQuery:
        return []
    items = []
    domain = \
        'http://s.taobao.com/search?q=$Q$&style=list&bcoffset=1&tab=all&cd=false&v=auction&sort=sale-desc&s={0}'
    domain = domain.replace('$Q$',
                            repr(keyword.decode(encoding).
                                 encode('GBK')).
                            replace('\\x', '%').upper()[1:-1])
    domain = domain.replace(' ', '%20')

    # price interval

    if price:
        domain = domain \
            + '&filter=reserve_price%5B{0}%2C{1}%5D'.format(price[0], price[1])

    # price interval end

    for i in range(page):
        url = domain.format(i * 40)
        r = __import__('requests').get(url)
        encoding = r.encoding
        py = PyQuery(r.content.decode(encoding))
        page_num = 0
        for i, meta_item in enumerate(py.items("div[nid]")):
            page_num += 1
            try:
                info = {
                    'rank': i + 1,
                    'keyword': keyword,
                    'itemName': meta_item('h3 a').attr('title').encode('utf-8'),
                    'itemId': meta_item.attr('nid'),
                    'itemPic': meta_item('img').attr('src'),
                    'wangwang': meta_item('.seller a').html().encode('utf-8').strip(),
                    'userNumId': re.findall(r'id\=(\d+)', meta_item('.seller a').attr('href'))[0],
                    'price': re.findall(r'\d+\.\d*', meta_item('.price').html().encode('utf-8'))[0],
                    'location': (meta_item('.loc div').html() or '').encode('utf-8'),
                    'tradeNum': re.findall(r'\d+', meta_item('.dealing div').html() or '0')[-1],
                    'rateNum': re.findall(r'\d+', (meta_item('.count a').html() or '0').encode('utf-8'))[0],
                }
                items.append(info)
            except:
                traceback.print_exc()
                # continue
        # if page_num < 40:
            # break
    return items
开发者ID:ruige123456,项目名称:dataMining,代码行数:56,代码来源:app.py

示例5: unpack_events

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import items [as 别名]
def unpack_events(html, args):
    query = PyQuery(html)
    logger.debug("Source encoding: {} ".format(query.encoding))
    rows = query.items('tr')
    offset = 0
    events = []
    for row in rows:
        parsed_row = __parse_row(row)
        event_id = offset
        events.append(parsed_row)
        offset += 1

    logger.debug("Parsed {} entries".format(offset))
    return events
开发者ID:patrykgorniak,项目名称:runforfun-parser,代码行数:16,代码来源:eventlist.py

示例6: process_html_file

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import items [as 别名]
def process_html_file(fi):
    f = open(fi,'r')
    d = PyQuery(f.read())
    f.close()

    year = None
    links = {}
    for a in d.items('.toc a'):
        if len(a.text()) == 4:
            year = a.text()
            links[year] = []
        else:
            if year:
                links[year].append( (a.attr('href'), a.text()) )
            else:
                if 'NONE' not in links:
                    links['NONE'] = []
                links['NONE'].append( (a.attr('href'), a.text()) )
        logger.debug('Retrieved data %s %s' % (a.text(), a.attr('href')))

    data = []
    for year in links.keys():
        for link_id, link_name in links[year]:
            logger.info('Getting text at %s' % link_id)
            title = date = text = ''
            for x in d(link_id).parents('p').nextAll().items():
                logger.debug('X: %s' % x.outerHtml())
                if '<a' in x.outerHtml():
                    break
                elif 'End of the Project Gutenberg' in x.text():
                    break
                elif '<h2' in x.outerHtml():
                    title = x.text()
                elif '<h3' in x.outerHtml():
                    date = x.text()
                elif '<p' in x.outerHtml():
                    text += RE_CLEAN_TEXT.sub(x.text().replace('\n',' ').replace('&#13;','').replace('\r',' '), ' ')
                else:
                    logger.error('Unrecognized tag: %s' % x.outerHtml())

            if 'Gutenberg' in text:
                logger.error('%s\n%s' % (title,text))
            logger.debug('\nTitle: %s\nDate: %s\nText: %s' % (title, date, text))
            data.append((year, date, title, text))
    logger.info('Retrieved %d pieces' % len(data))
    return data
开发者ID:dangoldin,项目名称:lincoln-text-analysis,代码行数:48,代码来源:analyze.py

示例7: Start

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import items [as 别名]
async def Start():
	timestamp = time.time()

	parser = argparse.ArgumentParser(description="BioTC by Bioruebe (https://bioruebe.com), 2014-2019, Version 3.0.0, released under a BSD 3-clause style license.\n\nBioTC is a small application to simplify trading Steam Trading Cards with the SteamCardExchange bot by comparing the user's Steam inventory with the available cards on steamcardexchange.net")
	parser.add_argument("-n", "--name", action="store", type=str, default=None, help="Use specified Steam ID instead of reading it from " + STEAM_ID_FILE_NAME)
	parser.add_argument("-l", "--limit", action="store", type=int, default=-1, help="Stop searching after n sets have been found")
	args = parser.parse_args()

	parser.print_help()
	print("\n-----------------------------------------------------------------------------\n")

	if args.name is None:
		try:
			f = open(STEAM_ID_FILE_NAME)
			args.name = f.read()
		except:
			pass
	if args.name is None:
		sys.exit("Error: Could not read SteamID from file. Make sure the file '" + STEAM_ID_FILE_NAME + "' contains a valid SteamID.")

	result = {
		"sets": [],
		"steamID": args.name,
		"cardsCount": 0,
		"gameCount": 0,
		"completeSets": 0,
		"processingTime": 0,
		"time": 0
	}

	async with aiohttp.ClientSession() as session:
		print("Loading Steam inventory")
		url = "https://steamcommunity.com/id/" + args.name + "/inventory/json/753/6"
		raw_json = await fetch(session, url)
		cardData = json.loads(raw_json)
		# print(cardData)
		if cardData is None or not cardData["success"]:
			sys.exit("Invalid JSON data received. Aborting.")

		for key, card in cardData["rgDescriptions"].items():
			# Ignore emoticons, backgrounds
			if "Trading Card" not in card["type"]:
				# print(card["name"] + " is not a trading card.")
				continue
			# print(card)

			appid = card["market_fee_app"]
			try:
				game_cards = card_requests[appid]
				game_cards.append(card)
			except KeyError:
				card_requests[appid] = [card]

		i = 0
		result["gameCount"] = len(card_requests)
		for appid, inventory in card_requests.items():
			print("Processing " + appid)
			url = "https://www.steamcardexchange.net/index.php?inventorygame-appid-" + appid
			resp = await fetch(session, url)
			time.sleep(0.5)
			dom = PyQuery(resp)
			game_name = dom("h2").text()
			card_items = dom.items(".inventory-game-card-item")
			card_set = Set(appid, game_name)
			# print(inventory)
			for item in card_items:
				card = Card(item.find(".card-name").text().strip())
				if card.name == "":
					# print("[Warning] Invalid card name: " + card.name)
					continue

				# available = item.find(".green, .orange")
				# if not available:
				# 	continue
				stock = filter_card_stock_value(item.find(".card-amount").text())
				card.bot_inventory = stock[0]
				if len(stock) > 1:
					card.bot_inventory_pending = stock[1]

				try:
					card.price = int("".join(filter(str.isdigit, item.find(".card-price").eq(1).text())))
					if card_set.standard_price < 1 and card.bot_inventory > 1:
						card_set.standard_price = card.price
				except ValueError:
					pass

				card.trade_url = item.find(".button-blue").attr("href")
				card.user_inventory = get_card_amount_in_inventory(cardData, inventory, card.name)
				card_set.cards.append(card)

			card_set.update_complete_sets()
			card_set.calculate_total_cost()
			card_set.set_progress_class()
			card_set.set_card_classes()
			card_set.cards.sort(key=lambda c: (c.user_inventory, 10 - c.bot_inventory))

			result["completeSets"] += card_set.complete_sets
			if card_set.user_inventory_is_empty():
				print("User has " + str(card_set.complete_sets) + " complete sets, but no surplus cards in inventory")
				continue
#.........这里部分代码省略.........
开发者ID:Bioruebe,项目名称:CardAvailability,代码行数:103,代码来源:biotc.py

示例8: getItemsByKeyword

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import items [as 别名]
def getItemsByKeyword(
    keyword,
    encoding='utf-8',
    page=1,
    price=None,
    sort='sale-desc'
):
    """
    @params sort: renqi-desc/sale-desc/credit-desc/old_starts/price-asc/price-desc
    """
    if not PyQuery:
        return []
    items = []
    params = {
        'q': '$Q$',
        'style': 'grid',
        # 'bcoffset': 1,
        'tab': 'all',
        # 'cd': 'false',
        # 'v': 'auction',
        'sort': sort,
        's': '{0}',
        # 'p4poffset': '4',
        # 'bcoffset': '-4',
        'btab': 0
    }
    domain = 'http://s.taobao.com/search?' + \
        '&'.join(['%s=%s' % (k, v) for k, v in params.items()])
    domain = domain.replace('$Q$',
                            repr(keyword.decode(encoding).
                                 encode('GBK')).
                            replace('\\x', '%').upper()[1:-1])
    domain = domain.replace(' ', '%20')

    # price interval

    if price:
        domain = domain \
            + '&filter=reserve_price%5B{0}%2C{1}%5D'.format(price[0], price[1])

    # price interval end

    for i in range(page):
        url = domain.format(i * 40)
        r = requests.get(url, timeout=5, allow_redirects=True)
        # encoding = r.encoding
        content = eval(repr(r.content).replace('\\x86"', '"').replace('\\x90"', '"'))
        py = PyQuery(content.decode(r.encoding))
        page_num = 0
        for j, meta_item in enumerate(py.items("div[nid]")):
            page_num += 1
            try:
                info = {
                    'rank': i * 40 + j + 1,
                    'keyword': keyword.decode(encoding).encode('utf-8'),
                    'itemName': meta_item('h3 a').attr('title').encode('utf-8'),
                    'itemId': meta_item.attr('nid').encode('utf-8'),
                    'itemPic': meta_item('img').attr('src').encode('utf-8'),
                    'wangwang': meta_item('.seller a').html().encode('utf-8').strip(),
                    'userNumId': re.findall(r'id\=(\d+)', meta_item('.seller a').attr('href'))[0].encode('utf-8'),
                    'price': re.findall(r'\d+\.\d*', meta_item('.price').html().encode('utf-8'))[0],
                    'location': (meta_item('.loc div').html() or '').encode('utf-8'),
                    'tradeNum': re.findall(r'\d+', meta_item('.dealing div').html() or '0')[-1].encode('utf-8'),
                    'rateNum': re.findall(r'\d+', (meta_item('.count a').html() or '0').encode('utf-8'))[0],
                }
                items.append(info)
            except:
                traceback.print_exc()
                # continue
        # if page_num < 40:
            # break
    return items
开发者ID:ruige123456,项目名称:dataMining,代码行数:74,代码来源:app.py


注:本文中的pyquery.PyQuery.items方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。