当前位置: 首页>>代码示例>>Python>>正文


Python Field.value方法代码示例

本文整理汇总了Python中field.Field.value方法的典型用法代码示例。如果您正苦于以下问题:Python Field.value方法的具体用法?Python Field.value怎么用?Python Field.value使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在field.Field的用法示例。


在下文中一共展示了Field.value方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parseJsonPage

# 需要导入模块: from field import Field [as 别名]
# 或者: from field.Field import value [as 别名]
    def parseJsonPage(self, site, doc, listurl):
	try:
	    doc = json.loads(doc, encoding=site.getCharset())
	    item = self.listRule.getEntryItem()

	    if item and item in doc:
		data = doc[item]
	    else:
		data = doc

	    urlParent = self.listRule.getContentUrl()
	    extrarules = self.listRule.extrarules

	    if isinstance(data, list) and urlParent:
		for _data in data:
		    if urlParent in _data:
			link = urlparse.urljoin(listurl, _data[urlParent])
			guid = md5(link).hexdigest()

			_item = Item({
			    "type" : self.seed_type,
			    "images" : []
			})

			#取出需要的key数据
			for field_name, _rule, fetch_all, page_type in extrarules:
			    field = Field(name = field_name, rule=_rule)
			    if _rule in _data:
				value = _data[_rule]
				if is_image(value):
				    _item["images"].append(value)
				    field.value = value
				    _item[field["name"]] = field
			
			if (link is not None):
			    _item['url'] = link

			# get item guid
			if self.guid_rule:
			    guid = self.getItemGUID(_item)
			elif self.seed["dont_craw_content"]:
			    self.guid_rule = []
			    for f in _item.fields:
				self.guid_rule.append(_item[f]["id"])
			    guid = self.getItemGUID(_item)
			    self.guid_rule = None
			else:
			    self.guid_rule = "url"
			    guid = self.getItemGUID(_item)
			    self.guid_rule = None
			
			self.items[guid] = _item
            else:
                if isinstance(self.items, dict):
                    self.items = [];

                self.items.append(data)
	except:
	    raise "Cant parse json file"
开发者ID:leonardleonard,项目名称:spyder,代码行数:61,代码来源:document.py

示例2: entry

# 需要导入模块: from field import Field [as 别名]
# 或者: from field.Field import value [as 别名]
            def entry(i, e):
                #link
                urlParent = self.listRule.getContentUrl()

		if e.tag == "a":
		    link = e.get("href")
		else:
		    link = getElementData(e, urlParent)

		if link is not None:
		    link = urlparse.urljoin(listurl, link);

		_item = Item({
		    "type" : self.seed_type,
		    "images" : []
		})

		for field_name, _rule, fetch_all, page_type in extrarules:
		    field = Field(name = field_name, rule=_rule)
		    value = getElementData(e, _rule, _item["images"])
		    #TODO:
		    # filter HOOK
		    field.value = value
		    _item[field["name"]] = field

		if (link is not None):
		    _item['url'] = link

		# get item guid
		if self.guid_rule:
		    guid = self.getItemGUID(_item)
                elif self.seed['dont_craw_content'] == True:
		    self.guid_rule = []
		    for f in _item.fields:
			self.guid_rule.append(_item[f]["id"])
		    guid = self.getItemGUID(_item)
		    self.guid_rule = None
		else:
		    self.guid_rule = "url"
		    guid = self.getItemGUID(_item)
		    self.guid_rule = None
		
		self.items[guid] = _item
开发者ID:leonardleonard,项目名称:spyder,代码行数:45,代码来源:document.py

示例3: parseDocument

# 需要导入模块: from field import Field [as 别名]
# 或者: from field.Field import value [as 别名]
    def parseDocument(self, doc):
        doc = pq(doc);

	wrapparent = self.articleRule.wrapparent
	pageparent = self.articleRule.pageparent
	content_re = "";
	#子页面url
	urls = []

	#文本数据内容
	content = ""

	article = doc.find(wrapparent);
	#pages
	if pageparent:
	    urls = self.parsePage(article, pageparent)
	#need title, tags
	extrarules = self.articleRule.extrarules

	#只有文章是有content
        #TODO: 这里目前缺失一些特性
	if len(extrarules):
	    for key, rule, fetch_all, page_type in extrarules:
		field = Field(name = key, rule=rule);
		value = getElementData(doc, rule, self.data["images"], fetch_all)

		self.data[field.get('name')] = field

		if self.is_article_content(field):
		    content_re = field.get("rule")
		    content = value
		elif self.is_gallery_content(field):
		    content_re = field.get("rule")
		    content = []
		    if (isinstance(value, list)):
			content += value
		else:
		    field.value = value

	#采集分页内容
	if len(urls) > 0 and content_re:
	    for next_url in urls:
		next_page = Fetch(next_url, charset = self.seed["charset"], timeout = self.seed["timeout"]).read()
		if next_page is not None:
		    next_page = self._getContent(next_page, wrapparent, content_re);
		    if next_page:
			if isinstance(content, list):
			    content.append(next_page)
			else:
			    content += next_page

	if content and content_re:
	    if isinstance(content, list):
		self.data['content'].value = content
		self.data['images'] += content
	    else:
		content = Readability(content, self.url, self.articleRule.filters)
		images = content.getImages();

		self.data['content'].value = content.getContent();
		self.data['images'] += images
开发者ID:leonardleonard,项目名称:spyder,代码行数:63,代码来源:document.py


注:本文中的field.Field.value方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。