本文整理汇总了Python中field.Field.value方法的典型用法代码示例。如果您正苦于以下问题:Python Field.value方法的具体用法?Python Field.value怎么用?Python Field.value使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类field.Field
的用法示例。
在下文中一共展示了Field.value方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parseJsonPage
# 需要导入模块: from field import Field [as 别名]
# 或者: from field.Field import value [as 别名]
def parseJsonPage(self, site, doc, listurl):
try:
doc = json.loads(doc, encoding=site.getCharset())
item = self.listRule.getEntryItem()
if item and item in doc:
data = doc[item]
else:
data = doc
urlParent = self.listRule.getContentUrl()
extrarules = self.listRule.extrarules
if isinstance(data, list) and urlParent:
for _data in data:
if urlParent in _data:
link = urlparse.urljoin(listurl, _data[urlParent])
guid = md5(link).hexdigest()
_item = Item({
"type" : self.seed_type,
"images" : []
})
#取出需要的key数据
for field_name, _rule, fetch_all, page_type in extrarules:
field = Field(name = field_name, rule=_rule)
if _rule in _data:
value = _data[_rule]
if is_image(value):
_item["images"].append(value)
field.value = value
_item[field["name"]] = field
if (link is not None):
_item['url'] = link
# get item guid
if self.guid_rule:
guid = self.getItemGUID(_item)
elif self.seed["dont_craw_content"]:
self.guid_rule = []
for f in _item.fields:
self.guid_rule.append(_item[f]["id"])
guid = self.getItemGUID(_item)
self.guid_rule = None
else:
self.guid_rule = "url"
guid = self.getItemGUID(_item)
self.guid_rule = None
self.items[guid] = _item
else:
if isinstance(self.items, dict):
self.items = [];
self.items.append(data)
except:
raise "Cant parse json file"
示例2: entry
# 需要导入模块: from field import Field [as 别名]
# 或者: from field.Field import value [as 别名]
def entry(i, e):
#link
urlParent = self.listRule.getContentUrl()
if e.tag == "a":
link = e.get("href")
else:
link = getElementData(e, urlParent)
if link is not None:
link = urlparse.urljoin(listurl, link);
_item = Item({
"type" : self.seed_type,
"images" : []
})
for field_name, _rule, fetch_all, page_type in extrarules:
field = Field(name = field_name, rule=_rule)
value = getElementData(e, _rule, _item["images"])
#TODO:
# filter HOOK
field.value = value
_item[field["name"]] = field
if (link is not None):
_item['url'] = link
# get item guid
if self.guid_rule:
guid = self.getItemGUID(_item)
elif self.seed['dont_craw_content'] == True:
self.guid_rule = []
for f in _item.fields:
self.guid_rule.append(_item[f]["id"])
guid = self.getItemGUID(_item)
self.guid_rule = None
else:
self.guid_rule = "url"
guid = self.getItemGUID(_item)
self.guid_rule = None
self.items[guid] = _item
示例3: parseDocument
# 需要导入模块: from field import Field [as 别名]
# 或者: from field.Field import value [as 别名]
def parseDocument(self, doc):
doc = pq(doc);
wrapparent = self.articleRule.wrapparent
pageparent = self.articleRule.pageparent
content_re = "";
#子页面url
urls = []
#文本数据内容
content = ""
article = doc.find(wrapparent);
#pages
if pageparent:
urls = self.parsePage(article, pageparent)
#need title, tags
extrarules = self.articleRule.extrarules
#只有文章是有content
#TODO: 这里目前缺失一些特性
if len(extrarules):
for key, rule, fetch_all, page_type in extrarules:
field = Field(name = key, rule=rule);
value = getElementData(doc, rule, self.data["images"], fetch_all)
self.data[field.get('name')] = field
if self.is_article_content(field):
content_re = field.get("rule")
content = value
elif self.is_gallery_content(field):
content_re = field.get("rule")
content = []
if (isinstance(value, list)):
content += value
else:
field.value = value
#采集分页内容
if len(urls) > 0 and content_re:
for next_url in urls:
next_page = Fetch(next_url, charset = self.seed["charset"], timeout = self.seed["timeout"]).read()
if next_page is not None:
next_page = self._getContent(next_page, wrapparent, content_re);
if next_page:
if isinstance(content, list):
content.append(next_page)
else:
content += next_page
if content and content_re:
if isinstance(content, list):
self.data['content'].value = content
self.data['images'] += content
else:
content = Readability(content, self.url, self.articleRule.filters)
images = content.getImages();
self.data['content'].value = content.getContent();
self.data['images'] += images