本文整理汇总了PHP中Finder::addSelector方法的典型用法代码示例。如果您正苦于以下问题:PHP Finder::addSelector方法的具体用法?PHP Finder::addSelector怎么用?PHP Finder::addSelector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Finder
的用法示例。
在下文中一共展示了Finder::addSelector方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: getContents
/**
* 采集内容
* @return void
*/
public function getContents()
{
/* 检查初始化状态 */
$this->checkIsInited();
/* 所有入口链接 */
foreach ($this->urls as $url) {
$this->collectorParser->simplifyUrl($url);
/* 初始化分页正文内容容器 */
$paged_main_content = [];
/* 获得内容入口页面内容 */
if (($result = $this->getResult($url)) !== false) {
/* 判断采集需要采集正文内容 */
if (isset($result[self::MAIN_CONTENT_SELECTOR_ID])) {
/* 保存第一页内容 */
$paged_main_content[] = $result[self::MAIN_CONTENT_SELECTOR_ID];
try {
$page_urls = $this->collectorParser->getContentPages($this->getHtml(), $this->getHtmlDom());
} catch (Exception $e) {
if ($this->contentPageMode === self::PAGES_INLINE) {
$page_urls = $this->getContentInlinePages($this->getHtml(), $this->getHtmlDom(), $this->contentPagesSelector);
}
}
/* 内部查找实例,用于查找分页其他内容 */
$mainContentFinder = new Finder();
$mainContentFinder->addSelector(self::MAIN_CONTENT_SELECTOR_ID, $this->contentSelector);
while (count($page_urls) > 0) {
$page_url = array_shift($page_urls);
$page_result = $mainContentFinder->getResult($this->collectorParser->changeUrl($page_url));
if ($page_result[self::MAIN_CONTENT_SELECTOR_ID]) {
$paged_main_content[] = $result[self::MAIN_CONTENT_SELECTOR_ID];
}
if (get_class($this->collectorParser) === __NAMESPACE__ . '\\CollectorParser' && $this->contentPageMode === self::PAGES_CONTEXT) {
if ($next_url = $this->collectorParser->getContentContextPage($this->getHtml(), $this->getHtmlDom(), $this->contentPagesSelector)) {
$page_urls[] = $next_url;
}
}
}
unset($page_url, $next_url, $page_result, $result[self::MAIN_CONTENT_SELECTOR_ID]);
}
/* 处理替换工作 */
foreach ($result as $key => &$item) {
if ($selector = $this->getSelector($key)) {
$item = $this->replaceString($item, $selector);
}
}
unset($key, $item);
/* 遍历分页内容,并替换字符串 */
foreach ($paged_main_content as &$content) {
if ($this->contentSelector) {
$content = $this->replaceString($content, $this->contentSelector);
}
}
unset($content);
if ($paged_main_content) {
/* 创建闭包函数需要的实例 */
$collectorParser = $this->collectorParser;
$pictureMaker = $this->pictureMaker;
$downloadPicture = $this->downloadPicture;
$content_pictures = [];
/* 初始化正文图片容器,用于返回给监听器 */
/* 替换正文内容的图片地址为采集后地址 */
foreach ($paged_main_content as &$content) {
$paged_content_pictures = [];
/* 初始化存储每页图片容器,用于返回给监听器 */
$content = preg_replace_callback('/<img\\s[^>]*\\ssrc="([^>]+?)"\\s[^>]*\\/?>/i', function ($match) use($collectorParser, $pictureMaker, &$content_pictures, &$paged_content_pictures, $downloadPicture) {
/* 补全图片链接 */
$pic_url = $collectorParser->changeUrl($match[1]);
/* 如果需要下载图片,则替换为目标地址 */
if ($downloadPicture) {
$pic_url = $pictureMaker->getUrl($pic_url);
}
$content_pictures[] = $pic_url;
$paged_content_pictures[] = $pic_url;
return '<img src="' . $pic_url . '" />';
}, $content);
$this->dispatch('collect_paged_main_content_success', $url, $content, $paged_content_pictures);
}
unset($collectorParser, $pictureMaker, $downloadPicture, $content, $paged_content_pictures);
/* 替换别名 */
$result[$this->contentSelectorIDAlias] = Helper::formatContent(implode('', $paged_main_content));
}
$this->dispatch('collect_content_success', $url, $result, $content_pictures);
} else {
$this->dispatch('collect_content_fail', $url);
}
}
unset($url);
if ($this->downloadPicture) {
/* 开始下载图片 */
$this->pictureMaker->download();
}
}