本文整理汇总了PHP中Snoopy::fetchlinks方法的典型用法代码示例。如果您正苦于以下问题:PHP Snoopy::fetchlinks方法的具体用法?PHP Snoopy::fetchlinks怎么用?PHP Snoopy::fetchlinks使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Snoopy
的用法示例。
在下文中一共展示了Snoopy::fetchlinks方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: getDomainLinks
function getDomainLinks($url, $domain)
{
global $domainLinks;
$snoopy = new Snoopy();
$snoopy->agent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17";
$snoopy->rawheaders['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
$snoopy->rawheaders['Accept-Charset'] = 'GBK,utf-8;q=0.7,*;q=0.3';
$snoopy->rawheaders['Connection'] = 'keep-alive';
$snoopy->rawheaders['Accept-Language'] = 'zh-CN,zh;q=0.8';
$snoopy->rawheaders['Cache-Control'] = 'max-age=0';
$links = array();
if ($snoopy->fetchlinks($url)) {
foreach ($snoopy->results as $link) {
if (stripos($link, $domain) === false) {
continue;
}
if (in_array($link, $domainLinks)) {
continue;
}
$domainLinks[] = $link;
echo $link . "\r\n";
getDomainLinks($link, $domain);
}
}
}
示例2: fetchLinks
function fetchLinks($s_url, $re)
{
$o_snoopy = new Snoopy();
$o_snoopy->fetchlinks($s_url);
$a_links_temp = $o_snoopy->results;
$a_links = array();
$i = 0;
foreach ($a_links_temp as $key => $value) {
if (preg_match($re, $a_links_temp[$key]) && !isSameLink($a_links, $a_links_temp[$key])) {
$a_links[$i++] = $a_links_temp[$key];
}
}
return $a_links;
}
示例3: Newses
$news = new Newses();
$newstype = new Newstypes();
$snoopy = new Snoopy();
$typeoption = new Typeoption();
$conditions = array();
$tpl_file = "news.gather";
if (isset($_POST['gather']) && !empty($_POST['rules'])) {
set_time_limit(180);
$rules = stripslashes($_POST['rules']);
$tmp_rules = explode("\r\n", $rules);
if (!empty($tmp_rules) && count($tmp_rules) == 4) {
list($remote_url, $remote_url_match, $remote_title_match, $remote_content_match) = $tmp_rules;
} else {
flash();
}
$snoopy->fetchlinks($remote_url);
$urls = array();
$urls = $snoopy->results;
if (empty($urls)) {
flash();
}
foreach ($urls as $key => $value) {
//fetched url
if (!preg_match($remote_url_match, $value)) {
unset($urls[$key]);
}
}
$urls = array_unique($urls);
$u = 0;
$sql[] = $title = $content = array();
foreach ($urls as $key => $value) {
示例4: get_page_link
function get_page_link($url)
{
$snoopy = new Snoopy();
$snoopy->fetchlinks($url);
$all_link = $snoopy->results;
$re = is_array($all_link) ? array_unique($all_link) : $all_link;
return $re;
}
示例5: stbv_main
function stbv_main($incomingTB)
{
global $stbv_opt, $stbv_val;
####################################
# We only deal with trackbacks
####################################
if ($incomingTB['comment_type'] != 'trackback') {
return $incomingTB;
}
####################################
# Get trackback information
####################################
$stbv_val['comment_author'] = $incomingTB['comment_author'];
$stbv_val['comment_author_url'] = $incomingTB['comment_author_url'];
$stbv_val['comment_post_permalink'] = get_permalink($incomingTB['comment_post_ID']);
$stbv_val['comment_post_permalink'] = preg_replace('/\\/$/', '', $stbv_val['comment_post_permalink']);
// Remove trailing slash
$stbv_val['comment_post_ID'] = $incomingTB['comment_post_ID'];
####################################
# Get Plugin options
####################################
if ($stbv_opt['stbv_accuracy'] == 'open') {
if (is_string($stbv_opt['stbv_blogurls'])) {
if (strlen($stbv_opt['stbv_blogurls']) > 9) {
$stbv_blogurlsArray = explode(' ', $stbv_opt['stbv_blogurls']);
}
}
}
####################################
# 'Is Spam' flag is FALSE by default. Below we check several things
# and this flag will become true as soon as we have any doubts.
####################################
$stbv_val['is_spam'] = false;
####################################
# If a Snoopy problem occurrs (Snoopy can't be loaded or a snoopy error
# occurred), this variable will be set to TRUE
####################################
$stbv_val['snoopy_problem'] = false;
####################################
# If Author's URL is not correct, it will be considered as spam.
####################################
if (!$stbv_val['is_spam'] && substr($stbv_val['comment_author_url'], 0, 4) != 'http') {
$stbv_val['log_info'][]['warning'] = 'Author\'s URL was found not to be correct';
$stbv_val['is_spam'] = true;
}
####################################
# Phase 1 (IP) - Verify IP address
####################################
if (!$stbv_val['is_spam'] && $stbv_opt['stbv_validateIP'] == '1') {
$tmpSender_IP = preg_replace('/[^0-9.]/', '', $_SERVER['REMOTE_ADDR']);
$authDomainname = stbv_get_domainname_from_uri($stbv_val['comment_author_url']);
$tmpURL_IP = preg_replace('/[^0-9.]/', '', gethostbyname($authDomainname));
if ($tmpSender_IP != $tmpURL_IP) {
$stbv_val['log_info'][]['info'] = 'Sender\'s IP address (' . $tmpSender_IP . ') not equal to IP address of host (' . $tmpURL_IP . ').';
$stbv_val['is_spam'] = true;
} else {
$stbv_val['log_info'][]['info'] = 'IP address (' . $tmpSender_IP . ') was found to be valid.';
}
} elseif ($stbv_opt['stbv_validateIP'] != '1') {
$stbv_val['log_info'][]['info'] = 'IP address validation (Phase 1) skipped since it is not enabled in the plugin\'s options.';
}
####################################
# Phase 2 (URL) - Snoopy
####################################
if ($stbv_opt['stbv_validateURL'] == '1') {
# Loading snoopy and create snoopy object. In case of
# failure it is being considered as spam, just in case.
if (!$stbv_val['is_spam'] && !stbv_loadSnoopy()) {
// Loading snoopy failed
$stbv_val['log_info'][]['warning'] = 'Loading PHP Snoopy class failed. Phase 2 skipped.';
$stbv_val['snoopy_problem'] = true;
} else {
// Create new Snoopy object
$stbvSnoopy = new Snoopy();
}
# Fetch all URLs of the author's web page
if (!$stbv_val['is_spam'] && !$stbv_val['snoopy_problem'] && !@$stbvSnoopy->fetchlinks($stbv_val['comment_author_url'])) {
// Snoopy couldn't couldn't reach the target website, Snoopy error occurred, or something else...
$stbv_val['log_info'][]['warning'] = 'Snoopy couldn\\t find something on the source website or Snoopy error occurred. Phase 2 skipped.';
$stbv_val['snoopy_problem'] = true;
} else {
$stbvAuthorUrlArray = $stbvSnoopy->results;
}
# Check if URL array contains link to website
if (!$stbv_val['is_spam'] && !$stbv_val['snoopy_problem'] && is_array($stbvAuthorUrlArray)) {
$loopSuccess = false;
foreach ($stbvAuthorUrlArray as $loopUrl) {
// Remove trailing slash, "/trackback" and "/trackback/"
$loopUrl = preg_replace('/(\\/|\\/trackback|\\/trackback\\/)$/', '', $loopUrl);
if ($stbv_opt['stbv_accuracy'] == 'open' && is_array($stbv_blogurlsArray)) {
// We have more than one URL to be checked
$loopInnerSuccess = false;
foreach ($stbv_blogurlsArray as $loopOptionsURL) {
// Check if the first chars of the URL of remote page contain URL of the options
if (substr($loopUrl, 0, strlen($loopOptionsURL)) == $loopOptionsURL) {
$loopInnerSuccess = true;
break;
}
}
if ($loopInnerSuccess) {
//.........这里部分代码省略.........
示例6: add_links_insite
function add_links_insite($link, $old, $numm, $ooo, $site_id, $include_word, $not_include_word)
{
if (!is_url($link)) {
return false;
}
global $db, $config;
/* $spider=new spider; //系统自带蜘蛛
echo "<b>网站编码</b>(默认GB2312)<b>:";
$spider->url($link);
echo "</b><br>";
$links= $spider->get_insite_links();
*/
//$site_url=GetSiteUrl($link);
$url_old = GetSiteUrl($old);
echo "原始页=" . $url_old . " - - <";
echo "首层 id=" . $site_id . "> - - <";
echo "包含字段=" . $include_word . ">";
echo "<br>";
/*if($ooo==0)
{
$site=$db->get_one("select * from ve123_sites where url='".$url_old."'");
$site_id=$site["site_id"];
$include_word=$site["include_word"];
$not_include_word=$site["not_include_word"];
$spider_depth=$site["spider_depth"];
} */
$snoopy = new Snoopy();
//国外snoopy程序
$snoopy->fetchlinks($link);
$links = $snoopy->results;
$links = check_wai($links, $numm, $link);
$links = array_values(array_unique($links));
foreach ((array) $links as $value) {
$row = $db->get_one("select * from ve123_links_temp where url='" . $value . "'");
if (empty($row)) {
$arral = array('url' => $value, 'site_id' => $site_id);
$db->insert("ve123_links_temp", $arral);
}
$value = rtrim($value, "/");
$row = $db->get_one("select * from ve123_links where url='" . $value . "'");
if (check_include($value, $include_word, $not_include_word)) {
if (empty($row) && is_url($value)) {
echo "<font color=#C60A00><b>抓取到:</b></font>";
$array = array('url' => $value, 'site_id' => $site_id, 'level' => '1');
$db->insert("ve123_links", $array);
} else {
echo "<b>已存在了:</b>";
}
echo "<a href=" . $value . " target=_blank>" . $value . "</a><br>";
ob_flush();
flush();
//$row=$db->get_one("select * from ve123_links_temp where url='".$value."'");
// if(empty($row)&&is_url($value))
// {
// $array=array('url'=>$value,'site_id'=>$site_id);
// $db->insert("ve123_links_temp",$array);
// }
}
}
}
示例7: wphc_check_hidden_tag
function wphc_check_hidden_tag($comment)
{
// admins can do what they like
if (is_admin()) {
return $comment;
}
// get our options
$type = $comment['comment_type'];
$options = wphc_option();
$spam = false;
if ($type == "trackback" || $type == "pingback") {
// check the website's IP against the url it's sending as a trackback
if ($options['validate-ip']) {
$server_ip = isset($_SERVER['HTTP_X_FORWARDED_FOR']) ? $_SERVER['HTTP_X_FORWARDED_FOR'] : $_SERVER['REMOTE_ADDR'];
$web_ip = gethostbyname(parse_url($comment['comment_author_url'], PHP_URL_HOST));
$ipv = $server_ip != $web_ip;
$spam = $spam || $ipv;
if ($options['logging'] && $ipv) {
$comment['comment_content'] .= "\n\n[WORDPRESS HASHCASH] The comment's server IP (" . $server_ip . ") doesn't match the" . " comment's URL host IP (" . $web_ip . ") and so is spam.";
}
}
// look for our link in the page itself
if (!$spam && $options['validate-url']) {
if (!class_exists('Snoopy')) {
require_once ABSPATH . WPINC . '/class-snoopy.php';
}
$permalink = get_permalink($comment['comment_post_ID']);
$permalink = preg_replace('/\\/$/', '', $permalink);
$snoop = new Snoopy();
if (@$snoop->fetchlinks($comment['comment_author_url'])) {
$found = false;
if (!empty($snoop->results)) {
foreach ($snoop->results as $url) {
$url = preg_replace('/(\\/|\\/trackback|\\/trackback\\/)$/', '', $url);
if ($url == $permalink) {
$found = true;
}
}
}
if ($options['logging'] && !$found) {
$comment['comment_content'] .= "\n\n[WORDPRESS HASHCASH] The comment's actual post text did not contain your blog url (" . $permalink . ") and so is spam.";
}
$spam = $spam || !$found;
} else {
$spam = true;
if ($options['logging']) {
$comment['comment_content'] .= "\n\n[WORDPRESS HASHCASH] Snoopy failed to fetch results for the comment blog url (" . $comment['comment_author_url'] . ") with error '" . $snoop->error . "' and so is spam.";
}
}
}
} else {
// Check the wphc values against the last five keys
$spam = !in_array($_POST["wphc_value"], $options['key']);
if ($options['logging'] && $spam) {
$comment['comment_content'] .= "\n\n[WORDPRESS HASHCASH] The poster sent us '" . intval($_POST["wphc_value"]) . " which is not a hashcash value.";
}
}
if ($spam) {
$options['comments-spam'] = (int) $options['comments-spam'] + 1;
wphc_option($options);
switch ($options['moderation']) {
case 'delete':
add_filter('comment_post', create_function('$id', 'wp_delete_comment($id); die(\'This comment has been deleted by WP Hashcash\');'));
break;
case 'akismet':
add_filter('pre_comment_approved', create_function('$a', 'return \'spam\';'));
break;
case 'moderate':
default:
add_filter('pre_comment_approved', create_function('$a', 'return 0;'));
break;
}
} else {
$options['comments-ham'] = (int) $options['comments-ham'] + 1;
wphc_option($options);
}
return $comment;
}
示例8: jd_item
public function jd_item($url)
{
$snoopy = new Snoopy();
$snoopy->fetchlinks($url);
$link_all = $snoopy->results;
if (!empty($link_all)) {
foreach ($link_all as $key => $value) {
$link_decode = urldecode($value);
$link_info = parse_url($link_decode);
if ($link_info['host'] == 'item.jd.com' && empty($link_info['fragment'])) {
$link_item[] = $link_decode;
}
}
if (!empty($link_item)) {
$link_item = array_unique($link_item);
$data = $this->jd_data($link_item);
if (!empty($data)) {
$this->jd_inserts($data);
}
return true;
}
}
return array();
}