當前位置: 首頁>>代碼示例>>PHP>>正文


PHP SiteConfig::add_to_cache方法代碼示例

本文整理匯總了PHP中SiteConfig::add_to_cache方法的典型用法代碼示例。如果您正苦於以下問題:PHP SiteConfig::add_to_cache方法的具體用法?PHP SiteConfig::add_to_cache怎麽用?PHP SiteConfig::add_to_cache使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在SiteConfig的用法示例。


在下文中一共展示了SiteConfig::add_to_cache方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的PHP代碼示例。

示例1: getSinglePage

function getSinglePage($item, $html, $url)
{
    global $http, $extractor;
    //$url = "http://chinese.engadget.com/2014/04/21/nintendo-game-boy-25th-anniversary/";
    //echo "getSinglePage: " . $url . "\n";
    $host = @parse_url($url, PHP_URL_HOST);
    $site_config = SiteConfig::build($host);
    if ($site_config === false) {
        // check for fingerprints
        if (!empty($extractor->fingerprints) && ($_fphost = $extractor->findHostUsingFingerprints($html))) {
            $site_config = SiteConfig::build($_fphost);
        }
        if ($site_config === false) {
            $site_config = new SiteConfig();
        }
        SiteConfig::add_to_cache($host, $site_config);
        return false;
    } else {
        SiteConfig::add_to_cache($host, $site_config);
    }
    $splink = null;
    if (!empty($site_config->single_page_link)) {
        $splink = $site_config->single_page_link;
    } elseif (!empty($site_config->single_page_link_in_feed)) {
        // single page link xpath is targeted at feed
        $splink = $site_config->single_page_link_in_feed;
        // so let's replace HTML with feed item description
        $html = $item->get_description();
    }
    if (isset($splink)) {
        // Build DOM tree from HTML
        $readability = new Readability($html, $url);
        $xpath = new DOMXPath($readability->dom);
        // Loop through single_page_link xpath expressions
        $single_page_url = null;
        foreach ($splink as $pattern) {
            $elems = @$xpath->evaluate($pattern, $readability->dom);
            if (is_string($elems)) {
                $single_page_url = trim($elems);
                break;
            } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
                foreach ($elems as $item) {
                    if ($item instanceof DOMElement && $item->hasAttribute('href')) {
                        $single_page_url = $item->getAttribute('href');
                        break;
                    } elseif ($item instanceof DOMAttr && $item->value) {
                        $single_page_url = $item->value;
                        break;
                    }
                }
            }
        }
        // If we've got URL, resolve against $url
        if (isset($single_page_url) && ($single_page_url = makeAbsoluteStr($url, $single_page_url))) {
            // check it's not what we have already!
            if ($single_page_url != $url) {
                // it's not, so let's try to fetch it...
                $_prev_ref = $http->referer;
                $http->referer = $single_page_url;
                if (($response = $http->get($single_page_url, true)) && $response['status_code'] < 300) {
                    $http->referer = $_prev_ref;
                    return $response;
                }
                $http->referer = $_prev_ref;
            }
        }
    }
    return false;
}
開發者ID:oxmcvusd,項目名稱:full-text-rss-1,代碼行數:69,代碼來源:makefulltextfeed.php

示例2: process

 public function process($html, $url, $smart_tidy = true)
 {
     $this->reset();
     // extract host name
     $host = @parse_url($url, PHP_URL_HOST);
     if (!($this->config = SiteConfig::build($host))) {
         // no match, so use defaults
         $this->config = new SiteConfig();
     }
     // store copy of config in our static cache array in case we need to process another URL
     SiteConfig::add_to_cache($host, $this->config);
     // use tidy (if it exists)?
     // This fixes problems with some sites which would otherwise
     // trouble DOMDocument's HTML parsing. (Although sometimes it
     // makes matters worse, which is why you can override it in site config files.)
     $tidied = false;
     if ($this->config->tidy && function_exists('tidy_parse_string') && $smart_tidy) {
         $this->debug('Using Tidy');
         $tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
         if (tidy_clean_repair($tidy)) {
             $original_html = $html;
             $tidied = true;
             $html = $tidy->value;
         }
         unset($tidy);
     }
     // load and parse html
     $this->readability = new Readability($html, $url);
     // we use xpath to find elements in the given HTML document
     // see http://en.wikipedia.org/wiki/XPath_1.0
     $xpath = new DOMXPath($this->readability->dom);
     // strip elements (using xpath expressions)
     foreach ($this->config->strip as $pattern) {
         $elems = @$xpath->query($pattern, $this->readability->dom);
         // check for matches
         if ($elems && $elems->length > 0) {
             $this->debug('Stripping ' . $elems->length . ' elements (strip)');
             for ($i = $elems->length - 1; $i >= 0; $i--) {
                 $elems->item($i)->parentNode->removeChild($elems->item($i));
             }
         }
     }
     // strip elements (using id and class attribute values)
     foreach ($this->config->strip_id_or_class as $string) {
         $string = strtr($string, array("'" => '', '"' => ''));
         $elems = @$xpath->query("//*[contains(@class, '{$string}') or contains(@id, '{$string}')]", $this->readability->dom);
         // check for matches
         if ($elems && $elems->length > 0) {
             $this->debug('Stripping ' . $elems->length . ' elements (strip_id_or_class)');
             for ($i = $elems->length - 1; $i >= 0; $i--) {
                 $elems->item($i)->parentNode->removeChild($elems->item($i));
             }
         }
     }
     // strip images (using src attribute values)
     foreach ($this->config->strip_image_src as $string) {
         $string = strtr($string, array("'" => '', '"' => ''));
         $elems = @$xpath->query("//img[contains(@src, '{$string}')]", $this->readability->dom);
         // check for matches
         if ($elems && $elems->length > 0) {
             $this->debug('Stripping ' . $elems->length . ' image elements');
             for ($i = $elems->length - 1; $i >= 0; $i--) {
                 $elems->item($i)->parentNode->removeChild($elems->item($i));
             }
         }
     }
     // strip elements using Readability.com and Instapaper.com ignore class names
     // .entry-unrelated and .instapaper_ignore
     // See https://www.readability.com/publishers/guidelines/#view-plainGuidelines
     // and http://blog.instapaper.com/post/730281947
     $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' entry-unrelated ') or contains(concat(' ',normalize-space(@class),' '),' instapaper_ignore ')]", $this->readability->dom);
     // check for matches
     if ($elems && $elems->length > 0) {
         $this->debug('Stripping ' . $elems->length . ' .entry-unrelated,.instapaper_ignore elements');
         for ($i = $elems->length - 1; $i >= 0; $i--) {
             $elems->item($i)->parentNode->removeChild($elems->item($i));
         }
     }
     // strip elements that contain style="display: none;"
     $elems = @$xpath->query("//*[contains(@style,'display:none')]", $this->readability->dom);
     // check for matches
     if ($elems && $elems->length > 0) {
         $this->debug('Stripping ' . $elems->length . ' elements with inline display:none style');
         for ($i = $elems->length - 1; $i >= 0; $i--) {
             $elems->item($i)->parentNode->removeChild($elems->item($i));
         }
     }
     // try to get title
     foreach ($this->config->title as $pattern) {
         $elems = @$xpath->evaluate($pattern, $this->readability->dom);
         if (is_string($elems)) {
             $this->debug('Title expression evaluated as string');
             $this->title = trim($elems);
             break;
         } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
             $this->debug('Title matched');
             $this->title = $elems->item(0)->textContent;
             break;
         }
     }
//.........這裏部分代碼省略.........
開發者ID:jaimejorge,項目名稱:full-text-rss,代碼行數:101,代碼來源:ContentExtractor.php

示例3: buildSiteConfig

 public function buildSiteConfig($url, $html = '', $add_to_cache = true)
 {
     // extract host name
     $host = @parse_url($url, PHP_URL_HOST);
     $host = strtolower($host);
     if (substr($host, 0, 4) == 'www.') {
         $host = substr($host, 4);
     }
     // is merged version already cached?
     if (SiteConfig::is_cached("{$host}.merged")) {
         $this->debug("Returning cached and merged site config for {$host}");
         return SiteConfig::build("{$host}.merged");
     }
     // let's build from site_config/custom/ and standard/
     $config = SiteConfig::build($host);
     if ($add_to_cache && $config && !SiteConfig::is_cached("{$host}")) {
         SiteConfig::add_to_cache($host, $config);
     }
     // if no match, use defaults
     if (!$config) {
         $config = new SiteConfig();
     }
     // load fingerprint config?
     if ($config->autodetect_on_failure()) {
         // check HTML for fingerprints
         if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
             if ($config_fingerprint = SiteConfig::build($_fphost)) {
                 $this->debug("Appending site config settings from {$_fphost} (fingerprint match)");
                 $config->append($config_fingerprint);
                 if ($add_to_cache && !SiteConfig::is_cached($_fphost)) {
                     //$config_fingerprint->cache_in_apc = true;
                     SiteConfig::add_to_cache($_fphost, $config_fingerprint);
                 }
             }
         }
     }
     // load global config?
     if ($config->autodetect_on_failure()) {
         if ($config_global = SiteConfig::build('global', true)) {
             $this->debug('Appending site config settings from global.txt');
             $config->append($config_global);
             if ($add_to_cache && !SiteConfig::is_cached('global')) {
                 //$config_global->cache_in_apc = true;
                 SiteConfig::add_to_cache('global', $config_global);
             }
         }
     }
     // store copy of merged config
     if ($add_to_cache) {
         // do not store in APC if wildcard match
         $use_apc = $host == $config->cache_key;
         $config->cache_key = null;
         SiteConfig::add_to_cache("{$host}.merged", $config, $use_apc);
     }
     return $config;
 }
開發者ID:oxmcvusd,項目名稱:full-text-rss-3.4,代碼行數:56,代碼來源:ContentExtractor.php

示例4: process

 public function process($html, $url, $smart_tidy = true)
 {
     $this->reset();
     // extract host name
     $host = @parse_url($url, PHP_URL_HOST);
     if (!($this->config = SiteConfig::build($host))) {
         // no match, check HTML for fingerprints
         if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
             $this->config = SiteConfig::build($_fphost);
         }
         unset($_fphost);
         if (!$this->config) {
             // no match, so use defaults
             $this->config = new SiteConfig();
         }
     }
     //echo count($this->config->body);
     // store copy of config in our static cache array in case we need to process another URL
     SiteConfig::add_to_cache($host, $this->config);
     // do string replacements
     foreach ($this->config->replace_string as $_repl) {
         $html = str_replace($_repl[0], $_repl[1], $html);
     }
     unset($_repl);
     // use tidy (if it exists)?
     // This fixes problems with some sites which would otherwise
     // trouble DOMDocument's HTML parsing. (Although sometimes it
     // makes matters worse, which is why you can override it in site config files.)
     $tidied = false;
     if ($this->config->tidy && function_exists('tidy_parse_string') && $smart_tidy) {
         $this->debug('Using Tidy');
         $tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
         if (tidy_clean_repair($tidy)) {
             $original_html = $html;
             $tidied = true;
             $html = $tidy->value;
         }
         unset($tidy);
     }
     // load and parse html
     $this->readability = new Readability($html, $url);
     // we use xpath to find elements in the given HTML document
     // see http://en.wikipedia.org/wiki/XPath_1.0
     $xpath = new DOMXPath($this->readability->dom);
     // try to get title
     foreach ($this->config->title as $pattern) {
         $elems = @$xpath->evaluate($pattern, $this->readability->dom);
         if (is_string($elems)) {
             $this->debug('Title expression evaluated as string');
             $this->title = trim($elems);
             break;
         } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
             $this->debug('Title matched');
             $this->title = $elems->item(0)->textContent;
             // remove title from document
             try {
                 $elems->item(0)->parentNode->removeChild($elems->item(0));
             } catch (DOMException $e) {
                 // do nothing
             }
             break;
         }
     }
     // try to get author (if it hasn't already been set)
     if (empty($this->author)) {
         foreach ($this->config->author as $pattern) {
             $elems = @$xpath->evaluate($pattern, $this->readability->dom);
             if (is_string($elems)) {
                 $this->debug('Author expression evaluated as string');
                 if (trim($elems) != '') {
                     $this->author[] = trim($elems);
                     break;
                 }
             } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
                 foreach ($elems as $elem) {
                     if (!isset($elem->parentNode)) {
                         continue;
                     }
                     $this->author[] = trim($elem->textContent);
                 }
                 if (!empty($this->author)) {
                     break;
                 }
             }
         }
     }
     // try to get language
     $_lang_xpath = array('//html[@lang]/@lang', '//meta[@name="DC.language"]/@content');
     foreach ($_lang_xpath as $pattern) {
         $elems = @$xpath->evaluate($pattern, $this->readability->dom);
         if (is_string($elems)) {
             if (trim($elems) != '') {
                 $this->language = trim($elems);
                 break;
             }
         } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
             foreach ($elems as $elem) {
                 if (!isset($elem->parentNode)) {
                     continue;
                 }
//.........這裏部分代碼省略.........
開發者ID:oxmcvusd,項目名稱:full-text-rss-1,代碼行數:101,代碼來源:ContentExtractor.php


注:本文中的SiteConfig::add_to_cache方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。