本文整理汇总了PHP中simple_html_dom::clear方法的典型用法代码示例。如果您正苦于以下问题:PHP simple_html_dom::clear方法的具体用法?PHP simple_html_dom::clear怎么用?PHP simple_html_dom::clear使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类simple_html_dom
的用法示例。
在下文中一共展示了simple_html_dom::clear方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: clubURL
function clubURL($url)
{
$html = scraperwiki::scrape($url);
$dom = new simple_html_dom();
$dom->load($html);
$clubName = trim(str_replace(' ', '', $dom->find('table', 0)->find('tr', 2)->plaintext));
$formatClubName = trim(preg_replace('/\\s+/', ' ', $clubName));
$_GLOBAL['clubs'][] = $formatClubName;
echo 'running ' . $formatClubName . "\n";
foreach ($dom->find('table', 2)->find('tr') as $row) {
if (is_numeric($row->find('td', 0)->plaintext)) {
$year = trim($row->find('td', 0)->plaintext);
$position = trim(str_replace(' ', '', $row->find('td', 1)->plaintext));
if (trim($position) == 'Champion') {
$position = 1;
}
$leagueLevel = trim($row->find('td', 2)->plaintext);
$overallPosition = trim($row->find('td', 3)->plaintext);
$avgAttendance = trim(str_replace('.', '', $row->find('td', 4)->plaintext));
$totalAttendance = trim(str_replace('.', '', $row->find('td', 12)->plaintext));
$dataset = array('club' => $formatClubName, 'year' => $year, 'finishedPosition' => $position, 'league' => $leagueLevel, 'overallPosition' => $overallPosition, 'avgAttendance' => $avgAttendance, 'totalAttendance' => $totalAttendance);
scraperwiki::save(array('club', 'year'), $dataset);
}
}
/*
* The next to lines stop a memory leak in Simple XML as per http://simplehtmldom.sourceforge.net/manual_faq.htm#memory_leak
*/
$dom->clear();
unset($dom);
}
示例2: save
public function save($html, $dir)
{
import("@.ORG.htmltodocx.documentation.support_functions");
$phpword_object = new PHPWord();
$section = $phpword_object->createSection();
// HTML Dom object:
$html_dom = new simple_html_dom();
$html_dom->load('<html><body>' . $html . '</body></html>');
// Note, we needed to nest the html in a couple of dummy elements.
// Create the dom array of elements which we are going to work on:
$html_dom_array = $html_dom->find('html', 0)->children();
// We need this for setting base_root and base_path in the initial_state array
// (below). We are using a function here (derived from Drupal) to create these
// paths automatically - you may want to do something different in your
// implementation. This function is in the included file
// documentation/support_functions.inc.
$paths = htmltodocx_paths();
// Provide some initial settings:
$initial_state = array('phpword_object' => &$phpword_object, 'base_root' => $paths['base_root'], 'base_path' => $paths['base_path'], 'current_style' => array('size' => '11'), 'parents' => array(0 => 'body'), 'list_depth' => 0, 'context' => 'section', 'pseudo_list' => TRUE, 'pseudo_list_indicator_font_name' => 'Wingdings', 'pseudo_list_indicator_font_size' => '7', 'pseudo_list_indicator_character' => 'l ', 'table_allowed' => TRUE, 'treat_div_as_paragraph' => TRUE, 'style_sheet' => htmltodocx_styles_example());
// Convert the HTML and put it into the PHPWord object
htmltodocx_insert_html($section, $html_dom_array[0]->nodes, $initial_state);
// Clear the HTML dom object:
$html_dom->clear();
unset($html_dom);
// Save File
$str = explode(".", $h2d_file_uri);
$h2d_file_uri = $dir . "wordtemp/" . time() . ".docx";
if (!file_exists($dir . "wordtemp/")) {
$this->createFolders($dir . "wordtemp/");
//判断目标文件夹是否存在
}
$objWriter = PHPWord_IOFactory::createWriter($phpword_object, 'Word2007');
$objWriter->save($h2d_file_uri);
return $h2d_file_uri;
}
示例3: parse
public function parse($isUpdate = false)
{
Ibos::import("application.extensions.simple_html_dom", true);
if ($isUpdate) {
$model = preg_replace("/\\s+data-id\\s?=\\s?\"?\\d+\"?/i", "", $this->printmodel);
$max = 0;
} else {
$model = $this->printmodel;
$max = intval($this->itemmax);
}
$elements = array();
$doc = new simple_html_dom();
$doc->load($model, true, true, CHARSET);
$items = $doc->find("ic");
$config = $this->getItemConfig();
if (!empty($items) && !empty($config)) {
$this->refactor($items, $config, $max, $elements);
}
$html = $doc->save();
$this->_cache = $elements;
CacheUtil::set("form_" . $this->ID, $elements);
$form["printmodelshort"] = $html;
if ($max != $this->itemmax) {
$form["itemmax"] = $max;
}
$doc->clear();
FlowFormType::model()->modify($this->ID, $form);
}
示例4: str_get_html
function str_get_html($str, $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT) {
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
$dom->clear();
return false;
}
$dom->load($str, $lowercase, $stripRN);
return $dom;
}
示例5: generate_docx
function generate_docx($html, $file_path, &$file_takeout_tmp_files)
{
$phpword_object = new PHPWord();
$section = $phpword_object->createSection();
$html_dom = new simple_html_dom();
$html_dom->load($html);
$html_dom_array = $html_dom->find('html', 0)->children();
$paths = htmltodocx_paths();
$initial_state = array('phpword_object' => &$phpword_object, 'base_root' => $paths['base_root'], 'base_path' => $paths['base_path'], 'current_style' => array('size' => '11'), 'parents' => array(0 => 'body'), 'list_depth' => 0, 'context' => 'section', 'pseudo_list' => TRUE, 'pseudo_list_indicator_font_name' => 'Wingdings', 'pseudo_list_indicator_font_size' => '7', 'pseudo_list_indicator_character' => 'l ', 'table_allowed' => TRUE, 'treat_div_as_paragraph' => FALSE, 'style_sheet' => htmltodocx_styles(), 'download_img_path' => elgg_get_data_path(), 'download_img_tmp' => &$file_takeout_tmp_files);
htmltodocx_insert_html($section, $html_dom_array[0]->nodes, $initial_state);
$html_dom->clear();
unset($html_dom);
$objWriter = PHPWord_IOFactory::createWriter($phpword_object, 'Word2007');
// Word2007 is the only option :-(
$objWriter->save($file_path);
}
示例6: clean_children
private function clean_children(&$a_sHTML)
{
$l_sTmp = '<crawler>' . $a_sHTML . '</crawler>';
$l_oTheHtml = new simple_html_dom();
$l_oTheHtml->load($l_sTmp);
$l_sResult = $l_oTheHtml->find('crawler', 0);
$x = (string) $l_sResult->innertext;
for ($i = 0; $i < sizeof($l_sResult->children()); $i++) {
$x = str_replace($l_sResult->children($i), '', $x);
}
$l_oTheHtml->clear();
unset($l_sTmp);
unset($l_sResult);
unset($l_oTheHtml);
return $x;
}
示例7: foreach
function add_h_filter($foo)
{
$source_html = $foo;
$p = new simple_html_dom();
$p->load('<html><body>' . $source_html . '<html><body>');
$hrefs = $p->find("a");
foreach ($hrefs as $elm) {
error_log($elm->href);
$match;
if (preg_match("/\\?page_id\\=(\\d+?)\$/", $elm->href, $match)) {
$page_id = $match[1];
$page = get_page($page_id);
$content = $page->post_content;
$p2 = new simple_html_dom();
$p2->load('<html><body>' . $content . '<html><body>');
$has_id = $p2->find('h1[id]');
$submenu_array = array();
foreach ($has_id as $idh1) {
error_log($idh1->id);
$h1_id = $idh1->id;
$h1_txt = $idh1->plaintext;
array_push($submenu_array, array($h1_id, $h1_txt));
}
if (sizeof($submenu_array) !== 0) {
$submenu = "<ul class='submenu'>\n";
$blogurl = get_bloginfo('url');
foreach ($submenu_array as $sub) {
$submenu .= '<li><a href="' . $blogurl . '?page_id=' . $page_id . '#' . $sub[0] . '">' . $sub[1] . '</a></h1>' . "\n";
}
$submenu .= '</ul>';
$elm->outertext = $elm->outertext . $submenu;
}
$p2->clear();
unset($p2);
}
}
$foo = $p->outertext;
$p->clear();
unset($p);
return $foo;
}
示例8: addToTable
function addToTable($text,$position,$button){
$dom = new simple_html_dom();
$dom->load($text);
$tableEl = $dom->find('.'.$position,0);
if(!$tableEl){
$table = '<table class="'.$position .' myApiShareTable"></table>';
$text = ($position == 'myApiShareTop') ? $table.$text : $text.$table;
$dom->load($text);
}
$text = $dom->save();
$dom->load($text);
$rowEl = $dom->find('.'.$position,0)->find('.myApiButtons',0);
if(!$rowEl){
$tr = '<tr class="myApiButtons"><td><table><tr><td>'.$button.'</td></tr></table></td></tr>';
$row = $dom->find('.'.$position,0);
$row->innertext = $tr.$row->innertext;
}else{
$rowEl->find('table',0)->find('tr',0)->innertext = '<td>'.$button.'</td>'.$rowEl->find('table',0)->find('tr',0)->innertext;
}
$text = $dom->save();
$dom->load($text);
$commentsTable = $dom->find('.myApiShareBottom',0);
if($commentsTable){
$commentsEl = $commentsTable->find('.myApiCommentsCell',0);
if($commentsEl){
$buttonRow = $commentsTable->find('.myApiButtons',0);
if($buttonRow){
$commentsEl->colspan = sizeof($buttonRow->find('td'));
$text = $dom->save();
}
}
}
$dom->clear(); unset($dom);
return $text;
}
示例9: _Process_Recieved_Content
public static function _Process_Recieved_Content($_HTML_CONTENT, $_Cung1, $_Cung2, $_Summary, $_SourceUri, $_LinkId, $_ImageLink)
{
if ($_HTML_CONTENT != '') {
// Create a DOM object
require_once Kohana::find_file('classes', 'vendor/simple_html_dom');
$html = new simple_html_dom();
// Load HTML from a string
$html->load($_HTML_CONTENT);
unset($_HTML_CONTENT);
if ($html) {
$story = new Model_Horoscope_XungHop();
$ktra = true;
if ($_Cung1 == '-' || $_Cung2 == '-') {
$ktra = FALSE;
}
$story->cung_1 = $_Cung1;
$story->cung_2 = $_Cung2;
$story->alias = $_Cung1 . '_' . $_Cung2;
if (self::CheckRecordByAlias($story->alias)) {
$story->alias = $_Cung1 . '__' . $_Cung2;
}
$story->tom_tat = $_Summary;
$story->ngay_tao = date("Y-m-d");
$story->url_nguon = $_SourceUri;
$story->auto_get = true;
//begin find elements
#find date post
$date = $html->find('div[class="datetime"]', 0);
if ($date) {
$d = explode(',', $date->plaintext);
if (isset($d[1])) {
//var_dump($d);
//exit;
$d1 = explode(' ', trim($d[1]));
list($ngay, $thang, $nam) = explode('/', $d1[0]);
$story->source_date = date("Y-m-d h:i:s", strtotime($nam . '-' . $thang . '-' . $ngay . ' ' . $d1[1] . ':00'));
} else {
$story->source_date = date("Y-m-d h:i:s");
}
} else {
$story->source_date = date("Y-m-d h:i:s");
}
//find content
$content = $html->find('div[id="content_document"]', 0);
if ($content) {
$string = $content->innertext;
# remove white space
$string = str_replace(array("\r\n", "\r", "\n", "\t"), '', $string);
$string = preg_replace('/(<!--.+?-->)/s', '', $string);
$string = preg_replace('@<a[^>]*>(.*)</a>@ismUx', '$1', $string);
$string = preg_replace('/<p[ ]class="pAuthor">.*<\\/p>/ismxU', '', $string);
$string = preg_replace('/<p[ ]class="pSource">.*<\\/p>/ismxU', '', $string);
$story->noi_dung = $string;
$story->kiem_tra = $ktra;
$story->save();
if ($story->identifier()) {
if ($ktra) {
//get image thumb => save to disk => update record in db
$path = 'assets/horoscope/xung-hop/' . $story->alias . '/';
$img = Vendor_Crawler::get_file_from_url_by_curl($_ImageLink, $save_to_path = $path, $file_name_to_set = $story->alias . '-thumb');
if ($img) {
//check file size, if = 0 -> mean file can't get
if (filesize($img) == 0) {
@copy('assets/horoscope/thumb_140.jpg', $img);
}
$story->hinh_anh = '/' . $img;
} else {
$story->hinh_anh = $_ImageLink;
}
} else {
$story->hinh_anh = $_ImageLink;
}
if ($ktra != FALSE) {
//print_r($img);
$html2 = new simple_html_dom();
$html2->load($story->noi_dung);
$images = $html2->find('img');
if (count($images) > 0) {
for ($i = 0; $i < count($images); $i++) {
unset($images[$i]->onclick);
$file_name = 'anh_' . $i + 1;
$get_file = Vendor_Crawler::get_file_from_url_by_curl($images[$i]->src, $save_to_path = $path, $file_name_to_set = $file_name);
if (filesize(ltrim($get_file, '/')) == 0) {
unset($images[$i]);
} else {
$images[$i]->src = '/' . $get_file;
}
}
}
$story->noi_dung = $html2->save();
$html2->clear();
unset($html2);
} else {
$story->hinh_anh = $_ImageLink;
}
$story->save();
//insert done => update from tmp table
Model_Horoscope_XungHopLinkBLL::UpdateRecordStatus($_LinkId);
self::_print_to_console('Done: ' . $_SourceUri);
} else {
//.........这里部分代码省略.........
示例10: scrapeHTML
function scrapeHTML($param, $type)
{
$html = scraperWiki::scrape(BASE_URL . "?type={$param}");
$dom = new simple_html_dom();
$dom->load($html);
// Iterate over table rows and get flight details.
foreach ($dom->find("TR[@HEIGHT='25']") as $data) {
// Flight details.
$tds = $data->find("td");
$airline = removeSpaces($tds[0]->plaintext);
$flight_type = $type;
$flight_num = removeSpaces($tds[1]->plaintext);
$destination = removeSpaces($tds[2]->plaintext);
$time = removeSpaces($tds[3]->plaintext);
$gate = removeSpaces($tds[4]->plaintext);
$remarks = removeSpaces($tds[5]->plaintext);
// Skip header row. Cheesy, but effective.
if ($airline == "Airline") {
continue;
}
// Set the date.
$date = date("m.d.y");
// Build up record to store.
$flight_data = array("date" => $date, "airline" => $airline, "flight_type" => $flight_type, "flight_num" => $flight_num, "destination" => $destination, "time" => $time, "gate" => $gate, "remarks" => $remarks);
// Save the record.
saveData(array("date", "airline", "flight_type", "flight_num"), $flight_data);
}
$dom->clear();
}
示例11: lightboxPlusReplace
//.........这里部分代码省略.........
}
break;
}
}
break;
default:
/**
* find all links with image only else if (do not autolightbox textlinks) then
*/
foreach ($html->find('a[href*=jpg$] img, a[href*=gif$] img, a[href*=png$] img, a[href*=jpeg$] img, a[href*=bmp$] img') as $e) {
/**
* Generate HTML5 yes/no
*/
switch ($lightboxPlusOptions['output_htmlv']) {
case 1:
$htmlv_prop = 'data-' . $lightboxPlusOptions['data_name'];
switch ($lightboxPlusOptions['use_class_method']) {
/**
* Use Class Method is selected - yes/no
*/
case 1:
if ($e->parent()->class && $e->parent()->class != $lightboxPlusOptions['class_name']) {
$e->parent()->class .= ' ' . $lightboxPlusOptions['class_name'];
if (!$e->parent()->{$htmlv_prop}) {
$e->parent()->{$htmlv_prop} = 'lightbox[' . $postGroupID . $unq_id . ']';
}
} else {
$e->parent()->class = $lightboxPlusOptions['class_name'];
if (!$e->parent()->{$htmlv_prop}) {
$e->parent()->{$htmlv_prop} = 'lightbox[' . $postGroupID . $unq_id . ']';
}
}
break;
default:
if (!$e->parent()->{$htmlv_prop}) {
$e->parent()->{$htmlv_prop} = 'lightbox[' . $postGroupID . $unq_id . ']';
}
break;
}
break;
default:
switch ($lightboxPlusOptions['use_class_method']) {
/**
* Use Class Method is selected - yes/no
*/
case 1:
if ($e->parent()->class && $e->parent()->class != $lightboxPlusOptions['class_name']) {
$e->parent()->class .= ' ' . $lightboxPlusOptions['class_name'];
if (!$e->parent()->rel) {
$e->parent()->rel = 'lightbox[' . $postGroupID . $unq_id . ']';
}
} else {
$e->parent()->class = $lightboxPlusOptions['class_name'];
if (!$e->parent()->rel) {
$e->parent()->rel = 'lightbox[' . $postGroupID . $unq_id . ']';
}
}
break;
default:
if (!$e->parent()->rel) {
$e->parent()->rel = 'lightbox[' . $postGroupID . $unq_id . ']';
}
break;
}
break;
}
/**
* Do Not Display Title is select - yes/no
*/
switch ($lightboxPlusOptions['no_display_title']) {
case 1:
$e->parent()->title = null;
break;
default:
if (!$e->parent()->title) {
if ($e->title) {
$e->parent()->title = $e->title;
} else {
$e->parent()->title = $postGroupTitle;
}
}
if ($lightboxPlusOptions['use_caption_title']) {
//if ($e->parent()->next_sibling()->innertext) { $e->parent()->title = $e->parent()->next_sibling()->innertext; }
//if ($e->parent()->next_sibling()->innertext) { $e->title = $e->parent()->next_sibling()->innertext; }
if ($e->find('img[src*=jpg$], img[src*=gif$], img[src*=png$], img[src*=jpeg$], img[src*=bmp$]') && ($e->next_sibling()->class = 'wp-caption-text')) {
$e->title = $e->next_sibling()->innertext;
} elseif ($e->find('img[src*=jpg$], img[src*=gif$], img[src*=png$], img[src*=jpeg$], img[src*=bmp$]') && ($e->parent()->next_sibling()->class = 'gallery-caption')) {
$e->title = $e->parent()->next_sibling()->innertext;
}
}
break;
}
}
break;
}
$content = $html->save();
$html->clear();
unset($html);
return $content;
}
示例12: action_sua
public function action_sua($story_id)
{
$this->template->title = __('Sửa bài viết: xung - hợp cung');
$this->template->section_title = __('Sửa bài viết: xung - hợp cung');
$data = array();
$story = Model_Horoscope_XungHopBLL::getInstance()->find($story_id);
if ($story) {
if (Request::$method == 'POST') {
// print_r($_POST);
// die();
$post = $story->validate_update($_POST);
if ($post->check()) {
//begin save
$post_values = $post->as_array();
$old_alias = $story->alias;
//
//alias changed => image changed => directory changed => images in content not get :(
$story->hinh_anh = $post_values['hinh_anh'];
$story->alias = $post_values['alias'];
$story->cung_1 = $post_values['cung_1'];
$story->cung_2 = $post_values['cung_2'];
$story->tom_tat = trim($post_values['tom_tat']);
$story->noi_dung = $post_values['noi_dung'];
$story->kiem_tra = true;
$story->save();
//print_r($img);
// Create a DOM object
if ($old_alias != $post_values['alias']) {
//remove old folder (if existed when update)
@rmdir('assets/horoscope/xung-hop/' . $old_alias . '/');
require_once Kohana::find_file('classes', 'vendor/simple_html_dom');
$html2 = new simple_html_dom();
$html2->load($story->noi_dung);
$images = $html2->find('img');
if ($images) {
$i = 1;
foreach ($images as $image) {
unset($image->onclick);
$path = 'assets/horoscope/xung-hop/' . $story->alias . '/';
$file_name = 'anh_' . $i;
$get_file = Vendor_Crawler::get_file_from_url_by_curl($image->src, $save_to_path = $path, $file_name_to_set = $file_name);
if (filesize($get_file) == 0) {
unset($image);
} else {
$image->src = '/' . $get_file;
}
$i++;
}
}
$story->noi_dung = $html2->save();
$html2->clear();
unset($html2);
$story->save();
}
Request::instance()->redirect('admin/horoscope_xunghop/index');
} else {
$_POST = $post->as_array();
#Affects errors for further display
$data['errors'] = $post->errors();
}
}
$data['story'] = $story->toArray();
$this->template->content = View::factory('horoscope/admin/xung-hop/sua', $data);
} else {
Request::instance()->redirect('admin/horoscope_xunghop/index');
}
}
示例13: collect
public function collect()
{
$url = trim($this->_post('url'));
//返回结果
$res = array('title' => '', 'content' => '');
//分析网页是否包含视频
$video = $this->uVideoUpload($url);
if ($video != '10' && $video != '11') {
//获取标题
$htm = file_get_html($url);
$title = $htm->find('title', 0)->plaintext;
$htm->clear();
$res['title'] = $title;
$res['content'] = $video;
echo json_encode($res);
exit;
}
//不含视频,则按文章处理
$collect = D('collect');
$domin = '';
$match = "/http:\\/\\/([^\\/]*).*/i";
if (!substr_count($url, "http")) {
$url = "http://" . $url;
}
preg_match($match, $url, $out);
$domin = $out[1];
if (!empty($domin)) {
//分析是不是音乐网站
$music_websites = C('MUSIC_WEBSITES');
if (in_array($domin, $music_websites)) {
$htm = file_get_html($url);
$p = preg_match('/var\\s*?_xiamitoken\\s*?=\\s*?[\'\\"](.*?)[\'\\"]/i', $htm, $out);
$token = $out[1];
//onclick="playalbum(682938274, '', '时间的歌', '');
$xid = '';
if (preg_match('/playalbum\\((\\d+),\\s*?\'*?\',\\s*?\'(.*?)\',\\s*?\'*?\'\\)/i', $htm, $out)) {
//xid
$xid = $out[1];
//title
$title = $out[2];
} else {
if (preg_match('/\\/album\\/(\\d{1,})/', $htm, $out)) {
$xid = $out[1];
$title = $htm->find('div#title', 0)->plaintext;
} else {
if (preg_match('/var\\s*?cid\\s*?=\\s*?[\'\\"](.*?)[\'\\"]/i', $htm, $out)) {
#var cid = '22454617';
$xid = $out[1];
$title = $htm->find('title', 0)->plaintext;
}
}
}
if ($xid) {
//http://www.xiami.com/ajax/getquote/type/2/id/682938274?_xiamitoken=0802020a13ba3df687e7ca4ef45cf1a8
$zurl = "http://www.xiami.com/ajax/getquote/type/2/id/{$xid}?_xiamitoken={$token}";
$htm = file_get_html($zurl);
$content = $htm->find('textarea.tarea', 1)->innertext;
$res['title'] = trim($title);
$res['content'] = $content;
//清除内存消耗
$htm->clear();
} else {
$res['title'] = '';
$res['content'] = '没有找到音乐';
}
echo json_encode($res);
exit;
}
//查看数据库中是否已经有该域名的记录
$c = $collect->where('domain="' . $domin . '"')->find();
if (!$c) {
//没有数据库记录,则title为页面title,content为body正文
$collect->data(array('alias' => $domin, 'domain' => $domin, 'match' => '123'))->add();
//查找body
$htm = file_get_html($url);
$title = $htm->find('title', 0)->plaintext;
$content = $htm->find('body', 0)->innertext;
//title取正文的10个左右字符
$res['title'] = $title;
$res['content'] = $content;
} else {
//找到了匹配规则
//新浪博客URL特殊处理,去掉结尾的 ?tj=...
if ($domin == 'blog.sina.com.cn') {
$url = preg_replace('/\\?tj=.*/i', '', $url);
}
$htm = file_get_html($url);
//获取title
$matchlist = $this->collect_match->get_matchlist_by_collect_type($c['id'], self::TYPE_TITLE);
if (!empty($matchlist)) {
$exec = '$htm';
foreach ($matchlist as $match) {
$exec .= "->find( '{$match['match']}', {$match['pos']} )";
}
$exec = $exec . '->plaintext;';
eval("\$str = {$exec};");
$res['title'] = $str;
} else {
$title = $htm->find('title', 0)->plaintext;
$res['title'] = $title;
//.........这里部分代码省略.........
示例14: convertImpl
//.........这里部分代码省略.........
} else {
$credit = "<h2>Реквизиты переводчиков</h2>";
if ($this->command) {
$credit .= "<p>Перевод команды {$this->command}</p>";
}
foreach ($this->workers as $activity => $workers) {
$credit .= '<p>' . $activity . ': <b>' . implode('</b>, <b>', $workers) . "</b></p>\n";
}
$credit .= '<p>Версия от ' . date('d.m.Y', $this->touched) . '</p>
<p><b>Любое коммерческое использование данного текста или его фрагментов запрещено</b></p>';
}
if ($this->height == 0) {
$text = preg_replace('/(<p[^>]*>)?<img[^>]*>(<\\/p>)?/u', '', $text);
} else {
for ($i = 1; $i < count($this->covers); ++$i) {
$image = $this->images[$this->covers[$i]];
$text = "<img src=\"" . $image['thumbnail'] . "\" width=\"" . $image['convert_width'] . "\" height=\"" . $image['convert_height'] . "\" />" . $text;
}
$text = preg_replace_callback('/(<a[^>]*>)?<img[^>]*data-resource-id="(-?\\d*)"[^>]*>(<\\/a>)?/u', function ($match) use(&$images) {
if ($match[2] < 0) {
return '';
}
$image = $this->images[$match[2]];
/* Width and height are unimportant. Actual resizing is done not in this class. We must save aspect ratio though. */
return "<img src=\"" . $image['thumbnail'] . "\" width=\"" . $image['convert_width'] . "\" height=\"" . $image['convert_height'] . "\" />";
}, $text);
}
$footnotes = array();
$footnotes_temp = explode(',;,', $this->footnotes);
for ($i = 0; $i < sizeof($footnotes_temp); $i++) {
if (is_numeric($footnotes_temp[$i])) {
$footnotes[$footnotes_temp[$i]] = $footnotes_temp[$i + 1];
$i++;
}
}
$text = trim($text);
$epubText = "<html>\n\t<body>\n\t\t{$descr['coverpage']}\n\t\t{$descr['author']}\n\t\t{$descr['sequence']}\n\t {$descr['annotation']}\n\t\t{$credit}\n\t\t{$text}\n\t</body>\n\t</html>";
$epubText = preg_replace_callback('@(<span[^>]*><a href="#cite_note-(\\d*)"[^>]*>.{0,15}</span>)@', function ($match) use(&$footnotes) {
$footnote = $footnotes[$match[2]];
$footnote = preg_replace('@</p>\\s*<p[^>]*>@', '<br/>', $footnote);
if ($footnote) {
return '<footnote>' . $footnote . '</footnote>';
} else {
return $match[1];
}
}, $epubText);
//preg_replace('@cite_note-(\d*)@',"<footnote></footnote>", $epubText);
//echo '<xmp>'.$epubText;
//echo $footnotes[137603266];
//exit;
//echo '<xmp>'.$epubText;
//exit;
$epubText = preg_replace('@section@', "div", $epubText);
/* Delete extra <br/> tag before images */
$epubText = preg_replace('@<div>(.){0,20}<br\\/>(.){0,20}<img src@', '<div><img src', $epubText);
/* Eliminate caret return before <h1> (Each div starts with caret return in h2d_htmlconverter.php) */
$epubText = preg_replace('@\\s*<div>(.{0,40})(<h1>.*?<\\/h1>)@', '\\1\\2<div>', $epubText);
/* NGNL Specific names */
//$text=str_replace('<span style="position: relative; text-indent: 0;"><span style="display: inline-block; font-style: normal">『   』</span><span style="position: absolute; font-size: .7em; top: -11px; left: 50%"><span style="position: relative; left: -50%;">','『<sup>',$text);
//$text=str_replace('</span></span></span>','</sup>』',$text);
// Styles of elements in which footnote is nested should not count. Thus close them
$epubText = preg_replace('@pb@', "br", $epubText);
//echo '<xmp>'.$epubText;
//exit;
//PHPWord doesn't support tags nested in link element. Unnest images from them
$epubText = preg_replace('@<a[^>]*>(<img[^>]*>)<\\/a>@', "\\1", $epubText);
// Delete extra page breaks related to images.
$epubText = preg_replace('@<div[^>]*>(.){0,20}(<img[^>]*>)(.){0,20}<\\/div>@', "\\1\\2\\3", $epubText);
$epubText = preg_replace('@<p[^>]*>(.){0,20}(<img[^>]*>)(.){0,20}<\\/p>@', "\\1\\2\\3", $epubText);
/* Swap h2 and img tags if img follows h2. (It gave a bad look in docx). */
$epubText = preg_replace('@(<h2>.{0,100}<\\/h2>)(<img[^>]*>)@', '\\2\\1', $epubText);
/* After swap we often needs to further lift img tag in previous <div> or <p> tag */
$epubText = preg_replace('@<\\/div>(<img[^>]*>)<h2@', '\\1</div><h2', $epubText);
$epubText = preg_replace('@<\\/p>(<img[^>]*>)<h2@', '\\1</p><h2', $epubText);
//echo '<xmp>'.$epubText;
//exit;
$phpword_object = new \PhpOffice\PhpWord\PhpWord();
\PhpOffice\PhpWord\Settings::setCompatibility(false);
$html_dom = new \simple_html_dom();
$html_dom->load($epubText);
$html_dom_array = $html_dom->find('html', 0)->children();
$paths = htmltodocx_paths();
$initial_state = ['phpword_object' => &$phpword_object, 'base_root' => $paths['base_root'], 'base_path' => $paths['base_path'], 'current_style' => ['size' => '11'], 'parents' => [0 => 'body'], 'list_depth' => 0, 'context' => 'section', 'pseudo_list' => true, 'pseudo_list_indicator_font_name' => 'Wingdings', 'pseudo_list_indicator_font_size' => '7', 'pseudo_list_indicator_character' => 'l ', 'table_allowed' => true, 'treat_div_as_paragraph' => true, 'structure_headings' => true, 'structure_document' => true, 'style_sheet' => htmltodocx_styles_example()];
htmltodocx_insert_html($phpword_object, $html_dom_array[0]->nodes, $initial_state);
//var_dump($html_dom_array[0]->nodes);
// exit;
$html_dom->clear();
unset($html_dom);
$h2d_file_uri = tempnam(sys_get_temp_dir(), 'htd');
/*if ($h2d_file_uri === false) {
var_dump(sys_get_temp_dir());
}*/
$objWriter = \PhpOffice\PhpWord\IOFactory::createWriter($phpword_object, 'Word2007');
$objWriter->save($h2d_file_uri);
$bin = file_get_contents($h2d_file_uri);
unlink($h2d_file_uri);
//echo 'sdfjnsdlkvjn';
//exit;
return $bin;
}
示例15: process_page
function process_page($html)
{
$dom = new simple_html_dom();
$dom->load($html);
$apps = array();
global $authority_code;
global $nearby_api_key;
foreach ($dom->find("table[class='AppDetailsTable'] tr") as $row) {
# Man, this is hacky, but I'm not using dom here in case 'td' shows in plaintext of var
if (stristr($row, 'FINALISED') || stristr($row, 'CONDITIONAL') || stristr($row, 'APPEALED') || stristr($row, 'WITHDRAWN') || stristr($row, 'NEW<') || stristr($row, 'APPROVED') || stristr($row, 'REFUSED')) {
$appref = $authority_code . substr($row->children[0]->plaintext, 0, 2) . "/" . substr($row->children[0]->plaintext, 2);
$rawappref = trim($row->children[0]->plaintext);
$url = "http://planning.corkcity.ie/InternetEnquiry/rpt_ViewApplicDetails.asp?validFileNum=1&app_num_file=" . $rawappref;
$rawdate = substr($row->children[4]->plaintext, 0, 10);
$date = substr($rawdate, -4) . "-" . substr($rawdate, 3, 2) . "-" . substr($rawdate, 0, 2);
$applicant = trim($row->children[5]->plaintext);
$address = str_replace("<br>", ",", str_replace("<BR>", ",", $row->children[6]->innertext));
#print $row;
#print "row";
#print $row;
#print $row->children[15]->innertext;
#$sizedetails = $row->children[15]->innertext;
# Now fetch additional information. Part one, full description of plan
$fullapphtml = scraperwiki::scrape($url);
$fullappdom = new simple_html_dom();
$fullappdom->load($fullapphtml);
$fullappdetails = $fullappdom->find("table[class='AppDetailsTable'] tr", 15)->children(1)->plaintext;
#print $fullappdetails;
unset($fullapphtml);
$sizehtml = scraperwiki::scrape($url);
$sizedom = new simple_html_dom();
$sizedom->load($sizehtml);
$signifdetail = $sizedom->find("table[class='AppDetailsTable'] tr", 23)->children(1)->plaintext;
$sizedetail = $sizedom->find("table[class='AppDetailsTable'] tr", 23)->children(4)->plaintext;
unset($sizehtml);
if (strpos($fullappdetails, "Protected Structure") !== false) {
$protected = "Protected Structure";
} else {
$protected = "";
}
#print $protected;
$spam_found = false;
# mobile,council,gov etc
$business = array("retail", "Hotel", "Ltd", "Limited", " shop", " shop");
foreach ($business as $businessword) {
if (strrpos($row, $businessword)) {
$spam_found = true;
break;
}
}
if ($spam_found) {
$category = "Business";
} else {
if (strpos($fullappdetails, "dwelling") !== false) {
$category = "residential";
} else {
$category = "";
}
}
#print $category;
# Part two, location of application
$lochtml = scraperwiki::scrape('http://planning.corkcity.ie/InternetEnquiry/rpt_ViewSiteLocDetails.asp?page_num=0&file_number=' . $rawappref);
if (!stristr($lochtml, "No Site Location Details Found")) {
$locdom = new simple_html_dom();
$locdom->load($lochtml);
$locnorthing = round(floatval($locdom->find("table[class='AppDetailsTable'] tr", 1)->children(1)->plaintext));
$loceasting = round(floatval($locdom->find("table[class='AppDetailsTable'] tr", 1)->children(4)->plaintext));
# Part three, convert E&N to WGS84 using geograph class
$c = new ConversionsLatLong();
$res = $c->irish_to_wgs84($loceasting, $locnorthing);
$lat = $res[0];
$long = $res[1];
$locdom->clear();
unset($locdom);
unset($lochtml);
$apps["{$appref}"] = array('url' => $url, 'appref' => $appref, 'date' => $date, 'applicant' => $applicant, 'address' => $address, 'details' => $fullappdetails, 'signif' => $signifdetail, 'size' => $sizedetail, 'category' => $category, 'protected' => $protected, 'latitude' => $lat, 'longitude' => $long);
}
}
}
$dom->clear();
unset($dom);
return $apps;
}