当前位置: 首页>>代码示例>>Scala>>正文


Scala Element类代码示例

本文整理汇总了Scala中org.jsoup.nodes.Element的典型用法代码示例。如果您正苦于以下问题:Scala Element类的具体用法?Scala Element怎么用?Scala Element使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Element类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: HtmlConcatCompiler

//设置package包名称以及导入依赖的类
package com.karasiq.scalajsbundler.compilers

import com.karasiq.scalajsbundler.ScalaJSBundler.PageTypedContent
import org.jsoup.Jsoup
import org.jsoup.nodes.Element

import scala.collection.JavaConversions._

object HtmlConcatCompiler extends AssetCompiler {
  private implicit class ElementOps(val e: Element) extends AnyVal {
    def concatWith(src: Element): Unit = {
      @inline
      def delimit(delimiter: String, s1: String, s2: String): String = {
        if (s1.endsWith(delimiter)) s1 + s2
        else s1 + delimiter + s2
      }

      src.attributes().foreach {
        case a if a.getKey == "class" ?
          e.attr(a.getKey, delimit(" ", e.attr(a.getKey), a.getValue))

        case a if a.getKey == "style" ?
          e.attr(a.getKey, delimit(";", e.attr(a.getKey), a.getValue))

        case a ? // Replaces attribute value
          e.attr(a.getKey, a.getValue)
      }
      e.append(src.html())
    }
  }

  def concat(htmlList: Seq[String]): String = {
    val result = Jsoup.parse(htmlList.head)
    htmlList.tail.foreach { h ?
      val html = Jsoup.parse(h)
      result.head().concatWith(html.head())
      result.body().concatWith(html.body())
    }
    result.outerHtml()
  }

  override def compile(contents: Seq[PageTypedContent]): String = {
    concat(contents.map(_.asset.asString))
  }
} 
开发者ID:Karasiq,项目名称:sbt-scalajs-bundler,代码行数:46,代码来源:HtmlConcatCompiler.scala

示例2:

//设置package包名称以及导入依赖的类
import com.mashape.unirest.http.Unirest
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import purecsv.safe._


    val results = Unirest.post("http://nturanking.lis.ntu.edu.tw/DataPage/OverallRanking.aspx")
      .queryString("pagesize", pagesize)
      .queryString("y", year)
      .asString.getBody

    val jsoup = Jsoup.parse(results)
    val jsoupResults = jsoup.body.select("#MainContain_GridView1 > tbody").select("tr").toArray.tail
    val csvResults = (0 until jsoupResults.size)
      .map(idx => (idx, jsoupResults(idx))).map(_.asInstanceOf[(Int, Element)])
      .map(t =>
        (t._1 + 1, t._2.child(1).child(0).html, t._2.child(2).child(0).html, t._2.child(3).child(0).html))

    println(csvResults.map(t => s"${t._1},${t._2},${t._3},${t._4}").mkString("\n"))
  }
} 
开发者ID:sguzman,项目名称:UniversityRankingWebScraper,代码行数:22,代码来源:CScraper.scala

示例3: Convert

//设置package包名称以及导入依赖的类
package com.javaconverter.model

import org.jsoup.Jsoup
import org.jsoup.select.NodeVisitor
import org.jsoup.nodes.Node
import org.jsoup.nodes.TextNode
import org.jsoup.nodes.Comment
import scala.collection.JavaConversions._
import org.jsoup.nodes.Element
import org.jsoup.nodes.DataNode

class Convert(html: String) {

  private def pad(r: Range) = r.map(_ => "  ").mkString
  
  private def render(node: Node, depth: Int) = {
    val render = node match {
              		      case n: Element => node.nodeName + "("
              		      case n: TextNode => s"""text("${node.asInstanceOf[TextNode].text()}""""
              		      case n: Comment => s"""text("<!--${node.asInstanceOf[Comment].getData}-->""""
              		      case n: DataNode => s"""text("${node.asInstanceOf[DataNode].getWholeData}""""
              		   }
   "\n" + pad(0 until depth) + render
  }
  
  def toJavaTags() = {
    val doc = Jsoup.parse(html)
    var result = ""
    doc.child(0).traverse(new NodeVisitor() {
        override def head(node: Node, depth: Int) {
          result += render(node, depth)
          var attribute = node.attributes().asList().map { attr => s""""${attr.getKey} -> ${attr.getValue}"""" }
          if (!attribute.isEmpty && node.isInstanceOf[Element]) {
            result += s"""attr(${attribute.mkString(",")})"""
            if(!node.childNodes().isEmpty()){
              result += ","
            }
          }
        }
        
        override def tail(node: Node, depth: Int) {
          if(node.childNodes().isEmpty()){
            result +=  ")" 
          } else{
            result +=   "\n" + pad(depth until 0 by -1) + ")"
          }
          if(node.nextSibling() != null){
            result += ","
          }
        }
      }
    )
    result
  }
  
  def toHtmlFormat() = {
    Jsoup.parse(html).toString().
      replaceAll("<", "&lt;").
      replaceAll(">", "&gt;")
  }
} 
开发者ID:manlioGit,项目名称:javatagsconverter,代码行数:62,代码来源:Convert.scala

示例4: Link

//设置package包名称以及导入依赖的类
package utils
import java.net.{MalformedURLException, URL}

import models.{MessageButton, Photo, Message}
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import org.jsoup.select.Elements

//import org.jsoup.nodes.Element

import scala.collection.JavaConversions._
import scala.util.control.Exception._

sealed case class Link(title: String, href: String, imageSrc:String, desc:String) {
    override def toString(): String ={
        s"title : $title, href : $href, imageSrc : $imageSrc, desc : $desc"
    }
    def toMessage:Message ={
        Message(s"[$title]\n$desc",Option(Photo(imageSrc,300,250)), Option(MessageButton("?????",href)))
    }
}


object HtmlParser {

    type JDoc = org.jsoup.nodes.Document

    def get(url: String): JDoc = Jsoup.connect(url).get()

    def titleText(doc: JDoc): String = doc.select("title").text

    def bodyText(doc: JDoc): String = doc.select("body").text

    def linkSequence(doc: JDoc, containStr : String): Seq[Link] = {
        val links = doc.select(s"a[href*=$containStr]").iterator.toList
        links.map { l => Link(l.text, l.attr("href"), l.select("img[src]").attr("src"), l.select("[class*=desc]").text) }
    }


    def safeURL(url: String): Option[String] = {
        val result = catching(classOf[MalformedURLException]) opt new URL(url)
        result match {
            case Some(v) => Some(v.toString)
            case None => None
        }
    }
} 
开发者ID:suya55,项目名称:kakaoYellowIdBot,代码行数:48,代码来源:HtmlParser.scala

示例5: LinksSelector

//设置package包名称以及导入依赖的类
package haishu.crawler.selector

import java.nio.charset.Charset

import org.jsoup.helper.StringUtil
import org.jsoup.nodes.Element

import scala.collection.JavaConverters._

class LinksSelector extends BaseElementSelector {

  override def select(element: Element): String =
    throw new UnsupportedOperationException

  override def selectSeq(element: Element): Seq[String] = {
    val elements = element.select("a")
    elements.asScala.map { elem =>
      if (StringUtil.isBlank(elem.baseUri())) {
        elem.attr("abs:href")
      } else {
        elem.attr("href")
      }
    }
  }

  override def selectElement(element: Element): Element =
    throw new UnsupportedOperationException

  override def selectElements(element: Element): Seq[Element] =
    throw new UnsupportedOperationException

  override def hasAttribute = true
}

object LinksSelector {
  def apply(): LinksSelector = new LinksSelector()
} 
开发者ID:hualongdata,项目名称:hl-crawler,代码行数:38,代码来源:LinksSelector.scala

示例6: checkElementAndConvert

//设置package包名称以及导入依赖的类
package haishu.crawler.selector

import org.jsoup.nodes.{Document, Element}


  private def checkElementAndConvert(element: Element): Element = element match {
    case d: Document => d
    case _ =>
      val root = new Document(element.ownerDocument().baseUri())
      root.appendChild(element.clone())
      root
  }

  override def css(selector: String): Selectable = {
    val cssSelector = Selectors.css(selector)
    selectElements(cssSelector)
  }

  override def css(selector: String, attrName: String): Selectable = {
    val cssSelector = Selectors.css(selector, attrName)
    selectElements(cssSelector)
  }
} 
开发者ID:hualongdata,项目名称:hl-crawler,代码行数:24,代码来源:HtmlNode.scala

示例7: CssSelector

//设置package包名称以及导入依赖的类
package haishu.crawler.selector

import org.jsoup.nodes.Element
import org.jsoup.nodes.TextNode
import scala.collection.JavaConverters._


class CssSelector(selectorText: String, attrName: String) extends BaseElementSelector {

  private def getValue(element: Element): String = {
    if (attrName == null) element.outerHtml
    else if ("innerHtml".equalsIgnoreCase(attrName)) element.html
    else if ("text".equalsIgnoreCase(attrName)) getText(element)
    else if ("allText".equalsIgnoreCase(attrName)) element.text
    else element.attr(attrName)
  }

  protected def getText(element: Element): String = {
    element.childNodes.asScala.map {
      case node: TextNode => node.text()
      case _              => ""
    }.mkString
  }

  override def select(element: Element): String = {
    val elements = selectElements(element)
    if (elements.isEmpty) null
    else getValue(elements.head)
  }

  override def selectSeq(doc: Element): Seq[String] = {
    val elements = selectElements(doc)
    if (elements.isEmpty) Seq()
    else elements.map(getValue).filter(_ != null)
  }

  override def selectElement(element: Element): Element = {
    val elements = selectElements(element)
    if (elements.isEmpty) null else elements.head
  }

  override def selectElements(element: Element): Seq[Element] = {
    element.select(selectorText).asScala
  }

  override def hasAttribute: Boolean = attrName != null
}

object CssSelector {
  def apply(selectorText: String, attrName: String): CssSelector = new CssSelector(selectorText, attrName)

  def apply(selectorText: String): CssSelector = new CssSelector(selectorText, null)
} 
开发者ID:hualongdata,项目名称:hl-crawler,代码行数:54,代码来源:CssSelector.scala

示例8: StyleguideSpider

//设置package包名称以及导入依赖的类
package com.themillhousegroup.witchhunt

import org.jsoup.nodes.{ Element, Document }
import scala.concurrent.Future
import com.themillhousegroup.scoup.{ ScoupImplicits, Scoup }
import scala.concurrent.ExecutionContext.Implicits.global
import java.net.URL


object StyleguideSpider extends ScoupImplicits {

  def visit(url: URL, thisPageOnly: Boolean = false): Future[Set[Document]] = {
    visitLink(url, Set.empty, thisPageOnly)
  }

  private def visitLink(url: URL, alreadyVisited: Set[URL], thisPageOnly: Boolean): Future[Set[Document]] = {
    Scoup.parse(url.toString).flatMap { doc =>

      if (thisPageOnly) {
        Future.successful(Set(doc))
      } else {
        visitLinks(url, doc, alreadyVisited)
      }
    }
  }

  private def visitLinks(url: URL, doc: Document, alreadyVisited: Set[URL]) = {
    val links = doc.select("a").filter(isLocal).map(_.attr("href"))
    links.map(createFullLocalUrl(url)).filter(!alreadyVisited.contains(_)).foldLeft(Future.successful(Set(doc))) {
      case (acc, link) =>
        for {
          existingDocs <- acc
          newDocs <- visitLink(link, alreadyVisited + link, false)
        } yield (existingDocs ++ newDocs)
    }
  }

  private def isLocal(link: Element): Boolean = {
    val href = link.attr("href")
    href.startsWith("/")
  }

  def createFullLocalUrl(base: URL)(link: String): URL = {
    (new java.net.URL(base, link))
  }
} 
开发者ID:themillhousegroup,项目名称:witchhunt,代码行数:47,代码来源:StyleguideSpider.scala

示例9: Article

//设置package包名称以及导入依赖的类
package gander

import gander.images.Image
import gander.opengraph.OpenGraphData
import org.joda.time.DateTime
import org.jsoup.nodes.{Document, Element}


final case class Article(title: String,
                         cleanedArticleText: Option[String],
                         metaDescription: String,
                         metaKeywords: String,
                         canonicalLink: String,
                         domain: String,
                         topNode: Option[Element],
                         topImage: Option[Image],
                         tags: Set[String],
                         movies: List[Element],
                         finalUrl: String,
                         linkHash: String,
                         rawHtml: String,
                         doc: Document,
                         rawDoc: Document,
                         publishDate: Option[DateTime],
                         additionalData: Map[String, String],
                         openGraphData: OpenGraphData) 
开发者ID:lloydmeta,项目名称:gander,代码行数:27,代码来源:Article.scala

示例10: HtmlLifter

//设置package包名称以及导入依赖的类
package com.twitter.diffy.lifter

import org.jsoup.Jsoup
import org.jsoup.nodes.{Document, Element}
import org.jsoup.select.Elements

import scala.collection.JavaConversions._

object HtmlLifter {
  def lift(node: Element): FieldMap[Any] = node match {
    case doc: Document =>
      FieldMap(
        Map(
          "head" -> lift(doc.head),
          "body" -> lift(doc.body)
        )
      )
    case doc: Element => {
      val children: Elements = doc.children
      val attributes =
        FieldMap[String](
          doc.attributes.asList map { attribute =>
            attribute.getKey -> attribute.getValue
          } toMap
        )

      FieldMap(
        Map(
          "tag"         -> doc.tagName,
          "text"        -> doc.ownText,
          "attributes"  -> attributes,
          "children"    -> children.map(element => lift(element))
        )
      )
    }
  }

  def decode(html: String): Document = Jsoup.parse(html)
} 
开发者ID:sachinmanchanda,项目名称:diffy_unicast,代码行数:40,代码来源:HtmlLifter.scala

示例11: SosachPngSource

//设置package包名称以及导入依赖的类
package com.karasiq.nanoboard.sources.png

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Source
import com.karasiq.nanoboard.encoding.DataEncodingStage
import org.jsoup.nodes.Element


final class SosachPngSource(encoding: DataEncodingStage)(implicit as: ActorSystem, am: ActorMaterializer) extends BoardPngSource(encoding) {
  private val regex = """https?://2ch\.hk/(\w+/src/\d+/\d+\.png)""".r

  override protected def getUrl(e: Element, attr: String): Option[String] = {
    e.attr(attr) match {
      case regex(path) ?
        Some(s"http://m2-ch.ru/$path")

      case _ ?
        None
    }
  }

  override def imagesFromPage(url: String): Source[String, akka.NotUsed] = {
    super.imagesFromPage(url.replace("https://2ch.hk/", "http://m2-ch.ru/"))
  }
} 
开发者ID:Karasiq,项目名称:nanoboard,代码行数:27,代码来源:SosachPngSource.scala

示例12: BoardPngSource

//设置package包名称以及导入依赖的类
package com.karasiq.nanoboard.sources.png

import java.net.URL

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.HttpRequest
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Source
import akka.util.ByteString
import com.karasiq.nanoboard.NanoboardMessage
import com.karasiq.nanoboard.encoding.DataEncodingStage
import org.jsoup.Jsoup
import org.jsoup.nodes.{Document, Element}

import scala.collection.JavaConversions._
import scala.util.Try


class BoardPngSource(encoding: DataEncodingStage)(implicit as: ActorSystem, am: ActorMaterializer) extends UrlPngSource {
  protected final val http = Http()

  def messagesFromImage(url: String): Source[NanoboardMessage, akka.NotUsed] = {
    Source.fromFuture(http.singleRequest(HttpRequest(uri = url)))
      .flatMapConcat(_.entity.dataBytes.fold(ByteString.empty)(_ ++ _))
      .mapConcat { data ?
        NanoboardMessage.parseMessages(encoding.decode(data))
      }
      .recoverWith { case _ ? Source.empty }
  }

  def imagesFromPage(url: String): Source[String, akka.NotUsed] = {
    Source.fromFuture(http.singleRequest(HttpRequest(uri = url)))
      .flatMapConcat(_.entity.dataBytes.fold(ByteString.empty)(_ ++ _))
      .flatMapConcat(data ? imagesFromPage(Jsoup.parse(data.utf8String, url)))
      .recoverWith { case _ ? Source.empty }
  }

  protected def imagesFromPage(page: Document): Source[String, akka.NotUsed] = {
    val urls = page.select("a").flatMap(getUrl(_, "href"))
    Source(urls.distinct.toVector)
  }

  protected def getUrl(e: Element, attr: String): Option[String] = {
    Try(new URL(e.absUrl(attr)))
      .toOption
      .filter(_.getPath.matches("([^\\?\\s]+)?/src/([^\\?\\s]+)?\\.png"))
      .map(_.toString)
  }
} 
开发者ID:Karasiq,项目名称:nanoboard,代码行数:51,代码来源:BoardPngSource.scala

示例13: FloggerPageSpec

//设置package包名称以及导入依赖的类
package com.themillhousegroup.flogger

import org.specs2.mutable.Specification
import com.themillhousegroup.flogger.test.TestFixtures
import com.themillhousegroup.scoup.ScoupImplicits
import org.jsoup.nodes.Element

class FloggerPageSpec extends Specification with TestFixtures with ScoupImplicits {
  "Page API" should {
    "list all pages" in {
      waitFor(testFlogger.blogPages) must not beEmpty
    }

    "be able to parse HTML as a JSoup Document from a page" in {
      val testPage = getTestPage
      val html = testPage.documentContent
      html must not beNull

      val h4s: Iterable[Element] = html.select("h4")
      h4s must not beEmpty

      h4s.head.text must beEqualTo("This is an H4")
    }
  }
} 
开发者ID:themillhousegroup,项目名称:flogger,代码行数:26,代码来源:FloggerPageSpec.scala

示例14: JsoupHelper

//设置package包名称以及导入依赖的类
package util

import java.util.stream.Collectors

import org.jsoup.nodes.Element
import org.jsoup.select.Elements


object JsoupHelper {
  implicit def elementsToElements4Scala(elements: Elements): Elements4Scala = {
    new Elements4Scala(elements)
  }
}

class Elements4Scala(that: Elements) {
  def toElementArray: Array[Element] = {
    val list: java.util.List[Element] = that.stream.collect(Collectors.toList[Element])
    list.toArray(new Array[Element](list.size()))
  }
} 
开发者ID:ktr-skmt,项目名称:FelisCatusZero,代码行数:21,代码来源:JsoupHelper.scala

示例15: TorrentSearchResult

//设置package包名称以及导入依赖的类
package providers

import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import org.jsoup.select.Elements
import play.api.libs.json.Json
import play.api.libs.ws.WSClient


import scala.collection.JavaConversions._
import scala.concurrent.ExecutionContext.Implicits.global





case class TorrentSearchResult(name:String,magnetLink:String,
                               size:String,age:String,
                               seeders:String, leechers:String,
                               provider:String) extends SearchResult
object TorrentSearchResult{
  implicit val torrentSearchResultWrites = Json.writes[TorrentSearchResult]
}

class KatcrProvider extends SearchProvider {
  override def search(searchString:String, ws: WSClient, numberofResulstToReturn:Int) = {
    val searchProviderURL = "https://kat.cr/usearch/"
    ws.url(searchProviderURL + searchString).get().map {
      response => { //process the response
         extractTopTorrents(response.body,numberofResulstToReturn)
      }
    }
  }

  def processKatcrTableRow(tds: Elements):TorrentSearchResult =
    TorrentSearchResult(tds(0).text,
      tds.select("a[title=\"Torrent magnet link\"]").first().attr("href"),
      tds(1).text,
      tds(2).text,tds(3).text,tds(4).text, "kat.cr")


  def extractTopTorrents(htmlString:String, numberOfResults:Int) = {
    val tables:Elements = Jsoup.parse(htmlString).select("table")
    val table = tables.get(1)
    // Get the top numberOfResults and the return a list of TorrentSearchResult..
    table.select("tr").slice(1,numberOfResults+1).foldLeft(List[TorrentSearchResult]()){
      (result,tr) => // process each tr element and prepend to result
        processKatcrTableRow(tr.select("td")) :: result
    }
  }.reverse //since the list is sorted in the reverse seeders order
  .asInstanceOf[List[SearchResult]]
} 
开发者ID:aashiks,项目名称:jIgor,代码行数:53,代码来源:KatcrProvider.scala


注:本文中的org.jsoup.nodes.Element类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。