当前位置: 首页>>代码示例>>Scala>>正文


Scala Jsoup类代码示例

本文整理汇总了Scala中org.jsoup.Jsoup的典型用法代码示例。如果您正苦于以下问题:Scala Jsoup类的具体用法?Scala Jsoup怎么用?Scala Jsoup使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Jsoup类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: HtmlConcatCompiler

//设置package包名称以及导入依赖的类
package com.karasiq.scalajsbundler.compilers

import com.karasiq.scalajsbundler.ScalaJSBundler.PageTypedContent
import org.jsoup.Jsoup
import org.jsoup.nodes.Element

import scala.collection.JavaConversions._

object HtmlConcatCompiler extends AssetCompiler {
  private implicit class ElementOps(val e: Element) extends AnyVal {
    def concatWith(src: Element): Unit = {
      @inline
      def delimit(delimiter: String, s1: String, s2: String): String = {
        if (s1.endsWith(delimiter)) s1 + s2
        else s1 + delimiter + s2
      }

      src.attributes().foreach {
        case a if a.getKey == "class" ?
          e.attr(a.getKey, delimit(" ", e.attr(a.getKey), a.getValue))

        case a if a.getKey == "style" ?
          e.attr(a.getKey, delimit(";", e.attr(a.getKey), a.getValue))

        case a ? // Replaces attribute value
          e.attr(a.getKey, a.getValue)
      }
      e.append(src.html())
    }
  }

  def concat(htmlList: Seq[String]): String = {
    val result = Jsoup.parse(htmlList.head)
    htmlList.tail.foreach { h ?
      val html = Jsoup.parse(h)
      result.head().concatWith(html.head())
      result.body().concatWith(html.body())
    }
    result.outerHtml()
  }

  override def compile(contents: Seq[PageTypedContent]): String = {
    concat(contents.map(_.asset.asString))
  }
} 
开发者ID:Karasiq,项目名称:sbt-scalajs-bundler,代码行数:46,代码来源:HtmlConcatCompiler.scala

示例2: ScheduleDownloadActor

//设置package包名称以及导入依赖的类
package logic.actors.schedule

import java.nio.charset.StandardCharsets
import javax.inject._

import akka.actor.{Actor, ActorRef}
import helpers.SpiritHelper
import logic.actors.schedule.ScheduleDownloadActor.DownloadSchedule
import logic.actors.schedule.ScheduleParseActor._
import org.fhs.spirit.scheduleparser.enumerations.EScheduleKind
import org.jsoup.Jsoup
import play.api.libs.ws.WSClient

import scala.collection.JavaConversions._
import scala.concurrent.Await
import scala.concurrent.duration._


@Singleton
class ScheduleDownloadActor @Inject()(ws: WSClient, @Named("parseActor") parseActor: ActorRef) extends Actor with SpiritHelper {


  override def receive: Receive = {
    case DownloadSchedule =>

      val baseUrl = configuration.underlying.getString("schedule.baseUrl")

      val lectureResults = uncachedCourseNames.map {
        courseName =>
          val outcome = "s_" + courseName + ".html"
          val httpResult = Await.result(ws.url(baseUrl + outcome).get(), 10 seconds)
          if (httpResult.status != 404) {
            Some((httpResult.bodyAsBytes.decodeString(StandardCharsets.ISO_8859_1.toString), courseName))
          } else {
            None
          }
      }.filter(_.nonEmpty).map(rs => (Jsoup.parse(rs.get._1).toString, rs.get._2)).map(rs => (EScheduleKind.REGULAR, rs))

      val blockBaseResult = Await.result(ws.url(baseUrl + "bindex.html").get(), 10 seconds)
      val bindex = Jsoup.parse(blockBaseResult.bodyAsBytes.decodeString(StandardCharsets.ISO_8859_1.toString))
      val blockRefs = bindex.select("a").map(_.attr("href")).toSet

      val blockResult = blockRefs.map {
        block =>
          val httpResult = Await.result(ws.url(baseUrl + block).get(), 10 seconds)
          if (httpResult.status != 404) {
            Some((httpResult.bodyAsBytes.decodeString(StandardCharsets.ISO_8859_1.toString), block))
          } else {
            None
          }
      }.filter(_.nonEmpty).map(rs => (Jsoup.parse(rs.get._1).toString, rs.get._2)).map(rs => (EScheduleKind.BLOCK, rs))

      parseActor ! ParseSchedule(lectureResults ++ blockResult)
  }
} 
开发者ID:P1tt187,项目名称:spirit-play,代码行数:56,代码来源:ScheduleDownloadActor.scala

示例3: MainPageResponseParser

//设置package包名称以及导入依赖的类
package bridgeapp.crawler.parsers

import java.net.URL

import akka.actor.{Props, ActorSystem, Actor, ActorRef}
import bridgeapp.crawler.Config
import bridgeapp.crawler.execution.{Response, ResponseParser}
import bridgeapp.crawler.storage.{DiskForumsStorage, ForumsStorage}
import com.typesafe.scalalogging.LazyLogging
import org.jsoup.Jsoup

import scala.collection.JavaConverters._


class MainPageResponseParser(parser: ActorRef) extends ResponseParser {
  override def ->(response: Response): Unit = parser ! response
}

object MainPageResponseParser {

  def apply()(implicit actorSystem: ActorSystem): MainPageResponseParser = {
    val parser = actorSystem.actorOf(Props(new MainPageParser(ForumsStorage())))
    new MainPageResponseParser(parser)
  }
}

class MainPageParser(forumsListStorage: ForumsStorage) extends Actor with LazyLogging {
  override def receive: Receive = {
    case response: Response =>

      val charset = response.charset.getOrElse("utf-8")
      val body = new String(response.body, charset)
      val document = Jsoup.parse(body, response.uri.toString)

      val forumLink = document.select("[href^=viewforum.php]").asScala.toArray

      logger.trace(s" Total url: ${forumLink.length}")

      val forumsIds: Array[Int] = forumLink.map(_.attr("abs:href")).collect {
        case href: String =>
          val s = new URL(href).getQuery.split("&").map { part =>
            val pair = part.split("=")
            pair(0) -> pair(1)
          }.toMap
          s.getOrElse("f", "0").toInt
      }

      logger.trace(s" Extracted forums ids: ${forumsIds.length}")

      forumsListStorage.write(forumsIds, Config.forumsStorageURI)(context.dispatcher)

  }
} 
开发者ID:bridge-app,项目名称:crawler,代码行数:54,代码来源:MainPageParser.scala

示例4: MALImage

//设置package包名称以及导入依赖的类
package me.abarrow.ScalaSubNet.mal

import java.io.File
import java.io.FileOutputStream
import java.net.URL
import org.jsoup.Jsoup
import org.jsoup.parser.Parser
import java.nio.channels.Channels

object MALImage {
  def saveMainImage(animeID:Int, imagePath:File):Boolean = {
    val doc = Jsoup.parse(new URL(MALURLs.MAL_ANIME_PAGE_PREFIX + animeID.toString()), 60000)
    val mainImage = doc.select("img.ac").first()
    if (mainImage == null) {
      return false
    }
    val imgSrc = mainImage.attr("src")
    val rbc = Channels.newChannel(new URL(imgSrc).openStream())
    val fos = new FileOutputStream(imagePath)
    try {
      fos.getChannel().transferFrom(rbc, 0, Long.MaxValue)
    } finally {
      fos.close()
      rbc.close()
    }
    true
  }
} 
开发者ID:Abarrowman,项目名称:ScalaSubNet,代码行数:29,代码来源:MALImage.scala

示例5: MALList

//设置package包名称以及导入依赖的类
package me.abarrow.ScalaSubNet.mal

import org.jsoup.Jsoup
import org.jsoup.parser.Parser 
import collection.JavaConverters._


class MALList (val entries:Array[MALEntry]) {
  
}

object MALList {
  private val MAL_LIST_SUFFIX = "&status=all&type=anime"
  private val xmlParser = Parser.xmlParser()
  def getListByUser(userId:String):MALList = {
    val listXML = Jsoup.connect(MALURLs.MAL_LIST_PREFIX + userId + MAL_LIST_SUFFIX).parser(xmlParser).get()
    
    new MALList(listXML.getElementsByTag("anime").asScala.map { x =>
      val id = x.getElementsByTag("series_animedb_id").first().html().toInt
      val name = x.getElementsByTag("series_title").first().html()
      val score = x.getElementsByTag("my_score").first().html().toInt
      val status = x.getElementsByTag("my_status").first().html().toInt
      new MALEntry(id, name, score, status)
    }.toArray)
  }
} 
开发者ID:Abarrowman,项目名称:ScalaSubNet,代码行数:27,代码来源:MALList.scala

示例6: NoticeServiceObjects

//设置package包名称以及导入依赖的类
package com.zhranklin.homepage.notice

import org.json4s._
import org.jsoup.Jsoup
import org.jsoup.nodes.Document

object NoticeServiceObjects {

  trait ServiceBase extends IndexService with FunNoticeFetcher with SelectorUrlService {
    val initVal: ((Document) ? String, (Document) ? String, String, String)
    lazy val (getContent, getDateStr, urlPattern, template) = initVal
  }

  class LawService(title: String, listId: String) extends NoticeService(s"??? - $title") with UrlService with IndexService with FunNoticeFetcher {
    val getContent = contentF("div.text")
    val getDateStr = dateF("span:contains(????)")
    val template = "http://law.scu.edu.cn/xjax?arg=8573&arg=<index>&arg=20&arg=list&clazz=PortalArticleAction&method=list"

    def getUrl(id: String) = s"http://law.scu.edu.cn/detail.jsp?portalId=725&cid=8385&nextcid=$listId&aid=$id"

    override def noticeUrlsFromUrl(url: String): Iterable[NoticeEntry] = {
      val jsonStr = Jsoup.connect(url).execute().body()
      val json = jackson.parseJson(jsonStr)
      json.\("data").asInstanceOf[JArray].arr.map(
        jo ? NoticeEntry(getUrl(jo.\("id").values.toString), Some(jo.\("subject").values.toString)))
    }
  }

  val serviceList = List(
    "???? - ???? - test" ?
      "http://www.sculj.cn/Special_News.asp?SpecialID=40&SpecialName=%D1%A7%D4%BA%B6%AF%CC%AC&page=<index>",
    "???? - ???? - test" ? "http://sesu.scu.edu.cn/news/list_1_<index>.html",
    "???? - ????" ? "http://sesu.scu.edu.cn/gonggao/list_2_<index>.html",
    "????? - ???? - test" ? "http://cs.scu.edu.cn/cs/xsky/xskb/H951901index_<index>.htm",
    "????? - ???? - test" ? "http://cs.scu.edu.cn/cs/xytz/H9502index_<index>.htm",
    "????? - ???? - test" ? "http://cs.scu.edu.cn/cs/xyxw/H9501index_<index>.htm",
    "????? - ??? - test" ? "http://cs.scu.edu.cn/cs/fwzy/ftl/H951204index_<index>.htm",
    "???? - test" ? "http://news.scu.edu.cn/news2012/cdzx/I0201index_<index>.htm",
    "???? - ????" ?"http://math.scu.edu.cn/news.asp?PAGE=<index>",
    "?????? - ????" ? "http://seei.scu.edu.cn/student,p<index>,index.jsp",
    "????? - ????" ? "http://flc2.scu.edu.cn/foreign/a/xueyuangonggao/list_27_<index>.html"
  ).map { tp ?
    new NoticeService(tp._1) with UniversalUrlService with UniversalNoticeFetcher with IndexService {
      val template = tp._2
    }
  } ++ List(
    new NoticeService("??? - ??") with ServiceBase {
      val initVal =(selectorF("input[name=news.content]")(_.first.attr("value")), dateF("table[width=900] td:contains(????)"),
        "newsShow.*", "http://jwc.scu.edu.cn/jwc/moreNotice.action?url=moreNotice.action&type=2&keyWord=&pager.pageNow=<index>")},
    new LawService("????", "8572"),
    new LawService("????", "8573")
  )
} 
开发者ID:zhranklin,项目名称:Private_Blog,代码行数:54,代码来源:NoticeServiceObjects.scala

示例7: IsapReader

//设置package包名称以及导入依赖的类
package pl.mojepanstwo.sap.toakoma.readers

import org.slf4j.LoggerFactory
import org.springframework.batch.item.ItemReader

import org.jsoup.Jsoup

import org.jsoup.nodes.Document
import com.gargoylesoftware.htmlunit.WebClient
import pl.mojepanstwo.sap.toakoma._

object IsapReader {
  val BASE_URL = "http://isap.sejm.gov.pl"
  val URL      = BASE_URL + "/DetailsServlet?id="
}

class IsapReader(val id: String) extends ItemReader[Document] {

  val logger = LoggerFactory.getLogger(this.getClass())

  var last = false

  def read : Document = {
    logger.trace("read")

    if(last) return null

    this.last = true
    val isapUrl = IsapReader.URL + id
    val rsp = Jsoup.connect(isapUrl).get
    if(rsp.body.text.contains("Brak aktu prawnego o podanym adresie publikacyjnym !"))
      throw new NoSuchDocumentException
    return rsp
  }
} 
开发者ID:PrawoPolskie,项目名称:toakoma,代码行数:36,代码来源:IsapReader.scala

示例8: get

//设置package包名称以及导入依赖的类
package pl.mojepanstwo.sap.toakoma.services

import java.net.URL
import java.io.File
import org.apache.commons.io.FileUtils
import org.jsoup.nodes.Document
import com.gargoylesoftware.htmlunit._
import org.jsoup.Jsoup

trait Scraper {
  def get(url: String) : Document
  def dowloadFile(fileUrl:String, filePath:String) : String
}

class DefaultScraperService extends Scraper {

  val webClient = new WebClient

  def get(url: String) : Document = {
      webClient.setRefreshHandler(new RefreshHandler {
        override def handleRefresh(page: Page, url: URL, i: Int): Unit = webClient.getPage(url)
      })
      val apPage: Page = webClient.getPage(url)
      Jsoup.parse(apPage.getWebResponse.getContentAsString)
  }

  def dowloadFile(fileUrl:String, filePath:String) : String = {
    val url = new URL(fileUrl)
    val tmp = new File(filePath)
    FileUtils.copyURLToFile(url, tmp)
    tmp.getAbsolutePath()
  }

} 
开发者ID:PrawoPolskie,项目名称:toakoma,代码行数:35,代码来源:Scraper.scala

示例9: ResourceScraperService

//设置package包名称以及导入依赖的类
package pl.mojepanstwo.sap.toakoma

import pl.mojepanstwo.sap.toakoma.services.Scraper
import org.jsoup.nodes.Document
import org.jsoup.Jsoup
import scala.io.Source
import java.io.File
import java.nio.file.Files
import org.apache.commons.io.IOUtils
import java.io.FileOutputStream

class ResourceScraperService extends Scraper {

  def get(url: String) : Document = {
    val pattern = ".*id=(.*)&type=([0-9]+).*".r
    val pattern(id, docType) = url
    Jsoup.parse(Source.fromResource("isap/" + id + "/" + docType + ".html").mkString)
  }

  def dowloadFile(fileUrl:String, filePath:String) : String = {
    val pattern = ".*id=(.*)&type=([0-9]+).*".r
    val pattern(id, docType) = fileUrl
    val src = getClass.getResourceAsStream("/isap/" + id + "/" + docType + ".pdf")
    val dest = new File(filePath)
    val out = new FileOutputStream(dest)
    IOUtils.copy(src, out)
    src.close()
    out.close()
    dest.getAbsolutePath
  }

} 
开发者ID:PrawoPolskie,项目名称:toakoma,代码行数:33,代码来源:ResourceScraperService.scala

示例10: first

//设置package包名称以及导入依赖的类
package com.zhranklin.notice.service

import java.util.Date

import org.jsoup.Jsoup

import scala.collection.JavaConverters._
import scala.util._

trait IndexService {
  val template: String
  def first = rawIndices.head
  protected def firstIndex: Int = 1
  protected def valueStream(i: Int): Stream[Int] = i #:: valueStream(i + 1)
  protected def indexNums: Iterable[Any] = valueStream(firstIndex)
  protected def interpolate(value: Any): String = template.replaceAll("<index>", value.toString)
  def rawIndices: Iterable[String] = indexNums map interpolate
  def indexUrls: Iterable[String] = Stream(first) ++ rawIndices.drop(1)
}

case class Notice(url: String, title: String, html: String, date: Date) {
  def widthlessHtml = {
    val doc = Jsoup.parse(html)
    doc.select("*[width]").asScala.map(_.removeAttr("width"))
    doc.select("*[height]").asScala.map(_.removeAttr("height"))
    doc.toString
  }
  def stylelessHtml = {
    val doc = Jsoup.parse(html)
    doc.select("*[width]").asScala.map(_.removeAttr("width"))
    doc.select("*[height]").asScala.map(_.removeAttr("height"))
    doc.select("*[style]").asScala.map(_.removeAttr("style"))
    doc.toString
  }
  def imgs = Jsoup.parse(html).select("img[src]").asScala.map(_.attr("src"))
}
case class NoticeEntry(url: String, title: Option[String] = None)

abstract class NoticeService(val source: String) extends UrlService with IndexService with NoticeFetcher {
  def getUrls: Iterable[Try[NoticeEntry]] = indexUrls.map (i ? Try(noticeUrlsFromUrl(i))).flatMap {
    case Success(urls) ? urls map Success.apply
    case Failure(t) ? Iterable(Failure(t))
  }
  def notices: Iterable[Try[Notice]] = getUrls.map (_.flatMap(u ? Try(fetch(u))))
  def noticesWithErr(limit: Int, offset: Int): (List[Notice], List[Throwable]) = {
    val (succ, err) = notices.slice(offset, offset + limit).toList.partition(_.isSuccess)
    val successes = succ.asInstanceOf[List[Success[Notice]]].map(_.value)
    val failures = err.asInstanceOf[List[Failure[Throwable]]].map(_.exception)
    failures.groupBy(_.getClass.getSimpleName).map(_._2.head).foreach(t ? log.i(s"error when fetching news", t))
    (successes, failures)
  }
} 
开发者ID:zhranklin,项目名称:notice_crawler,代码行数:53,代码来源:NoticeService.scala

示例11: SearchControllerTest

//设置package包名称以及导入依赖的类
package controllers

import model.{Runway, Airport, Country, SearchResult}
import org.jsoup.Jsoup
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.mock.MockitoSugar
import org.scalatest.{Matchers, FunSpec}
import play.api.test.FakeRequest
import services.SearchService
import org.mockito.Mockito._
import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global

class SearchControllerTest extends FunSpec with Matchers with MockitoSugar with ScalaFutures{

  describe("Search Controller"){

    it("should generate search results page for given search term"){
      new Setup {
        when(mockSearchService.searchCountriesByNameOrCountryCode("aus")).thenReturn(Future(expectedSearchResult))

        val response = searchController.searchByCountry("aus")(FakeRequest()).futureValue

        response.header.status should be(200)

        expectedFirstRow should be("Australia AUS Melbourne Airport small CONCRETE 1")
      }
    }
  }

  trait Setup{
    val mockSearchService = mock[SearchService]
    val searchController = new SearchController(mockSearchService)
    val expectedSearchResult: Vector[SearchResult] = Vector(SearchResult(Country("Australia","AUS"),Airport("Melbourne Airport","small"),Runway("CONCRETE",1)))
    val expectedFirstRow = Jsoup.parse(views.html.search_results(expectedSearchResult.toList).body).select("table > tbody > tr:nth-child(1) td").text()
  }

} 
开发者ID:atiqsayyed,项目名称:airport,代码行数:39,代码来源:SearchControllerTest.scala

示例12: ReportsControllerTest

//设置package包名称以及导入依赖的类
package controllers

import model._
import org.jsoup.Jsoup
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.mock.MockitoSugar
import org.scalatest.{FunSpec, Matchers}
import play.api.test.FakeRequest
import services.ReportService
import org.mockito.Mockito._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future

class ReportsControllerTest extends FunSpec with Matchers with MockitoSugar with ScalaFutures{

  describe("Reports Controller"){
    it("should display country name and count of airports in country"){
      new Setup {
        val response = reportController.getCountriesWithHighestNoOfAirports(FakeRequest()).futureValue

        response.header.status should be(200)

        val expectedFirstRow = Jsoup.parse(views.html.report("Some Title",expectedSearchResult.toList).body).select("table > tbody > tr:nth-child(1) td").text()
        expectedFirstRow should be("Australia 100")
      }
    }
  }

  trait Setup{
    val mockReportService = mock[ReportService]
    val reportController = new ReportsController(mockReportService)
    val expectedSearchResult: Vector[CountryReport] = Vector(CountryReport(Country("Australia","AUS"),100))

    when(mockReportService.findCountriesWithHighestNoOfAirports).thenReturn(Future(expectedSearchResult))
  }

} 
开发者ID:atiqsayyed,项目名称:airport,代码行数:38,代码来源:ReportsControllerTest.scala

示例13: Crawler

//设置package包名称以及导入依赖的类
package pl.krix.scalacrawl

import java.net.URI
import org.jsoup.Jsoup
import scala.collection.JavaConversions._


object Crawler {

  def getDomain(URL: String): Option[String] = {               // get domain from URL method
    new URI(URL).getHost match {                               // get URI's host
      case s: String => Some(s.stripPrefix("www."))            // if got string, strip useless prefix
      case null => None                                        // if got null, return none
    }
  }

  def crawl(URL: String, visited: Set[String], interval: Int) {   // crawling method
    Thread.sleep(interval)                                        // sleep before launching a request
    Jsoup.connect(URL)                                            // connect
      .get()                                                      // get content
      .select("a[href]")                                          // get href elements from content (links)
      .map(_.attr("abs:href"))                                    // get their absolute path
      .filter(!_.isEmpty())                                       // weed out empty ones
      .filter(getDomain(_) == getDomain(URL))                     // we want links from same domain only
      .filter(!visited.contains(_))                               // we want unvisited links
      .foreach {                                                  // for every such link
        link:String => {                                          // execute lambda which
          println(URL + " --> " + link)                           // prints URL and its link
          crawl(link, visited + URL, interval)                    // crawl inside link
        }
      }
  }

  def printHelp() = {
    println("USAGE: sbt \"run [URL] [TIME INTERVAL BETWEEN REQUESTS]\"")
  }

  def main(args: Array[String]) {                              // run with arguments [URL] [TIME INTERVAL BETWEEN REQUESTS IN SECS]
    if(args.length < 2){
      printHelp()
    }else{
      crawl(args(0), Set[String](args(0)), args(1).toInt * 1000)
    }
  }
} 
开发者ID:krix38,项目名称:scalaCrawl,代码行数:46,代码来源:Crawler.scala

示例14: LinkExtractor

//设置package包名称以及导入依赖的类
package wipro.crawler.util

import org.jsoup.Jsoup

import scala.collection.JavaConverters._

class LinkExtractor {

  var crawledLinks : List[String] = List.empty[String]
  def getAllPageLinks(url : String) = {
    val links = Jsoup.connect(url).timeout(0).get().select("a[href]")
    (for (link <- links.iterator().asScala) yield {
      link.attr("href")
    }).toSeq.distinct
  }

  def filterLinks(links : Seq[String],baseUrl : String) = {
      links.filter(link => link != null && link.length > 0)
        .filter(link => link.contains(baseUrl))
  }

  def crawlDomainLinks(url : String,depth : Int,maxDepth : Int,baseUrl : String) : Unit = {
    if((!crawledLinks.contains(url)) && (depth < maxDepth)){
      crawledLinks = url :: crawledLinks
      if(url.contains(baseUrl)){
        for(link <- getAllPageLinks(url)){
          crawlDomainLinks(link,depth + 1,maxDepth,baseUrl)
        }
      }
    }
  }
} 
开发者ID:adityahalabe,项目名称:webCrawler,代码行数:33,代码来源:LinkExtractor.scala

示例15:

//设置package包名称以及导入依赖的类
import com.mashape.unirest.http.Unirest
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import purecsv.safe._


    val results = Unirest.post("http://nturanking.lis.ntu.edu.tw/DataPage/OverallRanking.aspx")
      .queryString("pagesize", pagesize)
      .queryString("y", year)
      .asString.getBody

    val jsoup = Jsoup.parse(results)
    val jsoupResults = jsoup.body.select("#MainContain_GridView1 > tbody").select("tr").toArray.tail
    val csvResults = (0 until jsoupResults.size)
      .map(idx => (idx, jsoupResults(idx))).map(_.asInstanceOf[(Int, Element)])
      .map(t =>
        (t._1 + 1, t._2.child(1).child(0).html, t._2.child(2).child(0).html, t._2.child(3).child(0).html))

    println(csvResults.map(t => s"${t._1},${t._2},${t._3},${t._4}").mkString("\n"))
  }
} 
开发者ID:sguzman,项目名称:UniversityRankingWebScraper,代码行数:22,代码来源:CScraper.scala


注:本文中的org.jsoup.Jsoup类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。