本文整理汇总了Scala中org.apache.spark.streaming.twitter.TwitterUtils类的典型用法代码示例。如果您正苦于以下问题:Scala TwitterUtils类的具体用法?Scala TwitterUtils怎么用?Scala TwitterUtils使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TwitterUtils类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: TwitterStreaming
//设置package包名称以及导入依赖的类
package local.riverside
import org.apache.lucene.analysis.ja.JapaneseAnalyzer
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute
import org.apache.spark.SparkConf
import org.apache.spark.streaming.twitter.TwitterUtils
import org.apache.spark.streaming.{Durations, StreamingContext}
object TwitterStreaming {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Twitter Streaming")
val ssc = new StreamingContext(conf, Durations.minutes(1L))
val filter = if (args.isEmpty) Nil else args.toList
val stream = TwitterUtils.createStream(ssc, None, filter)
stream
.flatMap { status =>
val text = status.getText
val analyzer = new JapaneseAnalyzer
val tokenStream = analyzer.tokenStream("", text)
val charAttr = tokenStream.addAttribute(classOf[CharTermAttribute])
tokenStream.reset()
try {
Iterator.continually(tokenStream.incrementToken())
.takeWhile(identity)
.map(_ => charAttr.toString)
.toVector
} finally {
tokenStream.end()
tokenStream.close()
}
}
.map(word => (word, 1))
.reduceByKey((a, b) => a + b)
.saveAsTextFiles("output/tweet")
ssc.start()
ssc.awaitTermination()
}
}
示例2: VeChallengeRealTime
//设置package包名称以及导入依赖的类
package io.github.adrianulbona.ve
import java.text.SimpleDateFormat
import java.util.Date
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.twitter.TwitterUtils
import org.apache.spark.streaming.{Minutes, StreamingContext}
import twitter4j.Place
object VeChallengeRealTime {
def main(args: Array[String]) {
val spark = SparkSession.builder
.master("local[*]")
.appName("ve-challenge")
.getOrCreate()
val ssc = new StreamingContext(spark.sparkContext, Minutes(2))
val stream = TwitterUtils.createStream(ssc, None, Seq("challenge"))
val places: DStream[Place] = stream.map(status => Option(status.getPlace))
.filter(optionPlace => optionPlace.isDefined)
.map(place => place.get)
places.map(place => place.getCountryCode)
.countByValue()
.foreachRDD(batch => printStats(batch.sortBy({ case (_, count) => count }, ascending = false).take(5)))
ssc.start()
ssc.awaitTermination()
spark.stop()
}
def printStats(top5Countries: Array[(String, Long)]) {
println()
println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()))
top5Countries.foreach(println)
}
}
示例3: TwitterStream
//设置package包名称以及导入依赖的类
package io.gzet.timeseries
import com.google.gson.GsonBuilder
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.twitter.TwitterUtils
import org.apache.spark.streaming.{Minutes, StreamingContext}
import org.apache.spark.{Logging, SparkConf, SparkContext}
import twitter4j.auth.OAuthAuthorization
import twitter4j.conf.ConfigurationBuilder
import scala.util.Try
object TwitterStream extends SimpleConfig with Logging {
def getTwitterStream(ssc: StreamingContext, filters: Seq[String] = Nil) = {
val builder = new ConfigurationBuilder()
builder.setOAuthConsumerKey(twitterApiKey)
builder.setOAuthConsumerSecret(twitterApiSecret)
builder.setOAuthAccessToken(twitterTokenKey)
builder.setOAuthAccessTokenSecret(twitterTokenSecret)
val configuration = builder.build()
TwitterUtils.createStream(
ssc,
Some(new OAuthAuthorization(configuration)),
filters,
StorageLevel.MEMORY_ONLY
)
}
def main(args: Array[String]) = {
val sparkConf = new SparkConf().setAppName("Twitter Extractor")
val sc = new SparkContext(sparkConf)
val ssc = new StreamingContext(sc, Minutes(5))
val twitterStream = getTwitterStream(ssc, args).mapPartitions({ it =>
val gson = new GsonBuilder().create()
it map { s =>
Try(gson.toJson(s))
}
})
twitterStream
.filter(_.isSuccess)
.map(_.get)
.saveAsTextFiles("twitter")
// Start streaming context
ssc.start()
ssc.awaitTermination()
}
}
示例4:
//设置package包名称以及导入依赖的类
println("")
println("*************************")
println("Tweet sample")
println("*************************")
println("")
import org.apache.spark.streaming.twitter.TwitterUtils
import org.apache.spark.streaming.{Durations, StreamingContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.log4j.{Level, Logger}
Logger.getRootLogger.setLevel(Level.ERROR)
val config = new java.util.Properties
config.load(this.getClass().getClassLoader().getResourceAsStream("config.properties"))
System.setProperty("twitter4j.oauth.consumerKey", config.get("twitter_consumerKey").toString)
System.setProperty("twitter4j.oauth.consumerSecret", config.get("twitter_consumerSecret").toString)
System.setProperty("twitter4j.oauth.accessToken", config.get("twitter_accessToken").toString)
System.setProperty("twitter4j.oauth.accessTokenSecret", config.get("twitter_accessTokenSecret").toString)
println("")
println("Create stream")
println("=============")
println("")
val filters = Array("#spark")
val ssc = new StreamingContext(sc, Seconds(3))
val stream = TwitterUtils.createStream(ssc, None, filters)
println("")
println("Extract texts and print")
println("=======================")
println("")
stream.map(status => status.getText()).print
println("")
println("Start")
println("=====")
println("")
ssc.start()
ssc.awaitTermination()
示例5: TwitterSentiment
//设置package包名称以及导入依赖的类
import com.databricks.spark.corenlp.functions._
import org.apache.spark._
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.{current_timestamp, explode}
import org.apache.spark.streaming._
import org.apache.spark.streaming.twitter.TwitterUtils
import twitter4j.auth.OAuthAuthorization
import twitter4j.conf.ConfigurationBuilder
object TwitterSentiment {
System.setProperty("spark.cassandra.connection.host", "127.0.0.1")
def main(args:Array[String]): Unit = {
val conf = new SparkConf().setAppName("TwitterSentiment").setMaster("local[*]")
val spark = SparkSession.builder.config(conf).getOrCreate()
val ssc = new StreamingContext(spark.sparkContext, Seconds(1))
// Create a Twitter Stream for the input source.
val auth = Some(new OAuthAuthorization(new ConfigurationBuilder().build()))
val twitterStream = TwitterUtils.createStream(ssc, auth, Array("trump", "donald"))
val tweets = twitterStream
.filter(tweet => tweet.getLang.equals("en") || tweet.getLang.equals(""))
.map(_.getText())
.map(_.replaceAll("/[^A-Za-z0-9 ]/", ""))
.map(_.replaceAll("/", ""))
.map(_.replaceAll("RT.+?(?=\\s)\\s", ""))
.map(_.replaceAll("https([^\\s]+).*", ""))
tweets.foreachRDD(rdd => {
import spark.implicits._
val df = rdd.toDF()
val output = df.select(cleanxml('value).as('doc))
.select(explode(ssplit('doc)).as('sen))
.select('sen, tokenize('sen).as('words), ner('sen).as('nerTags), sentiment('sen).as('sentiment))
val formated = output
.drop("words")
.drop("nerTags")
.withColumnRenamed("sen", "tweet")
.withColumn("processed_time", current_timestamp())
formated
.write
.mode("append")
.format("org.apache.spark.sql.cassandra")
.options(Map("table" -> "storm_doris", "keyspace" -> "sentiment"))
.save()
})
ssc.start()
// Let's await the stream to end - forever
ssc.awaitTermination()
}
}
示例6: Setup
//设置package包名称以及导入依赖的类
package twitter
import com.typesafe.config.ConfigFactory
import org.apache.spark.streaming.twitter.TwitterUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.LoggerFactory
object Setup {
private val log = LoggerFactory.getLogger(getClass)
val config = ConfigFactory.load()
log.info("Setting up Twitter credentials")
setupTwitter(config.getString("oauth.consumerKey"), config.getString("oauth.consumerSecret"),
config.getString("oauth.accessToken"), config.getString("oauth.accessTokenSecret"))
log.info("Starting Spark")
val conf = new SparkConf()
.setMaster("local[2]")
.setAppName("Twitter pipeline")
.set("spark.executor.memory", "1g")
.set("spark.rdd.compress", "true")
.set("spark.storage.memoryFraction", "1")
.set("spark.streaming.unpersist", "true")
.set("spark.streaming.receiver.writeAheadLog.enable", "false")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(sc, Seconds(1))
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
conf.registerKryoClasses(Array(classOf[Tweet]))
def setupTwitter(consumerKey: String, consumerSecret: String, accessToken: String, accessTokenSecret: String) ={
// Set up the system properties for twitter
System.setProperty("twitter4j.oauth.consumerKey", consumerKey)
System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret)
System.setProperty("twitter4j.oauth.accessToken", accessToken)
System.setProperty("twitter4j.oauth.accessTokenSecret", accessTokenSecret)
// https: all kinds of fun
System.setProperty("twitter4j.restBaseURL", "https://api.twitter.com/1.1/")
System.setProperty("twitter4j.streamBaseURL", "https://stream.twitter.com/1.1/")
System.setProperty("twitter4j.siteStreamBaseURL", "https://sitestream.twitter.com/1.1/")
System.setProperty("twitter4j.userStreamBaseURL", "https://userstream.twitter.com/1.1/")
System.setProperty("twitter4j.oauth.requestTokenURL", "https://api.twitter.com/oauth/request_token")
System.setProperty("twitter4j.oauth.accessTokenURL", "https://api.twitter.com/oauth/access_token")
System.setProperty("twitter4j.oauth.authorizationURL", "https://api.twitter.com/oauth/authorize")
System.setProperty("twitter4j.oauth.authenticationURL", "https://api.twitter.com/oauth/authenticate")
System.setProperty("sync.numThreads", "4")
System.setProperty("jsonStoreEnabled", "true")
}
def createStream = {
log.info("Creating Twitter stream")
TwitterUtils.createStream(ssc, None)
}
}
示例7: StoreToHdfs
//设置package包名称以及导入依赖的类
package com.godatadriven.twitter_classifier
import com.google.gson.Gson
import org.apache.spark.streaming.twitter.TwitterUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object StoreToHdfs {
private var numTweetsCollected = 0L
private var partNum = 0
private var gson = new Gson()
def main(args: Array[String]) {
// Process program arguments and set properties
println("Initializing Streaming Spark Context...")
val conf = new SparkConf().setAppName(this.getClass.getSimpleName)
val sc = new SparkContext(conf)
val ssc = new StreamingContext(sc, Seconds(60))
val tweetStream = TwitterUtils.createStream(ssc, None)
.map(gson.toJson(_))
tweetStream.foreachRDD((rdd, time) => {
val count = rdd.count()
if (count > 0) {
val outputRDD = rdd.repartition(1)
outputRDD.saveAsTextFile("/tweets/tweets_" + time.milliseconds.toString)
numTweetsCollected += count
}
})
ssc.start()
ssc.awaitTermination()
}
}
示例8: LiveTweetResourcesSparkController
//设置package包名称以及导入依赖的类
package controllers.spark.live
import javax.inject.Inject
import bigdata.engines.spark.SparkStreaming
import com.google.gson.Gson
import org.apache.spark.sql.DataFrame
import org.apache.spark.streaming.twitter.TwitterUtils
import play.api.mvc.{Action, Controller}
import services.twitter.Twitter4JConfiguration
class LiveTweetResourcesSparkController @Inject() (config: play.api.Configuration) extends Controller{
def listSampleTweets = Action {
val twitterInstance = new Twitter4JConfiguration(config).getTwitter4JAccess()
val tweetStream = TwitterUtils.createStream(SparkStreaming.streamingContext, Option(twitterInstance.getAuthorization)).map(new Gson().toJson(_))
var numTweetsCollected: Long = 0
tweetStream.foreachRDD((rdd, time) => {
val outputRDD = rdd.repartition(4)
outputRDD.saveAsTextFile(config.getString("hadoop-tweets-url").get + "tweet_" + time.milliseconds.toString)
})
tweetStream.print()
tweetStream.glom()
SparkStreaming.streamingContext.start()
Ok("started streaming")
}
def stopStreaming = Action{
SparkStreaming.streamingContext.stop(true,true)
Ok("stopped streaming")
}
private def parse(rdd: DataFrame): String = rdd.toJSON.collect.toList.mkString(",\n")
}