0% found this document useful (0 votes)

163 views4 pages

Spark Streaming Twitter Example

This document describes a program that calculates popular hashtags from a Twitter stream over sliding 10 and 60 second windows. It takes Twitter credentials as arguments to connect to the Twitter stream, extracts hashtags from tweets, counts the hashtags in each time window, and prints the top 10 hashtags for each window period. It also configures logging levels for the streaming job.

Uploaded by

anon_158103504

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

163 views4 pages

Spark Streaming Twitter Example

Uploaded by

anon_158103504

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 4

// scalastyle:off println

package org.apache.spark.examples.streaming

import org.apache.spark.streaming.{Seconds, StreamingContext}

import org.apache.spark.SparkContext._

import org.apache.spark.streaming.twitter._

import org.apache.spark.SparkConf

/**

* Calculates popular hashtags (topics) over sliding 10 and 60 second windows from a Twitter

* stream. The stream is instantiated with credentials and optionally filters supplied by the

* command line arguments.

* Run this on your local machine as

object TwitterPopularTags {

def main(args: Array[String]) {

if (args.length < 4) {

System.err.println("Usage: TwitterPopularTags <consumer key> <consumer secret> " +

"<access token> <access token secret> [<filters>]")

System.exit(1)

StreamingExamples.setStreamingLogLevels()
val Array(consumerKey, consumerSecret, accessToken, accessTokenSecret) = args.take(4)

val filters = args.takeRight(args.length - 4)

// Set the system properties so that Twitter4j library used by twitter stream

// can use them to generat OAuth credentials

System.setProperty("twitter4j.oauth.consumerKey", consumerKey)

System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret)

System.setProperty("twitter4j.oauth.accessToken", accessToken)

System.setProperty("twitter4j.oauth.accessTokenSecret", accessTokenSecret)

val sparkConf = new SparkConf().setAppName("TwitterPopularTags")

val ssc = new StreamingContext(sparkConf, Seconds(2))

val stream = TwitterUtils.createStream(ssc, None, filters)

val hashTags = stream.flatMap(status => status.getText.split(" ").filter(_.startsWith("#")))

val topCounts60 = hashTags.map((_, 1)).reduceByKeyAndWindow(_ + _, Seconds(60))

.map{case (topic, count) => (count, topic)}

.transform(_.sortByKey(false))

val topCounts10 = hashTags.map((_, 1)).reduceByKeyAndWindow(_ + _, Seconds(10))

.map{case (topic, count) => (count, topic)}

.transform(_.sortByKey(false))
// Print popular hashtags

topCounts60.foreachRDD(rdd => {

val topList = rdd.take(10)

println("\nPopular topics in last 60 seconds (%s total):".format(rdd.count()))

topList.foreach{case (count, tag) => println("%s (%s tweets)".format(tag, count))}

})

topCounts10.foreachRDD(rdd => {

val topList = rdd.take(10)

println("\nPopular topics in last 10 seconds (%s total):".format(rdd.count()))

topList.foreach{case (count, tag) => println("%s (%s tweets)".format(tag, count))}

})

ssc.start()

ssc.awaitTermination()

// scalastyle:on println

package org.apache.spark.examples.streaming

import org.apache.log4j.{Level, Logger}

import org.apache.spark.Logging
/** Utility functions for Spark Streaming examples. */

object StreamingExamples extends Logging {

/** Set reasonable logging levels for streaming if the user has not configured log4j. */

def setStreamingLogLevels() {

val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements

if (!log4jInitialized) {

// We first log something to initialize Spark's default logging, then we override the

// logging level.

logInfo("Setting log level to [WARN] for streaming example." +

" To override add a custom log4j.properties to the classpath.")

Logger.getRootLogger.setLevel(Level.WARN)

Create An Spark Streaming App: 1. Architecture and Abstraction
No ratings yet
Create An Spark Streaming App: 1. Architecture and Abstraction
8 pages
Snowflake Fundamentals Anand Jha
No ratings yet
Snowflake Fundamentals Anand Jha
50 pages
Problem Description: Sensitivity: Internal & Restricted
No ratings yet
Problem Description: Sensitivity: Internal & Restricted
2 pages
BD - Spark - Baladasu A - SightSpectrum
No ratings yet
BD - Spark - Baladasu A - SightSpectrum
3 pages
Snowflake Setup - MD
No ratings yet
Snowflake Setup - MD
2 pages
Elite SQL Query Practice Guide
0% (1)
Elite SQL Query Practice Guide
20 pages
Python Data Pipeline Guide
No ratings yet
Python Data Pipeline Guide
38 pages
TF On Spark
No ratings yet
TF On Spark
35 pages
Data Warehouse - What Is It
No ratings yet
Data Warehouse - What Is It
5 pages
Hadoop Data Transfer with Sqoop
No ratings yet
Hadoop Data Transfer with Sqoop
21 pages
Day 4-01-Spark
No ratings yet
Day 4-01-Spark
43 pages
AWS Athena Knowledgebase
No ratings yet
AWS Athena Knowledgebase
4 pages
Python Advanced - Pipes in Python
No ratings yet
Python Advanced - Pipes in Python
7 pages
Apache Hive
No ratings yet
Apache Hive
3 pages
Spark NLP Training-Public-April 2020
No ratings yet
Spark NLP Training-Public-April 2020
39 pages
05.azure Data Lake Authentication
No ratings yet
05.azure Data Lake Authentication
16 pages
Installing Jenkins On Windows
100% (1)
Installing Jenkins On Windows
8 pages
Hive Cheat Sheet - Quick Reference
No ratings yet
Hive Cheat Sheet - Quick Reference
19 pages
Learning Apache Spark With Python
No ratings yet
Learning Apache Spark With Python
10 pages
Databricks Question
No ratings yet
Databricks Question
7 pages
Snowflake - Billing Components
No ratings yet
Snowflake - Billing Components
9 pages
17.views and MaterializedViews
No ratings yet
17.views and MaterializedViews
13 pages
Hive Lab
No ratings yet
Hive Lab
33 pages
Pandas Cheatsheet 1743309413
No ratings yet
Pandas Cheatsheet 1743309413
11 pages
The Hadoop Distributed File System
No ratings yet
The Hadoop Distributed File System
44 pages
Data Stream Processing Insights
No ratings yet
Data Stream Processing Insights
67 pages
Spark Scala Interview Question
No ratings yet
Spark Scala Interview Question
3 pages
Midhun BIGDATA Curicullum
No ratings yet
Midhun BIGDATA Curicullum
17 pages
Spark A To Z
No ratings yet
Spark A To Z
63 pages
CopyCommand Options
No ratings yet
CopyCommand Options
12 pages
PySpark Interview Questions
No ratings yet
PySpark Interview Questions
3 pages
Deepshikha Agrawal Pushp B.Sc. (IT), MBA (IT) Certification-Hadoop, Spark, Scala, Python, Tableau, ML (Assistant Professor JLBS)
No ratings yet
Deepshikha Agrawal Pushp B.Sc. (IT), MBA (IT) Certification-Hadoop, Spark, Scala, Python, Tableau, ML (Assistant Professor JLBS)
74 pages
De Mod 2 Transform Data With Spark
No ratings yet
De Mod 2 Transform Data With Spark
32 pages
SS1123 - D2T - Apache Cassandra Overview PDF
100% (1)
SS1123 - D2T - Apache Cassandra Overview PDF
45 pages
Machine Learning with Spark Guide
No ratings yet
Machine Learning with Spark Guide
26 pages
Building Data Pipelines - 3
No ratings yet
Building Data Pipelines - 3
29 pages
Mining Data Streams (Part 2)
No ratings yet
Mining Data Streams (Part 2)
56 pages
Spark
No ratings yet
Spark
13 pages
Spark Big Data Tuning Guide
100% (1)
Spark Big Data Tuning Guide
20 pages
Apache Spark: Fast Cluster Computing
No ratings yet
Apache Spark: Fast Cluster Computing
6 pages
Python Virtual Environment
No ratings yet
Python Virtual Environment
23 pages
Database Setup for E-commerce
No ratings yet
Database Setup for E-commerce
4 pages
CCA175 Demo Examenes
No ratings yet
CCA175 Demo Examenes
19 pages
Scala PDF
No ratings yet
Scala PDF
29 pages
Amazon Redshift-Lab
100% (1)
Amazon Redshift-Lab
14 pages
Spark DataFrames Project Exercise - Jupyter Notebook
No ratings yet
Spark DataFrames Project Exercise - Jupyter Notebook
7 pages
Spark Training in Bangalore
No ratings yet
Spark Training in Bangalore
36 pages
Apache Spark for Developers
No ratings yet
Apache Spark for Developers
3 pages
Spark
No ratings yet
Spark
96 pages
Stream Processing at Lyft
No ratings yet
Stream Processing at Lyft
20 pages
Visual Guide to Spark API Transformations
No ratings yet
Visual Guide to Spark API Transformations
122 pages
AWS Lab Requirements for Training
No ratings yet
AWS Lab Requirements for Training
1 page
Pair RDD Operations: Flat Map
No ratings yet
Pair RDD Operations: Flat Map
4 pages
3 Lecture 3-ETL
100% (1)
3 Lecture 3-ETL
42 pages
Hadoop for Data Engineers
No ratings yet
Hadoop for Data Engineers
44 pages
Cloudurable Kafka Tutorial v1 PDF
No ratings yet
Cloudurable Kafka Tutorial v1 PDF
79 pages
Bài Giảng Spark Streaming
No ratings yet
Bài Giảng Spark Streaming
75 pages
Databricks - Spark Streaming
No ratings yet
Databricks - Spark Streaming
55 pages
Spark Streaming: Tathagata "TD" Das
No ratings yet
Spark Streaming: Tathagata "TD" Das
28 pages
Spark Streaming for Developers
100% (1)
Spark Streaming for Developers
28 pages
Liquid Crystal Display Term Paper
100% (1)
Liquid Crystal Display Term Paper
7 pages
201 64-OS-2 Hindi
No ratings yet
201 64-OS-2 Hindi
24 pages
Air Regulations RK Bali PDF
38% (8)
Air Regulations RK Bali PDF
1 page
Advamced Math Solved Problems
No ratings yet
Advamced Math Solved Problems
2 pages
AUTOSAR CP SRS SPALGeneral
No ratings yet
AUTOSAR CP SRS SPALGeneral
23 pages
What Is Configure, Price, Quote (CPQ)
No ratings yet
What Is Configure, Price, Quote (CPQ)
6 pages
Project Report - 2 On CreditCard Fraud Detection
No ratings yet
Project Report - 2 On CreditCard Fraud Detection
42 pages
ANSI-SPARC Architecture
No ratings yet
ANSI-SPARC Architecture
16 pages
Demo Imperial
No ratings yet
Demo Imperial
11 pages
Dji Phantom 4 RTK Book F-2
100% (1)
Dji Phantom 4 RTK Book F-2
18 pages
Tamron Lenses for Canon & Nikon
No ratings yet
Tamron Lenses for Canon & Nikon
18 pages
Professional Practices in Information Technology: Hand Book
No ratings yet
Professional Practices in Information Technology: Hand Book
131 pages
Fybcom Sem 1 Commerce 1
No ratings yet
Fybcom Sem 1 Commerce 1
20 pages
Workflow Management Training
No ratings yet
Workflow Management Training
628 pages
(Ebooks PDF) Download Discrete Mathematics and Applications 2nd Edition Ferland Full Chapters
100% (11)
(Ebooks PDF) Download Discrete Mathematics and Applications 2nd Edition Ferland Full Chapters
55 pages
ZXA10 C320 Product Introduction
100% (2)
ZXA10 C320 Product Introduction
9 pages
Witsml
100% (2)
Witsml
37 pages
Sensitivity On Wheatstone Bridge Report
No ratings yet
Sensitivity On Wheatstone Bridge Report
4 pages
Technological University (Meiktila) Department of Electronic Engineering
No ratings yet
Technological University (Meiktila) Department of Electronic Engineering
62 pages
KIRAN's Resume
No ratings yet
KIRAN's Resume
1 page
SDLC
100% (3)
SDLC
85 pages
Am-Stick-Wb: Part.-No. 349081
No ratings yet
Am-Stick-Wb: Part.-No. 349081
19 pages
Internship Report
No ratings yet
Internship Report
20 pages
CNG Owners Manual V1.0.2
No ratings yet
CNG Owners Manual V1.0.2
35 pages
Codebook Swo3
No ratings yet
Codebook Swo3
144 pages
Aqa A Level Computer Science Paper 2 2023 75172
No ratings yet
Aqa A Level Computer Science Paper 2 2023 75172
40 pages
Serial Communication With ET 200S 1SI Module Via PROFIBUS-PROFINET CP
No ratings yet
Serial Communication With ET 200S 1SI Module Via PROFIBUS-PROFINET CP
57 pages
Indian Mobile Brands & Ambassadors
No ratings yet
Indian Mobile Brands & Ambassadors
9 pages
Dr. Anjan Krishnamurthy Associate Professor Dept. of CSE, BMSIT&M
No ratings yet
Dr. Anjan Krishnamurthy Associate Professor Dept. of CSE, BMSIT&M
129 pages
Days of Innocence and Wonder Lucy Treloar Official Test Bank
No ratings yet
Days of Innocence and Wonder Lucy Treloar Official Test Bank
406 pages

Spark Streaming Twitter Example

Uploaded by

Spark Streaming Twitter Example

Uploaded by

// scalastyle:off println

import org.apache.spark.streaming.{Seconds, StreamingContext}

* command line arguments.

* Run this on your local machine as

def main(args: Array[String]) {

System.err.println("Usage: TwitterPopularTags <consumer key> <consumer secret> " +

"<access token> <access token secret> [<filters>]")

val filters = args.takeRight(args.length - 4)

// can use them to generat OAuth credentials

val sparkConf = new SparkConf().setAppName("TwitterPopularTags")

val ssc = new StreamingContext(sparkConf, Seconds(2))

val stream = TwitterUtils.createStream(ssc, None, filters)

val hashTags = stream.flatMap(status => status.getText.split(" ").filter(_.startsWith("#")))

val topCounts60 = hashTags.map((_, 1)).reduceByKeyAndWindow(_ + _, Seconds(60))

.map{case (topic, count) => (count, topic)}

val topCounts10 = hashTags.map((_, 1)).reduceByKeyAndWindow(_ + _, Seconds(10))

.map{case (topic, count) => (count, topic)}

val topList = rdd.take(10)

println("\nPopular topics in last 60 seconds (%s total):".format(rdd.count()))

topList.foreach{case (count, tag) => println("%s (%s tweets)".format(tag, count))}

val topList = rdd.take(10)

println("\nPopular topics in last 10 seconds (%s total):".format(rdd.count()))

topList.foreach{case (count, tag) => println("%s (%s tweets)".format(tag, count))}

import org.apache.log4j.{Level, Logger}

object StreamingExamples extends Logging {

val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements

logInfo("Setting log level to [WARN] for streaming example." +

" To override add a custom log4j.properties to the classpath.")

You might also like