Scala Sample Code
Function Description
Collect statistics on female netizens who dwell on online shopping for more than 2 hours on the weekend.
Sample Code
The following code snippets are used as an example. For complete codes, see the com.huawei.bigdata.spark.examples.FemaleInfoCollection class.
object CollectFemaleInfo {
// Table structure, used for mapping the text data to df
case class FemaleInfo(name: String, gender: String, stayTime: Int)
def main(args: Array[String]) {
// Configure the Spark application name.
val sparkConf = new SparkConf().setAppName("FemaleInfo")
val sc = new SparkContext(sparkConf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
// Convert RDD to DataFrame through the implicit conversion, then register a table.
sc.textFile(args(0)).map(_.split(","))
.map(p => FemaleInfo(p(0), p(1), p(2).trim.toInt))
.toDF.registerTempTable("FemaleInfoTable")
// Use SQL statements to filter female netizens' dwell duration data and aggregate data of the same name.
val femaleTimeInfo = sqlContext.sql("select name,sum(stayTime) as stayTime from FemaleInfoTable where
?gender = 'female' group by name")
// Filter the information of female netizens who spend more than 2 hours online and output the result.
val c = femaleTimeInfo.filter("stayTime >= 120").collect()
c.foreach(println)
sc.stop()
}
} For details about the code of other Spark SQL features, visit http://spark.apache.org/docs/latest/sql-programming-guide.html#running-sql-queries-programmatically.
Last Article: Java Sample Code
Next Article: Spark Streaming Application
Did this article solve your problem?
Thank you for your score!Your feedback would help us improve the website.