Updated on 2022-09-14 GMT+08:00

Scala Sample Code

Function Description

Collect statistics on female netizens who dwell on online shopping for more than 2 hours on the weekend.

Sample Code

The following code snippets are used as an example. For complete codes, see the com.huawei.bigdata.spark.examples.FemaleInfoCollection class.

object CollectFemaleInfo {
  // Table structure, used for mapping the text data to df
  case class FemaleInfo(name: String, gender: String, stayTime: Int)
  def main(args: Array[String]) {
    // Configure the Spark application name.
    val sparkConf = new SparkConf().setAppName("FemaleInfo")
    val sc = new SparkContext(sparkConf)
    val sqlContext = new org.apache.spark.sql.SQLContext(sc)
    import sqlContext.implicits._
   // Convert RDD to DataFrame through the implicit conversion, then register a table.
    sc.textFile(args(0)).map(_.split(","))
      .map(p => FemaleInfo(p(0), p(1), p(2).trim.toInt))
      .toDF.registerTempTable("FemaleInfoTable")
    // Use SQL statements to filter female netizens' dwell duration data and aggregate data of the same name.
    val femaleTimeInfo = sqlContext.sql("select name,sum(stayTime) as stayTime from FemaleInfoTable where 
?gender = 'female' group by name")
    // Filter the information of female netizens who spend more than 2 hours online and output the result.
    val c = femaleTimeInfo.filter("stayTime >= 120").collect()
    c.foreach(println)
    sc.stop()
  }
}

For details about the code of other Spark SQL features, visit http://spark.apache.org/docs/latest/sql-programming-guide.html#running-sql-queries-programmatically.