Esta página aún no está disponible en su idioma local. Estamos trabajando arduamente para agregar más versiones de idiomas. Gracias por tu apoyo.

On this page

Java Sample Code

Updated on 2022-09-14 GMT+08:00

Function Description

Collect statistics on female netizens who dwell on online shopping for more than 2 hours on the weekend.

Sample Code

The following code snippets are used as an example. For complete codes, see the com.huawei.bigdata.spark.examples.FemaleInfoCollection class.

    // Create a configuration class SparkConf, and then create a SparkContext.
    SparkConf conf = new SparkConf().setAppName("CollectFemaleInfo");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // Read the original file data, and transfer each row of records to an element of the RDD.
    JavaRDD<String> data = jsc.textFile(args[0]);

    // Split each column of each record to generate a Tuple.
    JavaRDD<Tuple3<String,String,Integer>> person = data.map(new Function<String,Tuple3<String,String,Integer>>()
    {
        private static final long serialVersionUID = -2381522520231963249L;

        @Override
        public Tuple3<String, String, Integer> call(String s) throws Exception
        {
            // Split a row of data by commas (,).
            String[] tokens = s.split(",");

            // Integrate the three split elements to a ternary Tuple.
            Tuple3<String, String, Integer> person = new Tuple3<String, String, Integer>(tokens[0], tokens[1], Integer.parseInt(tokens[2]));
            return person;
        }
    });

    // Use the filter function to filter the data information about the time that female netizens spend online.
    JavaRDD<Tuple3<String,String,Integer>> female = person.filter(new Function<Tuple3<String,String,Integer>, Boolean>()
    {
        private static final long serialVersionUID = -4210609503909770492L;

        @Override
        public Boolean call(Tuple3<String, String, Integer> person) throws Exception
        {
            // Filter the records of which the gender in the second column is female.
            Boolean isFemale = person._2().equals("female");
            return isFemale;
        }
    });

    // Summarize the total time that each female netizen spends online. 
    JavaPairRDD<String, Integer> females = female.mapToPair(new PairFunction<Tuple3<String, String, Integer>, String, Integer>()
    {
        private static final long serialVersionUID = 8313245377656164868L;

        @Override
        public Tuple2<String, Integer> call(Tuple3<String, String, Integer> female) throws Exception
        {
            // Extract the two columns representing the name and dwell duration for the sum of dwell duration by name during further operations. 
            Tuple2<String, Integer> femaleAndTime = new  Tuple2<String, Integer>(female._1(), female._3());
            return femaleAndTime;
        }
    });
      JavaPairRDD<String, Integer> femaleTime = females.reduceByKey(new Function2<Integer, Integer, Integer>()
    {
        private static final long serialVersionUID = -3271456048413349559L;

        @Override
        public Integer call(Integer integer, Integer integer2) throws Exception
        {
            // Sum the two dwell durations of the same female netizen.
            return (integer + integer2);
        }
    });

    // Filter the information of female netizens who spend more than 2 hours online.
    JavaPairRDD<String, Integer> rightFemales = females.filter(new Function<Tuple2<String, Integer>, Boolean>()
    {
        private static final long serialVersionUID = -3178168214712105171L;

        @Override
        public Boolean call(Tuple2<String, Integer> s) throws Exception
        {
            // Extract the total time that female netizens spend online, and determine whether the time is more than 2 hours.
            if(s._2() > (2 * 60))
            {
                return true;
            }
            return false;
        }
    });

    // Print the information about female netizens who meet the requirements.
    for(Tuple2<String, Integer> d: rightFemales.collect())
    {
        System.out.println(d._1() + "," + d._2());
    }
Feedback

Feedback

Feedback

0/500

Selected Content

Submit selected content with the feedback