Chương trình Word Count với spark-submit và spark-shell

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

package

spark.main

;

import

java.util.Arrays

;

import

org.apache.spark.SparkConf

;

import

org.apache.spark.api.java.JavaPairRDD

;

import

org.apache.spark.api.java.JavaRDD

;

import

org.apache.spark.api.java.JavaSparkContext

;

import

scala.Tuple2

;

public

class

Main

{

public

static

void

main

(

String

[]

args

)

{

SparkConf

conf

=

new

SparkConf

().

setMaster

(

"spark://PC0628:7077"

).

setAppName

(

"Spark Word Count"

);

try

(

JavaSparkContext

sc

=

new

JavaSparkContext

(

conf

))

{

JavaRDD

<

String

>

textFile

=

sc

.

textFile

(

"hdfs://localhost:9000/input/input-1.txt"

);

JavaPairRDD

<

String

,

Integer

>

result

=

textFile

.

flatMap

(

s

->

Arrays

.

asList

(

s

.

split

(

" "

)).

iterator

())

.

mapToPair

(

word

->

new

Tuple2

<>(

word

,

1

))

.

reduceByKey

((

a

,

b

)

->

(

a

+

b

));

result

.

saveAsTextFile

(

"hdfs://localhost:9000/output/result"

);

}

}

}