[root@master big_data]# cd spark
[root@master spark]# cd sbin/start-all.sh
( 也可以分别启动
[root@master spark]$ sbin/start-master.sh
可以通过 http://master:8080/ 看到对应界面
[root@master spark]$ sbin/start-slaves.sh park://master:7077
可以通过 http://master:8081/ 看到对应界面
)
[root@master spark]# jps
[root@master ~]# jps
4629 NameNode (hadoop的)
5007 Master (spark的)
6150 Jps
4832 SecondaryNameNode (hadoop的)
5107 Worker (spark的)
4734 DataNode (hadoop的)
可以通过 http://192.168.80.100:8080/ 看到对应界面
[root@master big_data]# spark-shell
Spark assembly has been built with Hive, including Datanucleus jars on classpath
14/07/20 21:41:04 INFO spark.SecurityManager: Changing view acls to: root
14/07/20 21:41:04 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root)
14/07/20 21:41:04 INFO spark.HttpServer: Starting HTTP Server
14/07/20 21:41:05 INFO server.Server: jetty-8.y.z-SNAPSHOT
14/07/20 21:41:05 INFO server.AbstractConnector: Started SocketConnector@0.0.0.0:43343
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 1.0.0
/_/
Using Scala version 2.10.4 (Java HotSpot(TM) Client VM, Java 1.7.0_60)
。。。
scala>
可以通过 http://192.168.80.100:4040/ 看到对应界面
(随便上传一个文件,里面随便一些英文单词,到hdfs上面)
scala> val file=sc.textFile("hdfs://master:9000/input")
14/07/20 21:51:05 INFO storage.MemoryStore: ensureFreeSpace(608) called with curMem=31527, maxMem=311387750
14/07/20 21:51:05 INFO storage.MemoryStore: Block broadcast_1 stored as values to memory (estimated size 608.0 B, free 296.9 MB)
file: org.apache.spark.rdd.RDD[String] = MappedRDD[5] at textFile at <
console
>:12
scala> val count=file.flatMap(line=>line.split(" ")).map(word=>(word,1)).reduceByKey(_+_)
14/07/20 21:51:14 INFO mapred.FileInputFormat: Total input paths to process : 1
count: org.apache.spark.rdd.RDD[(String, Int)] = MapPartitionsRDD[10] at reduceByKey at <
console
>:14
scala> count.collect()
14/07/20 21:51:48 INFO spark.SparkContext: Job finished: collect at <
console
>:17, took 2.482381535 s
res0: Array[(String, Int)] = Array((previously-registered,1), (this,3), (Spark,1), (it,3), (original,1), (than,1), (its,1), (previously,1), (have,2), (upon,1), (order,2), (whenever,1), (it’s,1), (could,3), (Configuration,1), (Master's,1), (SPARK_DAEMON_JAVA_OPTS,1), (This,2), (which,2), (applications,2), (register,,1), (doing,1), (for,3), (just,2), (used,1), (any,1), (go,1), ((equivalent,1), (Master,4), (killing,1), (time,1), (availability,,1), (stop-master.sh,1), (process.,1), (Future,1), (node,1), (the,9), (Workers,1), (however,,1), (up,2), (Details,1), (not,3), (recovered,1), (process,1), (enable,3), (spark-env,1), (enough,1), (can,4), (if,3), (While,2), (provided,1), (be,5), (mode.,1), (minute,1), (When,1), (all,2), (written,1), (store,1), (enter,1), (then,1), (as,1), (officially,1)...
scala>
scala> count.saveAsTextFile("hdfs://master:9000/output") (结果保存到hdfs上的/output文件夹下)
scala> :q
Stopping spark context.
[root@master ~]# hadoop fs -ls /
Found 3 items
drwxr-xr-x - root supergroup 0 2014-07-18 21:10 /home
-rw-r--r-- 1 root supergroup 1722 2014-07-18 06:18 /input
drwxr-xr-x - root supergroup 0 2014-07-20 21:53 /output
[root@master ~]#
[root@master ~]# hadoop fs -cat /output/p*
。。。
(mount,1)
(production-level,1)
(recovery).,1)
(Workers/applications,1)
(perspective.,1)
(so,2)
(and,1)
(ZooKeeper,2)
(System,1)
(needs,1)
(property Meaning,1)
(solution,1)
(seems,1)