shell执行scala脚本
来源:互联网 发布:昆明郊野公园知乎 编辑:程序博客网 时间:2024/05/22 09:23
新建一个helloworld.sh如下:
- #!/bin/sh
- exec scala "$0" "$@"
- !#
-
- case class Person(name:String)
-
- object HelloWorld {
- def main(args:Array[String]){
- require(args.length==1)
- val al = Person(args(0))
- println(al)
- }
- }
-
- HelloWorld.main(args)
println(al) }}HelloWorld.main(args)
chmod 777 helloworld.sh之后,就能通过./helloworld.sh xiaojun来执行了,输出为:Persion(xiaojun)
shell脚本里执行scala脚本的黑科技升级版:
- #!/bin/sh
- xmlpath="/data/hupu/dace/streaming/$1"
- export HADOOP_USER_NAME=hdfs
- xmlString=$(hadoop fs -cat "$xmlpath")
- exec scala "$0" "$xmlString" "$xmlpath"
- !#
-
- import scala.xml.XML
- import scala.sys.process._
- object StreamingSubmit {
- def main(args:Array[String]){
- require(args.length==2)
- val config = XML.loadString(args(0))
- val configFile = args(1)
- val submitArgs = (config \ "submit-args")
- val numExecutors = (submitArgs \ "@num-executors").text
- val driverMemory = (submitArgs \ "@driver-memory").text
- val executorMemory = (submitArgs \ "@executor-memory").text
- val executorCores = (submitArgs \ "@executor-cores").text
- val memoryFraction = (submitArgs \ "@memoryFraction").text
- println(numExecutors,driverMemory,executorMemory,executorCores,memoryFraction)
- val assemblyJar = "spark-assembly-1.4.0-hadoop2.5.0-cdh5.3.0.jar"
- val submitShell = s"""/opt/spark/bin/spark-submit --driver-class-path /opt/spark/lib/$assemblyJar:/opt/spark/lib/dace-streaming.jar --jars /opt/spark/lib/dace-streaming.jar,/opt/spark/lib/HiveUDF-1.0-jar-with-dependencies.jar --class com.hupu.dace.spark.streaming.framework.StreamingRunner --master yarn-cluster --num-executors $numExecutors --driver-memory $driverMemory --executor-memory $executorMemory --executor-cores $executorCores --conf spark.storage.memoryFraction=$memoryFraction --conf spark.sql.hive.metastore.jars=/opt/spark/lib/$assemblyJar /opt/spark/lib/dace-streaming.jar $configFile"""
- println(submitShell)
- submitShell.!!
-
- }
-
-
- }
-
- StreamingSubmit.main(args)
本来是想用shell去解析xml文件然后拼接成一个完整的提交spark-submit指令,百度了一下shell解析XML,瞬间放弃了。然后想到了以前用过的shell执行scala脚本的黑科技,果然方便多了。上面的代码还是蛮有意思的,先从 shell脚本启动执行scala脚本,在scala代码里又引入了
又通过.!!来执行shell指令。我自己写出来也是醉了。不过其中遇到了一个问题没解决,就是没法拼接--driver-java-options参数,比如我要设置--driver-java-options "-XX:PermSize=128M -XX:MaxPermSize=256M" ,第二个-XX前面那个空格会被隔断看成另外一个参数。而直接在shell命令行是可以这样写传入java options的。试了很久都没试出来怎么写。还好是通过yarn-cluster方式执行,于是把这个参数配置到spark-defaults.conf里面去了,如果是yarn-client模式的话driver早就启动了,只能在命令行设置--driver-java-options参数才能使driver jvm参数生效,否则太晚了。
demo2:
- #!/bin/sh
- hdfs_path="$1"
- export HADOOP_USER_NAME=hdfs
- interval="$2"
- exec scala "$0" "$hdfs_path" "$interval"
- !#
-
- import java.util.Date
- import scala.sys.process._
-
-
-
-
- object MyApp {
- def main(args: Array[String]) {
- require(args.length == 2)
- val Array(path, interval) = args
- println(path)
- var last_time = ""
- val shell = s"""hadoop fs -stat $path"""
- val subshell="""./subshell.sh"""
- println("exec shell :" + shell)
- while (true) {
- val current_time = (shell !!).trim
- if (current_time != last_time) {
-
-
- println(s"======exec sync shell,current time:${new Date()},last file time=$last_time,current file time:$current_time========")
- val result = subshell.!!
- println(result)
-
- last_time = current_time
- } else {
- println(s"======no change,current time:${new Date()},last file time=$last_time,current file time:$current_time========")
- }
- Thread.sleep(interval.toLong)
- }
- }
- }
-
- MyApp.main(args)