CassandraCQLTest
来源:互联网 发布:李荣浩和周杰伦知乎 编辑:程序博客网 时间:2024/05/23 17:30
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // scalastyle:off println // scalastyle:off jobcontextpackage org.apache.spark.examplesimport java.nio.ByteBufferimport java.util.Collectionsimport org.apache.cassandra.hadoop.ConfigHelperimport org.apache.cassandra.hadoop.cql3.CqlPagingInputFormatimport org.apache.cassandra.hadoop.cql3.CqlConfigHelperimport org.apache.cassandra.hadoop.cql3.CqlOutputFormatimport org.apache.cassandra.utils.ByteBufferUtilimport org.apache.hadoop.mapreduce.Jobimport org.apache.spark.{SparkConf, SparkContext}/* Need to create following keyspace and column family in cassandra before running this example Start CQL shell using ./bin/cqlsh and execute following commands CREATE KEYSPACE retail WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; use retail; CREATE TABLE salecount (prod_id text, sale_count int, PRIMARY KEY (prod_id)); CREATE TABLE ordercf (user_id text, time timestamp, prod_id text, quantity int, PRIMARY KEY (user_id, time)); INSERT INTO ordercf (user_id, time, prod_id, quantity) VALUES ('bob', 1385983646000, 'iphone', 1); INSERT INTO ordercf (user_id, time, prod_id, quantity) VALUES ('tom', 1385983647000, 'samsung', 4); INSERT INTO ordercf (user_id, time, prod_id, quantity) VALUES ('dora', 1385983648000, 'nokia', 2); INSERT INTO ordercf (user_id, time, prod_id, quantity) VALUES ('charlie', 1385983649000, 'iphone', 2);*//** * This example demonstrates how to read and write to cassandra column family created using CQL3 * using Spark. * Parameters : <cassandra_node> <cassandra_port> * Usage: ./bin/spark-submit examples.jar \ * --class org.apache.spark.examples.CassandraCQLTest localhost 9160 */object CassandraCQLTest { def main(args: Array[String]) { val sparkConf = new SparkConf().setAppName("CQLTestApp") val sc = new SparkContext(sparkConf) val cHost: String = args(0) val cPort: String = args(1) val KeySpace = "retail" val InputColumnFamily = "ordercf" val OutputColumnFamily = "salecount" val job = new Job() job.setInputFormatClass(classOf[CqlPagingInputFormat]) val configuration = job.getConfiguration ConfigHelper.setInputInitialAddress(job.getConfiguration(), cHost) ConfigHelper.setInputRpcPort(job.getConfiguration(), cPort) ConfigHelper.setInputColumnFamily(job.getConfiguration(), KeySpace, InputColumnFamily) ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner") CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), "3") /** CqlConfigHelper.setInputWhereClauses(job.getConfiguration(), "user_id='bob'") */ /** An UPDATE writes one or more columns to a record in a Cassandra column family */ val query = "UPDATE " + KeySpace + "." + OutputColumnFamily + " SET sale_count = ? " CqlConfigHelper.setOutputCql(job.getConfiguration(), query) job.setOutputFormatClass(classOf[CqlOutputFormat]) ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KeySpace, OutputColumnFamily) ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cHost) ConfigHelper.setOutputRpcPort(job.getConfiguration(), cPort) ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner") val casRdd = sc.newAPIHadoopRDD(job.getConfiguration(), classOf[CqlPagingInputFormat], classOf[java.util.Map[String, ByteBuffer]], classOf[java.util.Map[String, ByteBuffer]]) println("Count: " + casRdd.count) val productSaleRDD = casRdd.map { case (key, value) => { (ByteBufferUtil.string(value.get("prod_id")), ByteBufferUtil.toInt(value.get("quantity"))) } } val aggregatedRDD = productSaleRDD.reduceByKey(_ + _) aggregatedRDD.collect().foreach { case (productId, saleCount) => println(productId + ":" + saleCount) } val casoutputCF = aggregatedRDD.map { case (productId, saleCount) => { val outKey = Collections.singletonMap("prod_id", ByteBufferUtil.bytes(productId)) val outVal = Collections.singletonList(ByteBufferUtil.bytes(saleCount)) (outKey, outVal) } } casoutputCF.saveAsNewAPIHadoopFile( KeySpace, classOf[java.util.Map[String, ByteBuffer]], classOf[java.util.List[ByteBuffer]], classOf[CqlOutputFormat], job.getConfiguration() ) sc.stop() }}// scalastyle:on println// scalastyle:on jobcontext
0 0
- CassandraCQLTest
- windows进程间通信之文件映射
- web项目使用Tomcat作为服务器,GET请求参数中文乱码
- PowerDesigner怎么批量更新某个字段
- Android+Smack+Openfire
- java中处理异常
- CassandraCQLTest
- 正则表达式简单应用3
- Noi题库 矩阵归零消减序列和&&变幻的矩阵
- selinux 权限设置
- android4.0中HttpURLConnection抛异常java.io.FileNotFoundException
- 正则表达式简单应用4
- uva 1648 Business Center 数学+二分
- 欧几里得和扩展欧几里得算法
- gcc编译器