用Supersonic实现query的查询

来源:互联网 发布:奥卡姆剃刀通信知乎 编辑:程序博客网 时间:2024/04/30 22:44

    因为Supersonic没有Query的解析,所以我们想要实现Query必须自己调用Supersonic的操作函数编程实现。下面是我实现的一个简单的Query查询:

Query:



select
l_returnflag,
sum(l_quantity) as sum_qty,
from
lineitem
where
l_shipdate <= date '1998-12-01' 
group by
l_returnflag,
order by
l_returnflag;


Table:

Column Name     Datatype 
L_ORDERKEY INT32
L_PARTKEY INT32
L_SUPPKEY INT32
L_LINENUMBER INT32
L_QUANTITY FLOAT
L_EXTENDEDPRICE FLOAT
L_DISCOUNT FLOAT
L_TAX FLOAT
L_RETURNFLAG STRING
L_LINESTATUS STRING
L_SHIPDATE DATE
L_COMMITDATE DATE
L_RECEIPTDATE DATE
L_SHIPINSTRUCT STRING
L_SHIPMODE STRING
L_COMMENT STRING


实现代码如下:

#include <map>

using std::map;
using std::multimap;
#include <set>
using std::multiset;
using std::set;
#include <utility>
using std::make_pair;
using std::pair;


#include "gtest/gtest.h"


#include "supersonic/supersonic.h"
#include "supersonic/cursor/core/sort.h"
#include "supersonic/cursor/infrastructure/ordering.h"
#include "supersonic/utils/strings/stringpiece.h"


// Include some map utilities to use for result verification.
#include "supersonic/utils/map-util.h"


using supersonic::Attribute;
using supersonic::Block;
using supersonic::Cursor;
using supersonic::Operation;
using supersonic::FailureOr;
using supersonic::FailureOrOwned;
using supersonic::GetConstantExpressionValue;
using supersonic::TupleSchema;
using supersonic::Table;
using supersonic::TableRowWriter;
using supersonic::View;
using supersonic::ViewCopier;
using supersonic::HashJoinOperation;
using supersonic::HeapBufferAllocator;
using supersonic::JoinType;
using supersonic::ProjectNamedAttribute;
using supersonic::ProjectNamedAttributeAs;
using supersonic::rowid_t;
using supersonic::SingleSourceProjector;
using supersonic::MultiSourceProjector;
using supersonic::CompoundSingleSourceProjector;
using supersonic::CompoundMultiSourceProjector;
using supersonic::ResultView;
using supersonic::ScanView;
using supersonic::SucceedOrDie;
using supersonic::NamedAttribute;
using supersonic::AggregationSpecification;
using supersonic::SortOrder;
using supersonic::Sort;
using supersonic::Filter;
using supersonic::ProjectAllAttributes;




using supersonic::If;
using supersonic::IfNull;
using supersonic::Less;
using supersonic::CompoundExpression;
using supersonic::Expression;
using supersonic::Compute;
using supersonic::Generate;
using supersonic::ParseStringNulling;
using supersonic::ConstBool;
using supersonic::ConstString;
using supersonic::ConstInt32;
using supersonic::ConstDate;
using supersonic::Null;
using supersonic::BoundExpressionTree;
using supersonic::EvaluationResult;
using supersonic::ParseStringNulling;
using supersonic::GetConstantExpressionValue;


using supersonic::INNER;
using supersonic::UNIQUE;
using supersonic::SUM;
using supersonic::ASCENDING;


using supersonic::INT32;
using supersonic::NOT_NULLABLE;
using supersonic::NULLABLE;
using supersonic::STRING;
using supersonic::DATE;
using supersonic::BOOL;
using supersonic::FLOAT;




using supersonic::rowcount_t;


class QueryOneTest {
 public:
   void SetUp() {




  lineitem_schema.add_attribute(Attribute("L_ORDERKEY", INT32, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_PARTKEY", INT32, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_SUPPKEY", INT32, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_LINENUMBER", INT32, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_QUANTITY", FLOAT, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_EXTENDEDPRICE", FLOAT, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_DISCOUNT", FLOAT, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_TAX", FLOAT, NOT_NULLABLE));


    lineitem_schema.add_attribute(Attribute("L_RETURNFLAG", STRING, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_LINESTATUS", STRING, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_SHIPDATE", DATE, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_COMMITDATE", DATE, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_RECEIPTDATE", DATE, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_SHIPINSTRUCT", STRING, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_SHIPMODE", STRING, NOT_NULLABLE));
    lineitem_schema.add_attribute(Attribute("L_COMMENT", STRING, NOT_NULLABLE));




    lineitem_table.reset(new Table(lineitem_schema,
                                 HeapBufferAllocator::Get()));




    lineitem_table_writer.reset(new TableRowWriter(lineitem_table.get()));




  }






  int32 AddData(int32 L_ORDERKEY,int32 L_PARTKEY,int32 L_SUPPKEY,int32 L_LINENUMBER,
    float L_QUANTITY,float L_EXTENDEDPRICE,float L_DISCOUNT,float L_TAX,
    const StringPiece& L_RETURNFLAG,const StringPiece& L_LINESTATUS,
    const StringPiece& L_SHIPDATE,const StringPiece& L_COMMITDATE,const StringPiece& L_RECEIPTDATE,
    const StringPiece& L_SHIPINSTRUCT,const StringPiece& L_SHIPMODE,
    const StringPiece& L_COMMENT) {


   scoped_ptr<const Expression> date_or_null1(
       ParseStringNulling(DATE, ConstString(L_SHIPDATE)));
   bool L_SHIPDATE_is_null = false;
   FailureOr<int32> L_SHIPDATE_as_int32 =
       GetConstantExpressionValue<DATE>(*date_or_null1,
                                        &L_SHIPDATE_is_null);
   scoped_ptr<const Expression> date_or_null2(
         ParseStringNulling(DATE, ConstString(L_COMMITDATE)));
     bool L_COMMITDATE_is_null = false;
     FailureOr<int32> L_COMMITDATE_as_int32 =
         GetConstantExpressionValue<DATE>(*date_or_null2,
                                          &L_COMMITDATE_is_null);
   scoped_ptr<const Expression> date_or_null3(
         ParseStringNulling(DATE, ConstString(L_RECEIPTDATE)));
     bool L_RECEIPTDATE_is_null = false;
     FailureOr<int32> L_RECEIPTDATE_as_int32 =
         GetConstantExpressionValue<DATE>(*date_or_null3,
                                          &L_RECEIPTDATE_is_null);
    lineitem_table_writer
        ->AddRow().Int32(L_ORDERKEY).Int32(L_PARTKEY).Int32(L_SUPPKEY).Int32(L_LINENUMBER)
         .Float(L_QUANTITY).Float(L_EXTENDEDPRICE).Float(L_DISCOUNT).Float(L_TAX)
         .String(L_RETURNFLAG).String(L_LINESTATUS)
         .Date(L_SHIPDATE_as_int32.get()).Date(L_COMMITDATE_as_int32.get()).Date(L_RECEIPTDATE_as_int32.get())
         .String(L_SHIPINSTRUCT).String(L_SHIPMODE).String(L_COMMENT)
         .CheckSuccess();
    return L_LINENUMBER;
  }




  void TestResults() {
     //检查结果是否满足需求,首先,我们必须把轮询rows,将它们放到一个内存块里。


Operation * scan = ScanView(lineitem_table->view());
result_cursor.reset(SucceedOrDie(scan->CreateCursor()));


    /*Filter Start*/
    const Expression * LOE = LessOrEqual( NamedAttribute("L_SHIPDATE"),ConstDate(10561));
    scoped_ptr<Operation> filter(
         Filter(LOE,ProjectAllAttributes(), scan));
    FailureOrOwned<Cursor> cursor = filter.release()->CreateCursor();
    ResultView FiterResult(cursor->Next(-1));//数据量大之后不确定是否有问题。
    std::cout<<"The Result of Filter is :"<<std::endl;
       std::cout<<"column count is:"<<FiterResult.view().column_count()<<std::endl;
       std::cout<<"row count is:"<<FiterResult.view().row_count()<<std::endl;
      // std::cout<<result.view().column_count()<<"\t";
       std::cout<<std::endl;


       /*Filter End*/




       /*Group函数*/
    Operation * ScanforGroup = ScanView(FiterResult.view());
    scoped_ptr<AggregationSpecification> specification(
        new AggregationSpecification());
    specification-> AddAggregation(SUM, "L_QUANTITY", "data_sums");//data是要做相加运算的列的名称,data_sums是输出名称
    scoped_ptr< const SingleSourceProjector >
        key_projector(ProjectNamedAttribute( "L_RETURNFLAG"));
  //对表input_view以key为关键字分组,做specification运算。NULL是为分组聚集的选项,可具体看参数。
    scoped_ptr< Operation> aggregation(GroupAggregate(key_projector.release(),
                                                     specification.release(),
                                                     NULL,
                                                     ScanforGroup));
    scoped_ptr< Cursor> bound_aggregation(
        SucceedOrDie(aggregation->CreateCursor()));
    ResultView result(bound_aggregation->Next(-1));
    std::cout<<"The Result of Group is :"<<std::endl;
    std::cout<<"column count is:"<<result.view().column_count()<<std::endl;
    std::cout<<"row count is:"<<result.view().row_count()<<std::endl;
    for (int32 k=0;k<result.view().column_count();k++)
         {
           std::cout<<result.view().schema().attribute(k).name()<<"\t";
         }
    std::cout<<std::endl;
    for(int j=0; j<result.view().row_count();j++)
          {


             std::cout<<result.view().column(0).typed_data<STRING>()[j]<<"\t";
             std::cout<<result.view().column(1).typed_data<FLOAT>()[j]<<"\t";
             std::cout<<std::endl;
          }


    /*Sort函数*/
    Operation * ScanforSort = ScanView(result.view());
    scoped_ptr< const SingleSourceProjector >projector(ProjectNamedAttribute( "L_RETURNFLAG"));//选出排序属性
       scoped_ptr< SortOrder> sort_order(new SortOrder());
        sort_order-> add(projector.release(), ASCENDING);//指定排序顺序
       const size_t mem_limit = 128;//限定内存大小,
       scoped_ptr< Operation> sort(Sort(sort_order.release(),
                                     NULL,
                                     mem_limit,
                                     ScanforSort));//调用排序算法,其中的input_view是scoped_ptr<View>input_view。
       scoped_ptr<Cursor> Tresult_cursor;
       Tresult_cursor.reset(SucceedOrDie(sort->CreateCursor()));//scoped_ptr<Cursor> result_cursor;
       ResultView Tresult(Tresult_cursor->Next(-1));
       std::cout<<"The Result of Sort is :"<<std::endl;
       std::cout<<"column count is:"<<Tresult.view().column_count()<<std::endl;
       std::cout<<"row count is:"<<Tresult.view().row_count()<<std::endl;
       for (int32 k=0;k<Tresult.view().column_count();k++)
               {
                 std::cout<<Tresult.view().schema().attribute(k).name()<<"\t";
               }
          std::cout<<std::endl;
          for(int j=0; j<Tresult.view().row_count();j++)
                {


                   std::cout<<Tresult.view().column(0).typed_data<STRING>()[j]<<"\t";
                   std::cout<<Tresult.view().column(1).typed_data<FLOAT>()[j]<<"\t";
                   std::cout<<std::endl;
                }




  }


  // Supersonic objects.
  scoped_ptr<Cursor> result_cursor;


  TupleSchema lineitem_schema;




  scoped_ptr<Table> lineitem_table;
  scoped_ptr<TableRowWriter> lineitem_table_writer;




};


int main(void) {


  QueryOneTest test;
  test.SetUp();
  test.AddData(1,155190,7706,1,17,21168.23,0.04,0.02,"N","O","1996/03/13","1996/03/13","1996/03/13","DELIVER IN PERSON","TRUCK","egular courts above the");
  test.AddData(1,67310,7311,5,36,45983.16,0.09,0.06,"Y","O","1996/04/12","1996/03/13","1996/03/13","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold");
  test.AddData(1,67310,7311,1,8,13309.60,0.10,0.02,"N","O","1998/12/01","1996/03/13","1996/03/13","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold");
  test.AddData(7,67310,7311,3,17,21168.23,0.04,0.02,"Y","O","2001/04/13","1996/03/13","1996/03/13","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold");
  test.AddData(9,67310,7311,1,36,45983.16,0.09,0.06,"Z","O","2001/04/15","1996/03/13","1996/03/13","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold");
  test.AddData(5,67310,7311,1,8,13309.60,0.10,0.02,"A","O","2002/01/01","1996/03/13","1996/03/13","TAKE BACK RETURN","MAIL","ly final dependencies: slyly bold");
  //test.PrepareJoin();


  test.TestResults();
  return 0;
}


执行结果:

bash-4.1# ./tmp 
The Result of Filter is :
column count is:16
row count is:3


The Result of Group is :
column count is:2
row count is:2
L_RETURNFLAG    data_sums
N       25
Y       36
The Result of Sort is :
column count is:2
row count is:2
L_RETURNFLAG    data_sums
N       25
Y       36






0 0