如何使用大数据 SQL 语句

来源:互联网 发布:手表品牌排行榜 知乎 编辑:程序博客网 时间:2024/06/05 14:22

如何使用大数据 SQL 语句

标签: SQL, 大数据
从一个庞大的数据库中众多的表格和视图中 query 出所需的数据,是一件熟能生巧的工作。

SELECT itemid, itembrand, itemunitprice,  RANK() OVER ( ORDER BY itemunitprice ) AS rankunitprice  FROM ssitem;

RANK() OVER

SELECT custname, SUM(salesdollar) AS sumsales,  RANK() OVER ( ORDER BY SUM(salesdollar) DESC ) salesrank  FROM sssales, sscustomer  WHERE sssales.custid = sscustomer.custid  GROUP BY custname;

RANK() OVER

SELECT custstate, custname, sum(salesdollar) AS sumsales,  RANK() OVER ( PARTITION BY custstate                ORDER BY SUM(salesdollar) DESC ) salesrank  FROM sssales, sscustomer  WHERE sssales.custid = sscustomer.custid  GROUP BY custstate, custname  ORDER BY custstate;

PARTITION BY

SELECT storezip, timeyear, SUM(salesdollar) AS sumsales,  SUM(SUM(salesdollar)) OVER    (ORDER BY storezip, timeyear     ROWS UNBOUNDED PRECEDING) AS cumsumsales  FROM ssstore, sstimedim, sssales  WHERE sssales.storeid = ssstore.storeid  AND sssales.timeno = sstimedim.timeno  GROUP BY storezip, timeyear;

SUM() OVER

SELECT storezip, timeyear, SUM(salesdollar) AS sumsales,  SUM(SUM(salesdollar)) OVER    (PARTITION BY storezip     ORDER BY storezip, timeyear     ROWS UNBOUNDED PRECEDING) AS cumsumsales  FROM ssstore, sstimedim, sssales  WHERE sssales.storeid = ssstore.storeid  AND sssales.timeno = sstimedim.timeno  GROUP BY storezip, timeyear;

PARTITION BY

SELECT storezip, timeyear, SUM(salesdollar) AS sumsales,  ROUND(AVG(SUM(salesdollar)) OVER       (ORDER BY storezip, timeyear        ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING), 2) AS centermovavgsumsales  FROM ssstore, sstimedim, sssales  WHERE sssales.storeid = ssstore.storeid  AND sssales.timeno = sstimedim.timeno  GROUP BY storezip, timeyear;

ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING

SELECT timeyear, custcity, SUM(salesdollar) AS sumsales,  ROUND(RATIO_TO_REPORT(SUM(salesdollar))  OVER (PARTITION BY timeyear), 2) AS sumsalesratio  FROM sscustomer, sssales, sstimedim  WHERE sssales.custid = sscustomer.custid  AND sssales.timeno = sstimedim.timeno  GROUP BY timeyear, custcity  ORDER BY timeyear, SUM(salesdollar) DESC;

RATIO_TO_REPORT

SELECT itemname, itembrand, itemunitprice, cumdistunitprice  FROM (SELECT itemid, itemname, itembrand, itemunitprice,     CUME_DIST() OVER (ORDER BY itemunitprice DESC) AS cumdistunitprice    FROM ssitem)  WHERE cumdistunitprice <= 0.8;

CUME_DIST() OVER

SELECT custname, SUM(salesunits) AS sumsalesunits,  RANK() OVER (ORDER BY SUM(salesunits)) AS ranksalesunits,  PERCENT_RANK() OVER (ORDER BY SUM(salesunits)) AS perranksalesunits,  ROW_NUMBER() OVER (ORDER BY SUM(salesunits)) AS rownumsalesunits,  ROUND(CUME_DIST() OVER  (ORDER BY SUM(salesunits)), 2) AS cumdistsalesunits  FROM sssales, sscustomer  WHERE sssales.custid = sscustomer.custid  GROUP BY custname;

cumulative distribution function

SELECT itemname, itemunitprice,  RANK() OVER (ORDER BY itemunitprice) AS rankunitprice,  PERCENT_RANK() OVER (ORDER BY itemunitprice) AS perrankunitprice,  ROW_NUMBER() OVER (ORDER BY itemunitprice) AS rownumunitprice,  CUME_DIST() OVER (ORDER BY itemunitprice) AS cumdistunitprice,  FROM ssitem;

cumulative distribution function

CREATE VIEW connex_sales_view AS   SELECT ssitem.itemid, itemname, itemcategory,itemunitprice, salesno, salesunits,  salesdollar, salescost, timeyear, timemonth, timeday  FROM ssitem, sssales, sstimedim  WHERE itembrand = 'connex'  AND timeyear BETWEEN 2010 AND 2012  AND ssitem.itemid = sssales.itemid  AND sstimedim.timeno = sssales.timeno;
SELECT itemname, itemcategory, itemunitprice, salesno, salesunits,  salesdollar, salescost, timeyear, timemonth, timeday  FROM connex_sales_view  WHERE itemunitprice < 100 AND timeyear BETWEEN 2011 AND 2012;
CREATE MATERIALIZED VIEW MV1  BUILD IMMEDIATE  REFRESH COMPLETE ON DEMAND  ENABLE QUERY REWRITE AS   SELECT storestate, timeyear, SUM(salesdollar) AS sumdollar  FROM sssales, ssstore, sstimedim  WHERE sssales.storeid = ssstore.storeid  AND sssales.timeno = sstimedim.timeno  AND timeyear > 2010  GROUP BY storestate, timeyear;
0 0
原创粉丝点击