Java表格仿mysql实现基本操作(连接,分组排序,统计等)
来源:互联网 发布:linux cp 合并文件夹 编辑:程序博客网 时间:2024/06/05 04:13
以下是数据矩阵操作类
package cn.cgh.table;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.List;import java.util.Map;import java.util.Set;/** * * @author xiegonghai * */public class TableOperation{ /** * the separator of multiKey */ public static String separator = "@&"; /** * Join A and B according to one column * * @param indexA * @param indexB * @return New table after innerJoin */ public static List<List<String>> innerJoin(List<List<String>> A, List<List<String>> B, int indexA, int indexB) {List<List<String>> res = new ArrayList<List<String>>();// colMap保存B表的连接列的索引Map<String, ArrayList<Integer>> colMap = new HashMap<String, ArrayList<Integer>>();int i = 0;for (List<String> row : B){ String key = row.get(indexB); if (colMap.containsKey(key)) {colMap.get(key).add(i); } else {ArrayList<Integer> arrTmp = new ArrayList<Integer>();arrTmp.add(i);colMap.put(key, arrTmp); } i = i + 1;}// 遍历A表进行连接for (List<String> row : A){ String tarStr = row.get(indexA); if (colMap.containsKey(tarStr)) {ArrayList<Integer> idList = colMap.get(tarStr);for (Integer id : idList){ // deep copy List<String> newRow = new ArrayList<String>(row); Set<Integer> sList = new HashSet<Integer>(); sList.add(indexB); List<String> rightB = getRowExcept(B.get(i), sList); newRow.addAll(rightB); res.add(newRow);} }}System.out.println(A);System.out.println(B);return res; } /** * Join A and B according to multiColumn * * @param indexAList id list of table A * @param indexBList id list of table B * @return New table after innerJoin */ public static List<List<String>> innerJoin(List<List<String>> A, List<List<String>> B, List<Integer> indexAList, List<Integer> indexBList) {List<List<String>> res = new ArrayList<List<String>>();// colMap保存B表的连接列的索引Map<Long, ArrayList<Integer>> colMap = new HashMap<Long, ArrayList<Integer>>();int i = 0;for (List<String> row : B){ long murmurHashKeyB = getLineMultiKeyHash(row, indexBList); if (colMap.containsKey(murmurHashKeyB)) {colMap.get(murmurHashKeyB).add(i); } else {ArrayList<Integer> arrTmp = new ArrayList<Integer>();arrTmp.add(i);colMap.put(murmurHashKeyB, arrTmp); } i = i + 1;}// 遍历A表进行连接for (List<String> row : A){ long murmurHashValA = getLineMultiKeyHash(row, indexAList); if (colMap.containsKey(murmurHashValA)) {ArrayList<Integer> idList = colMap.get(murmurHashValA);for (Integer id : idList){ List<String> newRow = new ArrayList<String>(row); Set<Integer> sList = new HashSet<Integer>(); sList.addAll(indexBList); List<String> rightB = getRowExcept(B.get(id), sList); newRow.addAll(rightB); res.add(newRow);} }}System.out.println(A);System.out.println(B);return res; } /** * * @param row One line data * @param idList Clear the data that belong to idList * @return */ public static List<String> getRowExcept(List<String> row, Set<Integer> idList) {List<String> newRow = new ArrayList<String>();int size = row.size();for (int i = 0; i < size; ++i){ if (!idList.contains(i)) {newRow.add(row.get(i)); }}return newRow; } /** * Join A and B according to multiColumn * * @param indexAList id list of table A * @param indexBList id list of table B * @return New table after innerJoin */ public static List<List<String>> leftJoin(List<List<String>> A, List<List<String>> B, List<Integer> indexAList, List<Integer> indexBList) {List<List<String>> res = new ArrayList<List<String>>();// colMap保存B表的连接列的索引Map<Long, ArrayList<Integer>> colMap = new HashMap<Long, ArrayList<Integer>>();int i = 0;for (List<String> row : B){ long murmurHashKeyB = getLineMultiKeyHash(row, indexBList); if (colMap.containsKey(murmurHashKeyB)) {colMap.get(murmurHashKeyB).add(i); } else {ArrayList<Integer> arrTmp = new ArrayList<Integer>();arrTmp.add(i);colMap.put(murmurHashKeyB, arrTmp); } i = i + 1;}// 遍历A表进行连接for (List<String> row : A){ long murmurHashValA = getLineMultiKeyHash(row, indexAList); if (colMap.containsKey(murmurHashValA)) {ArrayList<Integer> idList = colMap.get(murmurHashValA);for (Integer id : idList){ List<String> newRow = new ArrayList<String>(row); Set<Integer> sList = new HashSet<Integer>(); sList.addAll(indexBList); List<String> rightB = getRowExcept(B.get(id), sList); newRow.addAll(rightB); res.add(newRow);} } else {List<String> newRow = new ArrayList<String>(row);List<String> rightB = new ArrayList<String>();int bSize = B == null ? 0 : B.size();int rolCols = bSize == 0 ? 0 : B.get(0).size();int indexbSize = indexBList == null ? 0 : indexBList.size();for (int k = 0; k < rolCols - indexbSize; ++k){ rightB.add("-");}newRow.addAll(rightB);res.add(newRow); }}System.out.println(A);System.out.println(B);return res; } /** * 根据前几列进行排序 * * @param table * @param orderList */ public static void groupByIdList(List<List<String>> table, int val) {SortedTable sort = new SortedTable(table, val); } /** * 根据前几列进行排序 * @param table * @param orderList */ public static List<List<String>> countGroupBy(List<List<String>> table,int sortCols,int colIndex) {List<List<String>> newTable = new ArrayList<List<String>>(table);SortedTable sort = new SortedTable(newTable,sortCols);int rowSize = newTable.size();for(int i = 0;i< rowSize;++i){ }return newTable; } /** * 64 bit MurmurHash value * * @param row String of list * @param idList multiKey of one line * @return the 64 bit of hash value */ public static long getKeyHash(String key) {return MurmurHash.hash64(key); } /** * 64 bit MurmurHash value * * @param row String of list * @param idList multiKey of one line * @return the 64 bit of hash value */ public static long getLineMultiKeyHash(List<String> row, List<Integer> idList) {StringBuilder sb = new StringBuilder();for (int i : idList){ sb.append(row.get(i)); sb.append(separator);}return MurmurHash.hash64(sb.toString()); }}
以上是数据矩阵实现mysql的自然连接和左连接的基本操作,尚未完成, 持续更新
下面是对数据矩阵实现多级排序,用于实现group by
package cn.cgh.table;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.List;/** * 实现table的多级(<=3)排序 * * @author xiegonghai * */public class SortedTable{ public List<List<String>> mList; public List<Comparator<List<String>>> mCmpList = new ArrayList<Comparator<List<String>>>(); public SortedTable(List<List<String>> list,int val){ mList = list; if(val>=1)mCmpList.add(compareFirASC); if(val>=2)mCmpList.add(compareSecASC); if(val>=3)mCmpList.add(compareThirASC); sort(mList, mCmpList); } public void sort(List<List<String>> list, final List<Comparator<List<String>>> comList) { if (comList == null) return; Comparator<List<String>> cmp = new Comparator<List<String>>() { public int compare(List<String> o1, List<String> o2) { for (Comparator<List<String>> comparator : comList) { if (comparator.compare(o1, o2) > 0) { return 1; } else if (comparator.compare(o1, o2) < 0) { return -1; } } return 0; } }; Collections.sort(list, cmp); } private Comparator<List<String>> compareFirASC = new Comparator<List<String>>() { public int compare(List<String> o1, List<String> o2) { return o1.get(0).compareTo(o2.get(0)); } }; private Comparator<List<String>> compareSecASC = new Comparator<List<String>>() { public int compare(List<String> o1, List<String> o2) { return o1.get(1).compareTo(o2.get(1)); } }; private Comparator<List<String>> compareThirASC = new Comparator<List<String>>() { public int compare(List<String> o1, List<String> o2) { return o1.get(2).compareTo(o2.get(2)); } }; }
以下是murmurhash实现类(hadoop,redis,nginx均有应用),实现任意长字符串到唯一32,64位整型的映射
package cn.cgh.table;/** * * @author reference to someone * */public final class MurmurHash{ /** * Generates 32 bit hash from byte array of the given length and seed. * * @param data byte array to hash * @param length length of the array to hash * @param seed initial seed value * @return 32 bit hash of the given array */ public static int hash32(final byte[] data, int length, int seed) {// 'm' and 'r' are mixing constants generated offline.// They're not really 'magic', they just happen to work well.final int m = 0x5bd1e995;final int r = 24;// Initialize the hash to a random valueint h = seed ^ length;int length4 = length / 4;for (int i = 0; i < length4; i++){ final int i4 = i * 4; int k = (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8) + ((data[i4 + 2] & 0xff) << 16) + ((data[i4 + 3] & 0xff) << 24); k *= m; k ^= k >>> r; k *= m; h *= m; h ^= k;}// Handle the last few bytes of the input arrayswitch (length % 4){case 3: h ^= (data[(length & ~3) + 2] & 0xff) << 16;case 2: h ^= (data[(length & ~3) + 1] & 0xff) << 8;case 1: h ^= (data[length & ~3] & 0xff); h *= m;}h ^= h >>> 13;h *= m;h ^= h >>> 15;return h; } /** * Generates 32 bit hash from byte array with default seed value. * * @param data byte array to hash * @param length length of the array to hash * @return 32 bit hash of the given array */ public static int hash32(final byte[] data, int length) {return hash32(data, length, 0x9747b28c); } /** * Generates 32 bit hash from a string. * * @param text string to hash * @return 32 bit hash of the given string */ public static int hash32(final String text) {final byte[] bytes = text.getBytes();return hash32(bytes, bytes.length); } /** * Generates 32 bit hash from a substring. * * @param text string to hash * @param from starting index * @param length length of the substring to hash * @return 32 bit hash of the given string */ public static int hash32(final String text, int from, int length) {return hash32(text.substring(from, from + length)); } /** * Generates 64 bit hash from byte array of the given length and seed. * * @param data byte array to hash * @param length length of the array to hash * @param seed initial seed value * @return 64 bit hash of the given array */ public static long hash64(final byte[] data, int length, int seed) {final long m = 0xc6a4a7935bd1e995L;final int r = 47;long h = (seed & 0xffffffffl) ^ (length * m);int length8 = length / 8;for (int i = 0; i < length8; i++){ final int i8 = i * 8; long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8) + (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24) + (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40) + (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56); k *= m; k ^= k >>> r; k *= m; h ^= k; h *= m;}switch (length % 8){case 7: h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;case 6: h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;case 5: h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;case 4: h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;case 3: h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;case 2: h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;case 1: h ^= (long) (data[length & ~7] & 0xff); h *= m;};h ^= h >>> r;h *= m;h ^= h >>> r;return h; } /** * Generates 64 bit hash from byte array with default seed value. * * @param data byte array to hash * @param length length of the array to hash * @return 64 bit hash of the given string */ public static long hash64(final byte[] data, int length) {return hash64(data, length, 0xe17a1465); } /** * Generates 64 bit hash from a string. * * @param text string to hash * @return 64 bit hash of the given string */ public static long hash64(final String text) {final byte[] bytes = text.getBytes();return hash64(bytes, bytes.length); } /** * Generates 64 bit hash from a substring. * * @param text string to hash * @param from starting index * @param length length of the substring to hash * @return 64 bit hash of the given array */ public static long hash64(final String text, int from, int length) {return hash64(text.substring(from, from + length)); }}
阅读全文
0 0
- Java表格仿mysql实现基本操作(连接,分组排序,统计等)
- mysql实现分组排序
- Linq对DataTable或者集合的排序,Where筛选,分组,统计总数sum等操作
- Java连接Mysql数据库的基本操作
- Java 连接MYSQL基本操作 查询更新
- JSP连接MySQL及插入等基本操作
- js实现表格操作-排序
- MySql CRUD等基本操作
- DB2递归实现字符串分组连接操作
- Mysql分组统计、排序、取前N条记录解决方案
- mysql 区间分组统计
- MYSQL-分组统计
- mysql 分组统计
- Java实现单链表插入删除等基本操作
- mysql关于表格的一些基本操作
- JAVA连接到MySQL数据库的基本操作
- PHP 连接 mysql 基本操作
- java8特性:Collectors.groupingBy进行分组、排序等操作
- 杭电acm 1171Big Event in HDU(0-1背包)
- 【HDU 1016】Prime Ring Problem(DFS、素数筛)
- 2017云哥性能测试实战带你走出迷茫
- Java并发集合操作中对锁的应用
- Python学习之文件处理详解
- Java表格仿mysql实现基本操作(连接,分组排序,统计等)
- Linux C 多线程编程条件变量
- [第五季]14.属性选择器和伪类伪元素
- 美团js面试题
- 下一阶段的一些想法
- Mysql中的MVCC
- miui8.5开发者选项USB调试模式
- MySQL删除普通用户
- 242. Valid Anagram