布尔运算--java位图搜索实现

来源:互联网 发布:如何关闭4g网络 编辑:程序博客网 时间:2024/04/18 14:07

前言

布尔运算是伟大的布尔发明的代数运算,只有简单的逻辑与或非,一开始人们没发现没啥用,后来对计算机的影响太大了,从基础设施到搜索引擎无处不在。


场景

身为码农,在日常工作中,我也遇到了涉及它的需求。场景是这样的,我们的后台服务有一个复杂的配置,涉及到对用户多个维度的匹配,因为变化不会很频繁,

  每次都查询数据库显然不划算,数据量也不是太多,不到万的级别,人配置的嘛。    这样很自然的,缓存到服务器的内存吧,但是总不能蛮力的一个个匹配吧,也太啰嗦,效率也会很低,而且逻辑判断会有点复杂,配置主要是四个维度:机型、渠道、国家、版本,每个维度都是一个数组,有的值是一个["ALL"],意思是全部匹配;

引言

于是想到了布尔运算,比较简单,每个值都对应一长串数字位,有多少条数据,每个维度的每个值就是最多多少位。

  如总共10000条配置,则“中国”对应的位向量最多10000位,对于“中国”这个值,第100位是1就表示第100条配置包含这个"中国"维度值。
  查询国家是“中国”的就是从map中取“中国”对应的位向量和"ALL"对应的位向量做 或 运算。Java自带了大整数的实现:BigInteger;可以给构造方法传递一个代表二进制位的byte数组,byte数组的长度显然是: (配置条数/8)+1,BigInteger内部还会做些处理,主要是去除左边连续的0;

马上开始

由于我们是用的MongoDB数据库,操作是用的spring-data对mongoDB的封装。有Criteria和Query的API,这也是spring一贯的风格。

 于是,实现了一个通用的位图查询,和Criteria的API相似,这样代码改动可以很小(把import spring的包换成自己的就差不多完事了),不啰嗦了,上代码:

  
   先上测试case(例子有点low,将就看吧):

import java.util.Arrays;import java.util.List;import java.util.concurrent.atomic.AtomicInteger;import org.junit.Assert;import org.junit.Test;import bitmapSearch.BitmapSearcher;import bitmapSearch.Criteria;public class BitmapSearchTest {@Testpublic void testQuery() {List<Car> cars = Arrays.asList(new Car(), //new Car("大众").setToAreas("ALL"), //new Car("耗子车", 3).setToAreas("某山区"), //new Car("东方红", 6).setToAreas("中国"));BitmapSearcher searcher = new BitmapSearcher(cars, new BitmapSearcher.IndexCreator<Car, String>() {@Overridepublic String[] indexKeys() {return new String[] { "id", "brand", "legs", "toAreas", "desc" };}@Overridepublic Object[] fieldValue(Car bean, String indexKey) {if ("id".equals(indexKey)) {return new Object[] { bean.id };} else if ("brand".equals(indexKey)) {return new Object[] { bean.getBrand() };} else if ("legs".equals(indexKey)) {return new Object[] { bean.getLegs() };} else if ("toAreas".equals(indexKey)) {return bean.getToAreas();}else if ("desc".equals(indexKey)) {return new Object[] { bean.getDesc() };}return null;}});Car rs1 = searcher.findOne(Criteria.where("legs").is(6)//.andOperator(new Criteria().orOperator(//Criteria.where("id").is(4), //Criteria.where("desc").is("MadeInChina")))//, null);//Assert.assertTrue(rs1.brand.equals("东方红"));List<Car> rs2 = searcher.find(Criteria.where("toAreas").in("中国", "ALL"));Assert.assertTrue(rs2 != null && rs2.size() == 2);}private static class Car {final int id;static AtomicInteger ID_GEN = new AtomicInteger();String brand = "QQ";int legs = 4;String[] toAreas;String desc = "";public Car() {super();id = ID_GEN.incrementAndGet();}public Car(String brand) {this();this.brand = brand;}public Car(String brand, int legs) {this();this.brand = brand;this.legs = legs;}public String getBrand() {return brand;}public void setBrand(String brand) {this.brand = brand;}public int getLegs() {return legs;}public void setLegs(int legs) {this.legs = legs;}public Car setToAreas(String... toAreas) {this.toAreas = toAreas;return this;}public String[] getToAreas() {return toAreas;}public String getDesc() {return desc;}public void setDesc(String desc) {this.desc = desc;}}}

其中,抽象一个内部类IndexCreator,把构造索引以及如何获取索引字段值的工作抛给用户
</pre><p></p><p></p><pre name="code" class="html">/** *  */package bitmapSearch;import java.math.BigInteger;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.HashMap;import java.util.List;import java.util.Map;/** * 通用位图搜索工具类 *  * @author HouKangxi * */public final class BitmapSearcher {/** * 对象list,只读。 */@SuppressWarnings("rawtypes")private final List beansList;/** * 供搜索用的索引: K: index, value:<fieldVal--bits> */private Map<Object, Map<Object, BigInteger>> indexMap;/** * 索引构造器 */@SuppressWarnings("rawtypes")private final IndexCreator indexCreator;/** * 索引构造器 *  * * * @param <T,<span style="font-family: Arial, Helvetica, sans-serif;">INDEX</span>> */public static interface IndexCreator<T, INDEX> {/** * 返回一组索引 *  * @return */INDEX[] indexKeys();/** * 获取指定索引名对应的字段值 *  * @param bean *            - list中的对象 * @param indexKey *            - 索引名 * @return */Object[] fieldValue(T bean, INDEX indexKey);}/** * 构造方法 *  * @param objList *            - 对象list * @param ic *            - 索引构造器 */public <T, INDEX> BitmapSearcher(List<T> objList, IndexCreator<T, INDEX> ic) {indexCreator = ic;if (objList != null && objList.size() > 0) {beansList = Collections.unmodifiableList(objList);createAllIndex();} else {beansList = Collections.emptyList();}}Map<Object, BigInteger> getBitmap(Object key) {return indexMap.get(key);}/** * 查询一个结果 *  * @param <T> *  * @param criteria *            - 查询条件 * @param sorter *            - 指定的排序器 * @return */public <T> T findOne(Criteria criteria, Comparator<T> sorter) {List<T> list = find(criteria);if (list == null || list.isEmpty()) {return null;}if (sorter != null)Collections.sort(list, sorter);// 取出第一个return list.get(0);}/** * 查询 list *  * @param <T> * @param criteria * @return */@SuppressWarnings("unchecked")public <T> List<T> find(Criteria criteria) {if (beansList == null || beansList.isEmpty()) {return null;}BigInteger bInt = criteria.proc(this, null);if (bInt == null) {return null;}ArrayList<Integer> indexes = new ArrayList<Integer>();int idx;while ((idx = bInt.getLowestSetBit()) >= 0) {indexes.add(idx);// 将当前值与它减去1的值做&运算,正好下次可以得到最右边的 1bInt = bInt.and(bInt.subtract(BigInteger.ONE));}// 序号 indexes 天然是从小到大排列,且不会重复,因为每次都是找最右边的1if (indexes.isEmpty()) {return null;}@SuppressWarnings("rawtypes")ArrayList rslist = new ArrayList(indexes.size());                for (int i : indexes) {     rslist.add(beansList.get(i));}return rslist;}private void createAllIndex() {Map<Object, Map<Object, byte[]>> t_Index = new HashMap<Object, Map<Object, byte[]>>();Object[] keyNames = indexCreator.indexKeys();for (int i = 0; i < keyNames.length; i++) {t_Index.put(keyNames[i], new HashMap<Object, byte[]>());}int i = 0;final int SUM = beansList.size();for (Object o : beansList) {createIndex(o, t_Index, SUM, i, keyNames);i++;}indexMap = new HashMap<Object, Map<Object, BigInteger>>(t_Index.size());bytes2BigInteger(t_Index, indexMap);}private void createIndex(Object o, Map<Object, Map<Object, byte[]>> t_Index, final int SUM, final int index,Object[] indexes) {if (o == null) {return;}final int bytesLen = (SUM >> 3) + 1;final int byteIndex = bytesLen - 1 - (index >> 3);final int value = 1 << (index % 8);for (int i = 0; i < indexes.length; i++) {Object key = indexes[i];@SuppressWarnings("unchecked")Object fieldValues[] = indexCreator.fieldValue(o, key);if (fieldValues == null) {continue;}Map<Object, byte[]> bIntMap = t_Index.get(key);for (Object fieldValue : fieldValues) {if (fieldValue != null) {byte[] bInt = bIntMap.get(fieldValue);if (bInt == null) {bIntMap.put(fieldValue, bInt = new byte[bytesLen]);}bInt[byteIndex] |= value;}}}}@SuppressWarnings("unchecked")private void bytes2BigInteger(Map<Object, Map<Object, byte[]>> t_Index,Map<Object, Map<Object, BigInteger>> bigInts) {for (Map.Entry<Object, Map<Object, byte[]>> entry : t_Index.entrySet()) {Object key = entry.getKey();Map<Object, byte[]> value = entry.getValue();if (value == null || value.isEmpty()) {continue;}@SuppressWarnings("rawtypes")Map ov = value;for (Map.Entry<Object, byte[]> v : value.entrySet()) {ov.put(v.getKey(), new BigInteger(v.getValue()));}bigInts.put(key, ov);}}}


下面是Criteria的一些实现类:

/** *  */package bitmapSearch;import java.math.BigInteger;import java.util.LinkedList;import java.util.List;/** * 通用查询约束 *  * @author houkangxi * */public class Criteria {protected Object key;protected List<Criteria> chain;private Criteria prev = this;public Criteria() {chain = new LinkedList<Criteria>();}public Criteria(Object key) {this();this.key = key;}Criteria(int noInitChain) {}public static Criteria where(Object key) {return new Criteria(key);}private Criteria addToChain(Criteria c) {prev = c;chain.add(c);return this;}public Criteria is(Object val) {prev.addToChain(new CriteriaOpIs(prev.key, val));return this;}public Criteria ne(Object val) {prev.addToChain(new CriteriaOpNot(prev.key, val));return this;}public Criteria in(Object... val) {prev.addToChain(new CriteriaOpIn(prev.key, val));return this;}public Criteria and(String key) {return addToChain(new CriteriaOpAnd(key));}public Criteria or(String key) {return addToChain(new CriteriaOpOr(key));}public Criteria andOperator(Criteria... o) {return addToChain(new CriteriaOpAnd(o));}public Criteria orOperator(Criteria... o) {return addToChain(new CriteriaOpOr(o));}BigInteger proc(BitmapSearcher sea, BigInteger prev) {if (chain == null) {return null;}BigInteger rs = prev;for (Criteria c : chain) {rs = c.proc(sea, rs);}return rs;}@Overridepublic String toString() {return getClass().getSimpleName() + "@key=" + key;}}

/** *  */package bitmapSearch;import java.math.BigInteger;import java.util.Arrays;/** * @author houkangxi * */abstract class CriteriaChain extends Criteria {CriteriaChain(String key) {super(key);}CriteriaChain(Criteria[] list) {super(0);this.chain = Arrays.asList(list);}protected abstract BigInteger op(BigInteger o1, BigInteger o2);@Overrideprotected final BigInteger proc(BitmapSearcher sea, BigInteger prev) {if (chain == null || chain.isEmpty()) {return null;}BigInteger h = chain.get(0).proc(sea, null);for (int i = 1; i < chain.size() && h != null; i++) {h = op(h, chain.get(i).proc(sea, h));}        return op(prev, h);}}

/** *  */package bitmapSearch;import java.math.BigInteger;/** * And (且)查询 * @author houkangxi * */class CriteriaOpAnd extends CriteriaChain {CriteriaOpAnd(String key) {super(key);}CriteriaOpAnd(Criteria[] list) {super(list);}@Overrideprotected BigInteger op(BigInteger o1, BigInteger o2) {if (o2 == null || o1 == null) {return null;}return o1.and(o2);}}/** *  */package bitmapSearch;import java.math.BigInteger;import java.util.Map;/** * IN 查询 * @author houkangxi * */class CriteriaOpIn extends Criteria {Object[] colls;CriteriaOpIn(Object key, Object[] colls) {super(key);this.colls = colls;}@Overrideprotected BigInteger proc(BitmapSearcher sea, BigInteger prev) {Map<Object, BigInteger> bitmap = sea.getBitmap(key);if (bitmap == null || colls == null) {return null;}BigInteger bit = null;for (int i = 0; i < colls.length; i++) {Object val = colls[i];BigInteger I = bitmap.get(val);if (I != null) {bit = bit == null ? I : bit.or(I);}}return bit;}}/** *  */package bitmapSearch;import java.math.BigInteger;import java.util.Map;/** * Is查询 *  * @author houkangxi * */class CriteriaOpIs extends Criteria {private Object ov;CriteriaOpIs(Object k, Object ov) {super(k);this.ov = ov;}@Overrideprotected BigInteger proc(BitmapSearcher sea, BigInteger prev) {Map<Object, BigInteger> bimap = sea.getBitmap(key);if (bimap == null) {return null;}return bimap.get(ov);}}/** *  */package bitmapSearch;import java.math.BigInteger;import java.util.Map;/** * NOT (非)查询 * @author houkangxi * */class CriteriaOpNot extends Criteria {private Object ov;CriteriaOpNot(Object k, Object ov) {super(k);this.ov = ov;}@Overrideprotected BigInteger proc(BitmapSearcher sea, BigInteger prev) {Map<Object, BigInteger> bimap = sea.getBitmap(key);if (bimap == null) {return null;}BigInteger b = bimap.get(ov);if (b == null) {return null;}return b.not();}}/** *  */package bitmapSearch;import java.math.BigInteger;/** * Or (或)查询 * @author houkangxi * */class CriteriaOpOr extends CriteriaChain {CriteriaOpOr(String k) {super(k);}CriteriaOpOr(Criteria[] list) {super(list);}@Overrideprotected BigInteger op(BigInteger o1, BigInteger o2) {if (o2 == null) {return o1;}if (o1 == null) {return o2;}return o1.or(o2);}}

1 0
原创粉丝点击