使用POI数据挖掘区域功能并在网页端显示

来源:互联网 发布:iphone电视直播软件 编辑:程序博客网 时间:2024/06/03 18:41

使用POI数据挖掘区域功能并在网页端显示

    最近在做一个创新项目,其中包含区域功能挖掘的部分。前期我们使用了路网数据对上海市进行了区域的划分,并为每个POI加上了所属的区域标签。之后便使用此数据进行区域功能挖掘部分的展示。

一。使用TF_IDF算法挖掘出每个区域对应的功能,此处我们分了六大功能,分别是住宅,工作,教育,商业,公共服务,景点

   有如下几个子步骤
  (1)根据poi的三级目录将poi数据划分到六种poi类别
  (2)统计TF_IDF算法使用到的中间结果
  (3)使用TF_IDF挖掘出每个区域的功能,并记录相关结果

代码如下
import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.io.OutputStreamWriter;/* * poi.csv 原始经处理的poi,而且已经加上了区域编号,式例如下图 * "241076","永生餐饮","121.603927","31.235058","餐饮服务;中餐厅;中餐厅","275" *  * 将操作合并起来,包括向poi中添加类,保留统计中间结果到result.txt * 和计算TF_IDF大小,并最终得到每个区域的功能, *  * poiRange.csv 提取有用的信息(名称,经度,纬度,poi类别,所属区域) * result.txt(各个poi类别的数量,每个区域含有的poi数量) * TF_IDF1/2.txt 记录两种IDF算法产生的TF_IDF大小 * function1/2.txt 记录两种算法分别对应的每种功能区的区域数量 * 每个功能区包含的区域编号存储在poi_label.txt文件中 */public class POI_Func {public static String [][] CC;//代表class的数量    public static String [][] CC2;//代表SecondClass的数量    public static int ccc=0;public static int getPoiClass(String type) {int num=-1;int flag=0;for (int i = 0; i < CC.length; i++) {if (flag==1) {flag=0;break;}for (int j = 0; j < CC[i].length; j++) {if (CC[i][j]==null) {break;}else if(CC[i][j].equals(type)){num=i;flag=1;break;}}} return num;}public static int getSecondPoi(String type) {int num=-1;int flag=0;for (int i = 0; i < CC2.length; i++) {if (flag==1) {flag=0;break;}for (int j = 1; j < CC2[i].length; j++) {if (CC2[i][j]==null) {break;}else if(CC2[i][j].equals(type)){num=Integer.parseInt(CC2[i][0]);ccc++;flag=1;break;}}} return num;}public static void main(String[] args) throws IOException{// TODO Auto-generated method stub/*第一步:对原始数据poi.csv添加class.txt 中的poi类别,提取有用信息,并将结果保存在poiRange.csv文件中*/String path1 = "class5.txt";String path2 = "poi.csv";String path3 = "poiRange.csv";String path4="result.txt";int classLength=6;//记录poi类别的数量int secondLength=3;//记录第二级目录的分类int bigClass=19;//大类的数量int secondClass=11;int rangeNum=542;String[] d1=null;String[] d2=null;String line=null;String line2="";CC=new String[classLength][bigClass];CC2=new String[secondLength][secondClass];int countP[]=new int[classLength];//countP[0]-countP[5]分别对应六种类别的poi数量int countR[]=new int[rangeNum];//countR[0]-countR[]分别对应落在50个区域的BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path1), "utf-8"));BufferedReader br2 = new BufferedReader(new InputStreamReader(new FileInputStream(path2), "utf-8"));BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path3), "utf-8"));BufferedWriter bw2 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path4), "utf-8"));for (int i = 0; i <classLength; i++) {line=br.readLine();d1=line.split(" ");for (int j = 1; j < d1.length; j++) {CC[i][j-1]=d1[j];}}for (int i = 0; i < secondLength; i++) {line=br.readLine();d1=line.split(" ");for (int j = 0; j < d1.length; j++) {CC2[i][j]=d1[j];}}int poiC=-1;int count=0;int range=-1;while((line=br2.readLine())!=null){d1=line.split(",");d2=d1[4].split(";");//System.out.println(d2[0].substring(1));range=Integer.parseInt(d1[5].substring(1, d1[5].length()-1));poiC=getPoiClass(d2[0].substring(1));if (poiC==-1) {//System.out.println(d1[4].substring(1,d1[4].length()-1));if (!(d1[4].substring(1,d1[4].length()-1)).equals("NULL")) {poiC=getSecondPoi(d2[0].substring(1)+";"+d2[1]);}if(poiC==-1){count++;}}if (range!=-1&&poiC!=-1) { countR[range]++;}            if(poiC!=-1&&range!=-1){     countP[poiC]++;           }//统计 姓名+经度+纬度+poi分类(六类其中之一)+所属区域line2=d1[1]+","+d1[2]+","+d1[3]+","+poiC+","+range;bw.write(line2);bw.newLine();bw.flush();}bw.close();br.close();br2.close();CC=null;System.out.println("在区域外的poi数据:"+count);System.out.println("通过第二级目录得到的数据:"+ccc);System.out.println("----------------------------------------------");line="";System.out.print("poi:");for (int i = 0; i < countP.length; i++) {System.out.print(countP[i]+" ");line+=countP[i]+" ";}System.out.println();bw2.write(line.trim());bw2.newLine();bw2.flush();line="";System.out.print("Range:");for (int i = 0; i < countR.length; i++) {System.out.print(countR[i]+",");line+=countR[i]+" ";}System.out.println();bw2.write(line.trim());bw2.flush();bw2.close();System.out.println("----------------------------------------------");/*第二步:进行TF_IDF的计算,要使用countR[] 和 countP[]中的统计数据*/ path1 = "poiRange.csv"; path2="TF_IDF1.txt"; path3="function1.txt"; path4="poi_label1.txt";d1=null;d2=null;int R=542;int countPoi=0;//统计有效的poi数量br = new BufferedReader(new InputStreamReader(new FileInputStream(path1), "utf-8"));bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path2), "utf-8"));bw2 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path3), "utf-8"));    line="";int TF[][]=new int[R][6];//记录各个区域每个POI类别的数量int F[]=new int[6];//代表含有poi类别的区域数float IDF[][]=new float[R][6];float TF_F[][]=new float[R][6];float TF_IDF[][]=new float[R][7];int numFunc[]=new int [6];//与功能划分相关的变量String lable[]=new String[6];for (int i = 0; i < lable.length; i++) {lable[i]="";}double max=0l;double temp=0l;int cc=0;//如果为均为0的归属问题int cc2=0;//均为0的个数int flag=0;count=0;for (int i = 0; i < TF.length; i++) {for (int j = 0; j < TF[1].length; j++) {TF[i][j]=0;TF_F[i][j]=0;}}int r=-1,p=-1;while ((line = br.readLine()) != null) {//从文件中获得数据             d1=line.split(","); r=Integer.parseInt(d1[4]); p=Integer.parseInt(d1[3]); if (r!=-1&&p!=-1) {TF[r][p]++;countPoi++;} }br.close();for (int i = 0; i < TF.length; i++) {for (int j = 0; j < TF[1].length; j++) {if (countR[i]==0) {TF_F[i][j]=0l;}else {TF_F[i][j]=TF[i][j]*1.0f/countR[i];}}}for (int i = 0; i < TF.length; i++) {for (int j = 0; j < TF[1].length; j++) {if (TF[i][j]!=0) {F[j]++;}}}    //第一种类型的TF_IDFfor (int i = 0; i < IDF.length; i++) {for (int j = 0; j < IDF[1].length; j++) {IDF[i][j]=(float) Math.log(R*1.0/(F[j]+1));if (IDF[i][j]<0) {IDF[i][j]=0;}}}for (int i = 0; i < IDF.length; i++) {for (int j = 0; j < IDF[1].length; j++) {TF_IDF[i][j]=TF_F[i][j]*IDF[i][j];temp=TF_IDF[i][j];  if (temp>max) {  flag=j;  max=temp;   }}if (max==0l) {cc=cc%6;numFunc[cc++]++;cc2++;}else {numFunc[flag]++;lable[flag]+=count+" ";}max=0l;flag=0;count++;}line="";for (int i = 0; i < TF.length; i++) {for (int j = 0; j < TF[1].length; j++) {line+=TF_IDF[i][j]+" ";}bw.write(line.trim());bw.newLine();bw.flush();line="";}bw.close();System.out.print("各个功能区含有的区域数(第一种):");for (int i = 0; i < numFunc.length; i++) {System.out.print(numFunc[i]+" ");bw2.write(numFunc[i]+"");bw2.newLine();bw2.flush();}System.out.println();bw2.close();bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path4), "utf-8"));for (int i = 0; i < lable.length-1; i++) {bw.write(lable[i].trim());bw.newLine();bw.flush();}bw.write(lable[lable.length-1].trim());bw.flush();bw.close();//第二种类型的DF_IDFpath2="TF_IDF2.txt";path3="function2.txt";path4="poi_label2.txt";bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path2), "utf-8"));bw2 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path3), "utf-8"));count=0;for (int i = 0; i < lable.length; i++) {lable[i]="";}for (int i = 0; i < numFunc.length; i++) {numFunc[i]=0;}cc=0;for (int i = 0; i < IDF.length; i++) {for (int j = 0; j < IDF[1].length; j++) {IDF[i][j]=(float) (100f/Math.log(countP[j]*1.0/TF[i][j])*Math.log(R*1.0/(F[j]+1)));if (IDF[i][j]<0) {IDF[i][j]=0;}}}for (int i = 0; i < IDF.length; i++) {for (int j = 0; j < IDF[1].length; j++) {TF_IDF[i][j]=TF_F[i][j]*IDF[i][j];temp=TF_IDF[i][j];  if (temp>max) {  flag=j;  max=temp;   }}if (max==0l) {cc=cc%6;numFunc[cc++]++;}else {numFunc[flag]++;lable[flag]+=count+" ";}max=0l;flag=0;count++;}line="";for (int i = 0; i < TF.length; i++) {for (int j = 0; j < TF[1].length; j++) {line+=TF_IDF[i][j]+" ";}bw.write(line.trim());bw.newLine();bw.flush();line="";}bw.close();System.out.print("各个功能区含有的区域数(第二种):");for (int i = 0; i < numFunc.length; i++) {System.out.print(numFunc[i]+" ");bw2.write(numFunc[i]+"");bw2.newLine();bw2.flush();}System.out.println();bw2.close();bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path4), "utf-8"));for (int i = 0; i < lable.length-1; i++) {bw.write(lable[i].trim());bw.newLine();bw.flush();}bw.write(lable[lable.length-1].trim());bw.flush();bw.close();System.out.println("----------------------------------------------");System.out.println("使用到poi数:"+countPoi);}}

二。将每个poi的有用信息提取出来,只包含经纬度和所属功能区编号(0-5)

代码如下
import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.io.OutputStreamWriter;/* * 此程序以poiRange.csv为输入,将结果保存在poiF1,poiF2.Json中 *  * poiF1: 包含class5的六个不同的功能区的poi数据(每个区域只含有对应功能区的poi),包含经纬度和所属功能编号 * poiF2:  包含class5的六个不同的功能区的poi数据(每个区域含有所有的poi,只不过poi功能号码相同),包含经纬度和所属功能编号 */public class Func_Json { public static int CC[][];    public static int getFunction(int label){    int num=-1;    int flag=0;    for (int i = 0; i < CC.length; i++) {    if (flag==1) {break;}for (int j = 0; j < CC[0].length; j++) {if (CC[i][j]==-1) {break;}else {if (label==CC[i][j]) {num=i;flag=1;break;}}}}    return num;    }public static void main(String[] args) throws IOException{// TODO Auto-generated method stubString path1 = "poiRange.csv";    String path2="poi_label1.txt";String path3 = "poiF1.json";String path4 = "poiF2.json";String[] d1=null;String line=null;String line2="";String line3="";int classPoi=6;int classLength=260;CC=new int[classPoi][classLength];for (int i = 0; i < CC.length; i++) {for (int j = 0; j < CC[i].length; j++) {CC[i][j]=-1;}}BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path1), "utf-8"));BufferedReader br2 = new BufferedReader(new InputStreamReader(new FileInputStream(path2), "utf-8"));BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path3), "utf-8"));BufferedWriter bw2 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path4), "utf-8"));int poi=-1,fun=-1,label=-1;for (int i = 0; i < classPoi; i++) {line=br2.readLine();d1=line.split(" ");for (int j = 0; j < d1.length; j++) {CC[i][j]=Integer.parseInt(d1[j]);}}int count=0;int flag=0;while((line=br.readLine())!=null){d1=line.split(",");poi=Integer.parseInt(d1[3]);label=Integer.parseInt(d1[4]);if (poi!=-1&&label!=-1) {fun=getFunction(label);if (fun==poi) {line2+="{\"lng\":"+d1[1].substring(1,d1[1].length()-1)+",\"lat\":"         +d1[2].substring(1, d1[2].length()-1)+",\"fun\":"+fun+"},";bw.write(line2);        bw.newLine();        bw.flush();}line3+="{\"lng\":"+d1[1].substring(1,d1[1].length()-1)+",\"lat\":"         +d1[2].substring(1, d1[2].length()-1)+",\"fun\":"+fun+"},";     bw2.write(line3);     bw2.newLine();     bw2.flush();}poi=-1;label=-1;line2="";line3="";}line2="]";bw.write(line2);bw.flush();bw2.write(line2);bw2.flush();        bw.close();bw2.close();br.close();br2.close();System.out.println("Tranform end"); }}

三。在网页中展示效果

利用上一步得到的json文件(要稍微处理一下,把最后一项{}之后的“,”去掉),在使用leaflet.js d3.js库,便能清晰的在地图上展示每个区域的功能区分布。
效果如下:



四。代码链接

  (1) java代码和源文件
  (2)网页端显示地址


1 0