海量IP地址排序统计出现次数最多的K个地址

来源:互联网 发布:长沙岳麓网络花店 编辑:程序博客网 时间:2024/05/16 06:09

本文基于海量IP地址无法一次性装入内存进行排序,因此采用如下步骤:(1)hash后分割成K(这里取1000)个文件(2)统计每个文件出现次数最多的K个地址(3)对K*N个地址进行统计(最小堆/归并/快排)

import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Map.Entry;import java.util.Random;public class IP {public static void generateIp(int num,String path) {Random random = new Random();File file = new File(path);String dot = ".";String ip1 = "192.";try {if(!file.exists()) {file.getParentFile().mkdirs();file.createNewFile();}else {//return;}OutputStreamWriter oStreamWriter = new OutputStreamWriter(new FileOutputStream(file));BufferedWriter bufferedWriter = new BufferedWriter(oStreamWriter);for (int i = 0; i < num; i++) {StringBuilder sBuilder = new StringBuilder();sBuilder.append(ip1).append(random.nextInt(256) + dot).append(random.nextInt(256) + dot).append(random.nextInt(256) + " ");bufferedWriter.append(sBuilder);}bufferedWriter.flush();bufferedWriter.close();System.out.println("ip生成完毕");} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}public static void hash(String path,int fileNum) {File file = new File(path);InputStreamReader inputStreamReader;File[] fileArr = new File[fileNum];OutputStreamWriter[] osArr= new OutputStreamWriter[fileNum];try {for (int i = 0; i < fileArr.length; i++) {fileArr[i] = new File(file.getParent() + "/hash/" + i + ".txt");fileArr[i].getParentFile().mkdirs();fileArr[i].createNewFile();osArr[i] = new OutputStreamWriter(new FileOutputStream(fileArr[i],true));}inputStreamReader = new InputStreamReader(new FileInputStream(file));BufferedReader bufferedReader = new BufferedReader(inputStreamReader);while(true) {StringBuilder sBuilder = new StringBuilder();int ii = -1;while((ii = bufferedReader.read()) != -1 && ii != (int)' ') {sBuilder.append((char)ii);}int h = sBuilder.toString().hashCode();int hashCode =  Math.abs((h ^ (h >>> 16))) % fileNum;osArr[hashCode].write(sBuilder.append(' ').toString());if(ii == -1) {break;}}bufferedReader.close();for (OutputStreamWriter outputStreamWriter : osArr) {outputStreamWriter.flush();outputStreamWriter.close();}System.out.println("hash完毕");} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}public static void hashAndSort(String path,int fileNum,int top) {File file = new File(path);try {HashMap<String, Integer> resultMap = new HashMap<>();HashMap<String, Integer> topMap = new HashMap<>();for (int i = 0; i < fileNum; i++) {File fileHash = new File(file.getParent() + "/hash/" + i + ".txt");//File fileHash = new File(file.getParent() + "/新建文本文档.txt");InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(fileHash));BufferedReader bufferedReader = new BufferedReader(inputStreamReader);HashMap<String, Integer> map = new HashMap<>(1024);while(true) {StringBuilder sBuilder = new StringBuilder();int ii = -1;while((ii = bufferedReader.read()) != -1 && ii != (int)' ') {sBuilder.append((char)ii);}//System.out.println(sBuilder);Integer integer = map.get(sBuilder.toString());//System.out.println(integer);if(integer == null) {map.put(sBuilder.toString(), 1);}else {map.put(sBuilder.toString(), integer + 1);}if(ii == -1) {break;}}sortMap(map, top, topMap);bufferedReader.close();}sortMap(topMap, top, resultMap);File result = new File(file.getParent() + "/result" + ".txt");OutputStreamWriter oStreamWriter = new OutputStreamWriter(new FileOutputStream(result));for (Entry<String,Integer> entry : resultMap.entrySet()) {oStreamWriter.write(entry.getKey() + " 出现次数:" + entry.getValue());oStreamWriter.write("\r\n");}for (Entry<String,Integer> entry : resultMap.entrySet()) {System.out.println(entry.toString());}oStreamWriter.flush();oStreamWriter.close();}catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}public static void sortMap(Map<String, Integer> map,int top,Map<String, Integer> resultMap) {List<Entry<String, Integer>> list = new ArrayList<>(map.entrySet());Collections.sort(list, new Comparator<Entry<String, Integer>>() {@Overridepublic int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {if(o1.getValue() > o2.getValue()) {return -1;}else if (o1.getValue() < o2.getValue()) {return 1;}else {return 0;}}});for (int j = 0; j < list.size() && j < top; j++) {resultMap.put(list.get(j).getKey(), list.get(j).getValue());}}public static void main(String[] args) {String path = "F:/ip/source2.txt";int top = 10;int filenum = 1000;generateIp((int)Math.pow(10, 9), path);hash(path,filenum);hashAndSort(path, filenum,top);}}


阅读全文
0 0
原创粉丝点击