OpenRTMFP/Cumulus开发笔记(7) Cumulus大数据处理实例(续)
来源:互联网 发布:影子写手 知乎 编辑:程序博客网 时间:2024/05/16 14:51
前一篇啰啰嗦嗦,已经把相关原理叙述了,这里把源码附上,如下:
一.HashMpq.h头文件如下:
#ifndef HASHMPQ_H_
#define HASHMPQ_H_
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <string>
#define MAXFILENAME 25
#define MAXTABLELEN 1024
typedef struct
{
long nHashA;
long nHashB;
bool bExists;
char test_filename[MAXFILENAME];
int count;
} MPQHASHTABLE;
class HashMpq
{
public :
HashMpq( const long nTableLength = MAXTABLELEN )
{
prepareCryptTable();
m_tablelength = nTableLength;
m_HashIndexTable = new MPQHASHTABLE[nTableLength];
reset(nTableLength);
}
void reset(const long nTableLength);
void prepareCryptTable();
unsigned long HashString(std::string lpszFileName, unsigned long dwHashType);
long GetHashTablePos(std::string lpszString);
bool SetHashTable(std::string lpszString);
unsigned long GetTableLength( void );
void SetTableLength(const unsigned long nLength );
~HashMpq()
{
if ( NULL != m_HashIndexTable )
{
delete []m_HashIndexTable;
m_HashIndexTable = NULL;
m_tablelength = 0;
}
}
public :
MPQHASHTABLE *m_HashIndexTable;
private :
unsigned long cryptTable[0x500];
unsigned long m_tablelength;
};
#endif /* HASHMPQ_H_ */
转载请注明出处:山水间博客,http://blog.csdn.net/linyanwen99/article/details/8183120
二.HashMpq.cpp实现文件,如下:
#include "windows.h"
#include "HashMpq.h"
#include <ctype.h>
void HashMpq::reset(const long nTableLength) {
for (int i = 0; i < nTableLength; i++) {
m_HashIndexTable[i].nHashA = -1;
m_HashIndexTable[i].nHashB = -1;
m_HashIndexTable[i].bExists = false;
m_HashIndexTable[i].test_filename[0] = '\0';
m_HashIndexTable[i].count = 0;
}
}
void HashMpq::prepareCryptTable()
{
unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i;
for( index1 = 0; index1 < 0x100; index1++ )
{
for( index2 = index1, i = 0; i < 5; i++, index2 += 0x100 )
{
unsigned long temp1, temp2;
seed = (seed * 125 + 3) % 0x2AAAAB;
temp1 = (seed & 0xFFFF) << 0x10;
seed = (seed * 125 + 3) % 0x2AAAAB;
temp2 = (seed & 0xFFFF);
cryptTable[index2] = ( temp1 | temp2 );
}
}
}
unsigned long HashMpq::HashString(std::string lpszFileName, unsigned long dwHashType)
{
unsigned char *key = (unsigned char *)lpszFileName.c_str();
unsigned long seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE;
int ch;
while(*key != 0)
{
ch = toupper(*key++);
seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2);
seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3;
}
return seed1;
}
long HashMpq::GetHashTablePos(std::string lpszString)
{
const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;
unsigned long nHash = HashString(lpszString, HASH_OFFSET);
unsigned long nHashA = HashString(lpszString, HASH_A);
unsigned long nHashB = HashString(lpszString, HASH_B);
unsigned long nHashStart = nHash % m_tablelength;
unsigned long nHashPos = nHashStart;
while ( m_HashIndexTable[nHashPos].bExists)
{
if (m_HashIndexTable[nHashPos].nHashA == nHashA && m_HashIndexTable[nHashPos].nHashB == nHashB)
{
return nHashPos;
}
else
nHashPos = (nHashPos + 1) % m_tablelength;
if (nHashPos == nHashStart)
break;
}
return -1;
}
bool HashMpq::SetHashTable(std::string lpszString )
{
const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;
unsigned long nHash = HashString(lpszString, HASH_OFFSET);
unsigned long nHashA = HashString(lpszString, HASH_A);
unsigned long nHashB = HashString(lpszString, HASH_B);
unsigned long nHashStart = nHash % m_tablelength, nHashPos = nHashStart;
while (m_HashIndexTable[nHashPos].bExists) {
/*
TODO,判断该IP地址是否已经存在,存在的话,只需在原来的基础上加1即可,这里略去
*/
nHashPos = (nHashPos + 1) % m_tablelength;if (nHashPos == nHashStart) {
return false;
}
}
m_HashIndexTable[nHashPos].bExists = true;
m_HashIndexTable[nHashPos].nHashA = nHashA;
m_HashIndexTable[nHashPos].nHashB = nHashB;
strcpy( m_HashIndexTable[nHashPos].test_filename, lpszString.c_str());
m_HashIndexTable[nHashPos].count = 1;
return true;
}
unsigned long HashMpq::GetTableLength(void)
{
return m_tablelength;
}
void HashMpq::SetTableLength( const unsigned long nLength )
{
m_tablelength = nLength;
return;
}
转载请注明出处:山水间博客,http://blog.csdn.net/linyanwen99/article/details/8183120
三.ConstructBigData.h头文件,如下:
#ifndef CONSTRUCTBIGDATA_H_
#define CONSTRUCTBIGDATA_H_
#include <string>
#include <map>
#include "HashMpq.h"
class ConstructBigData{
public:
ConstructBigData(){}
ConstructBigData(int hashlen):mpq(hashlen),hashMpqLen(hashlen){}
~ConstructBigData(){}
public:
void constructIps(char* fileName);
void constructIps(std::string fileName);
void filePartition(std::string fileName);
void printMpq();
void findMax();
void Max();
public:
HashMpq mpq;
private:
int hashMpqLen;
};
#endif /* CONSTRUCTBIGDATA_H_ */
四.ConstructBigData.cpp实现文件,如下:
#include "ConstructBigData.h"
#include <fstream>
#include <sstream>
#include <stdlib.h>
#include <time.h>
#include <stdio.h>
#include <iostream>
#include <map>
void ConstructBigData::constructIps(char* fileName){
std::ofstream outfile(fileName,std::ios::out);
outfile << "127.0.0.1";
outfile.flush();
outfile.close();
}
void ConstructBigData::constructIps(std::string fileName){
std::ofstream outfile(fileName.c_str(), std::ios::out);
std::stringstream ip("");
unsigned short num = 0;
srand((unsigned) time(NULL));
for (int i = 0; i < 9000000; ++i) {
for (int j = 0; j < 4; ++j) {
num = (rand() % 256);
ip << num;
if (j < 3)
ip << '.';
else
ip << '\n';
}
outfile << ip.str();
ip.str("");
outfile.flush();
}
outfile.close();
}
void ConstructBigData::filePartition(std::string fileName){
std::ifstream infile(fileName.c_str(),std::ios::in);
std::ofstream outfile0("outfile0.txt",std::ios::out);
std::ofstream outfile1("outfile1.txt",std::ios::out);
std::ofstream outfile2("outfile2.txt",std::ios::out);
std::ofstream outfile3("outfile3.txt",std::ios::out);
std::ofstream outfile4("outfile4.txt",std::ios::out);
if(!infile){
return;
}
unsigned short val1,val2,val3,val4;
unsigned char ch1,ch2,ch3;
unsigned long ipval = 0;
int modval = 0;
std::stringstream ss;
std::string buffer;
std::stringstream ssbuf("");
while (!infile.eof()) {
getline(infile,buffer);
ssbuf<<buffer;
if(!infile.eof()){
ssbuf >> val1 >> ch1 >> val2 >> ch2 >> val3 >> ch3 >> val4;
ipval = (((((val1<<8) + val2)<<8)+val3)<<8)+val4;
modval = ipval % 5;
switch(modval){
case 0:
outfile0 << ssbuf.str() << '\n';
break;
case 1:
outfile1 << ssbuf.str() << '\n';
break;
case 2:
outfile2 << ssbuf.str() << '\n';
break;
case 3:
outfile3 << ssbuf.str() << '\n';
break;
case 4:
outfile4 << ssbuf.str() << '\n';
break;
default:
std::cout<<"sb"<<std::endl;
break;
}
ipval = 0;
}
ssbuf.clear();
ssbuf.str("");
}
outfile0.flush();
outfile1.flush();
outfile2.flush();
outfile3.flush();
outfile4.flush();
outfile0.close();
outfile1.close();
outfile2.close();
outfile3.close();
outfile4.close();
infile.close();
}
void ConstructBigData::printMpq(){
for(int i=0;i<hashMpqLen;++i){
if(mpq.m_HashIndexTable[i].bExists){
if(mpq.m_HashIndexTable[i].count > 1)
printf("%s,%d\n",mpq.m_HashIndexTable[i].test_filename,mpq.m_HashIndexTable[i].count);
}
}
}
void ConstructBigData::findMax(){
int fileNum = 5;
std::stringstream ss("");
for(int i=0;i<fileNum;++i){
std::string fileName = "outfile";
std::string suffix = ".txt";
ss<<fileName<<i<<suffix;
std::ifstream infile(ss.str().c_str(),std::ios::in);
std::string buffer;
while (!infile.eof()) {
getline(infile,buffer);
if(!infile.eof()){
mpq.SetHashTable(buffer);
}
}
infile.close();
printf("from %s->",ss.str().c_str());
Max();
ss.clear();
ss.str("");
}
}
void ConstructBigData::Max(){
int index = 0;
for (int i = 0; i < hashMpqLen; ++i) {
if (mpq.m_HashIndexTable[i].bExists) {
if(mpq.m_HashIndexTable[i].count > mpq.m_HashIndexTable[index].count){
index = i;
}
}
}
if(mpq.m_HashIndexTable[index].bExists){
printf("%s,%d\n",mpq.m_HashIndexTable[index].test_filename,mpq.m_HashIndexTable[index].count);
}
mpq.reset(hashMpqLen);
}
五.main.cpp实现文件,如下:
#include "ConstructBigData.h"
#include <iostream>
#include <stdio.h>
int main(int argc,char** argv){
ConstructBigData bd(10000000);
std::string fileName = "bigdata.txt";
bd.constructIps(fileName);
bd.filePartition(fileName);
bd.findMax();
return 0;
}
说明:关于运行时间什么的,这里懒得做了,有空的时候再加上吧。
PS:初写文章,文笔生涩之处,各位请见谅,若有疑问或者交流的,可加本人YY号:301558660
转载请注明出处:山水间博客,http://blog.csdn.net/linyanwen99/article/details/8183120
- OpenRTMFP/Cumulus开发笔记(7) Cumulus大数据处理实例(续)
- OpenRTMFP/Cumulus开发笔记(6) Cumulus大数据处理实例
- OpenRTMFP/Cumulus开发笔记(4) Cumulus线程详解 (续1)
- OpenRTMFP/Cumulus开发笔记(1) 部署Cumulus
- OpenRTMFP/Cumulus开发笔记(8) Cumulus服务器端处理高并发连接实例
- OpenRTMFP/Cumulus开发笔记(3) Cumulus线程详解
- OpenRTMFP/Cumulus开发笔记(2) Cumulus相关AMF数据的解析
- OpenRTMFP/Cumulus开发笔记(5) Cumulus实时视频流的播放
- OpenRTMFP/Cumulus Primer(7)CumulusServer启动流程分析(续3)
- OpenRTMFP/Cumulus Primer(7)CumulusServer 启动流程分析(续3)
- Cumulus开发环境配置
- OpenRTMFP/Cumulus Primer(5)CumulusServer启动流程分析(续1)
- OpenRTMFP/Cumulus Primer(6)CumulusServer启动流程分析(续2)
- OpenRTMFP/Cumulus Primer(12)IO管理之IO流(续)
- OpenRTMFP/Cumulus Primer(17)AMF解析之AMFReader(续1)
- OpenRTMFP/Cumulus Primer(18)AMF解析之AMFReader(续2)
- OpenRTMFP/Cumulus Primer(5)CumulusServer启动流程分析(续1)
- OpenRTMFP/Cumulus Primer(6)CumulusServer启动流程分析(续2)
- 1.HttpServletResponse对象
- 数据库事务正确执行的四个基本要素
- Unity3D Android/Unity Remote Android
- 查询表中相同数据的sql
- 如何在Drupal 首页实现图片滑动效果?
- OpenRTMFP/Cumulus开发笔记(7) Cumulus大数据处理实例(续)
- 找出相加能得给定数的连续的数
- Android 4.2 打开设置中隐藏的开发者选项
- 理解 Thread.Sleep 函数 ,Sleep(0) 释放当前线程所剩余的时间片,让线程马上回到就绪队列而非等待队列
- 设置MySQL的最大连接数-普通只有100个
- excel文件的导出功能
- Oracle 查看 杀掉锁表进程
- java+Jsoup 正则过滤html网页标签
- dwr