大数据的四则运算

来源：互联网发布：mac降调软件编辑：程序博客网时间：2024/04/29 20:15

（一）大数据的应用

很多时候我们会面临计算机内置数据类型不够用的情况，比如说我们要统计全世界QQ用户的年访问量,这个数据将非常的庞大，VS开发平台最大的整型是long long 其范围是最大值9223372036854775807，最小值-9223372036854775808。16进制表示最大值0x7ffffffffffffff最小值为0x8000000000000000。

（二）自定义类型

BigData的实现思路，当处理的数据没有超过内置类型时，将数据保存到longlong类型中，超过范围时以字符串的形式保存到string类中，需要四则运算时，再将每一位转为数字进行运算。其实思路就是这么简单，难点在于实现的细节。

（四）初始化方式，定义两个构造函数，传参如果是范围内的数识别为BigData(INT64 data = UN_INIT)，如果超出范围识别为BigData(const char *pData)

（三）加减法的实现

1.两个数都没有超（又分为计算结果有没有超）：内置类型相加

2.两个数都超了（结果必然超了）

3.两个数有一个超了（结果必然超了）

以上三种情况下再考虑符号，减法考虑的情况与之类似

小技巧：运算传参时可以将位数长的数据固定传到左边的参数，方便逐位相加，还有更多的细节问题请看代码的注释

（四）乘法的实现

分情况的方式与加减法相似，乘法主要思路是逐位相乘，错位相加。

（五）除法的实现相对复杂些

如果被除数短，自然商为0，余数为该数。被除数长则计数除数的位数，再与被除数循环相减并补位（规则同除法运算一致）。

BigData.h

#ifndef BIG_DATA_H#define BIG_DATA_H#include <string>#include <iostream>#define UN_INIT 0xcccccccccccccccc//定义随机值#define MAX_INT64 0x7fffffffffffffff//longlong类型的最大值#define MIN_INT64 0x8000000000000000//longlong类型的最小值typedef long long INT64;class BigData{public:BigData(INT64 data = UN_INIT);BigData(const char *pData);BigData operator+(BigData& bigData);BigData operator-(const BigData& bigData);BigData operator*(const BigData& bigData);BigData operator/(const BigData& bigData);BigData operator%(const BigData& bigData);//=======================================bool operator<(const BigData& bigData);bool operator>(const BigData& bigData);bool operator==(const BigData& bigData);std::ostream& operator<<(std::ostream& _cout);friend std::ostream& operator<<(std::ostream& _cout, const BigData& bigData);friend std::istream& operator>>(std::istream& _cin, BigData bigData);private:std::string Add(std::string left, std::string right);std::string Sub(std::string left, std::string right);std::string Mul(std::string left, std::string right);std::string Div(std::string left, std::string right);void INT64ToString();bool IsINT64Owerflow()const;bool IsLeftStrBig(char *pLeft, size_t LSize, char *pRight, size_t RSize);char SubLoop(char *pLeft, size_t LSize, char *pRight, size_t RSize);private:long long m_llValue;std::string m_strData;};#endif

#include "BigData.h"#include <cassert>BigData::BigData(INT64 data): m_llValue(data), m_strData(""){INT64ToString();//作用是将内置类型数据转换成字符串存起来}BigData::BigData(const char *_pData){// "-12345789"  "1234567" "+" "12457aaa123" "000001234567"// "a23456789" // atoiassert(NULL != _pData);char cSybom = _pData[0];//保存这个数的符号char* pData = (char*)_pData;//const char* 强转为char*if ('+' == cSybom || '-' == cSybom){pData++;}else if (*pData >= '0' && *pData <= '9')//根据输入将其转换成统一格式，将符号放在第0位{cSybom = '+';}else{m_llValue = 0;//输入是其他字符这个数字直接存成0m_strData = "0";return;}// 去掉前置0while ('0' == *pData)pData++;// "12457aaa123"m_strData.resize(strlen(pData) + 1);m_llValue = 0;m_strData[0] = cSybom;int iCount = 1;while (pData)//用于跳过输入中不合法的数字{if (*pData >= '0' && *pData <= '9'){m_llValue = m_llValue * 10 + *pData - '0';//字符串转换成数字存起来m_strData[iCount++] = *pData++;}else{break;}}m_strData.resize(iCount);if ('-' == cSybom){m_llValue = 0 - m_llValue;}}BigData BigData::operator+(BigData& bigData){// 8 + -2  10if (!IsINT64Owerflow() && !bigData.IsINT64Owerflow())//两个数据都溢出的情况{if (m_strData[0] != bigData.m_strData[0]){return BigData(m_llValue + bigData.m_llValue);}else{// 2 + 8  10 - 6 > 2// -3 + -8  -10 - (-6) = -4 < -3if (('+' == m_strData[0] && MAX_INT64 - m_llValue >= bigData.m_llValue) ||('-') == m_strData[0] && MIN_INT64 - m_llValue <= bigData.m_llValue){return BigData(m_llValue + bigData.m_llValue);}}}// 2 + 2 / -2 + -2 == -(2+2)// 2 + -1 // 计算结果溢出std::string strRet;if (m_strData[0] == bigData.m_strData[0])// 至少有一个溢出{strRet = Add(m_strData, bigData.m_strData);}else{strRet = Sub(m_strData, bigData.m_strData);}return BigData(strRet.c_str());}BigData BigData::operator-(const BigData& bigData){if (!IsINT64Owerflow() && !bigData.IsINT64Owerflow()){if (m_strData[0] == bigData.m_strData[0]){return BigData(m_llValue - bigData.m_llValue);}else{// 10 + (-8) = 2 > 1// 3 - (-8); 1 - (-8) // -10  -8  3    -8  2  -10 + 3 = -7 <= if (('+' == m_strData[0] && MAX_INT64 + bigData.m_llValue >= m_llValue) ||('-' == m_strData[0] && MIN_INT64 + bigData.m_llValue <= m_llValue)){return BigData(m_llValue - bigData.m_llValue);}}}// 1、至少有一个操作数溢出// 2、相减的结果一定会溢出// "999999999" "-111111"  "-9999999" "1111111"std::string strRet;if (m_strData[0] != bigData.m_strData[0]){strRet = Add(m_strData, bigData.m_strData);}else{strRet = Sub(m_strData, bigData.m_strData);}return BigData(strRet.c_str());}BigData BigData::operator*(const BigData& bigData){if (0 == m_llValue || 0 == bigData.m_llValue){return BigData(INT64(0));}if (!IsINT64Owerflow() && !bigData.IsINT64Owerflow()){if (m_strData[0] == bigData.m_strData[0]){// 10 /2 = 5 >= 1 2 3 4 5// 10 /-2 = -5 <= -5 -4 -3 -2 -1 if (('+' == m_strData[0] && MAX_INT64 / m_llValue >= bigData.m_llValue) ||('-' == m_strData[0] && MAX_INT64 / m_llValue <= bigData.m_llValue)){return BigData(m_llValue*bigData.m_llValue);}}else{// -10 /2 = -5 <= // -10/-2 = 5 >if (('+' == m_strData[0] && MIN_INT64 / m_llValue <= bigData.m_llValue) ||('-' == m_strData[0] && MIN_INT64 / m_llValue >= bigData.m_llValue)){return BigData(m_llValue*bigData.m_llValue);}}}return BigData(Mul(m_strData, bigData.m_strData).c_str());}BigData BigData::operator/(const BigData& bigData){if (0 == bigData.m_llValue){assert("除数不能为0！");return BigData(INT64(0));}if (!IsINT64Owerflow() && !bigData.IsINT64Owerflow()){return BigData(m_llValue / bigData.m_llValue);}return BigData(Div(m_strData, bigData.m_strData).c_str());}// +// += std::string BigData::Add(std::string left, std::string right){int iLSize = left.size();int iRSize = right.size();if (iLSize < iRSize){std::swap(left, right);std::swap(iLSize, iRSize);}std::string strRet;strRet.resize(iLSize + 1);strRet[0] = left[0];char cStep = 0;//left = "+9999999"  size = 9 // right="1"   "+10000000" for (int iIdx = 1; iIdx < iLSize; ++iIdx){char cRet = left[iLSize - iIdx] - '0' + cStep;if (iIdx < iRSize){cRet += (right[iRSize - iIdx] - '0');}strRet[iLSize - iIdx + 1] = (cRet % 10 + '0');cStep = cRet / 10;}strRet[1] = (cStep + '0');return strRet;}std::string BigData::Sub(std::string left, std::string right){// 1、左操作数 > 右操作数// 2、确定符号位int iLSize = left.size();int iRSize = right.size();char cSymbol = left[0];if (iLSize < iRSize ||(iLSize == iRSize && left < right)){std::swap(left, right);std::swap(iLSize, iRSize);if ('+' == cSymbol){cSymbol = '-';}else{cSymbol = '+';}}std::string strRet;strRet.resize(iLSize);strRet[0] = cSymbol;// 逐位相减// 1、取left每一位，从后往前取// 2、在right没有超出  取right每一位 从后往前取// 3、直接相减// 4、 保存结果for (int iIdx = 1; iIdx < iLSize; iIdx++){char cRet = left[iLSize - iIdx] - '0';if (iIdx < iRSize){cRet -= (right[iRSize - iIdx] - '0');}if (cRet < 0){left[iLSize - iIdx - 1] -= 1;cRet += 10;}strRet[iLSize - iIdx] = (cRet + '0');}return strRet;}std::string BigData::Mul(std::string left, std::string right){int iLSize = left.size();int iRSize = right.size();if (iLSize > iRSize){std::swap(left, right);std::swap(iLSize, iRSize);}char cSymbol = '+';if (left[0] != right[0]){cSymbol = '-';}std::string strRet;//strRet.resize(iLSize + iRSize - 1);strRet.assign(iLSize + iRSize - 1, '0');//两数相乘总的位数为iLSize + iRSize - 1，每一位都初始化为0strRet[0] = cSymbol;int iDataLen = strRet.size();int iOffset = 0;//定义的偏移量，每相加一次偏移量加1for (int iIdx = 1; iIdx < iLSize; ++iIdx){char cLeft = left[iLSize - iIdx] - '0';//左字符串从右到左依次取数char cStep = 0;if (0 == cLeft){iOffset++;continue;}for (int iRIdx = 1; iRIdx < iRSize; ++iRIdx){char cRet = cLeft*(right[iRSize - iRIdx] - '0');//左边取到的数字和右边的数相乘cRet += cStep;//加上相乘得到的进位cRet += (strRet[iDataLen - iOffset - iRIdx] - '0');//将该位变成数字strRet[iDataLen - iOffset - iRIdx] = cRet % 10 + '0';cStep = cRet / 10;}strRet[iDataLen - iOffset - iRSize] += cStep;iOffset++;}return strRet;}std::string BigData::Div(std::string left, std::string right){char cSymbol = '+';if (left[0] != right[0]){cSymbol = '-';}int iLSize = left.size();int iRSize = right.size();if (iLSize < iRSize ||iLSize == iRSize && strcmp(left.c_str() + 1, right.c_str() + 1) < 0){return "0";}else{if ("+1" == right || "-1" == right){left[0] = cSymbol;return left;}}std::string strRet;strRet.append(1, cSymbol);char *pLeft = (char*)(left.c_str() + 1);char *pRight = (char*)(right.c_str() + 1);int iDataLen = 1;iLSize -= 1;// "2422222222"  33for (int iIdx = 0; iIdx < iLSize;){if ('0' == *pLeft){strRet.append(1, '0');pLeft++;iIdx++;continue;}if (!IsLeftStrBig(pLeft, iDataLen, pRight, iRSize - 1)){strRet.append(1, '0');iDataLen++;if (iIdx + iDataLen > iLSize){break;}continue;}else{// 循环相减strRet.append(1, SubLoop(pLeft, iDataLen, pRight, iRSize - 1));// pLeftwhile ('0' == *pLeft && iDataLen > 0){pLeft++;iIdx++;iDataLen--;}iDataLen++;if (iIdx + iDataLen > iLSize){break;}}}return strRet;}bool BigData::IsLeftStrBig(char *pLeft, size_t LSize, char *pRight, size_t RSize){assert(NULL != pLeft && NULL != pRight);if (LSize > RSize ||LSize == RSize && strncmp(pLeft, pRight, LSize) >= 0){return true;}return false;}char BigData::SubLoop(char *pLeft, size_t LSize, char *pRight, size_t RSize){assert(NULL != pLeft && NULL != pRight);char cRet = '0';while (true){if (!IsLeftStrBig(pLeft, LSize, pRight, RSize)){break;}// 做-=int iLDataLen = LSize - 1;int iRDataLen = RSize - 1;while (iRDataLen >= 0 && iLDataLen >= 0){if (pLeft[iLDataLen] < pRight[iRDataLen]){pLeft[iLDataLen - 1] -= 1;pLeft[iLDataLen] += 10;}pLeft[iLDataLen] = pLeft[iLDataLen] - pRight[iRDataLen] + '0';iLDataLen--;iRDataLen--;}// "990000000000000000000000000099"while ('0' == *pLeft && LSize > 0){pLeft++;LSize--;}cRet++;}return cRet;}void BigData::INT64ToString(){//12345char cSymbol = '+';INT64 temp = m_llValue;if (temp < 0){cSymbol = '-';temp = 0 - temp;}m_strData.append(1, cSymbol);int iCount = 1;// 54321while (temp){m_strData.append(1, temp % 10 + '0');temp /= 10;}char *pLeft = (char*)(m_strData.c_str() + 1);char *pRight = (char*)(m_strData.c_str() + m_strData.size() - 1);while (pLeft < pRight){char ctemp = *pLeft;*pLeft++ = *pRight;*pRight-- = ctemp;}// 1 符号位// 2 m_strData = 54321}bool BigData::IsINT64Owerflow()const{std::string strTemp;if ('+' == m_strData[0]){strTemp = "+9223372036854775807";}else{strTemp = "-9223372036854775808";}if (m_strData.size() > strTemp.size()){return true;}else if (m_strData.size() == strTemp.size() && m_strData > strTemp){return true;}return false;}// std::ostream& operator<<(std::ostream& _cout, const BigData& bigData)// {// if (!bigData.IsINT64Owerflow()) // 没有溢出// {// _cout<<bigData.m_llValue;// }// else// {// char* pData = (char*)bigData.m_strData.c_str();// if (pData[0] == '+')// {// pData++;// }// _cout<<pData;// }// return _cout;// }std::ostream& BigData::operator<<(std::ostream& _cout){_cout << 10;return _cout;}

0 0