浮点数量化2^n定点数

来源:互联网 发布:软件编程招聘 编辑:程序博客网 时间:2024/04/29 17:00

Target:

将浮点数量化到最近的2^n定点数

Code:

E.g. 量化为3bit 区间为[-1,0.5]

#include <iostream>#include <cmath>using namespace std;template <typename Dtype>int getArrayLen(Dtype& array){return (sizeof(array) / sizeof(array[0]));}template <typename Dtype>Dtype quantization(const Dtype data, const int N, const Dtype abs_min_2_pow){if(data == Dtype(0))return Dtype(0);const int exponent = (1<<(N-1)) - 2;const Dtype maxValue = abs_min_2_pow * Dtype(1<<exponent);const Dtype minValue = -1 * maxValue * Dtype(2);if (data <= minValue) {return minValue;} else if (data >= maxValue){return maxValue;} else {Dtype sign_index = (data > 0) ? Dtype(1) : Dtype(-1);Dtype fabs_data = fabs(data);const int mul = int(floor(log(fabs_data / abs_min_2_pow) / log(2.0)));Dtype mul_2 = 1<<mul;if (mul < 0){return (fabs_data < abs_min_2_pow / 2) ? Dtype(0) : sign_index * abs_min_2_pow;} else {Dtype upperbound = abs_min_2_pow * mul_2 * 2;Dtype lowerbound = abs_min_2_pow * mul_2;Dtype diff = upperbound - lowerbound;diff /= 2;return (fabs_data - lowerbound >= diff) ? sign_index * upperbound : sign_index * lowerbound;}}}int main(int argc, char *argv[]){float a[] = {0, -0.00001, -0.0624999, -0.0625, -0.0626, -0.124, -0.125, -0.126, -0.1874, -0.1875, -0.1876, -0.249, -0.25, -0.251, -0.499, -0.5, -0.501, -0.749, -0.75, -0.751,1.23, 2.4, -1.23, -2.4};for(int i = 0; i < getArrayLen(a); i++){cout << a[i] << "\t" << quantization(a[i], 3, (float)0.125) << endl;}return 0;}
Output:

0       0-1e-05  0-0.0624999      0-0.0625 -0.125-0.0626 -0.125-0.124  -0.125-0.125  -0.125-0.126  -0.125-0.1874 -0.125-0.1875 -0.25-0.1876 -0.25-0.249  -0.25-0.25   -0.25-0.251  -0.25-0.499  -0.5-0.5    -0.5-0.501  -0.5-0.749  -0.5-0.75   -1-0.751  -11.23    0.52.4     0.5-1.23   -1-2.4    -1




0 0
原创粉丝点击