cuda julia & ripple

来源:互联网 发布:sshd linux 编辑:程序博客网 时间:2024/03/28 21:18
</pre><pre name="code" class="cpp">
// chaterConsole.cpp : 定义控制台应用程序的入口点。//#include "stdafx.h"/** Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.** NVIDIA Corporation and its licensors retain all intellectual property and* proprietary rights in and to this software and related documentation.* Any use, reproduction, disclosure, or distribution of this software* and related documentation without an express license agreement from* NVIDIA Corporation is strictly prohibited.** Please refer to the applicable NVIDIA end user license agreement (EULA)* associated with this source code for terms and conditions that govern* your use of this NVIDIA software.**/#include <stdio.h>#include <stdlib.h>#include <time.h>#include <cuda_runtime.h>#include <device_launch_parameters.h>#include <opencv2\opencv.hpp>#include <opencv2\gpu\gpu.hpp>#define DIM 1024__device__ struct cuComplex{float   r;float   i;__device__ cuComplex(float a, float b) : r(a), i(b)  {}__device__ float magnitude2(void) {return r * r + i * i;}__device__ cuComplex operator*(const cuComplex& a) {return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i);}__device__ cuComplex operator+(const cuComplex& a) {return cuComplex(r + a.r, i + a.i);}};__device__ int julia(int x, int y) {const float scale = 1.5;float jx = scale * (float)(DIM / 2 - x) / (DIM / 2);float jy = scale * (float)(DIM / 2 - y) / (DIM / 2);cuComplex c(-0.8, 0.156);cuComplex a(jx, jy);int i = 0;for (i = 0; i<200; i++) {a = a * a + c;if (a.magnitude2() > 1000)return 0;}return 1;}__global__ void drawJuliaKernel(int *ptr){int x = blockIdx.x;int y = blockIdx.y;int offset = x + y * gridDim.x;float juliaValue = julia(x,y);ptr[offset * 3 + 0] = 255 * juliaValue;ptr[offset * 3 + 1] = 0;ptr[offset * 3 + 2] = 0;}__global__ void drawRippleKernel(int *ptr) {// map from threadIdx/BlockIdx to pixel positionint x = threadIdx.x + blockIdx.x * blockDim.x;int y = threadIdx.y + blockIdx.y * blockDim.y;int offset = x + y * blockDim.x * gridDim.x;// now calculate the value at that positionfloat fx = x - DIM / 2;float fy = y - DIM / 2;float d = sqrtf(fx * fx + fy * fy);unsigned char grey = (unsigned char)(128.0f + 127.0f * cos(d / 10.0f) / (d / 10.0f + 1.0f));ptr[offset * 3 + 0] = grey;ptr[offset * 3 + 1] = grey;ptr[offset * 3 + 2] = grey;}// Helper function for using CUDA to add vectors in parallel.cudaError_t drawWithCuda(int *c, int *a, unsigned int size){int *dev_c = 0;int *dev_a = 0;cudaError_t cudaStatus;// Choose which GPU to run on, change this on a multi-GPU system.cudaStatus = cudaSetDevice(0);if (cudaStatus != cudaSuccess) {fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");goto Error;}// Allocate GPU buffers for three vectors (two input, one output)    .cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));if (cudaStatus != cudaSuccess) {fprintf(stderr, "cudaMalloc failed!");goto Error;}cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));if (cudaStatus != cudaSuccess) {fprintf(stderr, "cudaMalloc failed!");goto Error;}dim3 grid(DIM, DIM);// Launch a kernel on the GPU with one thread for each element.drawJuliaKernel << <grid, 1 >> >(dev_c);dim3    blocks(DIM / 16, DIM / 16);dim3    threads(16, 16);drawRippleKernel << <blocks, threads >> >(dev_a);// Check for any errors launching the kernelcudaStatus = cudaGetLastError();if (cudaStatus != cudaSuccess) {fprintf(stderr, "drawJuliaKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));goto Error;}// cudaDeviceSynchronize waits for the kernel to finish, and returns// any errors encountered during the launch.cudaStatus = cudaDeviceSynchronize();if (cudaStatus != cudaSuccess) {fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching drawJuliaKernel!\n", cudaStatus);goto Error;}// Copy output vector from GPU buffer to host memory.cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);if (cudaStatus != cudaSuccess) {fprintf(stderr, "cudaMemcpy failed!");goto Error;}// Copy output vector from GPU buffer to host memory.cudaStatus = cudaMemcpy(a, dev_a, size * sizeof(int), cudaMemcpyDeviceToHost);if (cudaStatus != cudaSuccess) {fprintf(stderr, "cudaMemcpy failed!");goto Error;}Error:cudaFree(dev_a);cudaFree(dev_c);return cudaStatus;}int main(){const int pixSize = DIM * DIM;const int arraySize = DIM * DIM * 3;int *c = (int *)malloc(arraySize * sizeof(int));if (c)std::cout << "Memory Allocated at" << c << std::endl;elsestd::cout << "Not Enough Memory!" << c << std::endl;int *a = (int *)malloc(arraySize * sizeof(int));if (c)std::cout << "Memory Allocated at" << a << std::endl;elsestd::cout << "Not Enough Memory!" << a << std::endl;// Add vectors in parallel.cudaError_t cudaStatus = drawWithCuda(c, a, arraySize);if (cudaStatus != cudaSuccess) {fprintf(stderr, "drawWithCuda failed!");return 1;}cv::Mat colorImg = cv::Mat::zeros(DIM, DIM, CV_8UC3);cv::Mat rippleImg = cv::Mat::zeros(DIM, DIM, CV_8UC3);//colorImg = cv::imread("kinect 1.bmp", CV_LOAD_IMAGE_COLOR);uchar* p_mat = colorImg.data;uchar* p_ripple = rippleImg.data;for (size_t i = 0; i < pixSize; i++){*p_mat = c[i * 3 + 0];p_mat++;*p_mat = c[i * 3 + 1];p_mat++;*p_mat = c[i * 3 + 2];p_mat++;*p_ripple = a[i * 3 + 0];p_ripple++;*p_ripple = a[i * 3 + 1];p_ripple++;*p_ripple = a[i * 3 + 2];p_ripple++;/*std::cout << c[i * 3 + 0] << " " << c[i * 3 + 1] << " " << c[i * 3 + 2] << " " << std::endl;*/}cv::imshow("color image", colorImg);cv::imshow("ripple image", rippleImg);cv::waitKey(3000);// cudaDeviceReset must be called before exiting in order for profiling and// tracing tools such as Nsight and Visual Profiler to show complete traces.cudaStatus = cudaDeviceReset();if (cudaStatus != cudaSuccess) {fprintf(stderr, "cudaDeviceReset failed!");return 1;}free(a);free(c);return 0;}



0 0