文章目录
- 一、简介
- 二、实现代码
- 三、实现效果
- 参考资料
一、简介
这里使用CUDA实现一种计算计算点集与点集之间的距离的方法,其思路很简单,就是计算每个点到另一个点集之间的最小距离,最终保存结果到一个数组中,通过这种方式可以快速的计算出点集与点集之间的距离,当然也可以将这种方法应用到计算点云与点云之间的距离。
二、实现代码
这里我直接引入了点云库PCL,方便后续的使用。
CMakeLists.txt
cmake_minimum_required(VERSION 3.19 FATAL_ERROR)
project(CudaTest LANGUAGES CUDA CXX)set(CMAKE_CXX_STANDARD 14)
set(CUDA_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN TRUE)
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
set(CMAKE_CUDA_ARCHITECTURES "60;61;62;70;72;75;80;86;87")
set(CMAKE_CUDA_SEPARABLE_COMPILATION TRUE)
set(CMAKE_CUDA_RESOLVE_DEVICE_SYMBOLS TRUE)
set(CMAKE_CUDA_HOST_COMPILATION_CPP FALSE)
set(CMAKE_CUDA_PROPAGATE_HOST_FLAGS FALSE)# CUDA
find_package(CUDAToolkit REQUIRED)#PCL
find_package(PCL 1.9 REQUIRED)
include_directories( ${PCL_INCLUDE_DIRS} )file( GLOB header_list *.h *.cuh)
file( GLOB source_list *.hpp *.cpp *.cu)add_executable(${PROJECT_NAME} ${header_list})
target_sources(${PROJECT_NAME} PRIVATE ${source_list})if ( PCL_VERSION VERSION_LESS 1.7 )set_property( TARGET ${PROJECT_NAME} APPEND PROPERTY COMPILE_DEFINITIONS PCL_VER_1_6_OR_OLDER )
endif()if( PCL_VERSION VERSION_GREATER 1.7 )set_property( TARGET ${PROJECT_NAME} APPEND PROPERTY COMPILE_DEFINITIONS LP_PCL_PATCH_ENABLED )
endif()link_directories( ${PCL_LIBRARY_DIRS} )
add_definitions( ${PCL_DEFINITIONS} )
target_link_libraries( ${PROJECT_NAME} PRIVATE ${PCL_LIBRARIES})target_link_libraries(${PROJECT_NAME}PRIVATECUDA::cusparseCUDA::cusolver)
ComputeDistances.cuh
#ifndef COMPUTE_DISTANCES_GPU_CUH
#define COMPUTE_DISTANCES_GPU_CUH#include <vector>
#include <math.h>
#include <Eigen/Dense>#include <cuda_runtime.h>// 主机端函数声明
void computeDisByGpu(std::vector<Eigen::Vector3f>& points1, std::vector<Eigen::Vector3f>& points2, std::vector<float>& dis1, std::vector<float>& dis2);int getCudaDeviceCount();#endif // COMPUTE_DISTANCES_GPU_CUH
CalculateDistances.cu
#include "ComputeDistances.cuh"#include <cuda_runtime.h>
#include <device_launch_parameters.h>// CPU和GPU端都可以使用
struct Point3f
{float x, y, z;// 构造函数__host__ __device__ Point3f() : x(0), y(0), z(0) {}__host__ __device__ Point3f(float px, float py, float pz) : x(px), y(py), z(pz) {}// 向量加法__host__ __device__ Point3f operator+(const Point3f& p) const {return Point3f(x + p.x, y + p.y, z + p.z);}// 向量减法__host__ __device__ Point3f operator-(const Point3f& p) const {return Point3f(x - p.x, y - p.y, z - p.z);}// 标量乘法__host__ __device__ Point3f operator*(float s) const {return Point3f(x * s, y * s, z * s);}// 向量叉乘__host__ __device__ Point3f Cross(const Point3f& p) const {return Point3f(y * p.z - z * p.y, z * p.x - x * p.z, x * p.y - y * p.x);}// 向量点乘__host__ __device__ float Dot(const Point3f& p) const {return (x * p.x + y * p.y + z * p.z);}// 向量模__host__ __device__ float Module() const {return sqrtf(x * x + y * y + z * z);}
};__global__ void calculateDistances(Point3f* points1, Point3f* points2,float* dis1, float* dis2, int num1, int num2)
{// 通过 blockIdx.x 和 threadIdx.x 计算当前线程的索引 idx// ,然后分别对 points1 和 points2 中的每个点,计算它们到另一个点云中所有点的最小距离int idx = blockIdx.x * blockDim.x + threadIdx.x;if (idx < num1){float minDistance = INFINITY;for (int i = 0; i < num2; ++i){float distance = (points1[idx] - points2[i]).Module();if (distance < minDistance) {minDistance = distance;}}dis1[idx] = minDistance;}if (idx < num2){float minDistance = INFINITY;for (int i = 0; i < num1; ++i){float distance = (points2[idx] - points1[i]).Module();if (distance < minDistance) {minDistance = distance;}}dis2[idx] = minDistance;}
}void computeDisByGpu(std::vector<Eigen::Vector3f>& points1, std::vector<Eigen::Vector3f>& points2, std::vector<float>& dis1, std::vector<float>& dis2)
{int maxNum = std::max(points1.size(), points2.size());int num1 = points1.size();int num2 = points2.size();Point3f* d_points1;Point3f* d_points2;float* d_dis1;float* d_dis2;// 分配GPU设备内存cudaMalloc(&d_points1, points1.size() * sizeof(Point3f));cudaMalloc(&d_points2, points2.size() * sizeof(Point3f));cudaMalloc(&d_dis1, points1.size() * sizeof(float));cudaMalloc(&d_dis2, points2.size() * sizeof(float));// 复制数据到设备cudaMemcpy(d_points1, points1.data(), points1.size() * sizeof(Point3f), cudaMemcpyHostToDevice);cudaMemcpy(d_points2, points2.data(), points2.size() * sizeof(Point3f), cudaMemcpyHostToDevice);dim3 blockSize(256);dim3 gridSize((maxNum + blockSize.x - 1) / blockSize.x);calculateDistances <<<gridSize, blockSize>>>(d_points1, d_points2, d_dis1, d_dis2, num1, num2);cudaDeviceSynchronize();dis1.resize(num1);dis2.resize(num2);cudaMemcpy(dis1.data(), d_dis1, num1 * sizeof(float), cudaMemcpyDeviceToHost);cudaMemcpy(dis2.data(), d_dis2, num2 * sizeof(float), cudaMemcpyDeviceToHost);cudaFree(d_points1);cudaFree(d_points2);cudaFree(d_dis1);cudaFree(d_dis2);
}int getCudaDeviceCount()
{int count;cudaGetDeviceCount(&count);return count;
}
main.cpp
#include <iostream>
#include <vector>
#include <Eigen/Core>
#include "ComputeDistances.cuh"int main()
{int num = getCudaDeviceCount();std::cout << "GPU数量:" << num << std::endl;if (!num) return -1;// 定义和初始化点云数据std::vector<Eigen::Vector3f> points1 = {Eigen::Vector3f(0.0f, 0.0f, 0.0f),Eigen::Vector3f(1.0f, 1.0f, 1.0f),Eigen::Vector3f(2.0f, 2.0f, 2.0f)};std::vector<Eigen::Vector3f> points2 = {Eigen::Vector3f(3.0f, 3.0f, 3.0f),Eigen::Vector3f(4.0f, 4.0f, 4.0f),Eigen::Vector3f(5.0f, 5.0f, 5.0f)};// 用于存储结果的数组std::vector<float> dis1, dis2;// 调用主机端函数,执行距离计算computeDisByGpu(points1, points2, dis1, dis2);// 输出结果std::cout << "Minimum distances from points1 to points2:" << std::endl;for (float d : dis1) {std::cout << d << std::endl;}std::cout << "Minimum distances from points2 to points1:" << std::endl;for (float d : dis2) {std::cout << d << std::endl;}return 0;
}
三、实现效果
参考资料
[1]https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html