cuda小白
原始API链接 NPP
GPU架构近些年也有不少的变化,具体的可以参考别的博主的介绍,都比较详细。还有一些cuda中的专有名词的含义,可以参考《详解CUDA的Context、Stream、Warp、SM、SP、Kernel、Block、Grid》
常见的NppStatus,可以看这里。
如有问题,请指出,谢谢
本节内容主要涉及到使用NPP进行一些图像色域变换等,类似功能的就不细说,仅针对一些常见的。由于Debayer,不同yuv之间的转换(Color Sampling Format Conversion),Gamma校正,Complement Color Key(一直不清楚这个啥)以及Color Processing(调色相关)的接触不对,后面就不介绍了,按需。
Color Model Conversion
当前模块主要涉及到的RGB,BGR,YUV,YUV420,YUV422,NV12,YCbCr,YCbCr422和YCbCr420之间的相互转换,以及批处理(分为同样ROI和不同ROI两个接口)的接口。其他还有一些未接触过的XYZ,LUV,Lab,YCC,CMYK_JPEG,HLS,HSV等与RGB或者BGR之间的转换。
本文就那比较简单的转换,以为RGB和YUV之间的相互转换为例子。
// 此处介绍的RGB和YUV都是三通道的,如果是packed的相互转换,则后缀是C3R,如果都是planar的,则后缀是P3R,反之则是P3C3R或者C3P3R。
// =========== RGB2YUV原理 ===========
// Npp32f nY = 0.299F * R + 0.587F * G + 0.114F * B;
// Npp32f nU = (0.492F * ((Npp32f)nB - nY)) + 128.0F;
// Npp32f nV = (0.877F * ((Npp32f)nR - nY)) + 128.0F;
// if (nV > 255.0F)
// nV = 255.0F;
NppStatus nppiRGBToYUV_8u_C3R(const Npp8u *pSrc,int nSrcStep,Npp8u *pDst,int nDstStep,NppiSize oSizeROI);// =========== RGB2YUV原理 ===========
// Npp32f nY = (Npp32f)Y;
// Npp32f nU = (Npp32f)U - 128.0F;
// Npp32f nV = (Npp32f)V - 128.0F;
// Npp32f nR = nY + 1.140F * nV;
// if (nR < 0.0F) nR = 0.0F;
// if (nR > 255.0F) nR = 255.0F;
// Npp32f nG = nY - 0.394F * nU - 0.581F * nV;
// if (nG < 0.0F) nG = 0.0F;
// if (nG > 255.0F) nG = 255.0F;
// Npp32f nB = nY + 2.032F * nU;
// if (nB < 0.0F) nB = 0.0F;
// if (nB > 255.0F) nB = 255.0F;
NppStatus nppiYUVToRGB_8u_C3R(const Npp8u *pSrc,int nSrcStep,Npp8u *pDst,int nDstStep,NppiSize oSizeROI);
code
#include <iostream>
#include <cuda_runtime.h>
#include <npp.h>
#include <opencv2/opencv.hpp>#define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } }int main() {std::string directory = "../";// =============== load image ===============cv::Mat image_dog = cv::imread(directory + "dog.png");int image_width = image_dog.cols;int image_height = image_dog.rows;int image_size = image_width * image_height * 3;// =============== device memory ===============uint8_t *in_ptr, *yuv, *out_ptr;cudaMalloc((void**)&in_ptr, image_size * sizeof(uint8_t));cudaMalloc((void**)&yuv, image_size * sizeof(uint8_t));cudaMalloc((void**)&out_ptr, image_size * sizeof(uint8_t));cudaMemcpy(in_ptr, image_dog.data, image_size, cudaMemcpyHostToDevice);NppiSize roi1, roi2;roi1.width = image_width;roi1.height = image_height;roi2.width = image_width / 2;roi2.height = image_height / 2;// =============== nppiRGBToYUV_8u_C3R ===============uint8_t *host_yuv = (uint8_t *)malloc(image_size);NppStatus status;status = nppiRGBToYUV_8u_C3R(in_ptr, image_width * 3, yuv, image_width * 3, roi1);if (status != NPP_SUCCESS) {std::cout << "[GPU] ERROR nppiRGBToYUV_8u_C3R failed, status = " << status << std::endl;return false;}cudaMemcpy(host_yuv, yuv, image_size, cudaMemcpyDeviceToHost);std::string out_yuv_file = directory + "yuv.bin";FILE *fp = fopen(out_yuv_file.c_str(), "wb");fwrite(host_yuv, image_size, 1, fp);fclose(fp);// =============== nppiYUVToRGB_8u_C3R ===============cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3);status = nppiYUVToRGB_8u_C3R(yuv, image_width * 3, out_ptr, image_width * 3, roi2);if (status != NPP_SUCCESS) {std::cout << "[GPU] ERROR nppiYUVToRGB_8u_C3R failed, status = " << status << std::endl;return false;}cudaMemcpy(out_image.data, out_ptr, image_size, cudaMemcpyDeviceToHost);cv::imwrite(directory + "out_rgb.jpg", out_image);// freeCUDA_FREE(in_ptr)CUDA_FREE(yuv)CUDA_FREE(out_ptr)if (host_yuv != nullptr) { free(host_yuv); host_yuv = nullptr; }
}
make
cmake_minimum_required(VERSION 3.20)
project(test)find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")add_executable(test test.cpp)
target_link_libraries(test${OpenCV_LIBS}${CUDA_LIBS}
)
result
yuv的结果,使用7yuv工具查看
使用转换之后的yuv,再转回RGB,在限定ROI之后的结果:
注意点:
- 可以使用7yuv进行yuv图像的查看,7yuv,下载对应的版本即可,可以体验。
- 还有一个接口是基于JPEG的,主要介绍的是JPEG的Color Conversion,由于平时接触较少,这里暂时不介绍了,按需取《JPEG Color Conversion》.
ColorToGray Conversion
当前模块也主要分为三个大方向:RGBToGray,ColorToGray和GradientColorToGray。三个大方向都只选取一个比较常见的进行介绍:
// nGray = 0.299F * R + 0.587F * G + 0.114F * B;
NppStatus nppiRGBToGray_8u_C3C1R(const Npp8u *pSrc,int nSrcStep,Npp8u *pDst,int nDstStep,NppiSize oSizeROI);
// nGray = aCoeffs[0] * R + aCoeffs[1] * G + aCoeffs[2] * B;
NppStatus nppiColorToGray_8u_C3C1R(const Npp8u *pSrc,int nSrcStep,Npp8u *pDst,int nDstStep,NppiSize oSizeROI,const Npp32f aCoeffs[3]);
// NppiNorm { max(inf) / sum(L1) / square root of sum of squares(L2) }
NppStatus nppiGradientColorToGray_8u_C3C1R(const Npp8u *pSrc,int nSrcStep,Npp8u *pDst,int nDstStep,NppiSize oSizeROI,NppiNorm eNorm);
code
#include <iostream>
#include <cuda_runtime.h>
#include <npp.h>
#include <opencv2/opencv.hpp>#define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } }int main() {std::string directory = "../";// =============== load image ===============cv::Mat image_dog = cv::imread(directory + "dog.png");cv::cvtColor(image_dog, image_dog, CV_RGB2BGR);int image_width = image_dog.cols;int image_height = image_dog.rows;int image_size = image_width * image_height;// =============== device memory ===============uint8_t *in_ptr;cudaMalloc((void**)&in_ptr, image_size * 3 * sizeof(uint8_t));cudaMemcpy(in_ptr, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);uint8_t *out_ptr1, *out_ptr2, *out_ptr3;cudaMalloc((void**)&out_ptr1, image_size * sizeof(uint8_t));cudaMalloc((void**)&out_ptr2, image_size * sizeof(uint8_t));cudaMalloc((void**)&out_ptr3, image_size * sizeof(uint8_t));NppiSize roi1, roi2;roi1.width = image_width;roi1.height = image_height;roi2.width = image_width / 2;roi2.height = image_height / 2;// =============== nppiRGBToGray_8u_C3C1R ===============cv::Mat out_image_gray = cv::Mat::zeros(image_height, image_width, CV_8UC1);NppStatus status;status = nppiRGBToGray_8u_C3C1R(in_ptr, image_width * 3, out_ptr1, image_width, roi1);if (status != NPP_SUCCESS) {std::cout << "[GPU] ERROR nppiRGBToGray_8u_C3C1R failed, status = " << status << std::endl;return false;}cudaMemcpy(out_image_gray.data, out_ptr1, image_size, cudaMemcpyDeviceToHost);cv::imwrite(directory + "rgb2gray.jpg", out_image_gray);// =============== nppiColorToGray_8u_C3C1R ===============float coeffs[3] = { 0.1f, 0.8f, 0.1f};status = nppiColorToGray_8u_C3C1R(in_ptr, image_width * 3, out_ptr2, image_width, roi1, coeffs);if (status != NPP_SUCCESS) {std::cout << "[GPU] ERROR nppiColorToGray_8u_C3C1R failed, status = " << status << std::endl;return false;}cudaMemcpy(out_image_gray.data, out_ptr2, image_size, cudaMemcpyDeviceToHost);cv::imwrite(directory + "color2rgb.jpg", out_image_gray);// =============== nppiGradientColorToGray_8u_C3C1R_Ctx ===============status = nppiGradientColorToGray_8u_C3C1R(in_ptr, image_width * 3, out_ptr3, image_width, roi1, nppiNormInf);if (status != NPP_SUCCESS) {std::cout << "[GPU] ERROR nppiGradientColorToGray_8u_C3C1R failed, status = " << status << std::endl;return false;}cudaMemcpy(out_image_gray.data, out_ptr3, image_size, cudaMemcpyDeviceToHost);cv::imwrite(directory + "gradient.jpg", out_image_gray);// freeCUDA_FREE(in_ptr)CUDA_FREE(out_ptr1)CUDA_FREE(out_ptr2)CUDA_FREE(out_ptr3)
}
make
cmake_minimum_required(VERSION 3.20)
project(test)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
file(GLOB CUDA_LIBS “/usr/local/cuda/lib64/*.so”)
add_executable(test test.cpp)
target_link_libraries(test
${OpenCV_LIBS}
${CUDA_LIBS}
)
result
注意点:
- Color2Gray的接口与RGB2Gray的区别主要在于三个通道是不是比例关系。由于都是灰度图的,ColorGray的变化与直接rgb的转换结果差异性不易从肉眼看出来,但是通过不同的比例组合明显看出图像是有区别的。