一个 hipsolver 特征值示例

1,原理

通过雅可比旋转,对对称矩阵计算特征值和特征向量;

通过初等正交变换,每次把其中一个非主对角元素消成零,最终只剩主对角线非零元素为特征值,同时把初等变换累积下来,构成特征向量。

2,源码

yevj.cpp

计算一个512 阶的矩阵的特征值和特征向量:

#include <hip/hip_runtime.h>
#include <hipsolver/hipsolver.h>#include <iostream>
#include <random>
#include <vector>
#include <functional>
#include <sstream>
#include <stdexcept>
#include <string>#define NA 512
#define PR_D 0constexpr int error_exit_code = -1;inline int report_validation_result(int errors)
{if(errors){std::cout << "Validation failed. Errors: " << errors << std::endl;return error_exit_code;}std::cout << "Validation passed." << std::endl;return 0;
}template<typename T>
void multiply_matrices(T        alpha,T        beta,int      m,int      n,int      k,const T* A,int      stride1_a,int      stride2_a,const T* B,int      stride1_b,int      stride2_b,T*       C,int      stride_c)
{for(int i1 = 0; i1 < m; ++i1){for(int i2 = 0; i2 < n; ++i2){T t = T(0.0);for(int i3 = 0; i3 < k; ++i3){t += A[i1 * stride1_a + i3 * stride2_a] * B[i3 * stride1_b + i2 * stride2_b];}C[i1 + i2 * stride_c] = beta * C[i1 + i2 * stride_c] + alpha * t;}}
}template<class BidirectionalIterator>
inline std::string format_range(const BidirectionalIterator begin, const BidirectionalIterator end)
{std::stringstream sstream;sstream << "[ ";for(auto it = begin; it != end; ++it){sstream << *it;if(it != std::prev(end)){sstream << ", ";}}sstream << " ]";return sstream.str();
}inline const char* hipsolverStatusToString(hipsolverStatus_t status)
{switch(status){case HIPSOLVER_STATUS_SUCCESS: return "HIPSOLVER_STATUS_SUCCESS";case HIPSOLVER_STATUS_NOT_INITIALIZED: return "HIPSOLVER_STATUS_NOT_INITIALIZED";case HIPSOLVER_STATUS_ALLOC_FAILED: return "HIPSOLVER_STATUS_ALLOC_FAILED";case HIPSOLVER_STATUS_INVALID_VALUE: return "HIPSOLVER_STATUS_INVALID_VALUE";case HIPSOLVER_STATUS_MAPPING_ERROR: return "HIPSOLVER_STATUS_MAPPING_ERROR";case HIPSOLVER_STATUS_EXECUTION_FAILED: return "HIPSOLVER_STATUS_EXECUTION_FAILED";case HIPSOLVER_STATUS_INTERNAL_ERROR: return "HIPSOLVER_STATUS_INTERNAL_ERROR";case HIPSOLVER_STATUS_NOT_SUPPORTED: return "HIPSOLVER_STATUS_NOT_SUPPORTED";case HIPSOLVER_STATUS_ARCH_MISMATCH: return "HIPSOLVER_STATUS_ARCH_MISMATCH";case HIPSOLVER_STATUS_HANDLE_IS_NULLPTR: return "HIPSOLVER_STATUS_HANDLE_IS_NULLPTR";case HIPSOLVER_STATUS_INVALID_ENUM: return "HIPSOLVER_STATUS_INVALID_ENUM";case HIPSOLVER_STATUS_UNKNOWN: return "HIPSOLVER_STATUS_UNKNOWN";
#if (hipsolverVersionMajor == 1 && hipsolverVersionMinor >= 8) || hipsolverVersionMajor >= 2case HIPSOLVER_STATUS_ZERO_PIVOT: return "HIPSOLVER_STATUS_ZERO_PIVOT";
#endif}// We don't use default so that the compiler warns if any valid enums are missing from the// switch. If the value is not a valid hipsolverStatus_t, we return the following.return "<undefined hipsolverStatus_t value>";
}#define HIP_CHECK(condition)                                                                \{                                                                                       \const hipError_t error = condition;                                                 \if(error != hipSuccess)                                                             \{                                                                                   \std::cerr << "An error encountered: \"" << hipGetErrorString(error) << "\" at " \<< __FILE__ << ':' << __LINE__ << std::endl;                          \std::exit(error_exit_code);                                                     \}                                                                                   \}#define HIPSOLVER_CHECK(condition)                                                            \{                                                                                         \const hipsolverStatus_t status = condition;                                           \if(status != HIPSOLVER_STATUS_SUCCESS)                                                \{                                                                                     \std::cerr << "hipSOLVER error encountered: \"" << hipsolverStatusToString(status) \<< "\" at " << __FILE__ << ':' << __LINE__ << std::endl;                \std::exit(error_exit_code);                                                       \}                                                                                     \}void init_matrix(std::vector<double> &A, int n, int lda)
{std::default_random_engine             generator;std::uniform_real_distribution<double> distribution(0., 2.);auto                                   random_number = std::bind(distribution, generator);for(int i = 0; i < n; i++){A[(lda + 1) * i] = random_number();for(int j = 0; j < i; j++){A[i * lda + j] = A[j * lda + i] = random_number();}}
}int main(const int argc, char* argv[])
{int n = NA;if(n <= 0){std::cout << "Value of 'n' should be greater than 0" << std::endl;return 0;}const int lda = n;// 2. Data vectorsstd::vector<double> A(n * lda); // Input matrixstd::vector<double> V(n * lda); // Resulting eigenvectorsstd::vector<double> W(n); // resulting eigenvalues// 3. Generate a random symmetric matrixinit_matrix(A, n, lda);// 4. Set hipsolver parametersconst hipsolverEigMode_t  jobz = HIPSOLVER_EIG_MODE_VECTOR;const hipsolverFillMode_t uplo = HIPSOLVER_FILL_MODE_LOWER;hipsolverSyevjInfo_t params;HIPSOLVER_CHECK(hipsolverCreateSyevjInfo(&params));HIPSOLVER_CHECK(hipsolverXsyevjSetMaxSweeps(params, 100));HIPSOLVER_CHECK(hipsolverXsyevjSetTolerance(params, 1.e-7));HIPSOLVER_CHECK(hipsolverXsyevjSetSortEig(params, 1));// 5. Reserve and copy data to devicedouble* d_A    = nullptr;double* d_W    = nullptr;int*    d_info = nullptr;HIP_CHECK(hipMalloc(&d_A, sizeof(double) * A.size()));HIP_CHECK(hipMalloc(&d_W, sizeof(double) * W.size()));HIP_CHECK(hipMalloc(&d_info, sizeof(int)));HIP_CHECK(hipMemcpy(d_A, A.data(), sizeof(double) * A.size(), hipMemcpyHostToDevice));// 6. Initialize hipsolverhipsolverHandle_t hipsolver_handle;HIPSOLVER_CHECK(hipsolverCreate(&hipsolver_handle));// 7. Get and reserve the working space on device.int     lwork  = 0;double* d_work = nullptr;HIPSOLVER_CHECK(hipsolverDsyevj_bufferSize(hipsolver_handle, jobz, uplo, n, d_A, lda, d_W, &lwork, params));std::cout<< "LL:: 1 lwork = "<<lwork<<"bytes"<<std::endl;lwork += 64;
//    lwork = ((lwork+64-1)/64)*64;std::cout<< "LL:: 2 lwork = "<<lwork<<"bytes"<<std::endl;HIP_CHECK(hipMalloc(&d_work, sizeof(double) * lwork));// 8. Compute eigenvectors and eigenvaluesHIPSOLVER_CHECK(hipsolverDsyevj(hipsolver_handle,jobz,uplo,n,d_A,lda,d_W,d_work,lwork,d_info,params));// 9. Get results from host.int info = 0;HIP_CHECK(hipMemcpy(V.data(), d_A, sizeof(double) * V.size(), hipMemcpyDeviceToHost));HIP_CHECK(hipMemcpy(W.data(), d_W, sizeof(double) * W.size(), hipMemcpyDeviceToHost));HIP_CHECK(hipMemcpy(&info, d_info, sizeof(int), hipMemcpyDeviceToHost));// 10. Print resultsif(info == 0){std::cout << "SYEVJ converges." << std::endl;}else if(info > 0){std::cout << "SYEVJ does not converge (" << info << " elements did not converge)."<< std::endl;}std::cout << "\nGiven the n x n square input matrix A; we computed the linearly independent ""eigenvectors V and the associated eigenvalues W."<< std::endl;
#if PR_Dstd::cout << "A = " << format_range(A.begin(), A.end()) << std::endl;std::cout << "W = " << format_range(W.begin(), W.end()) << std::endl;std::cout << "V = " << format_range(V.begin(), V.end()) << std::endl;
#endifint    sweeps   = 0;double residual = 0;HIPSOLVER_CHECK(hipsolverXsyevjGetSweeps(hipsolver_handle, params, &sweeps));HIPSOLVER_CHECK(hipsolverXsyevjGetResidual(hipsolver_handle, params, &residual));std::cout << "\nWhich was computed in " << sweeps << " sweeps, with a residual of " << residual<< std::endl;// 11. Validate that 'AV == VD' and that 'AV - VD == 0'.std::cout << "\nLet D be the diagonal constructed from W.\n"<< "The right multiplication of A * V should result in V * D [AV == VD]:"<< std::endl;// Right multiplication of the input matrix with the eigenvectors.std::vector<double> AV(n * lda);multiply_matrices(1.0, 0.0, n, n, n, A.data(), lda, 1, V.data(), 1, lda, AV.data(), lda);
#if PR_Dstd::cout << "AV = " << format_range(AV.begin(), AV.end()) << std::endl;
#endif// Construct the diagonal D from eigenvalues W.std::vector<double> D(n * n);for(int i = 0; i < n; i++){D[(n + 1) * i] = W[i];}// Scale eigenvectors V with W by multiplying V with D.std::vector<double> VD(n * lda);multiply_matrices(1.0, 0.0, n, n, n, V.data(), 1, lda, D.data(), lda, 1, VD.data(), lda);
#if PR_Dstd::cout << "VD = " << format_range(VD.begin(), VD.end()) << std::endl;
#endifdouble epsilon = 1.0e5 * std::numeric_limits<double>::epsilon();int    errors  = 0;double mse     = 0;for(int i = 0; i < n * n; i++){double diff = (AV[i] - VD[i]);diff *= diff;mse += diff;errors += (diff > epsilon);}mse /= n * n;std::cout << "\nMean Square Error of [AV == VD]:\n  " << mse << std::endl;// 12. Clean up device allocations.HIPSOLVER_CHECK(hipsolverDestroy(hipsolver_handle));HIPSOLVER_CHECK(hipsolverDestroySyevjInfo(params));HIP_CHECK(hipFree(d_A));HIP_CHECK(hipFree(d_W));HIP_CHECK(hipFree(d_work));HIP_CHECK(hipFree(d_info));return report_validation_result(errors);
}

第一种 Makefile:

使用g++ 编译

SRC := hipsolver_Dsyevj.cppEXE := hipsolver_Dsyevjall: $(EXE)INC      := -I/opt/rocm/include
LD_FLAGS := -L/opt/rocm/lib -lamdhip64 -lrocblas -lhipsolver -lrocsolver -D__HIP_PLATFORM_AMD__%: %.cppg++ $< -o $@ $(INC) $(LD_FLAGS).PHONY: clean
clean:rm -rf $(EXE)

第二种 Makefile:

使用hipcc编译

EXAMPLE := hipsolver_syevj
COMMON_INCLUDE_DIR := ./Common
GPU_RUNTIME := HIP# HIP variables
ROCM_INSTALL_DIR := /opt/rocm
CUDA_INSTALL_DIR := /usr/local/cudaHIP_INCLUDE_DIR       := $(ROCM_INSTALL_DIR)/include
HIPSOLVER_INCLUDE_DIR := $(HIP_INCLUDE_DIR)HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc
CUDACXX ?= $(CUDA_INSTALL_DIR)/bin/nvcc# Common variables and flags
CXX_STD   := c++17
ICXXFLAGS := -std=$(CXX_STD)
ICPPFLAGS := -isystem $(HIPSOLVER_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR)
ILDFLAGS  := -L $(ROCM_INSTALL_DIR)/lib
ILDLIBS   := -lhipsolverifeq ($(GPU_RUNTIME), CUDA)CXXFLAGS += -x cuCPPFLAGS += -D__HIP_PLATFORM_NVIDIA__COMPILER := $(CUDACXX)
else ifeq ($(GPU_RUNTIME), HIP)CXXFLAGS ?= -Wall -WextraCPPFLAGS += -D__HIP_PLATFORM_AMD__COMPILER := $(HIPCXX)
else$(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP)
endifICXXFLAGS += $(CXXFLAGS)
ICPPFLAGS += $(CPPFLAGS)
ILDFLAGS  += $(LDFLAGS)
ILDLIBS   += $(LDLIBS)$(EXAMPLE): hipsolver_Dsyevj.cpp$(COMPILER) -g $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS)clean:$(RM) $(EXAMPLE).PHONY: clean

3,编译运行

make

 

4,提炼出来了

 

git@github.com:Kleenelan/yevj_batched_little_rocm.git

5, 参考

GitHub - amd/rocm-examples

结束

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.rhkb.cn/news/294915.html

如若内容造成侵权/违法违规/事实不符,请联系长河编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

CAS(Compare And Swap)

目录 CAS概念 乐观锁与悲观锁 ABA问题 Unsafe类 ​编辑 原子类 基本类型原子类 原子引用类 原子数组 原子更新器类 原子累加器 CAS概念 CAS是Compare And Swap的缩写&#xff0c;中文翻译成&#xff1a;比较并交换&#xff0c;实现无锁并发时常用到的一种技术。它一…

element plus的el-image图片发布到nginx不显示

问题&#xff1a; <el-image alt""src"/img/month-b.png" class"card-icon"style"width: 89px;height: 89px;right: -7px;top: -5px;"/> 部署到nginx二级路由访问地址是&#xff1a; http://192.168.1.207/divided/# 这时候使用…

总结jvm中GC机制(垃圾回收)

前言 本篇博客博主将介绍jvm中的GC机制&#xff0c;坐好板凳发车啦~~ 一.GC相关 1.1回收栈内存 对于虚拟机栈&#xff0c;本地方法栈这部分区域而言&#xff0c;其生命周期与相关线程相关&#xff0c;随线程而生&#xff0c;随线程而灭。并且这三个区域的内存分配与回收具有…

OpenHarmony:全流程讲解如何编写ADC平台驱动以及应用程序

ADC&#xff08;Analog to Digital Converter&#xff09;&#xff0c;即模拟-数字转换器&#xff0c;可将模拟信号转换成对应的数字信号&#xff0c;便于存储与计算等操作。除电源线和地线之外&#xff0c;ADC只需要1根线与被测量的设备进行连接。 一、案例简介 该程序是基于…

github本地仓库push到远程仓库

1.从远程仓库clone到本地 2.生成SSH秘钥&#xff0c;为push做准备 在Ubuntu命令行输入一下内容 [rootlocalhost ~]# ssh-keygen -t rsa < 建立密钥对&#xff0c;-t代表类型&#xff0c;有RSA和DSA两种 Generating public/private rsa key pair. Enter file in whi…

Synchronized锁升级过程

无锁-->偏向锁---> 轻量级锁---->重量级锁 ①、从无锁到偏向锁&#xff1a; 当一个线程首次访问同步块时&#xff0c;如果此对象无锁状态且偏向锁未被禁用&#xff0c;JVM 会将该对象头的锁标记改为偏向锁状态&#xff0c;并记录下当前线程的 ID。此时&#xff0c;对…

Java进阶-反射的详解与应用

本文深入探讨了Java反射机制的核心概念、应用实例及其在现代Java开发中的重要性。文章首先介绍了反射的基本原理和能力&#xff0c;包括在运行时动态获取类信息、操作对象字段和方法的能力。随后&#xff0c;通过具体代码示例&#xff0c;展示了如何利用反射进行字段访问、方法…

3.5网安学习第三阶段第五周回顾(个人学习记录使用)

本周重点 ①SSRF服务器端请求伪造 ②序列化和反序列化 ③Vaudit代码审计 本周主要内容 ①SSRF服务器端请求伪造 一、概述 SSRF: server site request forgery (服务器端请求伪造)。 SSR: 服务端请求&#xff0c;A服务器通过函数向B服务器发送请求。 SSRF发生的前提条件…

Linux:入门篇

文章目录 前言1. Linuxd的安装环境2.Linux的简单介绍2.1 新建目录2.2 新建文件 3.指令到底是什么&#xff1f;4.shell命令以及运行原理5.总结 前言 很多人对于Linux的学习总是感觉无法下手&#xff0c;不知道从何开始学习&#xff0c;相信这篇文章将会为你提供一个清晰的思路。…

高精度算法(加、减、乘、除,使用c++实现)

一、概念 在我们进行计算的过程中&#xff0c;经常会遇到几十位&#xff0c;甚至几百位的数字的计算问题&#xff0c;也有可能会遇到小数点后几十位&#xff0c;几百位的情况&#xff0c;而我们面对这样的情况下&#xff0c; 和 的数据范围显然是不够使用的了。因此这时&am…

Node.js-知识点学习总结归纳

Node.js-知识点学习总结归纳 安装nodenode运行方式通过Node.js直接运行js文件&#xff08;也就不用通过网页html了&#xff09;绝对路径调用:相对路径调用&#xff1a;直接运行js命令&#xff1a; Vscode控制台使用node运行js文件 安装node 这个就不用讲了吧&#xff0c;网上搜…

开源知识库平台Raneto--使用Docker部署Raneto

文章目录 一、Raneto介绍1.1 Raneto简介1.2 知识库介绍 二、阿里云环境2.1 环境规划2.2 部署介绍 三、环境检查3.1 检查Docker服务状态3.2 检查Docker版本3.3 检查docker compose 版本 四、下载Raneto镜像五、部署Raneto知识库平台5.1 创建挂载目录5.2 编辑config.js文件5.3 编…

Sui Basecamp日程公布,两天超50场密集分享等你来参加

随着4月的来临&#xff0c;我们也怀着激动的心情迎来了Sui全球旗舰品牌会议Sui Basecamp的个位数倒计时。 Sui Basecamp将在4月10–11日巴黎区块链周期间举行&#xff0c;Web3构建者、知名企业和信仰者齐聚一堂&#xff0c;在这里共同创造、学习和建立联系。Basecamp将由具有对…

左值与右值,以及c++11的相关特性。

目录 左值 右值 左值引用总结&#xff1a; 右值引用总结&#xff1a; 右值引用使用场景和意义&#xff1a; 1、左值引用的使用场景&#xff1a; 编译器优化1&#xff1a; 2、移动构造与移动赋值&#xff1a; 3、右值引用的使用场景&#xff1a; 编译器优化2&#xff1a…

excel匹配替换脱敏身份证等数据

假如excel sheet1中有脱敏的身份证号码和姓名&#xff0c;如&#xff1a; sheet2中有未脱敏的数据数据 做法如下&#xff1a; 1、在sheet2的C列用公式 LEFT(A2,6)&REPT("*",8)&RIGHT(A2,4) 做出脱敏数据&#xff0c;用来与sheet1的脱敏数据匹配 2、在sheet…

【卫星家族】 | 高分六号卫星影像及获取

1. 卫星简介 高分六号卫星&#xff08;GF-6&#xff09;于2018年6月2日在酒泉卫星发射中心成功发射&#xff0c;是高分专项中的一颗低轨光学遥感卫星&#xff0c;也是我国首颗精准农业观测的高分卫星&#xff0c;具有高分辨率、宽覆盖、高质量成像、高效能成像、国产化率高等特…

Mysql数据库:故障分析与配置优化

目录 前言 一、Mysql逻辑架构图 二、Mysql单实例常见故障 1、无法通过套接字连接到本地MySQL服务器 2、用户rootlocalhost访问被拒绝 3、远程连接数据库时连接很慢 4、无法打开以MYI结尾的索引文件 5、超出最大连接错误数量限制 6、连接过多 7、配置文件/etc/my.cnf权…

python 哔哩哔哩视频去水印

使用python 去除视频中的水印 1. 需要安装的包 pip install moviepy pip install numpy pip install opencv_python pip install tqdm 2. 代码 import cv2 import numpy as np import glob from moviepy.editor import VideoFileClip import os from tqdm import tqdm# 判…

2024 年每个程序员都应该尝试的 8 个AI工具

随着人工智能技术的极速发展&#xff0c;新的 AI 工具正以前所未有的速度涌现&#xff0c;为开发者们带来了前所未有的机会和挑战。在这个不断演进的时代&#xff0c;掌握最新的 AI 技术已成为每个程序员的必修课。 在本文中&#xff0c;我们收集了8 个程序员在 2024 年值得尝…

idea 报错 Could not list the contents of folder “ftps

idea 报错 Could not list the contents of folder "ftps 解决方案 这里看到了网上的解决方案&#xff0c;顺便再记录一下。打开 【高级】菜单 - 取消勾选 被动模式。然后点击测试连接&#xff0c;显示连接成功&#xff01; ftp中的主动模式和被动模式 主动模式&…