当前位置: 首页 > news >正文

记录几个学习cuda编程的例子

向量加和

#include <iostream>__global__ void vectorAdd(int n, const float* a, const float* b, float* c) {int i = blockDim.x * blockIdx.x + threadIdx.x;if (i < n) {c[i] = a[i] + b[i];}
}
int main() {int n = 1 << 20;size_t size = n * sizeof(float);float *a, *b, *c;cudaMallocManaged(&a, size);cudaMallocManaged(&b, size);cudaMallocManaged(&c, size);for (int i = 0; i < n; i++) {a[i] = 1.1f;b[i] = 2.3f;}int threadPerBlock = 256;int blockPerGrid = (n + threadPerBlock - 1) / threadPerBlock;vectorAdd<<<blockPerGrid, threadPerBlock>>>(n, a, b, c);cudaDeviceSynchronize();std::cout << c[0] << " " << c[n / 2] << " " << c[n - 1] << std::endl;cudaFree(a);cudaFree(b);cudaFree(c);return 0;
}

矩阵乘

#include <iostream>__global__ void matrixMul(int m,int n,int k,const float* a,const float* b,float* c) {// Shape: (m,n) @ (n,k) = (m,k)int col = blockDim.x * blockIdx.x + threadIdx.x;int row = blockDim.y * blockIdx.y + threadIdx.y;if (col < k && row < m) {float sum = 0.0f;for (int i = 0; i < n; i++) {sum += a[n * row + i] * b[i * k + col];}c[row * k + col] = sum;}
}
int main() {int m = 3;int n = 2;int k = 4;size_t size_a = m * n * sizeof(float);size_t size_b = n * k * sizeof(float);size_t size_c = m * k * sizeof(float);float *a, *b, *c;cudaMallocManaged(&a, size_a);cudaMallocManaged(&b, size_b);cudaMallocManaged(&c, size_c);for (int i = 0; i < m * n; i++) {a[i] = 2.0f;}for (int i = 0; i < n * k; i++) {b[i] = 3.0f;}dim3 threadPerBlock(16, 16);dim3 blockPerGrid((k + threadPerBlock.x - 1) / threadPerBlock.x,(m + threadPerBlock.y - 1) / threadPerBlock.y);matrixMul<<<blockPerGrid, threadPerBlock>>>(m, n, k, a, b, c);cudaDeviceSynchronize();std::cout << c[0] << " " << c[2 * k] << " " << c[(m - 1) * k + k - 1]<< std::endl;cudaFree(a);cudaFree(b);cudaFree(c);return 0;
}

无通道的卷积

#include <iostream>__global__ void conv2D(int H,int W,int kH,int kW,int padH,int padW,int strideH,int strideW,int Hout,int Wout,float* img,float* kernel,float* output) {int ox = blockDim.x * blockIdx.x + threadIdx.x;  // 当前输出列int oy = blockDim.y * blockIdx.y + threadIdx.y;  // 当前输出行if (ox >= Wout || oy >= Hout) {return;}float sum = 0.0f;// 被卷积的起点坐标int in_x0 = ox * strideW - padW;int in_y0 = oy * strideH - padH;for (int i = 0; i < kH; i++) {for (int j = 0; j < kW; j++) {int in_xi = in_x0 + j;int in_yi = in_y0 + i;if (in_xi >= 0 && in_xi < W && in_yi >= 0 && in_yi < H) {sum += img[in_yi * W + in_xi] * kernel[i * kW + j];}}}output[oy * Wout + ox] = sum;
}int main() {int H = 12, W = 12;int kH = 3, kW = 3;int padH = 1, padW = 1;int strideH = 1, strideW = 1;int Hout = (H + 2 * padH - kH) / strideH + 1;int Wout = (W + 2 * padW - kW) / strideW + 1;size_t img_size = H * W * sizeof(float);size_t kernel_size = kH * kW * sizeof(float);size_t out_size = Hout * Wout * sizeof(float);float *img, *kernel, *output;cudaMallocManaged(&img, img_size);cudaMallocManaged(&kernel, kernel_size);cudaMallocManaged(&output, out_size);for (int i = 0; i < H * W; i++) {img[i] = 10.0f;}for (int i = 0; i < kH * kW; i++) {kernel[i] = 0.5f;}dim3 threadPerBlock(16, 16);dim3 blockPerGrid((Wout + threadPerBlock.x - 1) / threadPerBlock.x,(Hout + threadPerBlock.y - 1) / threadPerBlock.y);conv2D<<<blockPerGrid, threadPerBlock>>>(H, W, kH, kW, padH, padW, strideH,strideW, Hout, Wout, img, kernel,output);cudaDeviceSynchronize();for (int i = 0; i < Hout; i++) {for (int j = 0; j < Wout; j++) {std::cout << output[i * Wout + j] << " ";}std::cout << std::endl;}return 0;
}

参考文献

  • CUDA 入门教程:更简单的介绍 (更新版)
http://www.sczhlp.com/news/41188/

相关文章:

  • 百度推广要自己做网站吗网络营销常用的工具有哪些
  • 做网站需要可信认证吗手机网站快速建站
  • 教着做美食的网站it培训课程
  • ae有么有做gif的网站深圳网络推广哪家
  • wordpress翻页数字南京 seo 价格
  • 碎碎念(十四)
  • 精选 2 款 .NET 开源、实用的缓存框架,帮助开发者更轻松地处理系统缓存!
  • try关键字
  • 8月25-27日集训小记 - L
  • 征途网站开发背景培训心得
  • 做图必备素材网站每天新闻早知道
  • 手机网站排名优化软件怎么在百度上设置自己的门店
  • app设计制作软件如何优化关键词排名快速首页
  • 谁家的网站做的比较好百度关键词搜索查询
  • 广州市 网站 建设广州最新消息今天
  • 帝国网站网站手机版怎么做百度官网认证多少钱
  • 自己制作网站的步骤网站备案查询
  • 做网站不优化黄冈黄页88网黄冈房产估价
  • 营销网站制作哪家有名独立站建站平台有哪些
  • 终末之诗
  • 本地coze调试
  • redis中的五种基本数据结构和常用命令
  • Chrome-Chrome安装axure RP扩展程序用于查看原型图
  • 重生之从零开始的神经网络算法学习之路——第二篇 深入Scikit-learn(分类问题与交叉验证)
  • 企业营销网站怎样做网络seo是什么意思
  • 成都网站建设优化江门百度seo公司
  • 网页设计用dw哪个版本好企业专业搜索引擎优化
  • oa办公系统如何使用搜索引擎优化的核心是
  • 做网站都用什么技术浙江百度推广开户
  • 新手如何制作网站百度一下网页