public inbox for git-commits@fedoraproject.org
help / color / mirror / Atom feed
* [rpms/rocclr] epel10: A smoke test for rocclr
@ 2026-06-10 23:25 Tom Rix
0 siblings, 0 replies; only message in thread
From: Tom Rix @ 2026-06-10 23:25 UTC (permalink / raw)
To: git-commits
A new commit has been pushed.
Repo : rpms/rocclr
Branch : epel10
Commit : c88f80a57e18e7127b543d4bf38a6c8a66aa370b
Author : Tom Rix <Tom.Rix@amd.com>
Date : 2026-05-19T13:59:14-07:00
Stats : +112/-0 in 2 file(s)
URL : https://src.fedoraproject.org/rpms/rocclr/c/c88f80a57e18e7127b543d4bf38a6c8a66aa370b?branch=epel10
Log:
A smoke test for rocclr
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
---
diff --git a/test.hip b/test.hip
new file mode 100644
index 0000000..fe16a74
--- /dev/null
+++ b/test.hip
@@ -0,0 +1,104 @@
+#include <iostream>
+#include <vector>
+#include <cmath>
+
+// Include the main HIP header
+#include <hip/hip_runtime.h>
+
+// --- 1. Define the Kernel ---
+// The 'extern "C"' block is necessary for the compiler to treat this
+// function as a dedicated, compiled GPU function (a kernel).
+// '__global__' tells the compiler this function runs on the GPU.
+__global__ void vector_add(const int* a, const int* b, int* c, int n) {
+ // Calculate the global index for this specific thread.
+ // GridDim.x and BlockDim.x are usually used for complex dimensioning,
+ // but for a simple 1D vector, the thread's own index (threadIdx.x)
+ // is offset by the block's start index (blockIdx.x) times the block size.
+ int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+ // Ensure the index doesn't exceed the array bounds (safety check)
+ if (i < n) {
+ // This is the actual computation: C[i] = A[i] + B[i]
+ c[i] = a[i] + b[i];
+ }
+}
+
+// --- 2. Main Program Execution ---
+int main() {
+ // --- Setup Parameters ---
+ const int N = 1024 * 1024; // Vector size (1 Million elements)
+ std::cout << "Starting HIP Vector Addition on " << N << " elements..." << std::endl;
+
+ // --- Host (CPU) Memory Allocation ---
+ // We use std::vector for convenience on the CPU side.
+ std::vector<int> h_a(N);
+ std::vector<int> h_b(N);
+ std::vector<int> h_c(N);
+
+ // Initialize input data (A and B) on the CPU
+ for (int i = 0; i < N; ++i) {
+ h_a[i] = i;
+ h_b[i] = i * 2;
+ }
+
+ // --- Device (GPU) Memory Pointers ---
+ // These pointers will hold the memory addresses on the GPU device.
+ int *d_a, *d_b, *d_c;
+
+ // --- Memory Allocation on Device ---
+ // hipMalloc allocates memory on the GPU.
+ hipError_t err = hipMalloc((void**)&d_a, N * sizeof(int));
+ err = hipMalloc((void**)&d_b, N * sizeof(int));
+ err = hipMalloc((void**)&d_c, N * sizeof(int));
+
+ if (err != hipSuccess) {
+ std::cerr << "Error allocating device memory: " << hipGetErrorString(err) << std::endl;
+ return 1;
+ }
+
+ // --- Data Transfer: Host -> Device ---
+ // hipMemcpy copies the CPU data into the newly allocated GPU memory.
+ hipMemcpy(d_a, h_a.data(), N * sizeof(int), hipMemcpyHostToDevice);
+ hipMemcpy(d_b, h_b.data(), N * sizeof(int), hipMemcpyHostToDevice);
+
+ // --- Kernel Launch Configuration ---
+ // 1. Determine thread/block size: We want 1024 threads per block.
+ const int threadsPerBlock = 1024;
+ // 2. Determine grid size: How many blocks do we need?
+ const int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
+
+ // --- Kernel Launch ---
+ // hipLaunchKernel executes the GPU function.
+ // Signature: (Kernel_Pointer, Grid_Size, Block_Size, Constant_Arguments...)
+ vector_add<<<blocksPerGrid, threadsPerBlock>>>(d_a, d_b, d_c, N);
+
+ // Check for GPU runtime errors after kernel launch
+ err = hipGetLastError();
+ if (err != hipSuccess) {
+ std::cerr << "Kernel launch failed: " << hipGetErrorString(err) << std::endl;
+ // Clean up before exiting
+ hipFree(d_a); hipFree(d_b); hipFree(d_c);
+ return 1;
+ }
+
+ // --- Synchronization & Data Transfer: Device -> Host ---
+ // hipDeviceSynchronize ensures the CPU waits for all GPU work to finish.
+ hipDeviceSynchronize();
+
+ // Copy the results from the GPU back to the CPU vector
+ hipMemcpy(h_c.data(), d_c, N * sizeof(int), hipMemcpyDeviceToHost);
+
+ // --- Clean Up ---
+ hipFree(d_a);
+ hipFree(d_b);
+ hipFree(d_c);
+
+ // --- Verification (Optional, only checks the first few elements) ---
+ std::cout << "\nComputation finished successfully." << std::endl;
+ std::cout << "First 5 results (C[i] = A[i] + B[i]):" << std::endl;
+ for (int i = 0; i < 5; ++i) {
+ std::cout << "C[" << i << "] = " << h_c[i] << " (Expected: " << i << " + " << i * 2 << ")" << std::endl;
+ }
+
+ return 0;
+}
diff --git a/test.sh b/test.sh
new file mode 100755
index 0000000..101664f
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,8 @@
+#! /usr/bin/bash
+
+gpu=`rocm_agent_enumerator`
+OUT=$(mktemp -d)
+/usr/lib64/rocm/llvm/bin/clang++ -v --offload-arch=${gpu} test.hip -lamdhip64 -o ${OUT}/test
+if [ -f ${OUT}/test ]; then
+ ${OUT}/test
+fi
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2026-06-10 23:25 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-06-10 23:25 [rpms/rocclr] epel10: A smoke test for rocclr Tom Rix
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox