[rpms/rocclr] epel10: A smoke test for rocclr

public inbox for git-commits@fedoraproject.org
help / color / mirror / Atom feed

* [rpms/rocclr] epel10: A smoke test for rocclr
@ 2026-06-10 23:25 Tom Rix
  0 siblings, 0 replies; only message in thread
From: Tom Rix @ 2026-06-10 23:25 UTC (permalink / raw)
  To: git-commits

            A new commit has been pushed.

            Repo   : rpms/rocclr
            Branch : epel10
            Commit : c88f80a57e18e7127b543d4bf38a6c8a66aa370b
            Author : Tom Rix <Tom.Rix@amd.com>
            Date   : 2026-05-19T13:59:14-07:00
            Stats  : +112/-0 in 2 file(s)
            URL    : https://src.fedoraproject.org/rpms/rocclr/c/c88f80a57e18e7127b543d4bf38a6c8a66aa370b?branch=epel10

            Log:
            A smoke test for rocclr

Signed-off-by: Tom Rix <Tom.Rix@amd.com>

---
diff --git a/test.hip b/test.hip
new file mode 100644
index 0000000..fe16a74
--- /dev/null
+++ b/test.hip
@@ -0,0 +1,104 @@
+#include <iostream>
+#include <vector>
+#include <cmath>
+
+// Include the main HIP header
+#include <hip/hip_runtime.h>
+
+// --- 1. Define the Kernel ---
+// The 'extern "C"' block is necessary for the compiler to treat this
+// function as a dedicated, compiled GPU function (a kernel).
+// '__global__' tells the compiler this function runs on the GPU.
+__global__ void vector_add(const int* a, const int* b, int* c, int n) {
+    // Calculate the global index for this specific thread.
+    // GridDim.x and BlockDim.x are usually used for complex dimensioning,
+    // but for a simple 1D vector, the thread's own index (threadIdx.x)
+    // is offset by the block's start index (blockIdx.x) times the block size.
+    int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+    // Ensure the index doesn't exceed the array bounds (safety check)
+    if (i < n) {
+        // This is the actual computation: C[i] = A[i] + B[i]
+        c[i] = a[i] + b[i];
+    }
+}
+
+// --- 2. Main Program Execution ---
+int main() {
+    // --- Setup Parameters ---
+    const int N = 1024 * 1024; // Vector size (1 Million elements)
+    std::cout << "Starting HIP Vector Addition on " << N << " elements..." << std::endl;
+
+    // --- Host (CPU) Memory Allocation ---
+    // We use std::vector for convenience on the CPU side.
+    std::vector<int> h_a(N);
+    std::vector<int> h_b(N);
+    std::vector<int> h_c(N);
+
+    // Initialize input data (A and B) on the CPU
+    for (int i = 0; i < N; ++i) {
+        h_a[i] = i;
+        h_b[i] = i * 2;
+    }
+
+    // --- Device (GPU) Memory Pointers ---
+    // These pointers will hold the memory addresses on the GPU device.
+    int *d_a, *d_b, *d_c;
+    
+    // --- Memory Allocation on Device ---
+    // hipMalloc allocates memory on the GPU.
+    hipError_t err = hipMalloc((void**)&d_a, N * sizeof(int));
+    err = hipMalloc((void**)&d_b, N * sizeof(int));
+    err = hipMalloc((void**)&d_c, N * sizeof(int));
+
+    if (err != hipSuccess) {
+        std::cerr << "Error allocating device memory: " << hipGetErrorString(err) << std::endl;
+        return 1;
+    }
+
+    // --- Data Transfer: Host -> Device ---
+    // hipMemcpy copies the CPU data into the newly allocated GPU memory.
+    hipMemcpy(d_a, h_a.data(), N * sizeof(int), hipMemcpyHostToDevice);
+    hipMemcpy(d_b, h_b.data(), N * sizeof(int), hipMemcpyHostToDevice);
+
+    // --- Kernel Launch Configuration ---
+    // 1. Determine thread/block size: We want 1024 threads per block.
+    const int threadsPerBlock = 1024;
+    // 2. Determine grid size: How many blocks do we need?
+    const int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
+
+    // --- Kernel Launch ---
+    // hipLaunchKernel executes the GPU function.
+    // Signature: (Kernel_Pointer, Grid_Size, Block_Size, Constant_Arguments...)
+    vector_add<<<blocksPerGrid, threadsPerBlock>>>(d_a, d_b, d_c, N);
+    
+    // Check for GPU runtime errors after kernel launch
+    err = hipGetLastError();
+    if (err != hipSuccess) {
+        std::cerr << "Kernel launch failed: " << hipGetErrorString(err) << std::endl;
+        // Clean up before exiting
+        hipFree(d_a); hipFree(d_b); hipFree(d_c);
+        return 1;
+    }
+
+    // --- Synchronization & Data Transfer: Device -> Host ---
+    // hipDeviceSynchronize ensures the CPU waits for all GPU work to finish.
+    hipDeviceSynchronize();
+
+    // Copy the results from the GPU back to the CPU vector
+    hipMemcpy(h_c.data(), d_c, N * sizeof(int), hipMemcpyDeviceToHost);
+
+    // --- Clean Up ---
+    hipFree(d_a);
+    hipFree(d_b);
+    hipFree(d_c);
+
+    // --- Verification (Optional, only checks the first few elements) ---
+    std::cout << "\nComputation finished successfully." << std::endl;
+    std::cout << "First 5 results (C[i] = A[i] + B[i]):" << std::endl;
+    for (int i = 0; i < 5; ++i) {
+        std::cout << "C[" << i << "] = " << h_c[i] << " (Expected: " << i << " + " << i * 2 << ")" << std::endl;
+    }
+
+    return 0;
+}

diff --git a/test.sh b/test.sh
new file mode 100755
index 0000000..101664f
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,8 @@
+#! /usr/bin/bash
+
+gpu=`rocm_agent_enumerator`
+OUT=$(mktemp -d)
+/usr/lib64/rocm/llvm/bin/clang++ -v --offload-arch=${gpu} test.hip -lamdhip64 -o ${OUT}/test
+if [ -f ${OUT}/test ]; then
+    ${OUT}/test
+fi

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2026-06-10 23:25 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-06-10 23:25 [rpms/rocclr] epel10: A smoke test for rocclr Tom Rix

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox