diff --git a/Tests/CudaOnly/SeparateCompilation/file4.cu b/Tests/CudaOnly/SeparateCompilation/file4.cu index f2ef8e7ef4..f2e5e6ec91 100644 --- a/Tests/CudaOnly/SeparateCompilation/file4.cu +++ b/Tests/CudaOnly/SeparateCompilation/file4.cu @@ -4,17 +4,24 @@ result_type __device__ file1_func(int x); result_type_dynamic __device__ file2_func(int x); -static __global__ void file4_kernel(result_type& r, int x) +static __global__ void file4_kernel(result_type* r, int x) { // call static_func which is a method that is defined in the // static library that is always out of date - r = file1_func(x); + *r = file1_func(x); result_type_dynamic rd = file2_func(x); } EXPORT int file4_launch_kernel(int x) { - result_type r; + result_type* r; + cudaMallocManaged(&r, sizeof(result_type)); + file4_kernel<<<1, 1>>>(r, x); - return r.sum; + cudaDeviceSynchronize(); + + auto sum = r->sum; + cudaFree(r); + + return sum; } diff --git a/Tests/CudaOnly/SeparateCompilation/file5.cu b/Tests/CudaOnly/SeparateCompilation/file5.cu index 9b2c92a1d6..007152160b 100644 --- a/Tests/CudaOnly/SeparateCompilation/file5.cu +++ b/Tests/CudaOnly/SeparateCompilation/file5.cu @@ -4,17 +4,24 @@ result_type __device__ file1_func(int x); result_type_dynamic __device__ file2_func(int x); -static __global__ void file5_kernel(result_type& r, int x) +static __global__ void file5_kernel(result_type *r, int x) { // call static_func which is a method that is defined in the // static library that is always out of date - r = file1_func(x); + *r = file1_func(x); result_type_dynamic rd = file2_func(x); } EXPORT int file5_launch_kernel(int x) { - result_type r; + result_type* r; + cudaMallocManaged(&r, sizeof(result_type)); + file5_kernel<<<1, 1>>>(r, x); - return r.sum; + cudaDeviceSynchronize(); + + auto sum = r->sum; + cudaFree(r); + + return sum; }