summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-08-24 19:11:52 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-08-24 19:11:52 +0000
commitd0a275228ea6453b6d63cf29750abc54cda6ac44 (patch)
tree91c28d7b36d0c5f0ebdffdbaec84bdcdcc7505cb /libclc/amdgcn-amdhsa
parentUse isTargetMachO instead of isTargetDarwin. (diff)
downloadllvm-project-d0a275228ea6453b6d63cf29750abc54cda6ac44.tar.gz
llvm-project-d0a275228ea6453b6d63cf29750abc54cda6ac44.tar.bz2
llvm-project-d0a275228ea6453b6d63cf29750abc54cda6ac44.zip
amdgcn: Also correct get_local_size type for HSA
llvm-svn: 279656
Diffstat (limited to 'libclc/amdgcn-amdhsa')
-rw-r--r--libclc/amdgcn-amdhsa/lib/workitem/get_local_size.ll13
1 files changed, 8 insertions, 5 deletions
diff --git a/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.ll b/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.ll
index c4df02792667..ff4b81118476 100644
--- a/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.ll
+++ b/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.ll
@@ -1,6 +1,6 @@
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
-define i32 @get_local_size(i32 %dim) #1 {
+define i64 @get_local_size(i32 %dim) #1 {
%dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%dispatch_ptr_i32 = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
%xy_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i64 1
@@ -13,19 +13,22 @@ define i32 @get_local_size(i32 %dim) #1 {
x_dim:
%x_size = and i32 %xy_size, 65535
- ret i32 %x_size
+ %x_size.ext = zext i32 %x_size to i64
+ ret i64 %x_size.ext
y_dim:
%y_size = lshr i32 %xy_size, 16
- ret i32 %y_size
+ %y_size.ext = zext i32 %y_size to i64
+ ret i64 %y_size.ext
z_dim:
%z_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i64 2
%z_size = load i32, i32 addrspace(2)* %z_size_ptr, align 4, !invariant.load !0, !range !1
- ret i32 %z_size
+ %z_size.ext = zext i32 %z_size to i64
+ ret i64 %z_size.ext
default:
- ret i32 1
+ ret i64 1
}
attributes #0 = { nounwind readnone }