# Pastebin GLrsszIo shader: MESA_SHADER_COMPUTE source_blake3: {0xfc5b2bd3, 0x3dea8f67, 0xb508fa10, 0x16485912, 0xeb430793, 0x7b04353e, 0x8709b25e, 0x1df3b158} internal: false workgroup_size: 1, 1, 1 stage: 5 next_stage: 0 num_ssbos: 1 subgroup_size: 0 bit_sizes_int: 0x20 separate_shader: true writes_memory: true ptr_size: 0 inputs: 0 outputs: 0 uniforms: 0 decl_var ssbo INTERP_MODE_NONE restrict block (~0, 0, 3) decl_function main (0 params) impl main { block b0: // preds: 32 %24 = @load_scalar_arg_amd (base=1, arg_upper_bound_u32_amd=0) 32 %0 = load_const (0x00000000) 64 %19 = pack_64_2x32_split %24, %0 (0x0) 32x4 %21 = @load_smem_amd (%19, %0 (0x0)) (align_mul=16, align_offset=0) 32 %5 = @load_ssbo (%21, %0 (0x0)) (access=none, align_mul=1073741824, align_offset=0) 32 %30 = load_const (0xff803fe1 = -8372255 = 4286595041) 32 %6 = load_const (0x00000009) 32 %7 = ushr %5, %6 (0x9) 32 %31 = umul_high %7, %30 (0xff803fe1) 32 %33 = ushr %31, %6 (0x9) 32 %25 = load_const (0x00000004) @store_ssbo (%33, %21, %25 (0x4)) (wrmask=x, access=none, align_mul=1073741824, align_offset=4) // succs: b1 block b1: } Compute Shader LLVM IR: ; ModuleID = 'mesa-shader' target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-mesa-mesa3d" @compute_lds = external addrspace(3) global [0 x i8], align 65536 define amdgpu_cs void @main(ptr addrspace(6) inreg noalias align 4 dereferenceable(18446744073709551615) %0, <3 x i32> %1) #0 { main_body: %2 = ptrtoint ptr addrspace(6) %0 to i32 %3 = insertelement <2 x i32> , i32 %2, i64 0 %4 = bitcast <2 x i32> %3 to i64 %5 = inttoptr i64 %4 to ptr addrspace(4) %6 = load <4 x i32>, ptr addrspace(4) %5, align 16, !invariant.load !0 %7 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %6, i32 0, i32 0, i32 0) #4 %8 = bitcast float %7 to <1 x i32> %9 = extractelement <1 x i32> %8, i64 0 %10 = lshr i32 %9, 9 %11 = zext nneg i32 %10 to i64 %12 = mul nuw nsw i64 %11, 4286595041 %sum.shift = lshr i64 %12, 41 %13 = trunc i64 %sum.shift to i32 %14 = bitcast i32 %13 to float call void @llvm.amdgcn.raw.buffer.store.f32(float %14, <4 x i32> %6, i32 4, i32 0, i32 0) #4 ret void } ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare align 4 ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #1 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg) #2 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #3 attributes #0 = { "amdgpu-flat-work-group-size"="1,1" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+DumpCode" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #2 = { nocallback nofree nosync nounwind willreturn memory(read) } attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) } attributes #4 = { nounwind } !0 = !{} Compute Shader disasm: main: BB0_0: s_mov_b32 s3, 0 ; BE830080 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C00A0001 00000000 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v0, off, s[0:3], 0 ; E0500000 80000000 s_waitcnt vmcnt(0) ; BF8C0F70 v_lshrrev_b32_e32 v0, 9, v0 ; 20000089 v_mul_hi_i32_i24_e32 v0, 0xff803fe1, v0 ; 0E0000FF FF803FE1 v_lshrrev_b32_e32 v0, 9, v0 ; 20000089 buffer_store_dword v0, off, s[0:3], 0 offset:4 ; E0700004 80000000 s_endpgm ; BF810000