# Pastebin GLrsszIo
shader: MESA_SHADER_COMPUTE
source_blake3: {0xfc5b2bd3, 0x3dea8f67, 0xb508fa10, 0x16485912, 0xeb430793, 0x7b04353e, 0x8709b25e, 0x1df3b158}
internal: false
workgroup_size: 1, 1, 1
stage: 5
next_stage: 0
num_ssbos: 1
subgroup_size: 0
bit_sizes_int: 0x20
separate_shader: true
writes_memory: true
ptr_size: 0
inputs: 0
outputs: 0
uniforms: 0
decl_var ssbo INTERP_MODE_NONE restrict block  (~0, 0, 3)
decl_function main (0 params)

impl main {
    block b0:   // preds: 
    32    %24 = @load_scalar_arg_amd (base=1, arg_upper_bound_u32_amd=0)
    32     %0 = load_const (0x00000000)
    64    %19 = pack_64_2x32_split %24, %0 (0x0)
    32x4  %21 = @load_smem_amd (%19, %0 (0x0)) (align_mul=16, align_offset=0)
    32     %5 = @load_ssbo (%21, %0 (0x0)) (access=none, align_mul=1073741824, align_offset=0)
    32    %30 = load_const (0xff803fe1 = -8372255 = 4286595041)
    32     %6 = load_const (0x00000009)
    32     %7 = ushr %5, %6 (0x9)
    32    %31 = umul_high %7, %30 (0xff803fe1)
    32    %33 = ushr %31, %6 (0x9)
    32    %25 = load_const (0x00000004)
                @store_ssbo (%33, %21, %25 (0x4)) (wrmask=x, access=none, align_mul=1073741824, align_offset=4)
                // succs: b1 
    block b1:
}

Compute Shader LLVM IR:

; ModuleID = 'mesa-shader'
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
target triple = "amdgcn-mesa-mesa3d"

@compute_lds = external addrspace(3) global [0 x i8], align 65536

define amdgpu_cs void @main(ptr addrspace(6) inreg noalias align 4 dereferenceable(18446744073709551615) %0, <3 x i32> %1) #0 {
main_body:
  %2 = ptrtoint ptr addrspace(6) %0 to i32
  %3 = insertelement <2 x i32> <i32 poison, i32 0>, i32 %2, i64 0
  %4 = bitcast <2 x i32> %3 to i64
  %5 = inttoptr i64 %4 to ptr addrspace(4)
  %6 = load <4 x i32>, ptr addrspace(4) %5, align 16, !invariant.load !0
  %7 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %6, i32 0, i32 0, i32 0) #4
  %8 = bitcast float %7 to <1 x i32>
  %9 = extractelement <1 x i32> %8, i64 0
  %10 = lshr i32 %9, 9
  %11 = zext nneg i32 %10 to i64
  %12 = mul nuw nsw i64 %11, 4286595041
  %sum.shift = lshr i64 %12, 41
  %13 = trunc i64 %sum.shift to i32
  %14 = bitcast i32 %13 to float
  call void @llvm.amdgcn.raw.buffer.store.f32(float %14, <4 x i32> %6, i32 4, i32 0, i32 0) #4
  ret void
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare align 4 ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #1

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg) #2

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #3

attributes #0 = { "amdgpu-flat-work-group-size"="1,1" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+DumpCode" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #2 = { nocallback nofree nosync nounwind willreturn memory(read) }
attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) }
attributes #4 = { nounwind }

!0 = !{}

Compute Shader
disasm:
main:
BB0_0:
	s_mov_b32 s3, 0                                ; BE830080
	s_load_dwordx4 s[0:3], s[2:3], 0x0             ; C00A0001 00000000
	s_waitcnt lgkmcnt(0)                           ; BF8C007F
	buffer_load_dword v0, off, s[0:3], 0           ; E0500000 80000000
	s_waitcnt vmcnt(0)                             ; BF8C0F70
	v_lshrrev_b32_e32 v0, 9, v0                    ; 20000089
	v_mul_hi_i32_i24_e32 v0, 0xff803fe1, v0        ; 0E0000FF FF803FE1
	v_lshrrev_b32_e32 v0, 9, v0                    ; 20000089
	buffer_store_dword v0, off, s[0:3], 0 offset:4 ; E0700004 80000000
	s_endpgm                                       ; BF810000