Loop termination handling
Nowadays, LLVM, at least nominally, does not assume that loops must progress,
but requires mustprogress annotation for that.
But it seems like alive2 doesn't deal with that.
For example,
; ModuleID = '/tmp/test.ll'
source_filename = "/tmp/test.ll"
target triple = "x86_64"
declare void @escape_inner(i8, i8, i8, i1, i8) #0
declare void @escape_outer(i8, i8, i8, i1, i8) #0
define i8 @src(i8 %val, i8 %start, i8 %extraoffset) {
entry:
br label %loop
loop:
%iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
%nbits = add nsw i8 %iv, %extraoffset
%val.shifted = ashr i8 %val, %nbits
%val.shifted.iszero = icmp eq i8 %val.shifted, 0
%iv.next = add i8 %iv, 1
call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)
br i1 %val.shifted.iszero, label %end, label %loop
end:
%iv.res = phi i8 [ %iv, %loop ]
%nbits.res = phi i8 [ %nbits, %loop ]
%val.shifted.res = phi i8 [ %val.shifted, %loop ]
%val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
%iv.next.res = phi i8 [ %iv.next, %loop ]
call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
ret i8 %iv.res
}
define i8 @tgt(i8 %val, i8 %start, i8 %extraoffset) #0 {
entry:
%val.numleadingzeros = call i8 @llvm.ctlz.i8(i8 %val, i1 false)
%val.numactivebits = sub nuw nsw i8 8, %val.numleadingzeros
%0 = sub i8 0, %extraoffset
%val.numactivebits.offset = add nsw i8 %val.numactivebits, %0
%iv.final = call i8 @llvm.smax.i8(i8 %val.numactivebits.offset, i8 %start)
%loop.backedgetakencount = sub nsw i8 %iv.final, %start
%loop.tripcount = add nuw nsw i8 %loop.backedgetakencount, 1
br label %loop
loop: ; preds = %loop, %entry
%loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
%loop.iv.next = add nuw nsw i8 %loop.iv, 1
%loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
%iv = add nsw i8 %loop.iv, %start
%nbits = add nsw i8 %iv, %extraoffset
%val.shifted = ashr i8 %val, %nbits
%iv.next = add i8 %iv, 1
call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %loop.ivcheck, i8 %iv.next)
br i1 %loop.ivcheck, label %end, label %loop
end: ; preds = %loop
%iv.res = phi i8 [ %iv.final, %loop ]
%nbits.res = phi i8 [ %nbits, %loop ]
%val.shifted.res = phi i8 [ %val.shifted, %loop ]
%val.shifted.iszero.res = phi i1 [ %loop.ivcheck, %loop ]
%iv.next.res = phi i8 [ %iv.next, %loop ]
call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)
ret i8 %iv.res
}
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare i8 @llvm.ctlz.i8(i8, i1 immarg) #1
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare i8 @llvm.smax.i8(i8, i8) #1
attributes #0 = { "target-cpu"="core-avx2" }
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
presumably shouldn't be a valid refinement - the original loop is endless for negative %start,
while the new one is not.
Right, none of this stuff is supported yet. Neither non-termination of function calls. But I heard @grigoryfedyukovich will fix all of this soonish! 😁
Even assuming that alive currently only supports mustprogress loops,
i don't think it handles even them correctly.
E.g.:
target triple = "x86_64"
declare void @escape_inner(i8, i8, i1, i8) #0
declare void @escape_outer(i8, i8, i1, i8) #0
declare i8 @gen.i8() #0
define i8 @src(i8 %x, i8 %bit) {
entry:
%bitmask = shl i8 1, %bit
br label %loop
loop:
%x.curr = phi i8 [ %x, %entry ], [ %x.next, %loop ]
%x.curr.bitmasked = and i8 %x.curr, %bitmask
%x.curr.isbitunset = icmp eq i8 %x.curr.bitmasked, 0
%x.next = lshr i8 %x.curr, 1
call void @escape_inner(i8 %x.curr, i8 %x.curr.bitmasked, i1 %x.curr.isbitunset, i8 %x.next)
br i1 %x.curr.isbitunset, label %loop, label %end
end:
%x.curr.res = phi i8 [ %x.curr, %loop ]
%x.curr.bitmasked.res = phi i8 [ %x.curr.bitmasked, %loop ]
%x.curr.isbitunset.res = phi i1 [ %x.curr.isbitunset, %loop ]
%x.next.res = phi i8 [ %x.next, %loop ]
call void @escape_outer(i8 %x.curr.res, i8 %x.curr.bitmasked.res, i1 %x.curr.isbitunset.res, i8 %x.next.res)
ret i8 %x.curr.res
}
define i8 @tgt(i8 %x, i8 %bit) #0 {
entry:
%bitmask = shl i8 1, %bit
%bit.mask = sub i8 0, %bitmask
%x.masked = and i8 %x, %bit.mask
%x.masked.numleadingzeros = call i8 @llvm.ctlz.i8(i8 %x.masked, i1 false)
%x.masked.numactivebits = sub i8 42, %x.masked.numleadingzeros
%x.masked.leadingonepos = add i8 %x.masked.numactivebits, 0
%loop.backedgetakencount = sub i8 %bit, %x.masked.leadingonepos
%loop.tripcount = add i8 %loop.backedgetakencount, 1
%x.curr = lshr i8 %x, %loop.backedgetakencount
%x.next = lshr i8 %x.curr, 1
br label %loop
loop: ; preds = %loop, %entry
%loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
%0 = phi i8 [ %x, %entry ], [ %1, %loop ]
%x.curr.bitmasked = and i8 %0, %bitmask
%x.curr.isbitunset = icmp eq i8 %x.curr.bitmasked, 0
%1 = lshr i8 %0, 1
call void @escape_inner(i8 %0, i8 %x.curr.bitmasked, i1 %x.curr.isbitunset, i8 %1)
%loop.iv.next = add i8 %loop.iv, 1
%loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
br i1 %loop.ivcheck, label %end, label %loop
end: ; preds = %loop
%x.curr.res = phi i8 [ %x.curr, %loop ]
%x.curr.bitmasked.res = phi i8 [ %x.curr.bitmasked, %loop ]
%x.curr.isbitunset.res = phi i1 [ %x.curr.isbitunset, %loop ]
%x.next.res = phi i8 [ %x.next, %loop ]
call void @escape_outer(i8 %x.curr.res, i8 %x.curr.bitmasked.res, i1 %x.curr.isbitunset.res, i8 %x.next.res)
ret i8 %x.curr.res
}
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare i8 @llvm.ctlz.i8(i8, i1 immarg) #1
attributes #0 = { "target-cpu"="core-avx2" }
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
Surely that is not a valid refinement? This is causing (me, but likely everyone) quite a bit of pain..
FWIW, i'm essentially waiting on this & other related bugs being implemented before dealing with https://github.com/llvm/llvm-project/issues/51011
FWIW, i'm essentially waiting on this & other related bugs being implemented before dealing with llvm/llvm-project#51011
I'm sorry, I know this feature is useful, but it's quite hard to implement. It's not just an implementation issue; we need to understand the theory behind it. It's maybe weeks of work. The src function of your first example loops forever (because of the ashr), so we need a way to do refinement between either partial traces (without false-positives) or prove non-termination. There hasn't been much work around synthesizing ranking functions for bit-vectors is not very well developed. Grigory worked on this stuff in the recent past, maybe he can help.
Regarding your second example, I see it uses lshr rather than ashr. The loop terminates, right? Alive should be able to verify it properly I hope (given enough unrolling)? What's the issue? (I didn't read it in detail TBH).