Skip to content

Commit 46ed084

Browse files
Add test
1 parent dda95d9 commit 46ed084

File tree

1 file changed

+210
-0
lines changed

1 file changed

+210
-0
lines changed
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -loop-reduce %s -S -o - | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
; Tests where the loop termination condition is not generated by a compare.
7+
8+
; The call to get.active.lane.mask in the loop should use the postincrement
9+
; value of %index.
10+
define void @lane_mask(ptr %dst, i64 %n) #0 {
11+
; CHECK-LABEL: define void @lane_mask(
12+
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
13+
; CHECK-NEXT: [[ENTRY:.*]]:
14+
; CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
15+
; CHECK-NEXT: [[VSCALEX4:%.*]] = shl i64 [[VSCALE]], 2
16+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]])
17+
; CHECK-NEXT: br label %[[LOOP:.*]]
18+
; CHECK: [[LOOP]]:
19+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
20+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[LOOP]] ]
21+
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[IV]], 2
22+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
23+
; CHECK-NEXT: tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> splat (i32 1), ptr align 4 [[SCEVGEP]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
24+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[VSCALEX4]]
25+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[VSCALEX4]], [[IV]]
26+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP1]], i64 [[N]])
27+
; CHECK-NEXT: [[COND:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
28+
; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
29+
; CHECK: [[EXIT]]:
30+
; CHECK-NEXT: ret void
31+
;
32+
entry:
33+
%vscale = tail call i64 @llvm.vscale.i64()
34+
%vscalex4 = shl i64 %vscale, 2
35+
%active.lane.mask.entry = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 %n)
36+
br label %loop
37+
38+
loop:
39+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
40+
%active.lane.mask = phi <vscale x 4 x i1> [ %active.lane.mask.entry, %entry ], [ %active.lane.mask.next, %loop ]
41+
%gep = getelementptr inbounds nuw i32, ptr %dst, i64 %iv
42+
tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> splat (i32 1), ptr %gep, i32 4, <vscale x 4 x i1> %active.lane.mask)
43+
%iv.next = add i64 %iv, %vscalex4
44+
%active.lane.mask.next = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %iv.next, i64 %n)
45+
%cond = extractelement <vscale x 4 x i1> %active.lane.mask.next, i64 0
46+
br i1 %cond, label %loop, label %exit
47+
48+
exit:
49+
ret void
50+
}
51+
52+
; The store between the call and the branch should cause get.active.lane.mask to
53+
; use a preincrement value.
54+
; FIXME: We could use a postincrement value by moving the call and
55+
; extractelement to after the store.
56+
define void @lane_mask_not_last(ptr %dst, i64 %n) #0 {
57+
; CHECK-LABEL: define void @lane_mask_not_last(
58+
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
59+
; CHECK-NEXT: [[ENTRY:.*]]:
60+
; CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
61+
; CHECK-NEXT: [[VSCALEX4:%.*]] = shl i64 [[VSCALE]], 2
62+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]])
63+
; CHECK-NEXT: br label %[[LOOP:.*]]
64+
; CHECK: [[LOOP]]:
65+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
66+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[LOOP]] ]
67+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[VSCALEX4]]
68+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[VSCALEX4]], [[IV]]
69+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IV]], 2
70+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]]
71+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP0]], i64 [[N]])
72+
; CHECK-NEXT: tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> splat (i32 1), ptr align 4 [[SCEVGEP]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
73+
; CHECK-NEXT: [[COND:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
74+
; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
75+
; CHECK: [[EXIT]]:
76+
; CHECK-NEXT: ret void
77+
;
78+
entry:
79+
%vscale = tail call i64 @llvm.vscale.i64()
80+
%vscalex4 = shl i64 %vscale, 2
81+
%active.lane.mask.entry = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 %n)
82+
br label %loop
83+
84+
loop:
85+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
86+
%active.lane.mask = phi <vscale x 4 x i1> [ %active.lane.mask.entry, %entry ], [ %active.lane.mask.next, %loop ]
87+
%gep = getelementptr inbounds nuw i32, ptr %dst, i64 %iv
88+
%iv.next = add i64 %iv, %vscalex4
89+
%active.lane.mask.next = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %iv.next, i64 %n)
90+
tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> splat (i32 1), ptr %gep, i32 4, <vscale x 4 x i1> %active.lane.mask)
91+
%cond = extractelement <vscale x 4 x i1> %active.lane.mask.next, i64 0
92+
br i1 %cond, label %loop, label %exit
93+
94+
exit:
95+
ret void
96+
}
97+
98+
; The call to cmp_fn in the loop should use the postincrement value of %iv.
99+
define void @uses_cmp_fn(ptr %dst, i64 %n) {
100+
; CHECK-LABEL: define void @uses_cmp_fn(
101+
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
102+
; CHECK-NEXT: [[ENTRY:.*]]:
103+
; CHECK-NEXT: br label %[[LOOP:.*]]
104+
; CHECK: [[LOOP]]:
105+
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[LOOP]] ], [ [[DST]], %[[ENTRY]] ]
106+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[LOOP]] ], [ 1, %[[ENTRY]] ]
107+
; CHECK-NEXT: store i32 0, ptr [[LSR_IV1]], align 4
108+
; CHECK-NEXT: [[COND:%.*]] = tail call i1 @cmp_fn(i64 [[LSR_IV]])
109+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 1
110+
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
111+
; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
112+
; CHECK: [[EXIT]]:
113+
; CHECK-NEXT: ret void
114+
;
115+
entry:
116+
br label %loop
117+
118+
loop:
119+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
120+
%gep = getelementptr inbounds nuw i32, ptr %dst, i64 %iv
121+
store i32 0, ptr %gep, align 4
122+
%iv.next = add i64 %iv, 1
123+
%cond = tail call i1 @cmp_fn(i64 %iv.next)
124+
br i1 %cond, label %loop, label %exit
125+
126+
exit:
127+
ret void
128+
}
129+
130+
; The store between the call and the branch should cause cmp_fn to use a
131+
; preincrement value. We can't move the call after the store as the call could
132+
; have side effects.
133+
define void @uses_cmp_fn_not_last(ptr %dst, i64 %n) {
134+
; CHECK-LABEL: define void @uses_cmp_fn_not_last(
135+
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
136+
; CHECK-NEXT: [[ENTRY:.*]]:
137+
; CHECK-NEXT: br label %[[LOOP:.*]]
138+
; CHECK: [[LOOP]]:
139+
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[LOOP]] ], [ [[DST]], %[[ENTRY]] ]
140+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[LOOP]] ], [ 1, %[[ENTRY]] ]
141+
; CHECK-NEXT: [[COND:%.*]] = tail call i1 @cmp_fn(i64 [[LSR_IV]])
142+
; CHECK-NEXT: store i32 0, ptr [[LSR_IV1]], align 4
143+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 1
144+
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
145+
; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
146+
; CHECK: [[EXIT]]:
147+
; CHECK-NEXT: ret void
148+
;
149+
entry:
150+
br label %loop
151+
152+
loop:
153+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
154+
%gep = getelementptr inbounds nuw i32, ptr %dst, i64 %iv
155+
%iv.next = add i64 %iv, 1
156+
%cond = tail call i1 @cmp_fn(i64 %iv.next)
157+
store i32 0, ptr %gep, align 4
158+
br i1 %cond, label %loop, label %exit
159+
160+
exit:
161+
ret void
162+
}
163+
164+
; cmp2 will use a preincrement induction variable as it isn't directly the loop
165+
; termination condition.
166+
; FIXME: We could potentially handle this by examining the operands of the 'and'
167+
; instruction.
168+
define void @cmp_and(ptr %dst, i64 %n) {
169+
; CHECK-LABEL: define void @cmp_and(
170+
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
171+
; CHECK-NEXT: [[ENTRY:.*]]:
172+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
173+
; CHECK-NEXT: br label %[[LOOP:.*]]
174+
; CHECK: [[LOOP]]:
175+
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[LOOP]] ], [ [[DST]], %[[ENTRY]] ]
176+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[TMP0]], %[[ENTRY]] ]
177+
; CHECK-NEXT: [[VAL:%.*]] = load i64, ptr [[LSR_IV1]], align 8
178+
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[VAL]], [[N]]
179+
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[LSR_IV]], 0
180+
; CHECK-NEXT: [[COND:%.*]] = and i1 [[CMP1]], [[CMP2]]
181+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
182+
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
183+
; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
184+
; CHECK: [[EXIT]]:
185+
; CHECK-NEXT: ret void
186+
;
187+
entry:
188+
br label %loop
189+
190+
loop:
191+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
192+
%gep = getelementptr inbounds nuw i32, ptr %dst, i64 %iv
193+
%val = load i64, ptr %gep, align 8
194+
%iv.next = add i64 %iv, 1
195+
%cmp1 = icmp ne i64 %val, %n
196+
%cmp2 = icmp ne i64 %iv.next, %n
197+
%cond = and i1 %cmp1, %cmp2
198+
br i1 %cond, label %loop, label %exit
199+
200+
exit:
201+
ret void
202+
}
203+
204+
205+
declare i64 @llvm.vscale.i64()
206+
declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)
207+
declare void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32>, ptr captures(none), i32 immarg, <vscale x 4 x i1>)
208+
declare i1 @cmp_fn(i64)
209+
210+
attributes #0 = { "target-features"="+sve2" }

0 commit comments

Comments
 (0)