/llvm-project/clang/test/OpenMP/tile_codegen_for_dependent.cpp

https://github.com/KhronosGroup/libclcxx · C++ · 193 lines · 10 code · 7 blank · 176 comment · 1 complexity · 9bb37ec3a67b8e72f96b3a4c01d21bc0 MD5 · raw file

  1. // Check code generation
  2. // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
  3. // Check same results after serialization round-trip
  4. // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
  5. // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
  6. // expected-no-diagnostics
  7. // The loop trip count used by #pragma omp for depends on code generated
  8. // by #pragma omp file. Check that theses PreInits are emitted before
  9. // the code generated by #pragma omp for.
  10. #ifndef HEADER
  11. #define HEADER
  12. // placeholder for loop body code.
  13. extern "C" void body(...) {}
  14. // IR-LABEL: @func(
  15. // IR-NEXT: [[ENTRY:.*]]:
  16. // IR-NEXT: %[[START_ADDR:.+]] = alloca i32, align 4
  17. // IR-NEXT: %[[END_ADDR:.+]] = alloca i32, align 4
  18. // IR-NEXT: %[[STEP_ADDR:.+]] = alloca i32, align 4
  19. // IR-NEXT: %[[DOTOMP_IV:.+]] = alloca i32, align 4
  20. // IR-NEXT: %[[TMP:.+]] = alloca i32, align 4
  21. // IR-NEXT: %[[I:.+]] = alloca i32, align 4
  22. // IR-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
  23. // IR-NEXT: %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
  24. // IR-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
  25. // IR-NEXT: %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
  26. // IR-NEXT: %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
  27. // IR-NEXT: %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
  28. // IR-NEXT: %[[DOTFLOOR_0_IV_I:.+]] = alloca i32, align 4
  29. // IR-NEXT: %[[DOTOMP_LB:.+]] = alloca i32, align 4
  30. // IR-NEXT: %[[DOTOMP_UB:.+]] = alloca i32, align 4
  31. // IR-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
  32. // IR-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
  33. // IR-NEXT: %[[DOTFLOOR_0_IV_I12:.+]] = alloca i32, align 4
  34. // IR-NEXT: %[[DOTTILE_0_IV_I:.+]] = alloca i32, align 4
  35. // IR-NEXT: %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
  36. // IR-NEXT: store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
  37. // IR-NEXT: store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
  38. // IR-NEXT: store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
  39. // IR-NEXT: %[[TMP1:.+]] = load i32, i32* %[[START_ADDR]], align 4
  40. // IR-NEXT: store i32 %[[TMP1]], i32* %[[I]], align 4
  41. // IR-NEXT: %[[TMP2:.+]] = load i32, i32* %[[START_ADDR]], align 4
  42. // IR-NEXT: store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_]], align 4
  43. // IR-NEXT: %[[TMP3:.+]] = load i32, i32* %[[END_ADDR]], align 4
  44. // IR-NEXT: store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
  45. // IR-NEXT: %[[TMP4:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
  46. // IR-NEXT: store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
  47. // IR-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
  48. // IR-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
  49. // IR-NEXT: %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]]
  50. // IR-NEXT: %[[SUB4:.+]] = sub i32 %[[SUB]], 1
  51. // IR-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
  52. // IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]]
  53. // IR-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
  54. // IR-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]]
  55. // IR-NEXT: %[[SUB5:.+]] = sub i32 %[[DIV]], 1
  56. // IR-NEXT: store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
  57. // IR-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
  58. // IR-NEXT: %[[ADD7:.+]] = add i32 %[[TMP9]], 1
  59. // IR-NEXT: store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
  60. // IR-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
  61. // IR-NEXT: %[[SUB9:.+]] = sub i32 %[[TMP10]], -3
  62. // IR-NEXT: %[[DIV10:.+]] = udiv i32 %[[SUB9]], 4
  63. // IR-NEXT: %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
  64. // IR-NEXT: store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
  65. // IR-NEXT: store i32 0, i32* %[[DOTFLOOR_0_IV_I]], align 4
  66. // IR-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
  67. // IR-NEXT: %[[CMP:.+]] = icmp ult i32 0, %[[TMP11]]
  68. // IR-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
  69. // IR-EMPTY:
  70. // IR-NEXT: [[OMP_PRECOND_THEN]]:
  71. // IR-NEXT: store i32 0, i32* %[[DOTOMP_LB]], align 4
  72. // IR-NEXT: %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
  73. // IR-NEXT: store i32 %[[TMP12]], i32* %[[DOTOMP_UB]], align 4
  74. // IR-NEXT: store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
  75. // IR-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
  76. // IR-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
  77. // IR-NEXT: %[[TMP13:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
  78. // IR-NEXT: %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
  79. // IR-NEXT: %[[CMP13:.+]] = icmp ugt i32 %[[TMP13]], %[[TMP14]]
  80. // IR-NEXT: br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
  81. // IR-EMPTY:
  82. // IR-NEXT: [[COND_TRUE]]:
  83. // IR-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
  84. // IR-NEXT: br label %[[COND_END:.+]]
  85. // IR-EMPTY:
  86. // IR-NEXT: [[COND_FALSE]]:
  87. // IR-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
  88. // IR-NEXT: br label %[[COND_END]]
  89. // IR-EMPTY:
  90. // IR-NEXT: [[COND_END]]:
  91. // IR-NEXT: %[[COND:.+]] = phi i32 [ %[[TMP15]], %[[COND_TRUE]] ], [ %[[TMP16]], %[[COND_FALSE]] ]
  92. // IR-NEXT: store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
  93. // IR-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
  94. // IR-NEXT: store i32 %[[TMP17]], i32* %[[DOTOMP_IV]], align 4
  95. // IR-NEXT: br label %[[OMP_INNER_FOR_COND:.+]]
  96. // IR-EMPTY:
  97. // IR-NEXT: [[OMP_INNER_FOR_COND]]:
  98. // IR-NEXT: %[[TMP18:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
  99. // IR-NEXT: %[[TMP19:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
  100. // IR-NEXT: %[[ADD14:.+]] = add i32 %[[TMP19]], 1
  101. // IR-NEXT: %[[CMP15:.+]] = icmp ult i32 %[[TMP18]], %[[ADD14]]
  102. // IR-NEXT: br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
  103. // IR-EMPTY:
  104. // IR-NEXT: [[OMP_INNER_FOR_BODY]]:
  105. // IR-NEXT: %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
  106. // IR-NEXT: %[[MUL:.+]] = mul i32 %[[TMP20]], 4
  107. // IR-NEXT: %[[ADD16:.+]] = add i32 0, %[[MUL]]
  108. // IR-NEXT: store i32 %[[ADD16]], i32* %[[DOTFLOOR_0_IV_I12]], align 4
  109. // IR-NEXT: %[[TMP21:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4
  110. // IR-NEXT: store i32 %[[TMP21]], i32* %[[DOTTILE_0_IV_I]], align 4
  111. // IR-NEXT: br label %[[FOR_COND:.+]]
  112. // IR-EMPTY:
  113. // IR-NEXT: [[FOR_COND]]:
  114. // IR-NEXT: %[[TMP22:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4
  115. // IR-NEXT: %[[TMP23:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
  116. // IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP23]], 1
  117. // IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4
  118. // IR-NEXT: %[[ADD18:.+]] = add nsw i32 %[[TMP24]], 4
  119. // IR-NEXT: %[[CMP19:.+]] = icmp ult i32 %[[ADD17]], %[[ADD18]]
  120. // IR-NEXT: br i1 %[[CMP19]], label %[[COND_TRUE20:.+]], label %[[COND_FALSE22:.+]]
  121. // IR-EMPTY:
  122. // IR-NEXT: [[COND_TRUE20]]:
  123. // IR-NEXT: %[[TMP25:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
  124. // IR-NEXT: %[[ADD21:.+]] = add i32 %[[TMP25]], 1
  125. // IR-NEXT: br label %[[COND_END24:.+]]
  126. // IR-EMPTY:
  127. // IR-NEXT: [[COND_FALSE22]]:
  128. // IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4
  129. // IR-NEXT: %[[ADD23:.+]] = add nsw i32 %[[TMP26]], 4
  130. // IR-NEXT: br label %[[COND_END24]]
  131. // IR-EMPTY:
  132. // IR-NEXT: [[COND_END24]]:
  133. // IR-NEXT: %[[COND25:.+]] = phi i32 [ %[[ADD21]], %[[COND_TRUE20]] ], [ %[[ADD23]], %[[COND_FALSE22]] ]
  134. // IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP22]], %[[COND25]]
  135. // IR-NEXT: br i1 %[[CMP26]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
  136. // IR-EMPTY:
  137. // IR-NEXT: [[FOR_BODY]]:
  138. // IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
  139. // IR-NEXT: %[[TMP28:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4
  140. // IR-NEXT: %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
  141. // IR-NEXT: %[[MUL27:.+]] = mul i32 %[[TMP28]], %[[TMP29]]
  142. // IR-NEXT: %[[ADD28:.+]] = add i32 %[[TMP27]], %[[MUL27]]
  143. // IR-NEXT: store i32 %[[ADD28]], i32* %[[I]], align 4
  144. // IR-NEXT: %[[TMP30:.+]] = load i32, i32* %[[START_ADDR]], align 4
  145. // IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[END_ADDR]], align 4
  146. // IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
  147. // IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[I]], align 4
  148. // IR-NEXT: call void (...) @body(i32 %[[TMP30]], i32 %[[TMP31]], i32 %[[TMP32]], i32 %[[TMP33]])
  149. // IR-NEXT: br label %[[FOR_INC:.+]]
  150. // IR-EMPTY:
  151. // IR-NEXT: [[FOR_INC]]:
  152. // IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4
  153. // IR-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP34]], 1
  154. // IR-NEXT: store i32 %[[INC]], i32* %[[DOTTILE_0_IV_I]], align 4
  155. // IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
  156. // IR-EMPTY:
  157. // IR-NEXT: [[FOR_END]]:
  158. // IR-NEXT: br label %[[OMP_BODY_CONTINUE:.+]]
  159. // IR-EMPTY:
  160. // IR-NEXT: [[OMP_BODY_CONTINUE]]:
  161. // IR-NEXT: br label %[[OMP_INNER_FOR_INC:.+]]
  162. // IR-EMPTY:
  163. // IR-NEXT: [[OMP_INNER_FOR_INC]]:
  164. // IR-NEXT: %[[TMP35:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
  165. // IR-NEXT: %[[ADD29:.+]] = add i32 %[[TMP35]], 1
  166. // IR-NEXT: store i32 %[[ADD29]], i32* %[[DOTOMP_IV]], align 4
  167. // IR-NEXT: br label %[[OMP_INNER_FOR_COND]]
  168. // IR-EMPTY:
  169. // IR-NEXT: [[OMP_INNER_FOR_END]]:
  170. // IR-NEXT: br label %[[OMP_LOOP_EXIT:.+]]
  171. // IR-EMPTY:
  172. // IR-NEXT: [[OMP_LOOP_EXIT]]:
  173. // IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
  174. // IR-NEXT: br label %[[OMP_PRECOND_END]]
  175. // IR-EMPTY:
  176. // IR-NEXT: [[OMP_PRECOND_END]]:
  177. // IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
  178. // IR-NEXT: ret void
  179. // IR-NEXT: }
  180. extern "C" void func(int start, int end, int step) {
  181. #pragma omp for
  182. #pragma omp tile sizes(4)
  183. for (int i = start; i < end; i += step)
  184. body(start, end, step, i);
  185. }
  186. #endif /* HEADER */