tree-vect-loop-manip.c 84 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465
  1. /* Vectorizer Specific Loop Manipulations
  2. Copyright (C) 2003-2015 Free Software Foundation, Inc.
  3. Contributed by Dorit Naishlos <dorit@il.ibm.com>
  4. and Ira Rosen <irar@il.ibm.com>
  5. This file is part of GCC.
  6. GCC is free software; you can redistribute it and/or modify it under
  7. the terms of the GNU General Public License as published by the Free
  8. Software Foundation; either version 3, or (at your option) any later
  9. version.
  10. GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  13. for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with GCC; see the file COPYING3. If not see
  16. <http://www.gnu.org/licenses/>. */
  17. #include "config.h"
  18. #include "system.h"
  19. #include "coretypes.h"
  20. #include "dumpfile.h"
  21. #include "tm.h"
  22. #include "hash-set.h"
  23. #include "machmode.h"
  24. #include "vec.h"
  25. #include "double-int.h"
  26. #include "input.h"
  27. #include "alias.h"
  28. #include "symtab.h"
  29. #include "wide-int.h"
  30. #include "inchash.h"
  31. #include "tree.h"
  32. #include "fold-const.h"
  33. #include "predict.h"
  34. #include "hard-reg-set.h"
  35. #include "input.h"
  36. #include "function.h"
  37. #include "dominance.h"
  38. #include "cfg.h"
  39. #include "cfganal.h"
  40. #include "basic-block.h"
  41. #include "gimple-pretty-print.h"
  42. #include "tree-ssa-alias.h"
  43. #include "internal-fn.h"
  44. #include "gimple-expr.h"
  45. #include "is-a.h"
  46. #include "gimple.h"
  47. #include "gimplify.h"
  48. #include "gimple-iterator.h"
  49. #include "gimplify-me.h"
  50. #include "gimple-ssa.h"
  51. #include "tree-cfg.h"
  52. #include "tree-phinodes.h"
  53. #include "ssa-iterators.h"
  54. #include "stringpool.h"
  55. #include "tree-ssanames.h"
  56. #include "tree-ssa-loop-manip.h"
  57. #include "tree-into-ssa.h"
  58. #include "tree-ssa.h"
  59. #include "tree-pass.h"
  60. #include "cfgloop.h"
  61. #include "diagnostic-core.h"
  62. #include "tree-scalar-evolution.h"
  63. #include "tree-vectorizer.h"
  64. #include "langhooks.h"
  65. /*************************************************************************
  66. Simple Loop Peeling Utilities
  67. Utilities to support loop peeling for vectorization purposes.
  68. *************************************************************************/
  69. /* Renames the use *OP_P. */
  70. static void
  71. rename_use_op (use_operand_p op_p)
  72. {
  73. tree new_name;
  74. if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  75. return;
  76. new_name = get_current_def (USE_FROM_PTR (op_p));
  77. /* Something defined outside of the loop. */
  78. if (!new_name)
  79. return;
  80. /* An ordinary ssa name defined in the loop. */
  81. SET_USE (op_p, new_name);
  82. }
  83. /* Renames the variables in basic block BB. */
  84. static void
  85. rename_variables_in_bb (basic_block bb)
  86. {
  87. gimple stmt;
  88. use_operand_p use_p;
  89. ssa_op_iter iter;
  90. edge e;
  91. edge_iterator ei;
  92. struct loop *loop = bb->loop_father;
  93. for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  94. gsi_next (&gsi))
  95. {
  96. stmt = gsi_stmt (gsi);
  97. FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  98. rename_use_op (use_p);
  99. }
  100. FOR_EACH_EDGE (e, ei, bb->preds)
  101. {
  102. if (!flow_bb_inside_loop_p (loop, e->src))
  103. continue;
  104. for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
  105. gsi_next (&gsi))
  106. rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
  107. }
  108. }
  109. typedef struct
  110. {
  111. tree from, to;
  112. basic_block bb;
  113. } adjust_info;
  114. /* A stack of values to be adjusted in debug stmts. We have to
  115. process them LIFO, so that the closest substitution applies. If we
  116. processed them FIFO, without the stack, we might substitute uses
  117. with a PHI DEF that would soon become non-dominant, and when we got
  118. to the suitable one, it wouldn't have anything to substitute any
  119. more. */
  120. static vec<adjust_info, va_heap> adjust_vec;
  121. /* Adjust any debug stmts that referenced AI->from values to use the
  122. loop-closed AI->to, if the references are dominated by AI->bb and
  123. not by the definition of AI->from. */
  124. static void
  125. adjust_debug_stmts_now (adjust_info *ai)
  126. {
  127. basic_block bbphi = ai->bb;
  128. tree orig_def = ai->from;
  129. tree new_def = ai->to;
  130. imm_use_iterator imm_iter;
  131. gimple stmt;
  132. basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
  133. gcc_assert (dom_info_available_p (CDI_DOMINATORS));
  134. /* Adjust any debug stmts that held onto non-loop-closed
  135. references. */
  136. FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
  137. {
  138. use_operand_p use_p;
  139. basic_block bbuse;
  140. if (!is_gimple_debug (stmt))
  141. continue;
  142. gcc_assert (gimple_debug_bind_p (stmt));
  143. bbuse = gimple_bb (stmt);
  144. if ((bbuse == bbphi
  145. || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
  146. && !(bbuse == bbdef
  147. || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
  148. {
  149. if (new_def)
  150. FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
  151. SET_USE (use_p, new_def);
  152. else
  153. {
  154. gimple_debug_bind_reset_value (stmt);
  155. update_stmt (stmt);
  156. }
  157. }
  158. }
  159. }
  160. /* Adjust debug stmts as scheduled before. */
  161. static void
  162. adjust_vec_debug_stmts (void)
  163. {
  164. if (!MAY_HAVE_DEBUG_STMTS)
  165. return;
  166. gcc_assert (adjust_vec.exists ());
  167. while (!adjust_vec.is_empty ())
  168. {
  169. adjust_debug_stmts_now (&adjust_vec.last ());
  170. adjust_vec.pop ();
  171. }
  172. adjust_vec.release ();
  173. }
  174. /* Adjust any debug stmts that referenced FROM values to use the
  175. loop-closed TO, if the references are dominated by BB and not by
  176. the definition of FROM. If adjust_vec is non-NULL, adjustments
  177. will be postponed until adjust_vec_debug_stmts is called. */
  178. static void
  179. adjust_debug_stmts (tree from, tree to, basic_block bb)
  180. {
  181. adjust_info ai;
  182. if (MAY_HAVE_DEBUG_STMTS
  183. && TREE_CODE (from) == SSA_NAME
  184. && ! SSA_NAME_IS_DEFAULT_DEF (from)
  185. && ! virtual_operand_p (from))
  186. {
  187. ai.from = from;
  188. ai.to = to;
  189. ai.bb = bb;
  190. if (adjust_vec.exists ())
  191. adjust_vec.safe_push (ai);
  192. else
  193. adjust_debug_stmts_now (&ai);
  194. }
  195. }
  196. /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
  197. to adjust any debug stmts that referenced the old phi arg,
  198. presumably non-loop-closed references left over from other
  199. transformations. */
  200. static void
  201. adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
  202. {
  203. tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
  204. SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
  205. if (MAY_HAVE_DEBUG_STMTS)
  206. adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
  207. gimple_bb (update_phi));
  208. }
  209. /* Update PHI nodes for a guard of the LOOP.
  210. Input:
  211. - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
  212. controls whether LOOP is to be executed. GUARD_EDGE is the edge that
  213. originates from the guard-bb, skips LOOP and reaches the (unique) exit
  214. bb of LOOP. This loop-exit-bb is an empty bb with one successor.
  215. We denote this bb NEW_MERGE_BB because before the guard code was added
  216. it had a single predecessor (the LOOP header), and now it became a merge
  217. point of two paths - the path that ends with the LOOP exit-edge, and
  218. the path that ends with GUARD_EDGE.
  219. - NEW_EXIT_BB: New basic block that is added by this function between LOOP
  220. and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
  221. ===> The CFG before the guard-code was added:
  222. LOOP_header_bb:
  223. loop_body
  224. if (exit_loop) goto update_bb
  225. else goto LOOP_header_bb
  226. update_bb:
  227. ==> The CFG after the guard-code was added:
  228. guard_bb:
  229. if (LOOP_guard_condition) goto new_merge_bb
  230. else goto LOOP_header_bb
  231. LOOP_header_bb:
  232. loop_body
  233. if (exit_loop_condition) goto new_merge_bb
  234. else goto LOOP_header_bb
  235. new_merge_bb:
  236. goto update_bb
  237. update_bb:
  238. ==> The CFG after this function:
  239. guard_bb:
  240. if (LOOP_guard_condition) goto new_merge_bb
  241. else goto LOOP_header_bb
  242. LOOP_header_bb:
  243. loop_body
  244. if (exit_loop_condition) goto new_exit_bb
  245. else goto LOOP_header_bb
  246. new_exit_bb:
  247. new_merge_bb:
  248. goto update_bb
  249. update_bb:
  250. This function:
  251. 1. creates and updates the relevant phi nodes to account for the new
  252. incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
  253. 1.1. Create phi nodes at NEW_MERGE_BB.
  254. 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
  255. UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
  256. 2. preserves loop-closed-ssa-form by creating the required phi nodes
  257. at the exit of LOOP (i.e, in NEW_EXIT_BB).
  258. There are two flavors to this function:
  259. slpeel_update_phi_nodes_for_guard1:
  260. Here the guard controls whether we enter or skip LOOP, where LOOP is a
  261. prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
  262. for variables that have phis in the loop header.
  263. slpeel_update_phi_nodes_for_guard2:
  264. Here the guard controls whether we enter or skip LOOP, where LOOP is an
  265. epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
  266. for variables that have phis in the loop exit.
  267. I.E., the overall structure is:
  268. loop1_preheader_bb:
  269. guard1 (goto loop1/merge1_bb)
  270. loop1
  271. loop1_exit_bb:
  272. guard2 (goto merge1_bb/merge2_bb)
  273. merge1_bb
  274. loop2
  275. loop2_exit_bb
  276. merge2_bb
  277. next_bb
  278. slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
  279. loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
  280. that have phis in loop1->header).
  281. slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
  282. loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
  283. that have phis in next_bb). It also adds some of these phis to
  284. loop1_exit_bb.
  285. slpeel_update_phi_nodes_for_guard1 is always called before
  286. slpeel_update_phi_nodes_for_guard2. They are both needed in order
  287. to create correct data-flow and loop-closed-ssa-form.
  288. Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
  289. that change between iterations of a loop (and therefore have a phi-node
  290. at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
  291. phis for variables that are used out of the loop (and therefore have
  292. loop-closed exit phis). Some variables may be both updated between
  293. iterations and used after the loop. This is why in loop1_exit_bb we
  294. may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
  295. and exit phis (created by slpeel_update_phi_nodes_for_guard2).
  296. - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
  297. an original loop. i.e., we have:
  298. orig_loop
  299. guard_bb (goto LOOP/new_merge)
  300. new_loop <-- LOOP
  301. new_exit
  302. new_merge
  303. next_bb
  304. If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
  305. have:
  306. new_loop
  307. guard_bb (goto LOOP/new_merge)
  308. orig_loop <-- LOOP
  309. new_exit
  310. new_merge
  311. next_bb
  312. The SSA names defined in the original loop have a current
  313. reaching definition that that records the corresponding new
  314. ssa-name used in the new duplicated loop copy.
  315. */
  316. /* Function slpeel_update_phi_nodes_for_guard1
  317. Input:
  318. - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
  319. - DEFS - a bitmap of ssa names to mark new names for which we recorded
  320. information.
  321. In the context of the overall structure, we have:
  322. loop1_preheader_bb:
  323. guard1 (goto loop1/merge1_bb)
  324. LOOP-> loop1
  325. loop1_exit_bb:
  326. guard2 (goto merge1_bb/merge2_bb)
  327. merge1_bb
  328. loop2
  329. loop2_exit_bb
  330. merge2_bb
  331. next_bb
  332. For each name updated between loop iterations (i.e - for each name that has
  333. an entry (loop-header) phi in LOOP) we create a new phi in:
  334. 1. merge1_bb (to account for the edge from guard1)
  335. 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
  336. */
  337. static void
  338. slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
  339. bool is_new_loop, basic_block *new_exit_bb)
  340. {
  341. gphi *orig_phi, *new_phi;
  342. gphi *update_phi, *update_phi2;
  343. tree guard_arg, loop_arg;
  344. basic_block new_merge_bb = guard_edge->dest;
  345. edge e = EDGE_SUCC (new_merge_bb, 0);
  346. basic_block update_bb = e->dest;
  347. basic_block orig_bb = loop->header;
  348. edge new_exit_e;
  349. tree current_new_name;
  350. gphi_iterator gsi_orig, gsi_update;
  351. /* Create new bb between loop and new_merge_bb. */
  352. *new_exit_bb = split_edge (single_exit (loop));
  353. new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
  354. for (gsi_orig = gsi_start_phis (orig_bb),
  355. gsi_update = gsi_start_phis (update_bb);
  356. !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
  357. gsi_next (&gsi_orig), gsi_next (&gsi_update))
  358. {
  359. source_location loop_locus, guard_locus;
  360. tree new_res;
  361. orig_phi = gsi_orig.phi ();
  362. update_phi = gsi_update.phi ();
  363. /** 1. Handle new-merge-point phis **/
  364. /* 1.1. Generate new phi node in NEW_MERGE_BB: */
  365. new_res = copy_ssa_name (PHI_RESULT (orig_phi));
  366. new_phi = create_phi_node (new_res, new_merge_bb);
  367. /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
  368. of LOOP. Set the two phi args in NEW_PHI for these edges: */
  369. loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
  370. loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
  371. EDGE_SUCC (loop->latch,
  372. 0));
  373. guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
  374. guard_locus
  375. = gimple_phi_arg_location_from_edge (orig_phi,
  376. loop_preheader_edge (loop));
  377. add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
  378. add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
  379. /* 1.3. Update phi in successor block. */
  380. gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
  381. || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
  382. adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
  383. update_phi2 = new_phi;
  384. /** 2. Handle loop-closed-ssa-form phis **/
  385. if (virtual_operand_p (PHI_RESULT (orig_phi)))
  386. continue;
  387. /* 2.1. Generate new phi node in NEW_EXIT_BB: */
  388. new_res = copy_ssa_name (PHI_RESULT (orig_phi));
  389. new_phi = create_phi_node (new_res, *new_exit_bb);
  390. /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
  391. add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
  392. /* 2.3. Update phi in successor of NEW_EXIT_BB: */
  393. gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
  394. adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
  395. PHI_RESULT (new_phi));
  396. /* 2.4. Record the newly created name with set_current_def.
  397. We want to find a name such that
  398. name = get_current_def (orig_loop_name)
  399. and to set its current definition as follows:
  400. set_current_def (name, new_phi_name)
  401. If LOOP is a new loop then loop_arg is already the name we're
  402. looking for. If LOOP is the original loop, then loop_arg is
  403. the orig_loop_name and the relevant name is recorded in its
  404. current reaching definition. */
  405. if (is_new_loop)
  406. current_new_name = loop_arg;
  407. else
  408. {
  409. current_new_name = get_current_def (loop_arg);
  410. /* current_def is not available only if the variable does not
  411. change inside the loop, in which case we also don't care
  412. about recording a current_def for it because we won't be
  413. trying to create loop-exit-phis for it. */
  414. if (!current_new_name)
  415. continue;
  416. }
  417. tree new_name = get_current_def (current_new_name);
  418. /* Because of peeled_chrec optimization it is possible that we have
  419. set this earlier. Verify the PHI has the same value. */
  420. if (new_name)
  421. {
  422. gimple phi = SSA_NAME_DEF_STMT (new_name);
  423. gcc_assert (gimple_code (phi) == GIMPLE_PHI
  424. && gimple_bb (phi) == *new_exit_bb
  425. && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
  426. == loop_arg));
  427. continue;
  428. }
  429. set_current_def (current_new_name, PHI_RESULT (new_phi));
  430. }
  431. }
  432. /* Function slpeel_update_phi_nodes_for_guard2
  433. Input:
  434. - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
  435. In the context of the overall structure, we have:
  436. loop1_preheader_bb:
  437. guard1 (goto loop1/merge1_bb)
  438. loop1
  439. loop1_exit_bb:
  440. guard2 (goto merge1_bb/merge2_bb)
  441. merge1_bb
  442. LOOP-> loop2
  443. loop2_exit_bb
  444. merge2_bb
  445. next_bb
  446. For each name used out side the loop (i.e - for each name that has an exit
  447. phi in next_bb) we create a new phi in:
  448. 1. merge2_bb (to account for the edge from guard_bb)
  449. 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
  450. 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
  451. if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
  452. */
  453. static void
  454. slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
  455. bool is_new_loop, basic_block *new_exit_bb)
  456. {
  457. gphi *orig_phi, *new_phi;
  458. gphi *update_phi, *update_phi2;
  459. tree guard_arg, loop_arg;
  460. basic_block new_merge_bb = guard_edge->dest;
  461. edge e = EDGE_SUCC (new_merge_bb, 0);
  462. basic_block update_bb = e->dest;
  463. edge new_exit_e;
  464. tree orig_def, orig_def_new_name;
  465. tree new_name, new_name2;
  466. tree arg;
  467. gphi_iterator gsi;
  468. /* Create new bb between loop and new_merge_bb. */
  469. *new_exit_bb = split_edge (single_exit (loop));
  470. new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
  471. for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
  472. {
  473. tree new_res;
  474. update_phi = gsi.phi ();
  475. orig_phi = update_phi;
  476. orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
  477. /* This loop-closed-phi actually doesn't represent a use
  478. out of the loop - the phi arg is a constant. */
  479. if (TREE_CODE (orig_def) != SSA_NAME)
  480. continue;
  481. orig_def_new_name = get_current_def (orig_def);
  482. arg = NULL_TREE;
  483. /** 1. Handle new-merge-point phis **/
  484. /* 1.1. Generate new phi node in NEW_MERGE_BB: */
  485. new_res = copy_ssa_name (PHI_RESULT (orig_phi));
  486. new_phi = create_phi_node (new_res, new_merge_bb);
  487. /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
  488. of LOOP. Set the two PHI args in NEW_PHI for these edges: */
  489. new_name = orig_def;
  490. new_name2 = NULL_TREE;
  491. if (orig_def_new_name)
  492. {
  493. new_name = orig_def_new_name;
  494. /* Some variables have both loop-entry-phis and loop-exit-phis.
  495. Such variables were given yet newer names by phis placed in
  496. guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
  497. new_name2 = get_current_def (get_current_def (orig_name)). */
  498. new_name2 = get_current_def (new_name);
  499. }
  500. if (is_new_loop)
  501. {
  502. guard_arg = orig_def;
  503. loop_arg = new_name;
  504. }
  505. else
  506. {
  507. guard_arg = new_name;
  508. loop_arg = orig_def;
  509. }
  510. if (new_name2)
  511. guard_arg = new_name2;
  512. add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
  513. add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
  514. /* 1.3. Update phi in successor block. */
  515. gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
  516. adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
  517. update_phi2 = new_phi;
  518. /** 2. Handle loop-closed-ssa-form phis **/
  519. /* 2.1. Generate new phi node in NEW_EXIT_BB: */
  520. new_res = copy_ssa_name (PHI_RESULT (orig_phi));
  521. new_phi = create_phi_node (new_res, *new_exit_bb);
  522. /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
  523. add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
  524. /* 2.3. Update phi in successor of NEW_EXIT_BB: */
  525. gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
  526. adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
  527. PHI_RESULT (new_phi));
  528. /** 3. Handle loop-closed-ssa-form phis for first loop **/
  529. /* 3.1. Find the relevant names that need an exit-phi in
  530. GUARD_BB, i.e. names for which
  531. slpeel_update_phi_nodes_for_guard1 had not already created a
  532. phi node. This is the case for names that are used outside
  533. the loop (and therefore need an exit phi) but are not updated
  534. across loop iterations (and therefore don't have a
  535. loop-header-phi).
  536. slpeel_update_phi_nodes_for_guard1 is responsible for
  537. creating loop-exit phis in GUARD_BB for names that have a
  538. loop-header-phi. When such a phi is created we also record
  539. the new name in its current definition. If this new name
  540. exists, then guard_arg was set to this new name (see 1.2
  541. above). Therefore, if guard_arg is not this new name, this
  542. is an indication that an exit-phi in GUARD_BB was not yet
  543. created, so we take care of it here. */
  544. if (guard_arg == new_name2)
  545. continue;
  546. arg = guard_arg;
  547. /* 3.2. Generate new phi node in GUARD_BB: */
  548. new_res = copy_ssa_name (PHI_RESULT (orig_phi));
  549. new_phi = create_phi_node (new_res, guard_edge->src);
  550. /* 3.3. GUARD_BB has one incoming edge: */
  551. gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
  552. add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
  553. UNKNOWN_LOCATION);
  554. /* 3.4. Update phi in successor of GUARD_BB: */
  555. gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
  556. == guard_arg);
  557. adjust_phi_and_debug_stmts (update_phi2, guard_edge,
  558. PHI_RESULT (new_phi));
  559. }
  560. }
  561. /* Make the LOOP iterate NITERS times. This is done by adding a new IV
  562. that starts at zero, increases by one and its limit is NITERS.
  563. Assumption: the exit-condition of LOOP is the last stmt in the loop. */
  564. void
  565. slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
  566. {
  567. tree indx_before_incr, indx_after_incr;
  568. gcond *cond_stmt;
  569. gcond *orig_cond;
  570. edge exit_edge = single_exit (loop);
  571. gimple_stmt_iterator loop_cond_gsi;
  572. gimple_stmt_iterator incr_gsi;
  573. bool insert_after;
  574. tree init = build_int_cst (TREE_TYPE (niters), 0);
  575. tree step = build_int_cst (TREE_TYPE (niters), 1);
  576. source_location loop_loc;
  577. enum tree_code code;
  578. orig_cond = get_loop_exit_condition (loop);
  579. gcc_assert (orig_cond);
  580. loop_cond_gsi = gsi_for_stmt (orig_cond);
  581. standard_iv_increment_position (loop, &incr_gsi, &insert_after);
  582. create_iv (init, step, NULL_TREE, loop,
  583. &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
  584. indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
  585. true, NULL_TREE, true,
  586. GSI_SAME_STMT);
  587. niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
  588. true, GSI_SAME_STMT);
  589. code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
  590. cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
  591. NULL_TREE);
  592. gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
  593. /* Remove old loop exit test: */
  594. gsi_remove (&loop_cond_gsi, true);
  595. free_stmt_vec_info (orig_cond);
  596. loop_loc = find_loop_location (loop);
  597. if (dump_enabled_p ())
  598. {
  599. if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
  600. dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
  601. LOCATION_LINE (loop_loc));
  602. dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
  603. dump_printf (MSG_NOTE, "\n");
  604. }
  605. loop->nb_iterations = niters;
  606. }
  607. /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
  608. For all PHI arguments in FROM->dest and TO->dest from those
  609. edges ensure that TO->dest PHI arguments have current_def
  610. to that in from. */
  611. static void
  612. slpeel_duplicate_current_defs_from_edges (edge from, edge to)
  613. {
  614. gimple_stmt_iterator gsi_from, gsi_to;
  615. for (gsi_from = gsi_start_phis (from->dest),
  616. gsi_to = gsi_start_phis (to->dest);
  617. !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
  618. gsi_next (&gsi_from), gsi_next (&gsi_to))
  619. {
  620. gimple from_phi = gsi_stmt (gsi_from);
  621. gimple to_phi = gsi_stmt (gsi_to);
  622. tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
  623. tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
  624. if (TREE_CODE (from_arg) == SSA_NAME
  625. && TREE_CODE (to_arg) == SSA_NAME
  626. && get_current_def (to_arg) == NULL_TREE)
  627. set_current_def (to_arg, get_current_def (from_arg));
  628. }
  629. }
  630. /* Given LOOP this function generates a new copy of it and puts it
  631. on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
  632. non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
  633. basic blocks from SCALAR_LOOP instead of LOOP, but to either the
  634. entry or exit of LOOP. */
  635. struct loop *
  636. slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
  637. struct loop *scalar_loop, edge e)
  638. {
  639. struct loop *new_loop;
  640. basic_block *new_bbs, *bbs;
  641. bool at_exit;
  642. bool was_imm_dom;
  643. basic_block exit_dest;
  644. edge exit, new_exit;
  645. exit = single_exit (loop);
  646. at_exit = (e == exit);
  647. if (!at_exit && e != loop_preheader_edge (loop))
  648. return NULL;
  649. if (scalar_loop == NULL)
  650. scalar_loop = loop;
  651. bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
  652. get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
  653. /* Check whether duplication is possible. */
  654. if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
  655. {
  656. free (bbs);
  657. return NULL;
  658. }
  659. /* Generate new loop structure. */
  660. new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
  661. duplicate_subloops (scalar_loop, new_loop);
  662. exit_dest = exit->dest;
  663. was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
  664. exit_dest) == loop->header ?
  665. true : false);
  666. /* Also copy the pre-header, this avoids jumping through hoops to
  667. duplicate the loop entry PHI arguments. Create an empty
  668. pre-header unconditionally for this. */
  669. basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
  670. edge entry_e = single_pred_edge (preheader);
  671. bbs[scalar_loop->num_nodes] = preheader;
  672. new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
  673. exit = single_exit (scalar_loop);
  674. copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
  675. &exit, 1, &new_exit, NULL,
  676. e->src, true);
  677. exit = single_exit (loop);
  678. basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
  679. add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
  680. if (scalar_loop != loop)
  681. {
  682. /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
  683. SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
  684. but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
  685. the LOOP SSA_NAMEs (on the exit edge and edge from latch to
  686. header) to have current_def set, so copy them over. */
  687. slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
  688. exit);
  689. slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
  690. 0),
  691. EDGE_SUCC (loop->latch, 0));
  692. }
  693. if (at_exit) /* Add the loop copy at exit. */
  694. {
  695. if (scalar_loop != loop)
  696. {
  697. gphi_iterator gsi;
  698. new_exit = redirect_edge_and_branch (new_exit, exit_dest);
  699. for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
  700. gsi_next (&gsi))
  701. {
  702. gphi *phi = gsi.phi ();
  703. tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
  704. location_t orig_locus
  705. = gimple_phi_arg_location_from_edge (phi, e);
  706. add_phi_arg (phi, orig_arg, new_exit, orig_locus);
  707. }
  708. }
  709. redirect_edge_and_branch_force (e, new_preheader);
  710. flush_pending_stmts (e);
  711. set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
  712. if (was_imm_dom)
  713. set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
  714. /* And remove the non-necessary forwarder again. Keep the other
  715. one so we have a proper pre-header for the loop at the exit edge. */
  716. redirect_edge_pred (single_succ_edge (preheader),
  717. single_pred (preheader));
  718. delete_basic_block (preheader);
  719. set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
  720. loop_preheader_edge (scalar_loop)->src);
  721. }
  722. else /* Add the copy at entry. */
  723. {
  724. if (scalar_loop != loop)
  725. {
  726. /* Remove the non-necessary forwarder of scalar_loop again. */
  727. redirect_edge_pred (single_succ_edge (preheader),
  728. single_pred (preheader));
  729. delete_basic_block (preheader);
  730. set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
  731. loop_preheader_edge (scalar_loop)->src);
  732. preheader = split_edge (loop_preheader_edge (loop));
  733. entry_e = single_pred_edge (preheader);
  734. }
  735. redirect_edge_and_branch_force (entry_e, new_preheader);
  736. flush_pending_stmts (entry_e);
  737. set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
  738. redirect_edge_and_branch_force (new_exit, preheader);
  739. flush_pending_stmts (new_exit);
  740. set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
  741. /* And remove the non-necessary forwarder again. Keep the other
  742. one so we have a proper pre-header for the loop at the exit edge. */
  743. redirect_edge_pred (single_succ_edge (new_preheader),
  744. single_pred (new_preheader));
  745. delete_basic_block (new_preheader);
  746. set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
  747. loop_preheader_edge (new_loop)->src);
  748. }
  749. for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
  750. rename_variables_in_bb (new_bbs[i]);
  751. if (scalar_loop != loop)
  752. {
  753. /* Update new_loop->header PHIs, so that on the preheader
  754. edge they are the ones from loop rather than scalar_loop. */
  755. gphi_iterator gsi_orig, gsi_new;
  756. edge orig_e = loop_preheader_edge (loop);
  757. edge new_e = loop_preheader_edge (new_loop);
  758. for (gsi_orig = gsi_start_phis (loop->header),
  759. gsi_new = gsi_start_phis (new_loop->header);
  760. !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
  761. gsi_next (&gsi_orig), gsi_next (&gsi_new))
  762. {
  763. gphi *orig_phi = gsi_orig.phi ();
  764. gphi *new_phi = gsi_new.phi ();
  765. tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
  766. location_t orig_locus
  767. = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
  768. add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
  769. }
  770. }
  771. free (new_bbs);
  772. free (bbs);
  773. #ifdef ENABLE_CHECKING
  774. verify_dominators (CDI_DOMINATORS);
  775. #endif
  776. return new_loop;
  777. }
  778. /* Given the condition statement COND, put it as the last statement
  779. of GUARD_BB; EXIT_BB is the basic block to skip the loop;
  780. Assumes that this is the single exit of the guarded loop.
  781. Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
  782. static edge
  783. slpeel_add_loop_guard (basic_block guard_bb, tree cond,
  784. gimple_seq cond_expr_stmt_list,
  785. basic_block exit_bb, basic_block dom_bb,
  786. int probability)
  787. {
  788. gimple_stmt_iterator gsi;
  789. edge new_e, enter_e;
  790. gcond *cond_stmt;
  791. gimple_seq gimplify_stmt_list = NULL;
  792. enter_e = EDGE_SUCC (guard_bb, 0);
  793. enter_e->flags &= ~EDGE_FALLTHRU;
  794. enter_e->flags |= EDGE_FALSE_VALUE;
  795. gsi = gsi_last_bb (guard_bb);
  796. cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
  797. NULL_TREE);
  798. if (gimplify_stmt_list)
  799. gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
  800. cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
  801. if (cond_expr_stmt_list)
  802. gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
  803. gsi = gsi_last_bb (guard_bb);
  804. gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
  805. /* Add new edge to connect guard block to the merge/loop-exit block. */
  806. new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
  807. new_e->count = guard_bb->count;
  808. new_e->probability = probability;
  809. new_e->count = apply_probability (enter_e->count, probability);
  810. enter_e->count -= new_e->count;
  811. enter_e->probability = inverse_probability (probability);
  812. set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
  813. return new_e;
  814. }
  815. /* This function verifies that the following restrictions apply to LOOP:
  816. (1) it is innermost
  817. (2) it consists of exactly 2 basic blocks - header, and an empty latch.
  818. (3) it is single entry, single exit
  819. (4) its exit condition is the last stmt in the header
  820. (5) E is the entry/exit edge of LOOP.
  821. */
  822. bool
  823. slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
  824. {
  825. edge exit_e = single_exit (loop);
  826. edge entry_e = loop_preheader_edge (loop);
  827. gcond *orig_cond = get_loop_exit_condition (loop);
  828. gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
  829. if (loop->inner
  830. /* All loops have an outer scope; the only case loop->outer is NULL is for
  831. the function itself. */
  832. || !loop_outer (loop)
  833. || loop->num_nodes != 2
  834. || !empty_block_p (loop->latch)
  835. || !single_exit (loop)
  836. /* Verify that new loop exit condition can be trivially modified. */
  837. || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
  838. || (e != exit_e && e != entry_e))
  839. return false;
  840. return true;
  841. }
  842. #ifdef ENABLE_CHECKING
  843. static void
  844. slpeel_verify_cfg_after_peeling (struct loop *first_loop,
  845. struct loop *second_loop)
  846. {
  847. basic_block loop1_exit_bb = single_exit (first_loop)->dest;
  848. basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
  849. basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
  850. /* A guard that controls whether the second_loop is to be executed or skipped
  851. is placed in first_loop->exit. first_loop->exit therefore has two
  852. successors - one is the preheader of second_loop, and the other is a bb
  853. after second_loop.
  854. */
  855. gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
  856. /* 1. Verify that one of the successors of first_loop->exit is the preheader
  857. of second_loop. */
  858. /* The preheader of new_loop is expected to have two predecessors:
  859. first_loop->exit and the block that precedes first_loop. */
  860. gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
  861. && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
  862. && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
  863. || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
  864. && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
  865. /* Verify that the other successor of first_loop->exit is after the
  866. second_loop. */
  867. /* TODO */
  868. }
  869. #endif
  870. /* If the run time cost model check determines that vectorization is
  871. not profitable and hence scalar loop should be generated then set
  872. FIRST_NITERS to prologue peeled iterations. This will allow all the
  873. iterations to be executed in the prologue peeled scalar loop. */
  874. static void
  875. set_prologue_iterations (basic_block bb_before_first_loop,
  876. tree *first_niters,
  877. struct loop *loop,
  878. unsigned int th,
  879. int probability)
  880. {
  881. edge e;
  882. basic_block cond_bb, then_bb;
  883. tree var, prologue_after_cost_adjust_name;
  884. gimple_stmt_iterator gsi;
  885. gphi *newphi;
  886. edge e_true, e_false, e_fallthru;
  887. gcond *cond_stmt;
  888. gimple_seq stmts = NULL;
  889. tree cost_pre_condition = NULL_TREE;
  890. tree scalar_loop_iters =
  891. unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
  892. e = single_pred_edge (bb_before_first_loop);
  893. cond_bb = split_edge (e);
  894. e = single_pred_edge (bb_before_first_loop);
  895. then_bb = split_edge (e);
  896. set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
  897. e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
  898. EDGE_FALSE_VALUE);
  899. set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
  900. e_true = EDGE_PRED (then_bb, 0);
  901. e_true->flags &= ~EDGE_FALLTHRU;
  902. e_true->flags |= EDGE_TRUE_VALUE;
  903. e_true->probability = probability;
  904. e_false->probability = inverse_probability (probability);
  905. e_true->count = apply_probability (cond_bb->count, probability);
  906. e_false->count = cond_bb->count - e_true->count;
  907. then_bb->frequency = EDGE_FREQUENCY (e_true);
  908. then_bb->count = e_true->count;
  909. e_fallthru = EDGE_SUCC (then_bb, 0);
  910. e_fallthru->count = then_bb->count;
  911. gsi = gsi_last_bb (cond_bb);
  912. cost_pre_condition =
  913. fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
  914. build_int_cst (TREE_TYPE (scalar_loop_iters), th));
  915. cost_pre_condition =
  916. force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
  917. NULL_TREE, false, GSI_CONTINUE_LINKING);
  918. cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
  919. NULL_TREE, NULL_TREE);
  920. gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
  921. var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
  922. "prologue_after_cost_adjust");
  923. prologue_after_cost_adjust_name =
  924. force_gimple_operand (scalar_loop_iters, &stmts, false, var);
  925. gsi = gsi_last_bb (then_bb);
  926. if (stmts)
  927. gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
  928. newphi = create_phi_node (var, bb_before_first_loop);
  929. add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
  930. UNKNOWN_LOCATION);
  931. add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
  932. *first_niters = PHI_RESULT (newphi);
  933. }
  934. /* Function slpeel_tree_peel_loop_to_edge.
  935. Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
  936. that is placed on the entry (exit) edge E of LOOP. After this transformation
  937. we have two loops one after the other - first-loop iterates FIRST_NITERS
  938. times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
  939. If the cost model indicates that it is profitable to emit a scalar
  940. loop instead of the vector one, then the prolog (epilog) loop will iterate
  941. for the entire unchanged scalar iterations of the loop.
  942. Input:
  943. - LOOP: the loop to be peeled.
  944. - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
  945. should be copied.
  946. - E: the exit or entry edge of LOOP.
  947. If it is the entry edge, we peel the first iterations of LOOP. In this
  948. case first-loop is LOOP, and second-loop is the newly created loop.
  949. If it is the exit edge, we peel the last iterations of LOOP. In this
  950. case, first-loop is the newly created loop, and second-loop is LOOP.
  951. - NITERS: the number of iterations that LOOP iterates.
  952. - FIRST_NITERS: the number of iterations that the first-loop should iterate.
  953. - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
  954. for updating the loop bound of the first-loop to FIRST_NITERS. If it
  955. is false, the caller of this function may want to take care of this
  956. (this can be useful if we don't want new stmts added to first-loop).
  957. - TH: cost model profitability threshold of iterations for vectorization.
  958. - CHECK_PROFITABILITY: specify whether cost model check has not occurred
  959. during versioning and hence needs to occur during
  960. prologue generation or whether cost model check
  961. has not occurred during prologue generation and hence
  962. needs to occur during epilogue generation.
  963. - BOUND1 is the upper bound on number of iterations of the first loop (if known)
  964. - BOUND2 is the upper bound on number of iterations of the second loop (if known)
  965. Output:
  966. The function returns a pointer to the new loop-copy, or NULL if it failed
  967. to perform the transformation.
  968. The function generates two if-then-else guards: one before the first loop,
  969. and the other before the second loop:
  970. The first guard is:
  971. if (FIRST_NITERS == 0) then skip the first loop,
  972. and go directly to the second loop.
  973. The second guard is:
  974. if (FIRST_NITERS == NITERS) then skip the second loop.
  975. If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
  976. then the generated condition is combined with COND_EXPR and the
  977. statements in COND_EXPR_STMT_LIST are emitted together with it.
  978. FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
  979. FORNOW the resulting code will not be in loop-closed-ssa form.
  980. */
  981. static struct loop *
  982. slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
  983. edge e, tree *first_niters,
  984. tree niters, bool update_first_loop_count,
  985. unsigned int th, bool check_profitability,
  986. tree cond_expr, gimple_seq cond_expr_stmt_list,
  987. int bound1, int bound2)
  988. {
  989. struct loop *new_loop = NULL, *first_loop, *second_loop;
  990. edge skip_e;
  991. tree pre_condition = NULL_TREE;
  992. basic_block bb_before_second_loop, bb_after_second_loop;
  993. basic_block bb_before_first_loop;
  994. basic_block bb_between_loops;
  995. basic_block new_exit_bb;
  996. gphi_iterator gsi;
  997. edge exit_e = single_exit (loop);
  998. source_location loop_loc;
  999. /* There are many aspects to how likely the first loop is going to be executed.
  1000. Without histogram we can't really do good job. Simply set it to
  1001. 2/3, so the first loop is not reordered to the end of function and
  1002. the hot path through stays short. */
  1003. int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
  1004. int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
  1005. int probability_of_second_loop;
  1006. if (!slpeel_can_duplicate_loop_p (loop, e))
  1007. return NULL;
  1008. /* We might have a queued need to update virtual SSA form. As we
  1009. delete the update SSA machinery below after doing a regular
  1010. incremental SSA update during loop copying make sure we don't
  1011. lose that fact.
  1012. ??? Needing to update virtual SSA form by renaming is unfortunate
  1013. but not all of the vectorizer code inserting new loads / stores
  1014. properly assigns virtual operands to those statements. */
  1015. update_ssa (TODO_update_ssa_only_virtuals);
  1016. /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
  1017. in the exit bb and rename all the uses after the loop. This simplifies
  1018. the *guard[12] routines, which assume loop closed SSA form for all PHIs
  1019. (but normally loop closed SSA form doesn't require virtual PHIs to be
  1020. in the same form). Doing this early simplifies the checking what
  1021. uses should be renamed. */
  1022. for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
  1023. if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
  1024. {
  1025. gphi *phi = gsi.phi ();
  1026. for (gsi = gsi_start_phis (exit_e->dest);
  1027. !gsi_end_p (gsi); gsi_next (&gsi))
  1028. if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
  1029. break;
  1030. if (gsi_end_p (gsi))
  1031. {
  1032. tree new_vop = copy_ssa_name (PHI_RESULT (phi));
  1033. gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
  1034. tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
  1035. imm_use_iterator imm_iter;
  1036. gimple stmt;
  1037. use_operand_p use_p;
  1038. add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
  1039. gimple_phi_set_result (new_phi, new_vop);
  1040. FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
  1041. if (stmt != new_phi && gimple_bb (stmt) != loop->header)
  1042. FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
  1043. SET_USE (use_p, new_vop);
  1044. }
  1045. break;
  1046. }
  1047. /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
  1048. Resulting CFG would be:
  1049. first_loop:
  1050. do {
  1051. } while ...
  1052. second_loop:
  1053. do {
  1054. } while ...
  1055. orig_exit_bb:
  1056. */
  1057. if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
  1058. e)))
  1059. {
  1060. loop_loc = find_loop_location (loop);
  1061. dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
  1062. "tree_duplicate_loop_to_edge_cfg failed.\n");
  1063. return NULL;
  1064. }
  1065. if (MAY_HAVE_DEBUG_STMTS)
  1066. {
  1067. gcc_assert (!adjust_vec.exists ());
  1068. adjust_vec.create (32);
  1069. }
  1070. if (e == exit_e)
  1071. {
  1072. /* NEW_LOOP was placed after LOOP. */
  1073. first_loop = loop;
  1074. second_loop = new_loop;
  1075. }
  1076. else
  1077. {
  1078. /* NEW_LOOP was placed before LOOP. */
  1079. first_loop = new_loop;
  1080. second_loop = loop;
  1081. }
  1082. /* 2. Add the guard code in one of the following ways:
  1083. 2.a Add the guard that controls whether the first loop is executed.
  1084. This occurs when this function is invoked for prologue or epilogue
  1085. generation and when the cost model check can be done at compile time.
  1086. Resulting CFG would be:
  1087. bb_before_first_loop:
  1088. if (FIRST_NITERS == 0) GOTO bb_before_second_loop
  1089. GOTO first-loop
  1090. first_loop:
  1091. do {
  1092. } while ...
  1093. bb_before_second_loop:
  1094. second_loop:
  1095. do {
  1096. } while ...
  1097. orig_exit_bb:
  1098. 2.b Add the cost model check that allows the prologue
  1099. to iterate for the entire unchanged scalar
  1100. iterations of the loop in the event that the cost
  1101. model indicates that the scalar loop is more
  1102. profitable than the vector one. This occurs when
  1103. this function is invoked for prologue generation
  1104. and the cost model check needs to be done at run
  1105. time.
  1106. Resulting CFG after prologue peeling would be:
  1107. if (scalar_loop_iterations <= th)
  1108. FIRST_NITERS = scalar_loop_iterations
  1109. bb_before_first_loop:
  1110. if (FIRST_NITERS == 0) GOTO bb_before_second_loop
  1111. GOTO first-loop
  1112. first_loop:
  1113. do {
  1114. } while ...
  1115. bb_before_second_loop:
  1116. second_loop:
  1117. do {
  1118. } while ...
  1119. orig_exit_bb:
  1120. 2.c Add the cost model check that allows the epilogue
  1121. to iterate for the entire unchanged scalar
  1122. iterations of the loop in the event that the cost
  1123. model indicates that the scalar loop is more
  1124. profitable than the vector one. This occurs when
  1125. this function is invoked for epilogue generation
  1126. and the cost model check needs to be done at run
  1127. time. This check is combined with any pre-existing
  1128. check in COND_EXPR to avoid versioning.
  1129. Resulting CFG after prologue peeling would be:
  1130. bb_before_first_loop:
  1131. if ((scalar_loop_iterations <= th)
  1132. ||
  1133. FIRST_NITERS == 0) GOTO bb_before_second_loop
  1134. GOTO first-loop
  1135. first_loop:
  1136. do {
  1137. } while ...
  1138. bb_before_second_loop:
  1139. second_loop:
  1140. do {
  1141. } while ...
  1142. orig_exit_bb:
  1143. */
  1144. bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
  1145. /* Loop copying insterted a forwarder block for us here. */
  1146. bb_before_second_loop = single_exit (first_loop)->dest;
  1147. probability_of_second_loop = (inverse_probability (first_guard_probability)
  1148. + combine_probabilities (second_guard_probability,
  1149. first_guard_probability));
  1150. /* Theoretically preheader edge of first loop and exit edge should have
  1151. same frequencies. Loop exit probablities are however easy to get wrong.
  1152. It is safer to copy value from original loop entry. */
  1153. bb_before_second_loop->frequency
  1154. = combine_probabilities (bb_before_first_loop->frequency,
  1155. probability_of_second_loop);
  1156. bb_before_second_loop->count
  1157. = apply_probability (bb_before_first_loop->count,
  1158. probability_of_second_loop);
  1159. single_succ_edge (bb_before_second_loop)->count
  1160. = bb_before_second_loop->count;
  1161. /* Epilogue peeling. */
  1162. if (!update_first_loop_count)
  1163. {
  1164. loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
  1165. tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
  1166. unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
  1167. if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
  1168. limit = limit + 1;
  1169. if (check_profitability
  1170. && th > limit)
  1171. limit = th;
  1172. pre_condition =
  1173. fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
  1174. build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
  1175. if (cond_expr)
  1176. {
  1177. pre_condition =
  1178. fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
  1179. pre_condition,
  1180. fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
  1181. cond_expr));
  1182. }
  1183. }
  1184. /* Prologue peeling. */
  1185. else
  1186. {
  1187. if (check_profitability)
  1188. set_prologue_iterations (bb_before_first_loop, first_niters,
  1189. loop, th, first_guard_probability);
  1190. pre_condition =
  1191. fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
  1192. build_int_cst (TREE_TYPE (*first_niters), 0));
  1193. }
  1194. skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
  1195. cond_expr_stmt_list,
  1196. bb_before_second_loop, bb_before_first_loop,
  1197. inverse_probability (first_guard_probability));
  1198. scale_loop_profile (first_loop, first_guard_probability,
  1199. check_profitability && (int)th > bound1 ? th : bound1);
  1200. slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
  1201. first_loop == new_loop,
  1202. &new_exit_bb);
  1203. /* 3. Add the guard that controls whether the second loop is executed.
  1204. Resulting CFG would be:
  1205. bb_before_first_loop:
  1206. if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
  1207. GOTO first-loop
  1208. first_loop:
  1209. do {
  1210. } while ...
  1211. bb_between_loops:
  1212. if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
  1213. GOTO bb_before_second_loop
  1214. bb_before_second_loop:
  1215. second_loop:
  1216. do {
  1217. } while ...
  1218. bb_after_second_loop:
  1219. orig_exit_bb:
  1220. */
  1221. bb_between_loops = new_exit_bb;
  1222. bb_after_second_loop = split_edge (single_exit (second_loop));
  1223. pre_condition =
  1224. fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
  1225. skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
  1226. bb_after_second_loop, bb_before_first_loop,
  1227. inverse_probability (second_guard_probability));
  1228. scale_loop_profile (second_loop, probability_of_second_loop, bound2);
  1229. slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
  1230. second_loop == new_loop, &new_exit_bb);
  1231. /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
  1232. */
  1233. if (update_first_loop_count)
  1234. slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
  1235. delete_update_ssa ();
  1236. adjust_vec_debug_stmts ();
  1237. return new_loop;
  1238. }
  1239. /* Function vect_get_loop_location.
  1240. Extract the location of the loop in the source code.
  1241. If the loop is not well formed for vectorization, an estimated
  1242. location is calculated.
  1243. Return the loop location if succeed and NULL if not. */
  1244. source_location
  1245. find_loop_location (struct loop *loop)
  1246. {
  1247. gimple stmt = NULL;
  1248. basic_block bb;
  1249. gimple_stmt_iterator si;
  1250. if (!loop)
  1251. return UNKNOWN_LOCATION;
  1252. stmt = get_loop_exit_condition (loop);
  1253. if (stmt
  1254. && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
  1255. return gimple_location (stmt);
  1256. /* If we got here the loop is probably not "well formed",
  1257. try to estimate the loop location */
  1258. if (!loop->header)
  1259. return UNKNOWN_LOCATION;
  1260. bb = loop->header;
  1261. for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
  1262. {
  1263. stmt = gsi_stmt (si);
  1264. if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
  1265. return gimple_location (stmt);
  1266. }
  1267. return UNKNOWN_LOCATION;
  1268. }
  1269. /* Function vect_can_advance_ivs_p
  1270. In case the number of iterations that LOOP iterates is unknown at compile
  1271. time, an epilog loop will be generated, and the loop induction variables
  1272. (IVs) will be "advanced" to the value they are supposed to take just before
  1273. the epilog loop. Here we check that the access function of the loop IVs
  1274. and the expression that represents the loop bound are simple enough.
  1275. These restrictions will be relaxed in the future. */
  1276. bool
  1277. vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
  1278. {
  1279. struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  1280. basic_block bb = loop->header;
  1281. gimple phi;
  1282. gphi_iterator gsi;
  1283. /* Analyze phi functions of the loop header. */
  1284. if (dump_enabled_p ())
  1285. dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
  1286. for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  1287. {
  1288. tree evolution_part;
  1289. phi = gsi.phi ();
  1290. if (dump_enabled_p ())
  1291. {
  1292. dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
  1293. dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
  1294. dump_printf (MSG_NOTE, "\n");
  1295. }
  1296. /* Skip virtual phi's. The data dependences that are associated with
  1297. virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
  1298. if (virtual_operand_p (PHI_RESULT (phi)))
  1299. {
  1300. if (dump_enabled_p ())
  1301. dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  1302. "virtual phi. skip.\n");
  1303. continue;
  1304. }
  1305. /* Skip reduction phis. */
  1306. if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
  1307. {
  1308. if (dump_enabled_p ())
  1309. dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  1310. "reduc phi. skip.\n");
  1311. continue;
  1312. }
  1313. /* Analyze the evolution function. */
  1314. evolution_part
  1315. = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
  1316. if (evolution_part == NULL_TREE)
  1317. {
  1318. if (dump_enabled_p ())
  1319. dump_printf (MSG_MISSED_OPTIMIZATION,
  1320. "No access function or evolution.\n");
  1321. return false;
  1322. }
  1323. /* FORNOW: We do not transform initial conditions of IVs
  1324. which evolution functions are a polynomial of degree >= 2. */
  1325. if (tree_is_chrec (evolution_part))
  1326. return false;
  1327. }
  1328. return true;
  1329. }
  1330. /* Function vect_update_ivs_after_vectorizer.
  1331. "Advance" the induction variables of LOOP to the value they should take
  1332. after the execution of LOOP. This is currently necessary because the
  1333. vectorizer does not handle induction variables that are used after the
  1334. loop. Such a situation occurs when the last iterations of LOOP are
  1335. peeled, because:
  1336. 1. We introduced new uses after LOOP for IVs that were not originally used
  1337. after LOOP: the IVs of LOOP are now used by an epilog loop.
  1338. 2. LOOP is going to be vectorized; this means that it will iterate N/VF
  1339. times, whereas the loop IVs should be bumped N times.
  1340. Input:
  1341. - LOOP - a loop that is going to be vectorized. The last few iterations
  1342. of LOOP were peeled.
  1343. - NITERS - the number of iterations that LOOP executes (before it is
  1344. vectorized). i.e, the number of times the ivs should be bumped.
  1345. - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
  1346. coming out from LOOP on which there are uses of the LOOP ivs
  1347. (this is the path from LOOP->exit to epilog_loop->preheader).
  1348. The new definitions of the ivs are placed in LOOP->exit.
  1349. The phi args associated with the edge UPDATE_E in the bb
  1350. UPDATE_E->dest are updated accordingly.
  1351. Assumption 1: Like the rest of the vectorizer, this function assumes
  1352. a single loop exit that has a single predecessor.
  1353. Assumption 2: The phi nodes in the LOOP header and in update_bb are
  1354. organized in the same order.
  1355. Assumption 3: The access function of the ivs is simple enough (see
  1356. vect_can_advance_ivs_p). This assumption will be relaxed in the future.
  1357. Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
  1358. coming out of LOOP on which the ivs of LOOP are used (this is the path
  1359. that leads to the epilog loop; other paths skip the epilog loop). This
  1360. path starts with the edge UPDATE_E, and its destination (denoted update_bb)
  1361. needs to have its phis updated.
  1362. */
  1363. static void
  1364. vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
  1365. edge update_e)
  1366. {
  1367. struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  1368. basic_block exit_bb = single_exit (loop)->dest;
  1369. gphi *phi, *phi1;
  1370. gphi_iterator gsi, gsi1;
  1371. basic_block update_bb = update_e->dest;
  1372. gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
  1373. /* Make sure there exists a single-predecessor exit bb: */
  1374. gcc_assert (single_pred_p (exit_bb));
  1375. for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
  1376. !gsi_end_p (gsi) && !gsi_end_p (gsi1);
  1377. gsi_next (&gsi), gsi_next (&gsi1))
  1378. {
  1379. tree init_expr;
  1380. tree step_expr, off;
  1381. tree type;
  1382. tree var, ni, ni_name;
  1383. gimple_stmt_iterator last_gsi;
  1384. stmt_vec_info stmt_info;
  1385. phi = gsi.phi ();
  1386. phi1 = gsi1.phi ();
  1387. if (dump_enabled_p ())
  1388. {
  1389. dump_printf_loc (MSG_NOTE, vect_location,
  1390. "vect_update_ivs_after_vectorizer: phi: ");
  1391. dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
  1392. dump_printf (MSG_NOTE, "\n");
  1393. }
  1394. /* Skip virtual phi's. */
  1395. if (virtual_operand_p (PHI_RESULT (phi)))
  1396. {
  1397. if (dump_enabled_p ())
  1398. dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  1399. "virtual phi. skip.\n");
  1400. continue;
  1401. }
  1402. /* Skip reduction phis. */
  1403. stmt_info = vinfo_for_stmt (phi);
  1404. if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
  1405. {
  1406. if (dump_enabled_p ())
  1407. dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  1408. "reduc phi. skip.\n");
  1409. continue;
  1410. }
  1411. type = TREE_TYPE (gimple_phi_result (phi));
  1412. step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
  1413. step_expr = unshare_expr (step_expr);
  1414. /* FORNOW: We do not support IVs whose evolution function is a polynomial
  1415. of degree >= 2 or exponential. */
  1416. gcc_assert (!tree_is_chrec (step_expr));
  1417. init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
  1418. off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
  1419. fold_convert (TREE_TYPE (step_expr), niters),
  1420. step_expr);
  1421. if (POINTER_TYPE_P (type))
  1422. ni = fold_build_pointer_plus (init_expr, off);
  1423. else
  1424. ni = fold_build2 (PLUS_EXPR, type,
  1425. init_expr, fold_convert (type, off));
  1426. var = create_tmp_var (type, "tmp");
  1427. last_gsi = gsi_last_bb (exit_bb);
  1428. ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
  1429. true, GSI_SAME_STMT);
  1430. /* Fix phi expressions in the successor bb. */
  1431. adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
  1432. }
  1433. }
  1434. /* Function vect_do_peeling_for_loop_bound
  1435. Peel the last iterations of the loop represented by LOOP_VINFO.
  1436. The peeled iterations form a new epilog loop. Given that the loop now
  1437. iterates NITERS times, the new epilog loop iterates
  1438. NITERS % VECTORIZATION_FACTOR times.
  1439. The original loop will later be made to iterate
  1440. NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
  1441. COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
  1442. test. */
  1443. void
  1444. vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
  1445. tree ni_name, tree ratio_mult_vf_name,
  1446. unsigned int th, bool check_profitability)
  1447. {
  1448. struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  1449. struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
  1450. struct loop *new_loop;
  1451. edge update_e;
  1452. basic_block preheader;
  1453. int loop_num;
  1454. int max_iter;
  1455. tree cond_expr = NULL_TREE;
  1456. gimple_seq cond_expr_stmt_list = NULL;
  1457. if (dump_enabled_p ())
  1458. dump_printf_loc (MSG_NOTE, vect_location,
  1459. "=== vect_do_peeling_for_loop_bound ===\n");
  1460. initialize_original_copy_tables ();
  1461. loop_num = loop->num;
  1462. new_loop
  1463. = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
  1464. &ratio_mult_vf_name, ni_name, false,
  1465. th, check_profitability,
  1466. cond_expr, cond_expr_stmt_list,
  1467. 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
  1468. gcc_assert (new_loop);
  1469. gcc_assert (loop_num == loop->num);
  1470. #ifdef ENABLE_CHECKING
  1471. slpeel_verify_cfg_after_peeling (loop, new_loop);
  1472. #endif
  1473. /* A guard that controls whether the new_loop is to be executed or skipped
  1474. is placed in LOOP->exit. LOOP->exit therefore has two successors - one
  1475. is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
  1476. is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
  1477. is on the path where the LOOP IVs are used and need to be updated. */
  1478. preheader = loop_preheader_edge (new_loop)->src;
  1479. if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
  1480. update_e = EDGE_PRED (preheader, 0);
  1481. else
  1482. update_e = EDGE_PRED (preheader, 1);
  1483. /* Update IVs of original loop as if they were advanced
  1484. by ratio_mult_vf_name steps. */
  1485. vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
  1486. /* For vectorization factor N, we need to copy last N-1 values in epilogue
  1487. and this means N-2 loopback edge executions.
  1488. PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
  1489. will execute at least LOOP_VINFO_VECT_FACTOR times. */
  1490. max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
  1491. ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
  1492. : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
  1493. if (check_profitability)
  1494. max_iter = MAX (max_iter, (int) th - 1);
  1495. record_niter_bound (new_loop, max_iter, false, true);
  1496. dump_printf (MSG_NOTE,
  1497. "Setting upper bound of nb iterations for epilogue "
  1498. "loop to %d\n", max_iter);
  1499. /* After peeling we have to reset scalar evolution analyzer. */
  1500. scev_reset ();
  1501. free_original_copy_tables ();
  1502. }
  1503. /* Function vect_gen_niters_for_prolog_loop
  1504. Set the number of iterations for the loop represented by LOOP_VINFO
  1505. to the minimum between LOOP_NITERS (the original iteration count of the loop)
  1506. and the misalignment of DR - the data reference recorded in
  1507. LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
  1508. this loop, the data reference DR will refer to an aligned location.
  1509. The following computation is generated:
  1510. If the misalignment of DR is known at compile time:
  1511. addr_mis = int mis = DR_MISALIGNMENT (dr);
  1512. Else, compute address misalignment in bytes:
  1513. addr_mis = addr & (vectype_align - 1)
  1514. prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
  1515. (elem_size = element type size; an element is the scalar element whose type
  1516. is the inner type of the vectype)
  1517. When the step of the data-ref in the loop is not 1 (as in interleaved data
  1518. and SLP), the number of iterations of the prolog must be divided by the step
  1519. (which is equal to the size of interleaved group).
  1520. The above formulas assume that VF == number of elements in the vector. This
  1521. may not hold when there are multiple-types in the loop.
  1522. In this case, for some data-references in the loop the VF does not represent
  1523. the number of elements that fit in the vector. Therefore, instead of VF we
  1524. use TYPE_VECTOR_SUBPARTS. */
  1525. static tree
  1526. vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
  1527. {
  1528. struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
  1529. struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  1530. tree var;
  1531. gimple_seq stmts;
  1532. tree iters, iters_name;
  1533. edge pe;
  1534. basic_block new_bb;
  1535. gimple dr_stmt = DR_STMT (dr);
  1536. stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
  1537. tree vectype = STMT_VINFO_VECTYPE (stmt_info);
  1538. int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
  1539. tree niters_type = TREE_TYPE (loop_niters);
  1540. int nelements = TYPE_VECTOR_SUBPARTS (vectype);
  1541. pe = loop_preheader_edge (loop);
  1542. if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
  1543. {
  1544. int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
  1545. if (dump_enabled_p ())
  1546. dump_printf_loc (MSG_NOTE, vect_location,
  1547. "known peeling = %d.\n", npeel);
  1548. iters = build_int_cst (niters_type, npeel);
  1549. *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
  1550. }
  1551. else
  1552. {
  1553. gimple_seq new_stmts = NULL;
  1554. bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
  1555. tree offset = negative
  1556. ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
  1557. tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
  1558. &new_stmts, offset, loop);
  1559. tree type = unsigned_type_for (TREE_TYPE (start_addr));
  1560. tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
  1561. HOST_WIDE_INT elem_size =
  1562. int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
  1563. tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
  1564. tree nelements_minus_1 = build_int_cst (type, nelements - 1);
  1565. tree nelements_tree = build_int_cst (type, nelements);
  1566. tree byte_misalign;
  1567. tree elem_misalign;
  1568. new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
  1569. gcc_assert (!new_bb);
  1570. /* Create: byte_misalign = addr & (vectype_align - 1) */
  1571. byte_misalign =
  1572. fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
  1573. vectype_align_minus_1);
  1574. /* Create: elem_misalign = byte_misalign / element_size */
  1575. elem_misalign =
  1576. fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
  1577. /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
  1578. if (negative)
  1579. iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
  1580. else
  1581. iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
  1582. iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
  1583. iters = fold_convert (niters_type, iters);
  1584. *bound = nelements;
  1585. }
  1586. /* Create: prolog_loop_niters = min (iters, loop_niters) */
  1587. /* If the loop bound is known at compile time we already verified that it is
  1588. greater than vf; since the misalignment ('iters') is at most vf, there's
  1589. no need to generate the MIN_EXPR in this case. */
  1590. if (TREE_CODE (loop_niters) != INTEGER_CST)
  1591. iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
  1592. if (dump_enabled_p ())
  1593. {
  1594. dump_printf_loc (MSG_NOTE, vect_location,
  1595. "niters for prolog loop: ");
  1596. dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
  1597. dump_printf (MSG_NOTE, "\n");
  1598. }
  1599. var = create_tmp_var (niters_type, "prolog_loop_niters");
  1600. stmts = NULL;
  1601. iters_name = force_gimple_operand (iters, &stmts, false, var);
  1602. /* Insert stmt on loop preheader edge. */
  1603. if (stmts)
  1604. {
  1605. basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
  1606. gcc_assert (!new_bb);
  1607. }
  1608. return iters_name;
  1609. }
  1610. /* Function vect_update_init_of_dr
  1611. NITERS iterations were peeled from LOOP. DR represents a data reference
  1612. in LOOP. This function updates the information recorded in DR to
  1613. account for the fact that the first NITERS iterations had already been
  1614. executed. Specifically, it updates the OFFSET field of DR. */
  1615. static void
  1616. vect_update_init_of_dr (struct data_reference *dr, tree niters)
  1617. {
  1618. tree offset = DR_OFFSET (dr);
  1619. niters = fold_build2 (MULT_EXPR, sizetype,
  1620. fold_convert (sizetype, niters),
  1621. fold_convert (sizetype, DR_STEP (dr)));
  1622. offset = fold_build2 (PLUS_EXPR, sizetype,
  1623. fold_convert (sizetype, offset), niters);
  1624. DR_OFFSET (dr) = offset;
  1625. }
  1626. /* Function vect_update_inits_of_drs
  1627. NITERS iterations were peeled from the loop represented by LOOP_VINFO.
  1628. This function updates the information recorded for the data references in
  1629. the loop to account for the fact that the first NITERS iterations had
  1630. already been executed. Specifically, it updates the initial_condition of
  1631. the access_function of all the data_references in the loop. */
  1632. static void
  1633. vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
  1634. {
  1635. unsigned int i;
  1636. vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
  1637. struct data_reference *dr;
  1638. if (dump_enabled_p ())
  1639. dump_printf_loc (MSG_NOTE, vect_location,
  1640. "=== vect_update_inits_of_dr ===\n");
  1641. FOR_EACH_VEC_ELT (datarefs, i, dr)
  1642. vect_update_init_of_dr (dr, niters);
  1643. }
  1644. /* Function vect_do_peeling_for_alignment
  1645. Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
  1646. 'niters' is set to the misalignment of one of the data references in the
  1647. loop, thereby forcing it to refer to an aligned location at the beginning
  1648. of the execution of this loop. The data reference for which we are
  1649. peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
  1650. void
  1651. vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
  1652. unsigned int th, bool check_profitability)
  1653. {
  1654. struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  1655. struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
  1656. tree niters_of_prolog_loop;
  1657. tree wide_prolog_niters;
  1658. struct loop *new_loop;
  1659. int max_iter;
  1660. int bound = 0;
  1661. if (dump_enabled_p ())
  1662. dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
  1663. "loop peeled for vectorization to enhance"
  1664. " alignment\n");
  1665. initialize_original_copy_tables ();
  1666. gimple_seq stmts = NULL;
  1667. gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
  1668. niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
  1669. ni_name,
  1670. &bound);
  1671. /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
  1672. new_loop =
  1673. slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
  1674. loop_preheader_edge (loop),
  1675. &niters_of_prolog_loop, ni_name, true,
  1676. th, check_profitability, NULL_TREE, NULL,
  1677. bound, 0);
  1678. gcc_assert (new_loop);
  1679. #ifdef ENABLE_CHECKING
  1680. slpeel_verify_cfg_after_peeling (new_loop, loop);
  1681. #endif
  1682. /* For vectorization factor N, we need to copy at most N-1 values
  1683. for alignment and this means N-2 loopback edge executions. */
  1684. max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
  1685. if (check_profitability)
  1686. max_iter = MAX (max_iter, (int) th - 1);
  1687. record_niter_bound (new_loop, max_iter, false, true);
  1688. dump_printf (MSG_NOTE,
  1689. "Setting upper bound of nb iterations for prologue "
  1690. "loop to %d\n", max_iter);
  1691. /* Update number of times loop executes. */
  1692. LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
  1693. TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
  1694. LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
  1695. TREE_TYPE (ni_name),
  1696. LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
  1697. if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
  1698. wide_prolog_niters = niters_of_prolog_loop;
  1699. else
  1700. {
  1701. gimple_seq seq = NULL;
  1702. edge pe = loop_preheader_edge (loop);
  1703. tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
  1704. tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
  1705. wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
  1706. var);
  1707. if (seq)
  1708. {
  1709. /* Insert stmt on loop preheader edge. */
  1710. basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
  1711. gcc_assert (!new_bb);
  1712. }
  1713. }
  1714. /* Update the init conditions of the access functions of all data refs. */
  1715. vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
  1716. /* After peeling we have to reset scalar evolution analyzer. */
  1717. scev_reset ();
  1718. free_original_copy_tables ();
  1719. }
  1720. /* Function vect_create_cond_for_align_checks.
  1721. Create a conditional expression that represents the alignment checks for
  1722. all of data references (array element references) whose alignment must be
  1723. checked at runtime.
  1724. Input:
  1725. COND_EXPR - input conditional expression. New conditions will be chained
  1726. with logical AND operation.
  1727. LOOP_VINFO - two fields of the loop information are used.
  1728. LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
  1729. LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
  1730. Output:
  1731. COND_EXPR_STMT_LIST - statements needed to construct the conditional
  1732. expression.
  1733. The returned value is the conditional expression to be used in the if
  1734. statement that controls which version of the loop gets executed at runtime.
  1735. The algorithm makes two assumptions:
  1736. 1) The number of bytes "n" in a vector is a power of 2.
  1737. 2) An address "a" is aligned if a%n is zero and that this
  1738. test can be done as a&(n-1) == 0. For example, for 16
  1739. byte vectors the test is a&0xf == 0. */
  1740. static void
  1741. vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
  1742. tree *cond_expr,
  1743. gimple_seq *cond_expr_stmt_list)
  1744. {
  1745. struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  1746. vec<gimple> may_misalign_stmts
  1747. = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
  1748. gimple ref_stmt;
  1749. int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
  1750. tree mask_cst;
  1751. unsigned int i;
  1752. tree int_ptrsize_type;
  1753. char tmp_name[20];
  1754. tree or_tmp_name = NULL_TREE;
  1755. tree and_tmp_name;
  1756. gimple and_stmt;
  1757. tree ptrsize_zero;
  1758. tree part_cond_expr;
  1759. /* Check that mask is one less than a power of 2, i.e., mask is
  1760. all zeros followed by all ones. */
  1761. gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
  1762. int_ptrsize_type = signed_type_for (ptr_type_node);
  1763. /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
  1764. of the first vector of the i'th data reference. */
  1765. FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
  1766. {
  1767. gimple_seq new_stmt_list = NULL;
  1768. tree addr_base;
  1769. tree addr_tmp_name;
  1770. tree new_or_tmp_name;
  1771. gimple addr_stmt, or_stmt;
  1772. stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
  1773. tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
  1774. bool negative = tree_int_cst_compare
  1775. (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
  1776. tree offset = negative
  1777. ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
  1778. /* create: addr_tmp = (int)(address_of_first_vector) */
  1779. addr_base =
  1780. vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
  1781. offset, loop);
  1782. if (new_stmt_list != NULL)
  1783. gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
  1784. sprintf (tmp_name, "addr2int%d", i);
  1785. addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
  1786. addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
  1787. gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
  1788. /* The addresses are OR together. */
  1789. if (or_tmp_name != NULL_TREE)
  1790. {
  1791. /* create: or_tmp = or_tmp | addr_tmp */
  1792. sprintf (tmp_name, "orptrs%d", i);
  1793. new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
  1794. or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
  1795. or_tmp_name, addr_tmp_name);
  1796. gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
  1797. or_tmp_name = new_or_tmp_name;
  1798. }
  1799. else
  1800. or_tmp_name = addr_tmp_name;
  1801. } /* end for i */
  1802. mask_cst = build_int_cst (int_ptrsize_type, mask);
  1803. /* create: and_tmp = or_tmp & mask */
  1804. and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
  1805. and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
  1806. or_tmp_name, mask_cst);
  1807. gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
  1808. /* Make and_tmp the left operand of the conditional test against zero.
  1809. if and_tmp has a nonzero bit then some address is unaligned. */
  1810. ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
  1811. part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
  1812. and_tmp_name, ptrsize_zero);
  1813. if (*cond_expr)
  1814. *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
  1815. *cond_expr, part_cond_expr);
  1816. else
  1817. *cond_expr = part_cond_expr;
  1818. }
  1819. /* Function vect_create_cond_for_alias_checks.
  1820. Create a conditional expression that represents the run-time checks for
  1821. overlapping of address ranges represented by a list of data references
  1822. relations passed as input.
  1823. Input:
  1824. COND_EXPR - input conditional expression. New conditions will be chained
  1825. with logical AND operation. If it is NULL, then the function
  1826. is used to return the number of alias checks.
  1827. LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
  1828. to be checked.
  1829. Output:
  1830. COND_EXPR - conditional expression.
  1831. The returned COND_EXPR is the conditional expression to be used in the if
  1832. statement that controls which version of the loop gets executed at runtime.
  1833. */
  1834. void
  1835. vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
  1836. {
  1837. vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
  1838. LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
  1839. tree part_cond_expr;
  1840. /* Create expression
  1841. ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
  1842. || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
  1843. &&
  1844. ...
  1845. &&
  1846. ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
  1847. || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
  1848. if (comp_alias_ddrs.is_empty ())
  1849. return;
  1850. for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
  1851. {
  1852. const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
  1853. const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
  1854. tree segment_length_a = dr_a.seg_len;
  1855. tree segment_length_b = dr_b.seg_len;
  1856. tree addr_base_a
  1857. = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
  1858. tree addr_base_b
  1859. = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
  1860. if (dump_enabled_p ())
  1861. {
  1862. dump_printf_loc (MSG_NOTE, vect_location,
  1863. "create runtime check for data references ");
  1864. dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
  1865. dump_printf (MSG_NOTE, " and ");
  1866. dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
  1867. dump_printf (MSG_NOTE, "\n");
  1868. }
  1869. tree seg_a_min = addr_base_a;
  1870. tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
  1871. /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
  1872. bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
  1873. [a, a+12) */
  1874. if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
  1875. {
  1876. tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
  1877. seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
  1878. seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
  1879. }
  1880. tree seg_b_min = addr_base_b;
  1881. tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
  1882. if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
  1883. {
  1884. tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
  1885. seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
  1886. seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
  1887. }
  1888. part_cond_expr =
  1889. fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
  1890. fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
  1891. fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
  1892. if (*cond_expr)
  1893. *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
  1894. *cond_expr, part_cond_expr);
  1895. else
  1896. *cond_expr = part_cond_expr;
  1897. }
  1898. if (dump_enabled_p ())
  1899. dump_printf_loc (MSG_NOTE, vect_location,
  1900. "created %u versioning for alias checks.\n",
  1901. comp_alias_ddrs.length ());
  1902. comp_alias_ddrs.release ();
  1903. }
  1904. /* Function vect_loop_versioning.
  1905. If the loop has data references that may or may not be aligned or/and
  1906. has data reference relations whose independence was not proven then
  1907. two versions of the loop need to be generated, one which is vectorized
  1908. and one which isn't. A test is then generated to control which of the
  1909. loops is executed. The test checks for the alignment of all of the
  1910. data references that may or may not be aligned. An additional
  1911. sequence of runtime tests is generated for each pairs of DDRs whose
  1912. independence was not proven. The vectorized version of loop is
  1913. executed only if both alias and alignment tests are passed.
  1914. The test generated to check which version of loop is executed
  1915. is modified to also check for profitability as indicated by the
  1916. cost model initially.
  1917. The versioning precondition(s) are placed in *COND_EXPR and
  1918. *COND_EXPR_STMT_LIST. */
  1919. void
  1920. vect_loop_versioning (loop_vec_info loop_vinfo,
  1921. unsigned int th, bool check_profitability)
  1922. {
  1923. struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  1924. struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
  1925. basic_block condition_bb;
  1926. gphi_iterator gsi;
  1927. gimple_stmt_iterator cond_exp_gsi;
  1928. basic_block merge_bb;
  1929. basic_block new_exit_bb;
  1930. edge new_exit_e, e;
  1931. gphi *orig_phi, *new_phi;
  1932. tree cond_expr = NULL_TREE;
  1933. gimple_seq cond_expr_stmt_list = NULL;
  1934. tree arg;
  1935. unsigned prob = 4 * REG_BR_PROB_BASE / 5;
  1936. gimple_seq gimplify_stmt_list = NULL;
  1937. tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
  1938. bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
  1939. bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
  1940. if (check_profitability)
  1941. {
  1942. cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
  1943. build_int_cst (TREE_TYPE (scalar_loop_iters), th));
  1944. cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
  1945. is_gimple_condexpr, NULL_TREE);
  1946. }
  1947. if (version_align)
  1948. vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
  1949. &cond_expr_stmt_list);
  1950. if (version_alias)
  1951. vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
  1952. cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
  1953. is_gimple_condexpr, NULL_TREE);
  1954. gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
  1955. initialize_original_copy_tables ();
  1956. if (scalar_loop)
  1957. {
  1958. edge scalar_e;
  1959. basic_block preheader, scalar_preheader;
  1960. /* We don't want to scale SCALAR_LOOP's frequencies, we need to
  1961. scale LOOP's frequencies instead. */
  1962. loop_version (scalar_loop, cond_expr, &condition_bb,
  1963. prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
  1964. scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
  1965. /* CONDITION_BB was created above SCALAR_LOOP's preheader,
  1966. while we need to move it above LOOP's preheader. */
  1967. e = loop_preheader_edge (loop);
  1968. scalar_e = loop_preheader_edge (scalar_loop);
  1969. gcc_assert (empty_block_p (e->src)
  1970. && single_pred_p (e->src));
  1971. gcc_assert (empty_block_p (scalar_e->src)
  1972. && single_pred_p (scalar_e->src));
  1973. gcc_assert (single_pred_p (condition_bb));
  1974. preheader = e->src;
  1975. scalar_preheader = scalar_e->src;
  1976. scalar_e = find_edge (condition_bb, scalar_preheader);
  1977. e = single_pred_edge (preheader);
  1978. redirect_edge_and_branch_force (single_pred_edge (condition_bb),
  1979. scalar_preheader);
  1980. redirect_edge_and_branch_force (scalar_e, preheader);
  1981. redirect_edge_and_branch_force (e, condition_bb);
  1982. set_immediate_dominator (CDI_DOMINATORS, condition_bb,
  1983. single_pred (condition_bb));
  1984. set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
  1985. single_pred (scalar_preheader));
  1986. set_immediate_dominator (CDI_DOMINATORS, preheader,
  1987. condition_bb);
  1988. }
  1989. else
  1990. loop_version (loop, cond_expr, &condition_bb,
  1991. prob, prob, REG_BR_PROB_BASE - prob, true);
  1992. if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
  1993. && dump_enabled_p ())
  1994. {
  1995. if (version_alias)
  1996. dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
  1997. "loop versioned for vectorization because of "
  1998. "possible aliasing\n");
  1999. if (version_align)
  2000. dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
  2001. "loop versioned for vectorization to enhance "
  2002. "alignment\n");
  2003. }
  2004. free_original_copy_tables ();
  2005. /* Loop versioning violates an assumption we try to maintain during
  2006. vectorization - that the loop exit block has a single predecessor.
  2007. After versioning, the exit block of both loop versions is the same
  2008. basic block (i.e. it has two predecessors). Just in order to simplify
  2009. following transformations in the vectorizer, we fix this situation
  2010. here by adding a new (empty) block on the exit-edge of the loop,
  2011. with the proper loop-exit phis to maintain loop-closed-form.
  2012. If loop versioning wasn't done from loop, but scalar_loop instead,
  2013. merge_bb will have already just a single successor. */
  2014. merge_bb = single_exit (loop)->dest;
  2015. if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
  2016. {
  2017. gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
  2018. new_exit_bb = split_edge (single_exit (loop));
  2019. new_exit_e = single_exit (loop);
  2020. e = EDGE_SUCC (new_exit_bb, 0);
  2021. for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
  2022. {
  2023. tree new_res;
  2024. orig_phi = gsi.phi ();
  2025. new_res = copy_ssa_name (PHI_RESULT (orig_phi));
  2026. new_phi = create_phi_node (new_res, new_exit_bb);
  2027. arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
  2028. add_phi_arg (new_phi, arg, new_exit_e,
  2029. gimple_phi_arg_location_from_edge (orig_phi, e));
  2030. adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
  2031. }
  2032. }
  2033. /* End loop-exit-fixes after versioning. */
  2034. if (cond_expr_stmt_list)
  2035. {
  2036. cond_exp_gsi = gsi_last_bb (condition_bb);
  2037. gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
  2038. GSI_SAME_STMT);
  2039. }
  2040. update_ssa (TODO_update_ssa);
  2041. }