tree-ssa-tail-merge.c 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753
  1. /* Tail merging for gimple.
  2. Copyright (C) 2011-2015 Free Software Foundation, Inc.
  3. Contributed by Tom de Vries (tom@codesourcery.com)
  4. This file is part of GCC.
  5. GCC is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. GCC is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with GCC; see the file COPYING3. If not see
  15. <http://www.gnu.org/licenses/>. */
  16. /* Pass overview.
  17. MOTIVATIONAL EXAMPLE
  18. gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
  19. hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
  20. {
  21. struct FILED.1638 * fpD.2605;
  22. charD.1 fileNameD.2604[1000];
  23. intD.0 D.3915;
  24. const charD.1 * restrict outputFileName.0D.3914;
  25. # BLOCK 2 freq:10000
  26. # PRED: ENTRY [100.0%] (fallthru,exec)
  27. # PT = nonlocal { D.3926 } (restr)
  28. outputFileName.0D.3914_3
  29. = (const charD.1 * restrict) outputFileNameD.2600_2(D);
  30. # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
  31. # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
  32. # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
  33. sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
  34. # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
  35. # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
  36. # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
  37. D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
  38. if (D.3915_4 == 0)
  39. goto <bb 3>;
  40. else
  41. goto <bb 4>;
  42. # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
  43. # BLOCK 3 freq:1000
  44. # PRED: 2 [10.0%] (true,exec)
  45. # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
  46. # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
  47. # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
  48. freeD.898 (ctxD.2601_5(D));
  49. goto <bb 7>;
  50. # SUCC: 7 [100.0%] (fallthru,exec)
  51. # BLOCK 4 freq:9000
  52. # PRED: 2 [90.0%] (false,exec)
  53. # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
  54. # PT = nonlocal escaped
  55. # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
  56. # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
  57. fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
  58. if (fpD.2605_8 == 0B)
  59. goto <bb 5>;
  60. else
  61. goto <bb 6>;
  62. # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
  63. # BLOCK 5 freq:173
  64. # PRED: 4 [1.9%] (true,exec)
  65. # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
  66. # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
  67. # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
  68. freeD.898 (ctxD.2601_5(D));
  69. goto <bb 7>;
  70. # SUCC: 7 [100.0%] (fallthru,exec)
  71. # BLOCK 6 freq:8827
  72. # PRED: 4 [98.1%] (false,exec)
  73. # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
  74. # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
  75. # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
  76. fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
  77. # SUCC: 7 [100.0%] (fallthru,exec)
  78. # BLOCK 7 freq:10000
  79. # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
  80. 6 [100.0%] (fallthru,exec)
  81. # PT = nonlocal null
  82. # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
  83. # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
  84. .MEMD.3923_18(6)>
  85. # VUSE <.MEMD.3923_11>
  86. return ctxD.2601_1;
  87. # SUCC: EXIT [100.0%]
  88. }
  89. bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
  90. same successors, and the same operations.
  91. CONTEXT
  92. A technique called tail merging (or cross jumping) can fix the example
  93. above. For a block, we look for common code at the end (the tail) of the
  94. predecessor blocks, and insert jumps from one block to the other.
  95. The example is a special case for tail merging, in that 2 whole blocks
  96. can be merged, rather than just the end parts of it.
  97. We currently only focus on whole block merging, so in that sense
  98. calling this pass tail merge is a bit of a misnomer.
  99. We distinguish 2 kinds of situations in which blocks can be merged:
  100. - same operations, same predecessors. The successor edges coming from one
  101. block are redirected to come from the other block.
  102. - same operations, same successors. The predecessor edges entering one block
  103. are redirected to enter the other block. Note that this operation might
  104. involve introducing phi operations.
  105. For efficient implementation, we would like to value numbers the blocks, and
  106. have a comparison operator that tells us whether the blocks are equal.
  107. Besides being runtime efficient, block value numbering should also abstract
  108. from irrelevant differences in order of operations, much like normal value
  109. numbering abstracts from irrelevant order of operations.
  110. For the first situation (same_operations, same predecessors), normal value
  111. numbering fits well. We can calculate a block value number based on the
  112. value numbers of the defs and vdefs.
  113. For the second situation (same operations, same successors), this approach
  114. doesn't work so well. We can illustrate this using the example. The calls
  115. to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
  116. remain different in value numbering, since they represent different memory
  117. states. So the resulting vdefs of the frees will be different in value
  118. numbering, so the block value numbers will be different.
  119. The reason why we call the blocks equal is not because they define the same
  120. values, but because uses in the blocks use (possibly different) defs in the
  121. same way. To be able to detect this efficiently, we need to do some kind of
  122. reverse value numbering, meaning number the uses rather than the defs, and
  123. calculate a block value number based on the value number of the uses.
  124. Ideally, a block comparison operator will also indicate which phis are needed
  125. to merge the blocks.
  126. For the moment, we don't do block value numbering, but we do insn-by-insn
  127. matching, using scc value numbers to match operations with results, and
  128. structural comparison otherwise, while ignoring vop mismatches.
  129. IMPLEMENTATION
  130. 1. The pass first determines all groups of blocks with the same successor
  131. blocks.
  132. 2. Within each group, it tries to determine clusters of equal basic blocks.
  133. 3. The clusters are applied.
  134. 4. The same successor groups are updated.
  135. 5. This process is repeated from 2 onwards, until no more changes.
  136. LIMITATIONS/TODO
  137. - block only
  138. - handles only 'same operations, same successors'.
  139. It handles same predecessors as a special subcase though.
  140. - does not implement the reverse value numbering and block value numbering.
  141. - improve memory allocation: use garbage collected memory, obstacks,
  142. allocpools where appropriate.
  143. - no insertion of gimple_reg phis, We only introduce vop-phis.
  144. - handle blocks with gimple_reg phi_nodes.
  145. PASS PLACEMENT
  146. This 'pass' is not a stand-alone gimple pass, but runs as part of
  147. pass_pre, in order to share the value numbering.
  148. SWITCHES
  149. - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
  150. #include "config.h"
  151. #include "system.h"
  152. #include "coretypes.h"
  153. #include "tm.h"
  154. #include "hash-set.h"
  155. #include "machmode.h"
  156. #include "vec.h"
  157. #include "double-int.h"
  158. #include "input.h"
  159. #include "alias.h"
  160. #include "symtab.h"
  161. #include "wide-int.h"
  162. #include "inchash.h"
  163. #include "real.h"
  164. #include "tree.h"
  165. #include "fold-const.h"
  166. #include "stor-layout.h"
  167. #include "trans-mem.h"
  168. #include "inchash.h"
  169. #include "tm_p.h"
  170. #include "predict.h"
  171. #include "hard-reg-set.h"
  172. #include "input.h"
  173. #include "function.h"
  174. #include "dominance.h"
  175. #include "cfg.h"
  176. #include "cfganal.h"
  177. #include "cfgcleanup.h"
  178. #include "basic-block.h"
  179. #include "flags.h"
  180. #include "hash-table.h"
  181. #include "tree-ssa-alias.h"
  182. #include "internal-fn.h"
  183. #include "tree-eh.h"
  184. #include "gimple-expr.h"
  185. #include "is-a.h"
  186. #include "gimple.h"
  187. #include "gimple-iterator.h"
  188. #include "gimple-ssa.h"
  189. #include "tree-cfg.h"
  190. #include "tree-phinodes.h"
  191. #include "ssa-iterators.h"
  192. #include "tree-into-ssa.h"
  193. #include "params.h"
  194. #include "gimple-pretty-print.h"
  195. #include "tree-ssa-sccvn.h"
  196. #include "tree-dump.h"
  197. #include "cfgloop.h"
  198. #include "tree-pass.h"
  199. #include "trans-mem.h"
  200. /* Describes a group of bbs with the same successors. The successor bbs are
  201. cached in succs, and the successor edge flags are cached in succ_flags.
  202. If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
  203. it's marked in inverse.
  204. Additionally, the hash value for the struct is cached in hashval, and
  205. in_worklist indicates whether it's currently part of worklist. */
  206. struct same_succ_def
  207. {
  208. /* The bbs that have the same successor bbs. */
  209. bitmap bbs;
  210. /* The successor bbs. */
  211. bitmap succs;
  212. /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
  213. bb. */
  214. bitmap inverse;
  215. /* The edge flags for each of the successor bbs. */
  216. vec<int> succ_flags;
  217. /* Indicates whether the struct is currently in the worklist. */
  218. bool in_worklist;
  219. /* The hash value of the struct. */
  220. hashval_t hashval;
  221. /* hash_table support. */
  222. typedef same_succ_def value_type;
  223. typedef same_succ_def compare_type;
  224. static inline hashval_t hash (const value_type *);
  225. static int equal (const value_type *, const compare_type *);
  226. static void remove (value_type *);
  227. };
  228. typedef struct same_succ_def *same_succ;
  229. typedef const struct same_succ_def *const_same_succ;
  230. /* hash routine for hash_table support, returns hashval of E. */
  231. inline hashval_t
  232. same_succ_def::hash (const value_type *e)
  233. {
  234. return e->hashval;
  235. }
  236. /* A group of bbs where 1 bb from bbs can replace the other bbs. */
  237. struct bb_cluster_def
  238. {
  239. /* The bbs in the cluster. */
  240. bitmap bbs;
  241. /* The preds of the bbs in the cluster. */
  242. bitmap preds;
  243. /* Index in all_clusters vector. */
  244. int index;
  245. /* The bb to replace the cluster with. */
  246. basic_block rep_bb;
  247. };
  248. typedef struct bb_cluster_def *bb_cluster;
  249. typedef const struct bb_cluster_def *const_bb_cluster;
  250. /* Per bb-info. */
  251. struct aux_bb_info
  252. {
  253. /* The number of non-debug statements in the bb. */
  254. int size;
  255. /* The same_succ that this bb is a member of. */
  256. same_succ bb_same_succ;
  257. /* The cluster that this bb is a member of. */
  258. bb_cluster cluster;
  259. /* The vop state at the exit of a bb. This is shortlived data, used to
  260. communicate data between update_block_by and update_vuses. */
  261. tree vop_at_exit;
  262. /* The bb that either contains or is dominated by the dependencies of the
  263. bb. */
  264. basic_block dep_bb;
  265. };
  266. /* Macros to access the fields of struct aux_bb_info. */
  267. #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
  268. #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
  269. #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
  270. #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
  271. #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
  272. /* Returns true if the only effect a statement STMT has, is to define locally
  273. used SSA_NAMEs. */
  274. static bool
  275. stmt_local_def (gimple stmt)
  276. {
  277. basic_block bb, def_bb;
  278. imm_use_iterator iter;
  279. use_operand_p use_p;
  280. tree val;
  281. def_operand_p def_p;
  282. if (gimple_vdef (stmt) != NULL_TREE
  283. || gimple_has_side_effects (stmt)
  284. || gimple_could_trap_p_1 (stmt, false, false)
  285. || gimple_vuse (stmt) != NULL_TREE)
  286. return false;
  287. def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
  288. if (def_p == NULL)
  289. return false;
  290. val = DEF_FROM_PTR (def_p);
  291. if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
  292. return false;
  293. def_bb = gimple_bb (stmt);
  294. FOR_EACH_IMM_USE_FAST (use_p, iter, val)
  295. {
  296. if (is_gimple_debug (USE_STMT (use_p)))
  297. continue;
  298. bb = gimple_bb (USE_STMT (use_p));
  299. if (bb == def_bb)
  300. continue;
  301. if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
  302. && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
  303. continue;
  304. return false;
  305. }
  306. return true;
  307. }
  308. /* Let GSI skip forwards over local defs. */
  309. static void
  310. gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
  311. {
  312. gimple stmt;
  313. while (true)
  314. {
  315. if (gsi_end_p (*gsi))
  316. return;
  317. stmt = gsi_stmt (*gsi);
  318. if (!stmt_local_def (stmt))
  319. return;
  320. gsi_next_nondebug (gsi);
  321. }
  322. }
  323. /* VAL1 and VAL2 are either:
  324. - uses in BB1 and BB2, or
  325. - phi alternatives for BB1 and BB2.
  326. Return true if the uses have the same gvn value. */
  327. static bool
  328. gvn_uses_equal (tree val1, tree val2)
  329. {
  330. gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
  331. if (val1 == val2)
  332. return true;
  333. if (vn_valueize (val1) != vn_valueize (val2))
  334. return false;
  335. return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
  336. && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
  337. }
  338. /* Prints E to FILE. */
  339. static void
  340. same_succ_print (FILE *file, const same_succ e)
  341. {
  342. unsigned int i;
  343. bitmap_print (file, e->bbs, "bbs:", "\n");
  344. bitmap_print (file, e->succs, "succs:", "\n");
  345. bitmap_print (file, e->inverse, "inverse:", "\n");
  346. fprintf (file, "flags:");
  347. for (i = 0; i < e->succ_flags.length (); ++i)
  348. fprintf (file, " %x", e->succ_flags[i]);
  349. fprintf (file, "\n");
  350. }
  351. /* Prints same_succ VE to VFILE. */
  352. inline int
  353. ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
  354. {
  355. const same_succ e = *pe;
  356. same_succ_print (file, e);
  357. return 1;
  358. }
  359. /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
  360. static void
  361. update_dep_bb (basic_block use_bb, tree val)
  362. {
  363. basic_block dep_bb;
  364. /* Not a dep. */
  365. if (TREE_CODE (val) != SSA_NAME)
  366. return;
  367. /* Skip use of global def. */
  368. if (SSA_NAME_IS_DEFAULT_DEF (val))
  369. return;
  370. /* Skip use of local def. */
  371. dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
  372. if (dep_bb == use_bb)
  373. return;
  374. if (BB_DEP_BB (use_bb) == NULL
  375. || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
  376. BB_DEP_BB (use_bb) = dep_bb;
  377. }
  378. /* Update BB_DEP_BB, given the dependencies in STMT. */
  379. static void
  380. stmt_update_dep_bb (gimple stmt)
  381. {
  382. ssa_op_iter iter;
  383. use_operand_p use;
  384. FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
  385. update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
  386. }
  387. /* Calculates hash value for same_succ VE. */
  388. static hashval_t
  389. same_succ_hash (const_same_succ e)
  390. {
  391. inchash::hash hstate (bitmap_hash (e->succs));
  392. int flags;
  393. unsigned int i;
  394. unsigned int first = bitmap_first_set_bit (e->bbs);
  395. basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
  396. int size = 0;
  397. gimple stmt;
  398. tree arg;
  399. unsigned int s;
  400. bitmap_iterator bs;
  401. for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
  402. !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
  403. {
  404. stmt = gsi_stmt (gsi);
  405. stmt_update_dep_bb (stmt);
  406. if (stmt_local_def (stmt))
  407. continue;
  408. size++;
  409. hstate.add_int (gimple_code (stmt));
  410. if (is_gimple_assign (stmt))
  411. hstate.add_int (gimple_assign_rhs_code (stmt));
  412. if (!is_gimple_call (stmt))
  413. continue;
  414. if (gimple_call_internal_p (stmt))
  415. hstate.add_int (gimple_call_internal_fn (stmt));
  416. else
  417. {
  418. inchash::add_expr (gimple_call_fn (stmt), hstate);
  419. if (gimple_call_chain (stmt))
  420. inchash::add_expr (gimple_call_chain (stmt), hstate);
  421. }
  422. for (i = 0; i < gimple_call_num_args (stmt); i++)
  423. {
  424. arg = gimple_call_arg (stmt, i);
  425. arg = vn_valueize (arg);
  426. inchash::add_expr (arg, hstate);
  427. }
  428. }
  429. hstate.add_int (size);
  430. BB_SIZE (bb) = size;
  431. for (i = 0; i < e->succ_flags.length (); ++i)
  432. {
  433. flags = e->succ_flags[i];
  434. flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
  435. hstate.add_int (flags);
  436. }
  437. EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
  438. {
  439. int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
  440. for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
  441. !gsi_end_p (gsi);
  442. gsi_next (&gsi))
  443. {
  444. gphi *phi = gsi.phi ();
  445. tree lhs = gimple_phi_result (phi);
  446. tree val = gimple_phi_arg_def (phi, n);
  447. if (virtual_operand_p (lhs))
  448. continue;
  449. update_dep_bb (bb, val);
  450. }
  451. }
  452. return hstate.end ();
  453. }
  454. /* Returns true if E1 and E2 have 2 successors, and if the successor flags
  455. are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
  456. the other edge flags. */
  457. static bool
  458. inverse_flags (const_same_succ e1, const_same_succ e2)
  459. {
  460. int f1a, f1b, f2a, f2b;
  461. int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
  462. if (e1->succ_flags.length () != 2)
  463. return false;
  464. f1a = e1->succ_flags[0];
  465. f1b = e1->succ_flags[1];
  466. f2a = e2->succ_flags[0];
  467. f2b = e2->succ_flags[1];
  468. if (f1a == f2a && f1b == f2b)
  469. return false;
  470. return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
  471. }
  472. /* Compares SAME_SUCCs E1 and E2. */
  473. int
  474. same_succ_def::equal (const value_type *e1, const compare_type *e2)
  475. {
  476. unsigned int i, first1, first2;
  477. gimple_stmt_iterator gsi1, gsi2;
  478. gimple s1, s2;
  479. basic_block bb1, bb2;
  480. if (e1->hashval != e2->hashval)
  481. return 0;
  482. if (e1->succ_flags.length () != e2->succ_flags.length ())
  483. return 0;
  484. if (!bitmap_equal_p (e1->succs, e2->succs))
  485. return 0;
  486. if (!inverse_flags (e1, e2))
  487. {
  488. for (i = 0; i < e1->succ_flags.length (); ++i)
  489. if (e1->succ_flags[i] != e2->succ_flags[i])
  490. return 0;
  491. }
  492. first1 = bitmap_first_set_bit (e1->bbs);
  493. first2 = bitmap_first_set_bit (e2->bbs);
  494. bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
  495. bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
  496. if (BB_SIZE (bb1) != BB_SIZE (bb2))
  497. return 0;
  498. gsi1 = gsi_start_nondebug_bb (bb1);
  499. gsi2 = gsi_start_nondebug_bb (bb2);
  500. gsi_advance_fw_nondebug_nonlocal (&gsi1);
  501. gsi_advance_fw_nondebug_nonlocal (&gsi2);
  502. while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
  503. {
  504. s1 = gsi_stmt (gsi1);
  505. s2 = gsi_stmt (gsi2);
  506. if (gimple_code (s1) != gimple_code (s2))
  507. return 0;
  508. if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
  509. return 0;
  510. gsi_next_nondebug (&gsi1);
  511. gsi_next_nondebug (&gsi2);
  512. gsi_advance_fw_nondebug_nonlocal (&gsi1);
  513. gsi_advance_fw_nondebug_nonlocal (&gsi2);
  514. }
  515. return 1;
  516. }
  517. /* Alloc and init a new SAME_SUCC. */
  518. static same_succ
  519. same_succ_alloc (void)
  520. {
  521. same_succ same = XNEW (struct same_succ_def);
  522. same->bbs = BITMAP_ALLOC (NULL);
  523. same->succs = BITMAP_ALLOC (NULL);
  524. same->inverse = BITMAP_ALLOC (NULL);
  525. same->succ_flags.create (10);
  526. same->in_worklist = false;
  527. return same;
  528. }
  529. /* Delete same_succ E. */
  530. void
  531. same_succ_def::remove (same_succ e)
  532. {
  533. BITMAP_FREE (e->bbs);
  534. BITMAP_FREE (e->succs);
  535. BITMAP_FREE (e->inverse);
  536. e->succ_flags.release ();
  537. XDELETE (e);
  538. }
  539. /* Reset same_succ SAME. */
  540. static void
  541. same_succ_reset (same_succ same)
  542. {
  543. bitmap_clear (same->bbs);
  544. bitmap_clear (same->succs);
  545. bitmap_clear (same->inverse);
  546. same->succ_flags.truncate (0);
  547. }
  548. static hash_table<same_succ_def> *same_succ_htab;
  549. /* Array that is used to store the edge flags for a successor. */
  550. static int *same_succ_edge_flags;
  551. /* Bitmap that is used to mark bbs that are recently deleted. */
  552. static bitmap deleted_bbs;
  553. /* Bitmap that is used to mark predecessors of bbs that are
  554. deleted. */
  555. static bitmap deleted_bb_preds;
  556. /* Prints same_succ_htab to stderr. */
  557. extern void debug_same_succ (void);
  558. DEBUG_FUNCTION void
  559. debug_same_succ ( void)
  560. {
  561. same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
  562. }
  563. /* Vector of bbs to process. */
  564. static vec<same_succ> worklist;
  565. /* Prints worklist to FILE. */
  566. static void
  567. print_worklist (FILE *file)
  568. {
  569. unsigned int i;
  570. for (i = 0; i < worklist.length (); ++i)
  571. same_succ_print (file, worklist[i]);
  572. }
  573. /* Adds SAME to worklist. */
  574. static void
  575. add_to_worklist (same_succ same)
  576. {
  577. if (same->in_worklist)
  578. return;
  579. if (bitmap_count_bits (same->bbs) < 2)
  580. return;
  581. same->in_worklist = true;
  582. worklist.safe_push (same);
  583. }
  584. /* Add BB to same_succ_htab. */
  585. static void
  586. find_same_succ_bb (basic_block bb, same_succ *same_p)
  587. {
  588. unsigned int j;
  589. bitmap_iterator bj;
  590. same_succ same = *same_p;
  591. same_succ *slot;
  592. edge_iterator ei;
  593. edge e;
  594. if (bb == NULL
  595. /* Be conservative with loop structure. It's not evident that this test
  596. is sufficient. Before tail-merge, we've just called
  597. loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
  598. set, so there's no guarantee that the loop->latch value is still valid.
  599. But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
  600. start of pre, we've kept that property intact throughout pre, and are
  601. keeping it throughout tail-merge using this test. */
  602. || bb->loop_father->latch == bb)
  603. return;
  604. bitmap_set_bit (same->bbs, bb->index);
  605. FOR_EACH_EDGE (e, ei, bb->succs)
  606. {
  607. int index = e->dest->index;
  608. bitmap_set_bit (same->succs, index);
  609. same_succ_edge_flags[index] = e->flags;
  610. }
  611. EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
  612. same->succ_flags.safe_push (same_succ_edge_flags[j]);
  613. same->hashval = same_succ_hash (same);
  614. slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
  615. if (*slot == NULL)
  616. {
  617. *slot = same;
  618. BB_SAME_SUCC (bb) = same;
  619. add_to_worklist (same);
  620. *same_p = NULL;
  621. }
  622. else
  623. {
  624. bitmap_set_bit ((*slot)->bbs, bb->index);
  625. BB_SAME_SUCC (bb) = *slot;
  626. add_to_worklist (*slot);
  627. if (inverse_flags (same, *slot))
  628. bitmap_set_bit ((*slot)->inverse, bb->index);
  629. same_succ_reset (same);
  630. }
  631. }
  632. /* Find bbs with same successors. */
  633. static void
  634. find_same_succ (void)
  635. {
  636. same_succ same = same_succ_alloc ();
  637. basic_block bb;
  638. FOR_EACH_BB_FN (bb, cfun)
  639. {
  640. find_same_succ_bb (bb, &same);
  641. if (same == NULL)
  642. same = same_succ_alloc ();
  643. }
  644. same_succ_def::remove (same);
  645. }
  646. /* Initializes worklist administration. */
  647. static void
  648. init_worklist (void)
  649. {
  650. alloc_aux_for_blocks (sizeof (struct aux_bb_info));
  651. same_succ_htab = new hash_table<same_succ_def> (n_basic_blocks_for_fn (cfun));
  652. same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
  653. deleted_bbs = BITMAP_ALLOC (NULL);
  654. deleted_bb_preds = BITMAP_ALLOC (NULL);
  655. worklist.create (n_basic_blocks_for_fn (cfun));
  656. find_same_succ ();
  657. if (dump_file && (dump_flags & TDF_DETAILS))
  658. {
  659. fprintf (dump_file, "initial worklist:\n");
  660. print_worklist (dump_file);
  661. }
  662. }
  663. /* Deletes worklist administration. */
  664. static void
  665. delete_worklist (void)
  666. {
  667. free_aux_for_blocks ();
  668. delete same_succ_htab;
  669. same_succ_htab = NULL;
  670. XDELETEVEC (same_succ_edge_flags);
  671. same_succ_edge_flags = NULL;
  672. BITMAP_FREE (deleted_bbs);
  673. BITMAP_FREE (deleted_bb_preds);
  674. worklist.release ();
  675. }
  676. /* Mark BB as deleted, and mark its predecessors. */
  677. static void
  678. mark_basic_block_deleted (basic_block bb)
  679. {
  680. edge e;
  681. edge_iterator ei;
  682. bitmap_set_bit (deleted_bbs, bb->index);
  683. FOR_EACH_EDGE (e, ei, bb->preds)
  684. bitmap_set_bit (deleted_bb_preds, e->src->index);
  685. }
  686. /* Removes BB from its corresponding same_succ. */
  687. static void
  688. same_succ_flush_bb (basic_block bb)
  689. {
  690. same_succ same = BB_SAME_SUCC (bb);
  691. BB_SAME_SUCC (bb) = NULL;
  692. if (bitmap_single_bit_set_p (same->bbs))
  693. same_succ_htab->remove_elt_with_hash (same, same->hashval);
  694. else
  695. bitmap_clear_bit (same->bbs, bb->index);
  696. }
  697. /* Removes all bbs in BBS from their corresponding same_succ. */
  698. static void
  699. same_succ_flush_bbs (bitmap bbs)
  700. {
  701. unsigned int i;
  702. bitmap_iterator bi;
  703. EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
  704. same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
  705. }
  706. /* Release the last vdef in BB, either normal or phi result. */
  707. static void
  708. release_last_vdef (basic_block bb)
  709. {
  710. for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
  711. gsi_prev_nondebug (&i))
  712. {
  713. gimple stmt = gsi_stmt (i);
  714. if (gimple_vdef (stmt) == NULL_TREE)
  715. continue;
  716. mark_virtual_operand_for_renaming (gimple_vdef (stmt));
  717. return;
  718. }
  719. for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
  720. gsi_next (&i))
  721. {
  722. gphi *phi = i.phi ();
  723. tree res = gimple_phi_result (phi);
  724. if (!virtual_operand_p (res))
  725. continue;
  726. mark_virtual_phi_result_for_renaming (phi);
  727. return;
  728. }
  729. }
  730. /* For deleted_bb_preds, find bbs with same successors. */
  731. static void
  732. update_worklist (void)
  733. {
  734. unsigned int i;
  735. bitmap_iterator bi;
  736. basic_block bb;
  737. same_succ same;
  738. bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
  739. bitmap_clear (deleted_bbs);
  740. bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
  741. same_succ_flush_bbs (deleted_bb_preds);
  742. same = same_succ_alloc ();
  743. EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
  744. {
  745. bb = BASIC_BLOCK_FOR_FN (cfun, i);
  746. gcc_assert (bb != NULL);
  747. find_same_succ_bb (bb, &same);
  748. if (same == NULL)
  749. same = same_succ_alloc ();
  750. }
  751. same_succ_def::remove (same);
  752. bitmap_clear (deleted_bb_preds);
  753. }
  754. /* Prints cluster C to FILE. */
  755. static void
  756. print_cluster (FILE *file, bb_cluster c)
  757. {
  758. if (c == NULL)
  759. return;
  760. bitmap_print (file, c->bbs, "bbs:", "\n");
  761. bitmap_print (file, c->preds, "preds:", "\n");
  762. }
  763. /* Prints cluster C to stderr. */
  764. extern void debug_cluster (bb_cluster);
  765. DEBUG_FUNCTION void
  766. debug_cluster (bb_cluster c)
  767. {
  768. print_cluster (stderr, c);
  769. }
  770. /* Update C->rep_bb, given that BB is added to the cluster. */
  771. static void
  772. update_rep_bb (bb_cluster c, basic_block bb)
  773. {
  774. /* Initial. */
  775. if (c->rep_bb == NULL)
  776. {
  777. c->rep_bb = bb;
  778. return;
  779. }
  780. /* Current needs no deps, keep it. */
  781. if (BB_DEP_BB (c->rep_bb) == NULL)
  782. return;
  783. /* Bb needs no deps, change rep_bb. */
  784. if (BB_DEP_BB (bb) == NULL)
  785. {
  786. c->rep_bb = bb;
  787. return;
  788. }
  789. /* Bb needs last deps earlier than current, change rep_bb. A potential
  790. problem with this, is that the first deps might also be earlier, which
  791. would mean we prefer longer lifetimes for the deps. To be able to check
  792. for this, we would have to trace BB_FIRST_DEP_BB as well, besides
  793. BB_DEP_BB, which is really BB_LAST_DEP_BB.
  794. The benefit of choosing the bb with last deps earlier, is that it can
  795. potentially be used as replacement for more bbs. */
  796. if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
  797. c->rep_bb = bb;
  798. }
  799. /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
  800. static void
  801. add_bb_to_cluster (bb_cluster c, basic_block bb)
  802. {
  803. edge e;
  804. edge_iterator ei;
  805. bitmap_set_bit (c->bbs, bb->index);
  806. FOR_EACH_EDGE (e, ei, bb->preds)
  807. bitmap_set_bit (c->preds, e->src->index);
  808. update_rep_bb (c, bb);
  809. }
  810. /* Allocate and init new cluster. */
  811. static bb_cluster
  812. new_cluster (void)
  813. {
  814. bb_cluster c;
  815. c = XCNEW (struct bb_cluster_def);
  816. c->bbs = BITMAP_ALLOC (NULL);
  817. c->preds = BITMAP_ALLOC (NULL);
  818. c->rep_bb = NULL;
  819. return c;
  820. }
  821. /* Delete clusters. */
  822. static void
  823. delete_cluster (bb_cluster c)
  824. {
  825. if (c == NULL)
  826. return;
  827. BITMAP_FREE (c->bbs);
  828. BITMAP_FREE (c->preds);
  829. XDELETE (c);
  830. }
  831. /* Array that contains all clusters. */
  832. static vec<bb_cluster> all_clusters;
  833. /* Allocate all cluster vectors. */
  834. static void
  835. alloc_cluster_vectors (void)
  836. {
  837. all_clusters.create (n_basic_blocks_for_fn (cfun));
  838. }
  839. /* Reset all cluster vectors. */
  840. static void
  841. reset_cluster_vectors (void)
  842. {
  843. unsigned int i;
  844. basic_block bb;
  845. for (i = 0; i < all_clusters.length (); ++i)
  846. delete_cluster (all_clusters[i]);
  847. all_clusters.truncate (0);
  848. FOR_EACH_BB_FN (bb, cfun)
  849. BB_CLUSTER (bb) = NULL;
  850. }
  851. /* Delete all cluster vectors. */
  852. static void
  853. delete_cluster_vectors (void)
  854. {
  855. unsigned int i;
  856. for (i = 0; i < all_clusters.length (); ++i)
  857. delete_cluster (all_clusters[i]);
  858. all_clusters.release ();
  859. }
  860. /* Merge cluster C2 into C1. */
  861. static void
  862. merge_clusters (bb_cluster c1, bb_cluster c2)
  863. {
  864. bitmap_ior_into (c1->bbs, c2->bbs);
  865. bitmap_ior_into (c1->preds, c2->preds);
  866. }
  867. /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
  868. all_clusters, or merge c with existing cluster. */
  869. static void
  870. set_cluster (basic_block bb1, basic_block bb2)
  871. {
  872. basic_block merge_bb, other_bb;
  873. bb_cluster merge, old, c;
  874. if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
  875. {
  876. c = new_cluster ();
  877. add_bb_to_cluster (c, bb1);
  878. add_bb_to_cluster (c, bb2);
  879. BB_CLUSTER (bb1) = c;
  880. BB_CLUSTER (bb2) = c;
  881. c->index = all_clusters.length ();
  882. all_clusters.safe_push (c);
  883. }
  884. else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
  885. {
  886. merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
  887. other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
  888. merge = BB_CLUSTER (merge_bb);
  889. add_bb_to_cluster (merge, other_bb);
  890. BB_CLUSTER (other_bb) = merge;
  891. }
  892. else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
  893. {
  894. unsigned int i;
  895. bitmap_iterator bi;
  896. old = BB_CLUSTER (bb2);
  897. merge = BB_CLUSTER (bb1);
  898. merge_clusters (merge, old);
  899. EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
  900. BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
  901. all_clusters[old->index] = NULL;
  902. update_rep_bb (merge, old->rep_bb);
  903. delete_cluster (old);
  904. }
  905. else
  906. gcc_unreachable ();
  907. }
  908. /* Return true if gimple operands T1 and T2 have the same value. */
  909. static bool
  910. gimple_operand_equal_value_p (tree t1, tree t2)
  911. {
  912. if (t1 == t2)
  913. return true;
  914. if (t1 == NULL_TREE
  915. || t2 == NULL_TREE)
  916. return false;
  917. if (operand_equal_p (t1, t2, 0))
  918. return true;
  919. return gvn_uses_equal (t1, t2);
  920. }
  921. /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
  922. gimple_bb (s2) are members of SAME_SUCC. */
  923. static bool
  924. gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
  925. {
  926. unsigned int i;
  927. tree lhs1, lhs2;
  928. basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
  929. tree t1, t2;
  930. bool inv_cond;
  931. enum tree_code code1, code2;
  932. if (gimple_code (s1) != gimple_code (s2))
  933. return false;
  934. switch (gimple_code (s1))
  935. {
  936. case GIMPLE_CALL:
  937. if (!gimple_call_same_target_p (s1, s2))
  938. return false;
  939. t1 = gimple_call_chain (s1);
  940. t2 = gimple_call_chain (s2);
  941. if (!gimple_operand_equal_value_p (t1, t2))
  942. return false;
  943. if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
  944. return false;
  945. for (i = 0; i < gimple_call_num_args (s1); ++i)
  946. {
  947. t1 = gimple_call_arg (s1, i);
  948. t2 = gimple_call_arg (s2, i);
  949. if (!gimple_operand_equal_value_p (t1, t2))
  950. return false;
  951. }
  952. lhs1 = gimple_get_lhs (s1);
  953. lhs2 = gimple_get_lhs (s2);
  954. if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
  955. return true;
  956. if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
  957. return false;
  958. if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
  959. return vn_valueize (lhs1) == vn_valueize (lhs2);
  960. return operand_equal_p (lhs1, lhs2, 0);
  961. case GIMPLE_ASSIGN:
  962. lhs1 = gimple_get_lhs (s1);
  963. lhs2 = gimple_get_lhs (s2);
  964. if (TREE_CODE (lhs1) != SSA_NAME
  965. && TREE_CODE (lhs2) != SSA_NAME)
  966. return (operand_equal_p (lhs1, lhs2, 0)
  967. && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
  968. gimple_assign_rhs1 (s2)));
  969. else if (TREE_CODE (lhs1) == SSA_NAME
  970. && TREE_CODE (lhs2) == SSA_NAME)
  971. return operand_equal_p (gimple_assign_rhs1 (s1),
  972. gimple_assign_rhs1 (s2), 0);
  973. return false;
  974. case GIMPLE_COND:
  975. t1 = gimple_cond_lhs (s1);
  976. t2 = gimple_cond_lhs (s2);
  977. if (!gimple_operand_equal_value_p (t1, t2))
  978. return false;
  979. t1 = gimple_cond_rhs (s1);
  980. t2 = gimple_cond_rhs (s2);
  981. if (!gimple_operand_equal_value_p (t1, t2))
  982. return false;
  983. code1 = gimple_expr_code (s1);
  984. code2 = gimple_expr_code (s2);
  985. inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
  986. != bitmap_bit_p (same_succ->inverse, bb2->index));
  987. if (inv_cond)
  988. {
  989. bool honor_nans = HONOR_NANS (t1);
  990. code2 = invert_tree_comparison (code2, honor_nans);
  991. }
  992. return code1 == code2;
  993. default:
  994. return false;
  995. }
  996. }
  997. /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
  998. Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
  999. processed statements. */
  1000. static void
  1001. gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
  1002. bool *vuse_escaped)
  1003. {
  1004. gimple stmt;
  1005. tree lvuse;
  1006. while (true)
  1007. {
  1008. if (gsi_end_p (*gsi))
  1009. return;
  1010. stmt = gsi_stmt (*gsi);
  1011. lvuse = gimple_vuse (stmt);
  1012. if (lvuse != NULL_TREE)
  1013. {
  1014. *vuse = lvuse;
  1015. if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
  1016. *vuse_escaped = true;
  1017. }
  1018. if (!stmt_local_def (stmt))
  1019. return;
  1020. gsi_prev_nondebug (gsi);
  1021. }
  1022. }
  1023. /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
  1024. clusters them. */
  1025. static void
  1026. find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
  1027. {
  1028. gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
  1029. gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
  1030. tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
  1031. bool vuse_escaped = false;
  1032. gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
  1033. gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
  1034. while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
  1035. {
  1036. gimple stmt1 = gsi_stmt (gsi1);
  1037. gimple stmt2 = gsi_stmt (gsi2);
  1038. /* What could be better than to this this here is to blacklist the bb
  1039. containing the stmt, when encountering the stmt f.i. in
  1040. same_succ_hash. */
  1041. if (is_tm_ending (stmt1)
  1042. || is_tm_ending (stmt2))
  1043. return;
  1044. if (!gimple_equal_p (same_succ, stmt1, stmt2))
  1045. return;
  1046. gsi_prev_nondebug (&gsi1);
  1047. gsi_prev_nondebug (&gsi2);
  1048. gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
  1049. gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
  1050. }
  1051. if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
  1052. return;
  1053. /* If the incoming vuses are not the same, and the vuse escaped into an
  1054. SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
  1055. which potentially means the semantics of one of the blocks will be changed.
  1056. TODO: make this check more precise. */
  1057. if (vuse_escaped && vuse1 != vuse2)
  1058. return;
  1059. if (dump_file)
  1060. fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
  1061. bb1->index, bb2->index);
  1062. set_cluster (bb1, bb2);
  1063. }
  1064. /* Returns whether for all phis in DEST the phi alternatives for E1 and
  1065. E2 are equal. */
  1066. static bool
  1067. same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
  1068. {
  1069. int n1 = e1->dest_idx, n2 = e2->dest_idx;
  1070. gphi_iterator gsi;
  1071. for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
  1072. {
  1073. gphi *phi = gsi.phi ();
  1074. tree lhs = gimple_phi_result (phi);
  1075. tree val1 = gimple_phi_arg_def (phi, n1);
  1076. tree val2 = gimple_phi_arg_def (phi, n2);
  1077. if (virtual_operand_p (lhs))
  1078. continue;
  1079. if (operand_equal_for_phi_arg_p (val1, val2))
  1080. continue;
  1081. if (gvn_uses_equal (val1, val2))
  1082. continue;
  1083. return false;
  1084. }
  1085. return true;
  1086. }
  1087. /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
  1088. phi alternatives for BB1 and BB2 are equal. */
  1089. static bool
  1090. same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
  1091. {
  1092. unsigned int s;
  1093. bitmap_iterator bs;
  1094. edge e1, e2;
  1095. basic_block succ;
  1096. EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
  1097. {
  1098. succ = BASIC_BLOCK_FOR_FN (cfun, s);
  1099. e1 = find_edge (bb1, succ);
  1100. e2 = find_edge (bb2, succ);
  1101. if (e1->flags & EDGE_COMPLEX
  1102. || e2->flags & EDGE_COMPLEX)
  1103. return false;
  1104. /* For all phis in bb, the phi alternatives for e1 and e2 need to have
  1105. the same value. */
  1106. if (!same_phi_alternatives_1 (succ, e1, e2))
  1107. return false;
  1108. }
  1109. return true;
  1110. }
  1111. /* Return true if BB has non-vop phis. */
  1112. static bool
  1113. bb_has_non_vop_phi (basic_block bb)
  1114. {
  1115. gimple_seq phis = phi_nodes (bb);
  1116. gimple phi;
  1117. if (phis == NULL)
  1118. return false;
  1119. if (!gimple_seq_singleton_p (phis))
  1120. return true;
  1121. phi = gimple_seq_first_stmt (phis);
  1122. return !virtual_operand_p (gimple_phi_result (phi));
  1123. }
  1124. /* Returns true if redirecting the incoming edges of FROM to TO maintains the
  1125. invariant that uses in FROM are dominates by their defs. */
  1126. static bool
  1127. deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
  1128. {
  1129. basic_block cd, dep_bb = BB_DEP_BB (to);
  1130. edge_iterator ei;
  1131. edge e;
  1132. bitmap from_preds = BITMAP_ALLOC (NULL);
  1133. if (dep_bb == NULL)
  1134. return true;
  1135. FOR_EACH_EDGE (e, ei, from->preds)
  1136. bitmap_set_bit (from_preds, e->src->index);
  1137. cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
  1138. BITMAP_FREE (from_preds);
  1139. return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
  1140. }
  1141. /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
  1142. replacement bb) and vice versa maintains the invariant that uses in the
  1143. replacement are dominates by their defs. */
  1144. static bool
  1145. deps_ok_for_redirect (basic_block bb1, basic_block bb2)
  1146. {
  1147. if (BB_CLUSTER (bb1) != NULL)
  1148. bb1 = BB_CLUSTER (bb1)->rep_bb;
  1149. if (BB_CLUSTER (bb2) != NULL)
  1150. bb2 = BB_CLUSTER (bb2)->rep_bb;
  1151. return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
  1152. && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
  1153. }
  1154. /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
  1155. static void
  1156. find_clusters_1 (same_succ same_succ)
  1157. {
  1158. basic_block bb1, bb2;
  1159. unsigned int i, j;
  1160. bitmap_iterator bi, bj;
  1161. int nr_comparisons;
  1162. int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
  1163. EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
  1164. {
  1165. bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
  1166. /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
  1167. phi-nodes in bb1 and bb2, with the same alternatives for the same
  1168. preds. */
  1169. if (bb_has_non_vop_phi (bb1))
  1170. continue;
  1171. nr_comparisons = 0;
  1172. EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
  1173. {
  1174. bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
  1175. if (bb_has_non_vop_phi (bb2))
  1176. continue;
  1177. if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
  1178. continue;
  1179. /* Limit quadratic behaviour. */
  1180. nr_comparisons++;
  1181. if (nr_comparisons > max_comparisons)
  1182. break;
  1183. /* This is a conservative dependency check. We could test more
  1184. precise for allowed replacement direction. */
  1185. if (!deps_ok_for_redirect (bb1, bb2))
  1186. continue;
  1187. if (!(same_phi_alternatives (same_succ, bb1, bb2)))
  1188. continue;
  1189. find_duplicate (same_succ, bb1, bb2);
  1190. }
  1191. }
  1192. }
  1193. /* Find clusters of bbs which can be merged. */
  1194. static void
  1195. find_clusters (void)
  1196. {
  1197. same_succ same;
  1198. while (!worklist.is_empty ())
  1199. {
  1200. same = worklist.pop ();
  1201. same->in_worklist = false;
  1202. if (dump_file && (dump_flags & TDF_DETAILS))
  1203. {
  1204. fprintf (dump_file, "processing worklist entry\n");
  1205. same_succ_print (dump_file, same);
  1206. }
  1207. find_clusters_1 (same);
  1208. }
  1209. }
  1210. /* Returns the vop phi of BB, if any. */
  1211. static gphi *
  1212. vop_phi (basic_block bb)
  1213. {
  1214. gphi *stmt;
  1215. gphi_iterator gsi;
  1216. for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  1217. {
  1218. stmt = gsi.phi ();
  1219. if (! virtual_operand_p (gimple_phi_result (stmt)))
  1220. continue;
  1221. return stmt;
  1222. }
  1223. return NULL;
  1224. }
  1225. /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
  1226. static void
  1227. replace_block_by (basic_block bb1, basic_block bb2)
  1228. {
  1229. edge pred_edge;
  1230. edge e1, e2;
  1231. edge_iterator ei;
  1232. unsigned int i;
  1233. gphi *bb2_phi;
  1234. bb2_phi = vop_phi (bb2);
  1235. /* Mark the basic block as deleted. */
  1236. mark_basic_block_deleted (bb1);
  1237. /* Redirect the incoming edges of bb1 to bb2. */
  1238. for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
  1239. {
  1240. pred_edge = EDGE_PRED (bb1, i - 1);
  1241. pred_edge = redirect_edge_and_branch (pred_edge, bb2);
  1242. gcc_assert (pred_edge != NULL);
  1243. if (bb2_phi == NULL)
  1244. continue;
  1245. /* The phi might have run out of capacity when the redirect added an
  1246. argument, which means it could have been replaced. Refresh it. */
  1247. bb2_phi = vop_phi (bb2);
  1248. add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
  1249. pred_edge, UNKNOWN_LOCATION);
  1250. }
  1251. bb2->frequency += bb1->frequency;
  1252. if (bb2->frequency > BB_FREQ_MAX)
  1253. bb2->frequency = BB_FREQ_MAX;
  1254. bb2->count += bb1->count;
  1255. /* Merge the outgoing edge counts from bb1 onto bb2. */
  1256. gcov_type out_sum = 0;
  1257. FOR_EACH_EDGE (e1, ei, bb1->succs)
  1258. {
  1259. e2 = find_edge (bb2, e1->dest);
  1260. gcc_assert (e2);
  1261. e2->count += e1->count;
  1262. out_sum += e2->count;
  1263. }
  1264. /* Recompute the edge probabilities from the new merged edge count.
  1265. Use the sum of the new merged edge counts computed above instead
  1266. of bb2's merged count, in case there are profile count insanities
  1267. making the bb count inconsistent with the edge weights. */
  1268. FOR_EACH_EDGE (e2, ei, bb2->succs)
  1269. {
  1270. e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum);
  1271. }
  1272. /* Do updates that use bb1, before deleting bb1. */
  1273. release_last_vdef (bb1);
  1274. same_succ_flush_bb (bb1);
  1275. delete_basic_block (bb1);
  1276. }
  1277. /* Bbs for which update_debug_stmt need to be called. */
  1278. static bitmap update_bbs;
  1279. /* For each cluster in all_clusters, merge all cluster->bbs. Returns
  1280. number of bbs removed. */
  1281. static int
  1282. apply_clusters (void)
  1283. {
  1284. basic_block bb1, bb2;
  1285. bb_cluster c;
  1286. unsigned int i, j;
  1287. bitmap_iterator bj;
  1288. int nr_bbs_removed = 0;
  1289. for (i = 0; i < all_clusters.length (); ++i)
  1290. {
  1291. c = all_clusters[i];
  1292. if (c == NULL)
  1293. continue;
  1294. bb2 = c->rep_bb;
  1295. bitmap_set_bit (update_bbs, bb2->index);
  1296. bitmap_clear_bit (c->bbs, bb2->index);
  1297. EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
  1298. {
  1299. bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
  1300. bitmap_clear_bit (update_bbs, bb1->index);
  1301. replace_block_by (bb1, bb2);
  1302. nr_bbs_removed++;
  1303. }
  1304. }
  1305. return nr_bbs_removed;
  1306. }
  1307. /* Resets debug statement STMT if it has uses that are not dominated by their
  1308. defs. */
  1309. static void
  1310. update_debug_stmt (gimple stmt)
  1311. {
  1312. use_operand_p use_p;
  1313. ssa_op_iter oi;
  1314. basic_block bbuse;
  1315. if (!gimple_debug_bind_p (stmt))
  1316. return;
  1317. bbuse = gimple_bb (stmt);
  1318. FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
  1319. {
  1320. tree name = USE_FROM_PTR (use_p);
  1321. gimple def_stmt = SSA_NAME_DEF_STMT (name);
  1322. basic_block bbdef = gimple_bb (def_stmt);
  1323. if (bbdef == NULL || bbuse == bbdef
  1324. || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
  1325. continue;
  1326. gimple_debug_bind_reset_value (stmt);
  1327. update_stmt (stmt);
  1328. break;
  1329. }
  1330. }
  1331. /* Resets all debug statements that have uses that are not
  1332. dominated by their defs. */
  1333. static void
  1334. update_debug_stmts (void)
  1335. {
  1336. basic_block bb;
  1337. bitmap_iterator bi;
  1338. unsigned int i;
  1339. EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
  1340. {
  1341. gimple stmt;
  1342. gimple_stmt_iterator gsi;
  1343. bb = BASIC_BLOCK_FOR_FN (cfun, i);
  1344. for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  1345. {
  1346. stmt = gsi_stmt (gsi);
  1347. if (!is_gimple_debug (stmt))
  1348. continue;
  1349. update_debug_stmt (stmt);
  1350. }
  1351. }
  1352. }
  1353. /* Runs tail merge optimization. */
  1354. unsigned int
  1355. tail_merge_optimize (unsigned int todo)
  1356. {
  1357. int nr_bbs_removed_total = 0;
  1358. int nr_bbs_removed;
  1359. bool loop_entered = false;
  1360. int iteration_nr = 0;
  1361. int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
  1362. if (!flag_tree_tail_merge
  1363. || max_iterations == 0)
  1364. return 0;
  1365. timevar_push (TV_TREE_TAIL_MERGE);
  1366. if (!dom_info_available_p (CDI_DOMINATORS))
  1367. {
  1368. /* PRE can leave us with unreachable blocks, remove them now. */
  1369. delete_unreachable_blocks ();
  1370. calculate_dominance_info (CDI_DOMINATORS);
  1371. }
  1372. init_worklist ();
  1373. while (!worklist.is_empty ())
  1374. {
  1375. if (!loop_entered)
  1376. {
  1377. loop_entered = true;
  1378. alloc_cluster_vectors ();
  1379. update_bbs = BITMAP_ALLOC (NULL);
  1380. }
  1381. else
  1382. reset_cluster_vectors ();
  1383. iteration_nr++;
  1384. if (dump_file && (dump_flags & TDF_DETAILS))
  1385. fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
  1386. find_clusters ();
  1387. gcc_assert (worklist.is_empty ());
  1388. if (all_clusters.is_empty ())
  1389. break;
  1390. nr_bbs_removed = apply_clusters ();
  1391. nr_bbs_removed_total += nr_bbs_removed;
  1392. if (nr_bbs_removed == 0)
  1393. break;
  1394. free_dominance_info (CDI_DOMINATORS);
  1395. if (iteration_nr == max_iterations)
  1396. break;
  1397. calculate_dominance_info (CDI_DOMINATORS);
  1398. update_worklist ();
  1399. }
  1400. if (dump_file && (dump_flags & TDF_DETAILS))
  1401. fprintf (dump_file, "htab collision / search: %f\n",
  1402. same_succ_htab->collisions ());
  1403. if (nr_bbs_removed_total > 0)
  1404. {
  1405. if (MAY_HAVE_DEBUG_STMTS)
  1406. {
  1407. calculate_dominance_info (CDI_DOMINATORS);
  1408. update_debug_stmts ();
  1409. }
  1410. if (dump_file && (dump_flags & TDF_DETAILS))
  1411. {
  1412. fprintf (dump_file, "Before TODOs.\n");
  1413. dump_function_to_file (current_function_decl, dump_file, dump_flags);
  1414. }
  1415. mark_virtual_operands_for_renaming (cfun);
  1416. }
  1417. delete_worklist ();
  1418. if (loop_entered)
  1419. {
  1420. delete_cluster_vectors ();
  1421. BITMAP_FREE (update_bbs);
  1422. }
  1423. timevar_pop (TV_TREE_TAIL_MERGE);
  1424. return todo;
  1425. }