1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433 |
- /* SLP - Basic Block Vectorization
- Copyright (C) 2007-2015 Free Software Foundation, Inc.
- Contributed by Dorit Naishlos <dorit@il.ibm.com>
- and Ira Rosen <irar@il.ibm.com>
- This file is part of GCC.
- GCC is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 3, or (at your option) any later
- version.
- GCC is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
- <http://www.gnu.org/licenses/>. */
- #include "config.h"
- #include "system.h"
- #include "coretypes.h"
- #include "dumpfile.h"
- #include "tm.h"
- #include "hash-set.h"
- #include "machmode.h"
- #include "vec.h"
- #include "double-int.h"
- #include "input.h"
- #include "alias.h"
- #include "symtab.h"
- #include "wide-int.h"
- #include "inchash.h"
- #include "tree.h"
- #include "fold-const.h"
- #include "stor-layout.h"
- #include "target.h"
- #include "predict.h"
- #include "hard-reg-set.h"
- #include "function.h"
- #include "basic-block.h"
- #include "gimple-pretty-print.h"
- #include "tree-ssa-alias.h"
- #include "internal-fn.h"
- #include "gimple-expr.h"
- #include "is-a.h"
- #include "gimple.h"
- #include "gimple-iterator.h"
- #include "gimple-ssa.h"
- #include "tree-phinodes.h"
- #include "ssa-iterators.h"
- #include "stringpool.h"
- #include "tree-ssanames.h"
- #include "tree-pass.h"
- #include "cfgloop.h"
- #include "hashtab.h"
- #include "rtl.h"
- #include "flags.h"
- #include "statistics.h"
- #include "real.h"
- #include "fixed-value.h"
- #include "insn-config.h"
- #include "expmed.h"
- #include "dojump.h"
- #include "explow.h"
- #include "calls.h"
- #include "emit-rtl.h"
- #include "varasm.h"
- #include "stmt.h"
- #include "expr.h"
- #include "recog.h" /* FIXME: for insn_data */
- #include "insn-codes.h"
- #include "optabs.h"
- #include "tree-vectorizer.h"
- #include "langhooks.h"
- #include "gimple-walk.h"
- /* Extract the location of the basic block in the source code.
- Return the basic block location if succeed and NULL if not. */
- source_location
- find_bb_location (basic_block bb)
- {
- gimple stmt = NULL;
- gimple_stmt_iterator si;
- if (!bb)
- return UNKNOWN_LOCATION;
- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
- {
- stmt = gsi_stmt (si);
- if (gimple_location (stmt) != UNKNOWN_LOCATION)
- return gimple_location (stmt);
- }
- return UNKNOWN_LOCATION;
- }
- /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
- static void
- vect_free_slp_tree (slp_tree node)
- {
- int i;
- slp_tree child;
- if (!node)
- return;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_free_slp_tree (child);
- SLP_TREE_CHILDREN (node).release ();
- SLP_TREE_SCALAR_STMTS (node).release ();
- SLP_TREE_VEC_STMTS (node).release ();
- SLP_TREE_LOAD_PERMUTATION (node).release ();
- free (node);
- }
- /* Free the memory allocated for the SLP instance. */
- void
- vect_free_slp_instance (slp_instance instance)
- {
- vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
- SLP_INSTANCE_LOADS (instance).release ();
- SLP_INSTANCE_BODY_COST_VEC (instance).release ();
- free (instance);
- }
- /* Create an SLP node for SCALAR_STMTS. */
- static slp_tree
- vect_create_new_slp_node (vec<gimple> scalar_stmts)
- {
- slp_tree node;
- gimple stmt = scalar_stmts[0];
- unsigned int nops;
- if (is_gimple_call (stmt))
- nops = gimple_call_num_args (stmt);
- else if (is_gimple_assign (stmt))
- {
- nops = gimple_num_ops (stmt) - 1;
- if (gimple_assign_rhs_code (stmt) == COND_EXPR)
- nops++;
- }
- else
- return NULL;
- node = XNEW (struct _slp_tree);
- SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
- SLP_TREE_VEC_STMTS (node).create (0);
- SLP_TREE_CHILDREN (node).create (nops);
- SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
- return node;
- }
- /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
- operand. */
- static vec<slp_oprnd_info>
- vect_create_oprnd_info (int nops, int group_size)
- {
- int i;
- slp_oprnd_info oprnd_info;
- vec<slp_oprnd_info> oprnds_info;
- oprnds_info.create (nops);
- for (i = 0; i < nops; i++)
- {
- oprnd_info = XNEW (struct _slp_oprnd_info);
- oprnd_info->def_stmts.create (group_size);
- oprnd_info->first_dt = vect_uninitialized_def;
- oprnd_info->first_op_type = NULL_TREE;
- oprnd_info->first_pattern = false;
- oprnds_info.quick_push (oprnd_info);
- }
- return oprnds_info;
- }
- /* Free operands info. */
- static void
- vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
- {
- int i;
- slp_oprnd_info oprnd_info;
- FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
- {
- oprnd_info->def_stmts.release ();
- XDELETE (oprnd_info);
- }
- oprnds_info.release ();
- }
- /* Find the place of the data-ref in STMT in the interleaving chain that starts
- from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
- static int
- vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
- {
- gimple next_stmt = first_stmt;
- int result = 0;
- if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
- return -1;
- do
- {
- if (next_stmt == stmt)
- return result;
- result++;
- next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
- }
- while (next_stmt);
- return -1;
- }
- /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
- they are of a valid type and that they match the defs of the first stmt of
- the SLP group (stored in OPRNDS_INFO). If there was a fatal error
- return -1, if the error could be corrected by swapping operands of the
- operation return 1, if everything is ok return 0. */
- static int
- vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
- gimple stmt, bool first,
- vec<slp_oprnd_info> *oprnds_info)
- {
- tree oprnd;
- unsigned int i, number_of_oprnds;
- tree def;
- gimple def_stmt;
- enum vect_def_type dt = vect_uninitialized_def;
- struct loop *loop = NULL;
- bool pattern = false;
- slp_oprnd_info oprnd_info;
- int first_op_idx = 1;
- bool commutative = false;
- bool first_op_cond = false;
- if (loop_vinfo)
- loop = LOOP_VINFO_LOOP (loop_vinfo);
- if (is_gimple_call (stmt))
- {
- number_of_oprnds = gimple_call_num_args (stmt);
- first_op_idx = 3;
- }
- else if (is_gimple_assign (stmt))
- {
- enum tree_code code = gimple_assign_rhs_code (stmt);
- number_of_oprnds = gimple_num_ops (stmt) - 1;
- if (gimple_assign_rhs_code (stmt) == COND_EXPR)
- {
- first_op_cond = true;
- commutative = true;
- number_of_oprnds++;
- }
- else
- commutative = commutative_tree_code (code);
- }
- else
- return -1;
- bool swapped = false;
- for (i = 0; i < number_of_oprnds; i++)
- {
- again:
- if (first_op_cond)
- {
- if (i == 0 || i == 1)
- oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
- swapped ? !i : i);
- else
- oprnd = gimple_op (stmt, first_op_idx + i - 1);
- }
- else
- oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
- oprnd_info = (*oprnds_info)[i];
- if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
- &def, &dt)
- || (!def_stmt && dt != vect_constant_def))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: can't find def for ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return -1;
- }
- /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
- from the pattern. Check that all the stmts of the node are in the
- pattern. */
- if (def_stmt && gimple_bb (def_stmt)
- && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
- || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
- && gimple_code (def_stmt) != GIMPLE_PHI))
- && vinfo_for_stmt (def_stmt)
- && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
- && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
- && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
- {
- pattern = true;
- if (!first && !oprnd_info->first_pattern)
- {
- if (i == 0
- && !swapped
- && commutative)
- {
- swapped = true;
- goto again;
- }
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: some of the stmts"
- " are in a pattern, and others are not ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return 1;
- }
- def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
- dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
- if (dt == vect_unknown_def_type)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Unsupported pattern.\n");
- return -1;
- }
- switch (gimple_code (def_stmt))
- {
- case GIMPLE_PHI:
- def = gimple_phi_result (def_stmt);
- break;
- case GIMPLE_ASSIGN:
- def = gimple_assign_lhs (def_stmt);
- break;
- default:
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported defining stmt:\n");
- return -1;
- }
- }
- if (first)
- {
- oprnd_info->first_dt = dt;
- oprnd_info->first_pattern = pattern;
- oprnd_info->first_op_type = TREE_TYPE (oprnd);
- }
- else
- {
- /* Not first stmt of the group, check that the def-stmt/s match
- the def-stmt/s of the first stmt. Allow different definition
- types for reduction chains: the first stmt must be a
- vect_reduction_def (a phi node), and the rest
- vect_internal_def. */
- if (((oprnd_info->first_dt != dt
- && !(oprnd_info->first_dt == vect_reduction_def
- && dt == vect_internal_def)
- && !((oprnd_info->first_dt == vect_external_def
- || oprnd_info->first_dt == vect_constant_def)
- && (dt == vect_external_def
- || dt == vect_constant_def)))
- || !types_compatible_p (oprnd_info->first_op_type,
- TREE_TYPE (oprnd))))
- {
- /* Try swapping operands if we got a mismatch. */
- if (i == 0
- && !swapped
- && commutative)
- {
- swapped = true;
- goto again;
- }
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: different types\n");
- return 1;
- }
- }
- /* Check the types of the definitions. */
- switch (dt)
- {
- case vect_constant_def:
- case vect_external_def:
- case vect_reduction_def:
- break;
- case vect_internal_def:
- oprnd_info->def_stmts.quick_push (def_stmt);
- break;
- default:
- /* FORNOW: Not supported. */
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: illegal type of def ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return -1;
- }
- }
- /* Swap operands. */
- if (swapped)
- {
- if (first_op_cond)
- {
- tree cond = gimple_assign_rhs1 (stmt);
- swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
- &TREE_OPERAND (cond, 1));
- TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
- }
- else
- swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
- gimple_assign_rhs2_ptr (stmt));
- }
- return 0;
- }
- /* Verify if the scalar stmts STMTS are isomorphic, require data
- permutation or are of unsupported types of operation. Return
- true if they are, otherwise return false and indicate in *MATCHES
- which stmts are not isomorphic to the first one. If MATCHES[0]
- is false then this indicates the comparison could not be
- carried out or the stmts will never be vectorized by SLP. */
- static bool
- vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
- vec<gimple> stmts, unsigned int group_size,
- unsigned nops, unsigned int *max_nunits,
- unsigned int vectorization_factor, bool *matches)
- {
- unsigned int i;
- gimple stmt = stmts[0];
- enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
- enum tree_code first_cond_code = ERROR_MARK;
- tree lhs;
- bool need_same_oprnds = false;
- tree vectype, scalar_type, first_op1 = NULL_TREE;
- optab optab;
- int icode;
- machine_mode optab_op2_mode;
- machine_mode vec_mode;
- struct data_reference *first_dr;
- HOST_WIDE_INT dummy;
- gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
- tree cond;
- /* For every stmt in NODE find its def stmt/s. */
- FOR_EACH_VEC_ELT (stmts, i, stmt)
- {
- matches[i] = false;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
- dump_printf (MSG_NOTE, "\n");
- }
- /* Fail to vectorize statements marked as unvectorizable. */
- if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: unvectorizable statement ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- lhs = gimple_get_lhs (stmt);
- if (lhs == NULL_TREE)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: not GIMPLE_ASSIGN nor "
- "GIMPLE_CALL ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- if (is_gimple_assign (stmt)
- && gimple_assign_rhs_code (stmt) == COND_EXPR
- && (cond = gimple_assign_rhs1 (stmt))
- && !COMPARISON_CLASS_P (cond))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: condition is not "
- "comparison ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
- vectype = get_vectype_for_scalar_type (scalar_type);
- if (!vectype)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: unsupported data-type ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- scalar_type);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- /* In case of multiple types we need to detect the smallest type. */
- if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
- {
- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
- if (bb_vinfo)
- vectorization_factor = *max_nunits;
- }
- if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
- {
- rhs_code = CALL_EXPR;
- if (gimple_call_internal_p (call_stmt)
- || gimple_call_tail_p (call_stmt)
- || gimple_call_noreturn_p (call_stmt)
- || !gimple_call_nothrow_p (call_stmt)
- || gimple_call_chain (call_stmt))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: unsupported call type ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- call_stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- }
- else
- rhs_code = gimple_assign_rhs_code (stmt);
- /* Check the operation. */
- if (i == 0)
- {
- first_stmt_code = rhs_code;
- /* Shift arguments should be equal in all the packed stmts for a
- vector shift with scalar shift operand. */
- if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
- || rhs_code == LROTATE_EXPR
- || rhs_code == RROTATE_EXPR)
- {
- vec_mode = TYPE_MODE (vectype);
- /* First see if we have a vector/vector shift. */
- optab = optab_for_tree_code (rhs_code, vectype,
- optab_vector);
- if (!optab
- || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
- {
- /* No vector/vector shift, try for a vector/scalar shift. */
- optab = optab_for_tree_code (rhs_code, vectype,
- optab_scalar);
- if (!optab)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: no optab.\n");
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- icode = (int) optab_handler (optab, vec_mode);
- if (icode == CODE_FOR_nothing)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: "
- "op not supported by target.\n");
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- optab_op2_mode = insn_data[icode].operand[2].mode;
- if (!VECTOR_MODE_P (optab_op2_mode))
- {
- need_same_oprnds = true;
- first_op1 = gimple_assign_rhs2 (stmt);
- }
- }
- }
- else if (rhs_code == WIDEN_LSHIFT_EXPR)
- {
- need_same_oprnds = true;
- first_op1 = gimple_assign_rhs2 (stmt);
- }
- }
- else
- {
- if (first_stmt_code != rhs_code
- && (first_stmt_code != IMAGPART_EXPR
- || rhs_code != REALPART_EXPR)
- && (first_stmt_code != REALPART_EXPR
- || rhs_code != IMAGPART_EXPR)
- && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
- && (first_stmt_code == ARRAY_REF
- || first_stmt_code == BIT_FIELD_REF
- || first_stmt_code == INDIRECT_REF
- || first_stmt_code == COMPONENT_REF
- || first_stmt_code == MEM_REF)))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: different operation "
- "in stmt ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Mismatch. */
- continue;
- }
- if (need_same_oprnds
- && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: different shift "
- "arguments in ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Mismatch. */
- continue;
- }
- if (rhs_code == CALL_EXPR)
- {
- gimple first_stmt = stmts[0];
- if (gimple_call_num_args (stmt) != nops
- || !operand_equal_p (gimple_call_fn (first_stmt),
- gimple_call_fn (stmt), 0)
- || gimple_call_fntype (first_stmt)
- != gimple_call_fntype (stmt))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: different calls in ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Mismatch. */
- continue;
- }
- }
- }
- /* Grouped store or load. */
- if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
- {
- if (REFERENCE_CLASS_P (lhs))
- {
- /* Store. */
- ;
- }
- else
- {
- /* Load. */
- unsigned unrolling_factor
- = least_common_multiple
- (*max_nunits, group_size) / group_size;
- /* FORNOW: Check that there is no gap between the loads
- and no gap between the groups when we need to load
- multiple groups at once.
- ??? We should enhance this to only disallow gaps
- inside vectors. */
- if ((unrolling_factor > 1
- && ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
- && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
- /* If the group is split up then GROUP_GAP
- isn't correct here, nor is GROUP_FIRST_ELEMENT. */
- || GROUP_SIZE (vinfo_for_stmt (stmt)) > group_size))
- || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
- && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: grouped "
- "loads have gaps ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- /* Check that the size of interleaved loads group is not
- greater than the SLP group size. */
- unsigned ncopies
- = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
- if (loop_vinfo
- && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
- && ((GROUP_SIZE (vinfo_for_stmt (stmt))
- - GROUP_GAP (vinfo_for_stmt (stmt)))
- > ncopies * group_size))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: the number "
- "of interleaved loads is greater than "
- "the SLP group size ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- old_first_load = first_load;
- first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
- if (prev_first_load)
- {
- /* Check that there are no loads from different interleaving
- chains in the same node. */
- if (prev_first_load != first_load)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION,
- vect_location,
- "Build SLP failed: different "
- "interleaving chains in one node ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Mismatch. */
- continue;
- }
- }
- else
- prev_first_load = first_load;
- /* In some cases a group of loads is just the same load
- repeated N times. Only analyze its cost once. */
- if (first_load == stmt && old_first_load != first_load)
- {
- first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
- if (vect_supportable_dr_alignment (first_dr, false)
- == dr_unaligned_unsupported)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION,
- vect_location,
- "Build SLP failed: unsupported "
- "unaligned load ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- }
- }
- } /* Grouped access. */
- else
- {
- if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
- {
- /* Not grouped load. */
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: not grouped load ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* FORNOW: Not grouped loads are not supported. */
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- /* Not memory operation. */
- if (TREE_CODE_CLASS (rhs_code) != tcc_binary
- && TREE_CODE_CLASS (rhs_code) != tcc_unary
- && rhs_code != COND_EXPR
- && rhs_code != CALL_EXPR)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: operation");
- dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
- }
- if (rhs_code == COND_EXPR)
- {
- tree cond_expr = gimple_assign_rhs1 (stmt);
- if (i == 0)
- first_cond_code = TREE_CODE (cond_expr);
- else if (first_cond_code != TREE_CODE (cond_expr))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: different"
- " operation");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- /* Mismatch. */
- continue;
- }
- }
- }
- matches[i] = true;
- }
- for (i = 0; i < group_size; ++i)
- if (!matches[i])
- return false;
- return true;
- }
- /* Recursively build an SLP tree starting from NODE.
- Fail (and return a value not equal to zero) if def-stmts are not
- isomorphic, require data permutation or are of unsupported types of
- operation. Otherwise, return 0.
- The value returned is the depth in the SLP tree where a mismatch
- was found. */
- static bool
- vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
- slp_tree *node, unsigned int group_size,
- unsigned int *max_nunits,
- vec<slp_tree> *loads,
- unsigned int vectorization_factor,
- bool *matches, unsigned *npermutes, unsigned *tree_size,
- unsigned max_tree_size)
- {
- unsigned nops, i, this_tree_size = 0;
- gimple stmt;
- matches[0] = false;
- stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
- if (is_gimple_call (stmt))
- nops = gimple_call_num_args (stmt);
- else if (is_gimple_assign (stmt))
- {
- nops = gimple_num_ops (stmt) - 1;
- if (gimple_assign_rhs_code (stmt) == COND_EXPR)
- nops++;
- }
- else
- return false;
- if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
- SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
- max_nunits, vectorization_factor, matches))
- return false;
- /* If the SLP node is a load, terminate the recursion. */
- if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
- && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
- {
- loads->safe_push (*node);
- return true;
- }
- /* Get at the operands, verifying they are compatible. */
- vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
- slp_oprnd_info oprnd_info;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
- {
- switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
- stmt, (i == 0), &oprnds_info))
- {
- case 0:
- break;
- case -1:
- matches[0] = false;
- vect_free_oprnd_info (oprnds_info);
- return false;
- case 1:
- matches[i] = false;
- break;
- }
- }
- for (i = 0; i < group_size; ++i)
- if (!matches[i])
- {
- vect_free_oprnd_info (oprnds_info);
- return false;
- }
- stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
- /* Create SLP_TREE nodes for the definition node/s. */
- FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
- {
- slp_tree child;
- unsigned old_nloads = loads->length ();
- unsigned old_max_nunits = *max_nunits;
- if (oprnd_info->first_dt != vect_internal_def)
- continue;
- if (++this_tree_size > max_tree_size)
- {
- vect_free_oprnd_info (oprnds_info);
- return false;
- }
- child = vect_create_new_slp_node (oprnd_info->def_stmts);
- if (!child)
- {
- vect_free_oprnd_info (oprnds_info);
- return false;
- }
- if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
- group_size, max_nunits, loads,
- vectorization_factor, matches,
- npermutes, &this_tree_size, max_tree_size))
- {
- oprnd_info->def_stmts = vNULL;
- SLP_TREE_CHILDREN (*node).quick_push (child);
- continue;
- }
- /* If the SLP build for operand zero failed and operand zero
- and one can be commutated try that for the scalar stmts
- that failed the match. */
- if (i == 0
- /* A first scalar stmt mismatch signals a fatal mismatch. */
- && matches[0]
- /* ??? For COND_EXPRs we can swap the comparison operands
- as well as the arms under some constraints. */
- && nops == 2
- && oprnds_info[1]->first_dt == vect_internal_def
- && is_gimple_assign (stmt)
- && commutative_tree_code (gimple_assign_rhs_code (stmt))
- /* Do so only if the number of not successful permutes was nor more
- than a cut-ff as re-trying the recursive match on
- possibly each level of the tree would expose exponential
- behavior. */
- && *npermutes < 4)
- {
- unsigned int j;
- slp_tree grandchild;
- /* Roll back. */
- *max_nunits = old_max_nunits;
- loads->truncate (old_nloads);
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
- vect_free_slp_tree (grandchild);
- SLP_TREE_CHILDREN (child).truncate (0);
- /* Swap mismatched definition stmts. */
- dump_printf_loc (MSG_NOTE, vect_location,
- "Re-trying with swapped operands of stmts ");
- for (j = 0; j < group_size; ++j)
- if (!matches[j])
- {
- gimple tem = oprnds_info[0]->def_stmts[j];
- oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
- oprnds_info[1]->def_stmts[j] = tem;
- dump_printf (MSG_NOTE, "%d ", j);
- }
- dump_printf (MSG_NOTE, "\n");
- /* And try again ... */
- if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
- group_size, max_nunits, loads,
- vectorization_factor,
- matches, npermutes, &this_tree_size,
- max_tree_size))
- {
- oprnd_info->def_stmts = vNULL;
- SLP_TREE_CHILDREN (*node).quick_push (child);
- continue;
- }
- ++*npermutes;
- }
- oprnd_info->def_stmts = vNULL;
- vect_free_slp_tree (child);
- vect_free_oprnd_info (oprnds_info);
- return false;
- }
- if (tree_size)
- *tree_size += this_tree_size;
- vect_free_oprnd_info (oprnds_info);
- return true;
- }
- /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
- static void
- vect_print_slp_tree (int dump_kind, slp_tree node)
- {
- int i;
- gimple stmt;
- slp_tree child;
- if (!node)
- return;
- dump_printf (dump_kind, "node ");
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
- {
- dump_printf (dump_kind, "\n\tstmt %d ", i);
- dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
- }
- dump_printf (dump_kind, "\n");
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_print_slp_tree (dump_kind, child);
- }
- /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
- If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
- J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
- stmts in NODE are to be marked. */
- static void
- vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
- {
- int i;
- gimple stmt;
- slp_tree child;
- if (!node)
- return;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
- if (j < 0 || i == j)
- STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_mark_slp_stmts (child, mark, j);
- }
- /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
- static void
- vect_mark_slp_stmts_relevant (slp_tree node)
- {
- int i;
- gimple stmt;
- stmt_vec_info stmt_info;
- slp_tree child;
- if (!node)
- return;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
- {
- stmt_info = vinfo_for_stmt (stmt);
- gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
- || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
- STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
- }
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_mark_slp_stmts_relevant (child);
- }
- /* Rearrange the statements of NODE according to PERMUTATION. */
- static void
- vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
- vec<unsigned> permutation)
- {
- gimple stmt;
- vec<gimple> tmp_stmts;
- unsigned int i;
- slp_tree child;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_slp_rearrange_stmts (child, group_size, permutation);
- gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
- tmp_stmts.create (group_size);
- tmp_stmts.quick_grow_cleared (group_size);
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
- tmp_stmts[permutation[i]] = stmt;
- SLP_TREE_SCALAR_STMTS (node).release ();
- SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
- }
- /* Check if the required load permutations in the SLP instance
- SLP_INSTN are supported. */
- static bool
- vect_supported_load_permutation_p (slp_instance slp_instn)
- {
- unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
- unsigned int i, j, k, next;
- sbitmap load_index;
- slp_tree node;
- gimple stmt, load, next_load, first_load;
- struct data_reference *dr;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- if (node->load_permutation.exists ())
- FOR_EACH_VEC_ELT (node->load_permutation, j, next)
- dump_printf (MSG_NOTE, "%d ", next);
- else
- for (k = 0; k < group_size; ++k)
- dump_printf (MSG_NOTE, "%d ", k);
- dump_printf (MSG_NOTE, "\n");
- }
- /* In case of reduction every load permutation is allowed, since the order
- of the reduction statements is not important (as opposed to the case of
- grouped stores). The only condition we need to check is that all the
- load nodes are of the same size and have the same permutation (and then
- rearrange all the nodes of the SLP instance according to this
- permutation). */
- /* Check that all the load nodes are of the same size. */
- /* ??? Can't we assert this? */
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
- return false;
- node = SLP_INSTANCE_TREE (slp_instn);
- stmt = SLP_TREE_SCALAR_STMTS (node)[0];
- /* Reduction (there are no data-refs in the root).
- In reduction chain the order of the loads is important. */
- if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
- && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
- {
- slp_tree load;
- unsigned int lidx;
- /* Compare all the permutation sequences to the first one. We know
- that at least one load is permuted. */
- node = SLP_INSTANCE_LOADS (slp_instn)[0];
- if (!node->load_permutation.exists ())
- return false;
- for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
- {
- if (!load->load_permutation.exists ())
- return false;
- FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
- if (lidx != node->load_permutation[j])
- return false;
- }
- /* Check that the loads in the first sequence are different and there
- are no gaps between them. */
- load_index = sbitmap_alloc (group_size);
- bitmap_clear (load_index);
- FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
- {
- if (bitmap_bit_p (load_index, lidx))
- {
- sbitmap_free (load_index);
- return false;
- }
- bitmap_set_bit (load_index, lidx);
- }
- for (i = 0; i < group_size; i++)
- if (!bitmap_bit_p (load_index, i))
- {
- sbitmap_free (load_index);
- return false;
- }
- sbitmap_free (load_index);
- /* This permutation is valid for reduction. Since the order of the
- statements in the nodes is not important unless they are memory
- accesses, we can rearrange the statements in all the nodes
- according to the order of the loads. */
- vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
- node->load_permutation);
- /* We are done, no actual permutations need to be generated. */
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- SLP_TREE_LOAD_PERMUTATION (node).release ();
- return true;
- }
- /* In basic block vectorization we allow any subchain of an interleaving
- chain.
- FORNOW: not supported in loop SLP because of realignment compications. */
- if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
- {
- /* Check that for every node in the instance the loads
- form a subchain. */
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- {
- next_load = NULL;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
- {
- if (j != 0 && next_load != load)
- return false;
- next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
- }
- }
- /* Check that the alignment of the first load in every subchain, i.e.,
- the first statement in every load node, is supported.
- ??? This belongs in alignment checking. */
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- {
- first_load = SLP_TREE_SCALAR_STMTS (node)[0];
- if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
- {
- dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
- if (vect_supportable_dr_alignment (dr, false)
- == dr_unaligned_unsupported)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION,
- vect_location,
- "unsupported unaligned load ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- first_load, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
- }
- }
- /* We are done, no actual permutations need to be generated. */
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- SLP_TREE_LOAD_PERMUTATION (node).release ();
- return true;
- }
- /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
- GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
- well (unless it's reduction). */
- if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
- return false;
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- if (!node->load_permutation.exists ())
- return false;
- load_index = sbitmap_alloc (group_size);
- bitmap_clear (load_index);
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- {
- unsigned int lidx = node->load_permutation[0];
- if (bitmap_bit_p (load_index, lidx))
- {
- sbitmap_free (load_index);
- return false;
- }
- bitmap_set_bit (load_index, lidx);
- FOR_EACH_VEC_ELT (node->load_permutation, j, k)
- if (k != lidx)
- {
- sbitmap_free (load_index);
- return false;
- }
- }
- for (i = 0; i < group_size; i++)
- if (!bitmap_bit_p (load_index, i))
- {
- sbitmap_free (load_index);
- return false;
- }
- sbitmap_free (load_index);
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- if (node->load_permutation.exists ()
- && !vect_transform_slp_perm_load
- (node, vNULL, NULL,
- SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
- return false;
- return true;
- }
- /* Find the first load in the loop that belongs to INSTANCE.
- When loads are in several SLP nodes, there can be a case in which the first
- load does not appear in the first SLP node to be transformed, causing
- incorrect order of statements. Since we generate all the loads together,
- they must be inserted before the first load of the SLP instance and not
- before the first load of the first node of the instance. */
- static gimple
- vect_find_first_load_in_slp_instance (slp_instance instance)
- {
- int i, j;
- slp_tree load_node;
- gimple first_load = NULL, load;
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
- first_load = get_earlier_stmt (load, first_load);
- return first_load;
- }
- /* Find the last store in SLP INSTANCE. */
- static gimple
- vect_find_last_store_in_slp_instance (slp_instance instance)
- {
- int i;
- slp_tree node;
- gimple last_store = NULL, store;
- node = SLP_INSTANCE_TREE (instance);
- for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
- last_store = get_later_stmt (store, last_store);
- return last_store;
- }
- /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
- static void
- vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
- slp_instance instance, slp_tree node,
- stmt_vector_for_cost *prologue_cost_vec,
- unsigned ncopies_for_cost)
- {
- stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
- unsigned i;
- slp_tree child;
- gimple stmt, s;
- stmt_vec_info stmt_info;
- tree lhs;
- unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
- /* Recurse down the SLP tree. */
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
- instance, child, prologue_cost_vec,
- ncopies_for_cost);
- /* Look at the first scalar stmt to determine the cost. */
- stmt = SLP_TREE_SCALAR_STMTS (node)[0];
- stmt_info = vinfo_for_stmt (stmt);
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- {
- if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
- vect_model_store_cost (stmt_info, ncopies_for_cost, false,
- vect_uninitialized_def,
- node, prologue_cost_vec, body_cost_vec);
- else
- {
- int i;
- gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
- vect_model_load_cost (stmt_info, ncopies_for_cost, false,
- node, prologue_cost_vec, body_cost_vec);
- /* If the load is permuted record the cost for the permutation.
- ??? Loads from multiple chains are let through here only
- for a single special case involving complex numbers where
- in the end no permutation is necessary. */
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
- if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
- == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
- && vect_get_place_in_interleaving_chain
- (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
- {
- record_stmt_cost (body_cost_vec, group_size, vec_perm,
- stmt_info, 0, vect_body);
- break;
- }
- }
- }
- else
- record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
- stmt_info, 0, vect_body);
- /* Scan operands and account for prologue cost of constants/externals.
- ??? This over-estimates cost for multiple uses and should be
- re-engineered. */
- lhs = gimple_get_lhs (stmt);
- for (i = 0; i < gimple_num_ops (stmt); ++i)
- {
- tree def, op = gimple_op (stmt, i);
- gimple def_stmt;
- enum vect_def_type dt;
- if (!op || op == lhs)
- continue;
- if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
- &def_stmt, &def, &dt)
- && (dt == vect_constant_def || dt == vect_external_def))
- record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
- stmt_info, 0, vect_prologue);
- }
- }
- /* Compute the cost for the SLP instance INSTANCE. */
- static void
- vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
- slp_instance instance, unsigned nunits)
- {
- stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
- unsigned ncopies_for_cost;
- stmt_info_for_cost *si;
- unsigned i;
- /* Calculate the number of vector stmts to create based on the unrolling
- factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
- GROUP_SIZE / NUNITS otherwise. */
- unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
- ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
- prologue_cost_vec.create (10);
- body_cost_vec.create (10);
- SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
- vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
- instance, SLP_INSTANCE_TREE (instance),
- &prologue_cost_vec, ncopies_for_cost);
- /* Record the prologue costs, which were delayed until we were
- sure that SLP was successful. Unlike the body costs, we know
- the final values now regardless of the loop vectorization factor. */
- void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
- : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
- FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
- {
- struct _stmt_vec_info *stmt_info
- = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
- (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
- si->misalign, vect_prologue);
- }
- prologue_cost_vec.release ();
- }
- /* Analyze an SLP instance starting from a group of grouped stores. Call
- vect_build_slp_tree to build a tree of packed stmts if possible.
- Return FALSE if it's impossible to SLP any stmt in the loop. */
- static bool
- vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
- gimple stmt, unsigned max_tree_size)
- {
- slp_instance new_instance;
- slp_tree node;
- unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
- unsigned int unrolling_factor = 1, nunits;
- tree vectype, scalar_type = NULL_TREE;
- gimple next;
- unsigned int vectorization_factor = 0;
- int i;
- unsigned int max_nunits = 0;
- vec<slp_tree> loads;
- struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
- vec<gimple> scalar_stmts;
- if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
- {
- if (dr)
- {
- scalar_type = TREE_TYPE (DR_REF (dr));
- vectype = get_vectype_for_scalar_type (scalar_type);
- }
- else
- {
- gcc_assert (loop_vinfo);
- vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
- }
- group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
- }
- else
- {
- gcc_assert (loop_vinfo);
- vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
- group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
- }
- if (!vectype)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: unsupported data-type ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
- nunits = TYPE_VECTOR_SUBPARTS (vectype);
- if (loop_vinfo)
- vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- else
- vectorization_factor = nunits;
- /* Calculate the unrolling factor. */
- unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
- if (unrolling_factor != 1 && !loop_vinfo)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: unrolling required in basic"
- " block SLP\n");
- return false;
- }
- /* Create a node (a root of the SLP tree) for the packed grouped stores. */
- scalar_stmts.create (group_size);
- next = stmt;
- if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
- {
- /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
- while (next)
- {
- if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
- && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
- scalar_stmts.safe_push (
- STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
- else
- scalar_stmts.safe_push (next);
- next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
- }
- }
- else
- {
- /* Collect reduction statements. */
- vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
- for (i = 0; reductions.iterate (i, &next); i++)
- scalar_stmts.safe_push (next);
- }
- node = vect_create_new_slp_node (scalar_stmts);
- loads.create (group_size);
- /* Build the tree for the SLP instance. */
- bool *matches = XALLOCAVEC (bool, group_size);
- unsigned npermutes = 0;
- if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
- &max_nunits, &loads,
- vectorization_factor, matches, &npermutes, NULL,
- max_tree_size))
- {
- /* Calculate the unrolling factor based on the smallest type. */
- if (max_nunits > nunits)
- unrolling_factor = least_common_multiple (max_nunits, group_size)
- / group_size;
- if (unrolling_factor != 1 && !loop_vinfo)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: unrolling required in basic"
- " block SLP\n");
- vect_free_slp_tree (node);
- loads.release ();
- return false;
- }
- /* Create a new SLP instance. */
- new_instance = XNEW (struct _slp_instance);
- SLP_INSTANCE_TREE (new_instance) = node;
- SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
- SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
- SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
- SLP_INSTANCE_LOADS (new_instance) = loads;
- SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
- /* Compute the load permutation. */
- slp_tree load_node;
- bool loads_permuted = false;
- FOR_EACH_VEC_ELT (loads, i, load_node)
- {
- vec<unsigned> load_permutation;
- int j;
- gimple load, first_stmt;
- bool this_load_permuted = false;
- load_permutation.create (group_size);
- first_stmt = GROUP_FIRST_ELEMENT
- (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
- {
- int load_place
- = vect_get_place_in_interleaving_chain (load, first_stmt);
- gcc_assert (load_place != -1);
- if (load_place != j)
- this_load_permuted = true;
- load_permutation.safe_push (load_place);
- }
- if (!this_load_permuted)
- {
- load_permutation.release ();
- continue;
- }
- SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
- loads_permuted = true;
- }
- if (loads_permuted)
- {
- if (!vect_supported_load_permutation_p (new_instance))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: unsupported load "
- "permutation ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- vect_free_slp_instance (new_instance);
- return false;
- }
- SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
- = vect_find_first_load_in_slp_instance (new_instance);
- }
- /* Compute the costs of this SLP instance. */
- vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
- new_instance, TYPE_VECTOR_SUBPARTS (vectype));
- if (loop_vinfo)
- LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
- else
- BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
- if (dump_enabled_p ())
- vect_print_slp_tree (MSG_NOTE, node);
- return true;
- }
- /* Failed to SLP. */
- /* Free the allocated memory. */
- vect_free_slp_tree (node);
- loads.release ();
- return false;
- }
- /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
- trees of packed scalar stmts if SLP is possible. */
- bool
- vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
- unsigned max_tree_size)
- {
- unsigned int i;
- vec<gimple> grouped_stores;
- vec<gimple> reductions = vNULL;
- vec<gimple> reduc_chains = vNULL;
- gimple first_element;
- bool ok = false;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
- if (loop_vinfo)
- {
- grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
- reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
- reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
- }
- else
- grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
- /* Find SLP sequences starting from groups of grouped stores. */
- FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
- if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
- max_tree_size))
- ok = true;
- if (bb_vinfo && !ok)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Failed to SLP the basic block.\n");
- return false;
- }
- if (loop_vinfo
- && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
- {
- /* Find SLP sequences starting from reduction chains. */
- FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
- if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
- max_tree_size))
- ok = true;
- else
- return false;
- /* Don't try to vectorize SLP reductions if reduction chain was
- detected. */
- return ok;
- }
- /* Find SLP sequences starting from groups of reductions. */
- if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
- && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
- max_tree_size))
- ok = true;
- return true;
- }
- /* For each possible SLP instance decide whether to SLP it and calculate overall
- unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
- least one instance. */
- bool
- vect_make_slp_decision (loop_vec_info loop_vinfo)
- {
- unsigned int i, unrolling_factor = 1;
- vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
- slp_instance instance;
- int decided_to_slp = 0;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
- "\n");
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- /* FORNOW: SLP if you can. */
- if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
- unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
- /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
- call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
- loop-based vectorization. Such stmts will be marked as HYBRID. */
- vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
- decided_to_slp++;
- }
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
- if (decided_to_slp && dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Decided to SLP %d instances. Unrolling factor %d\n",
- decided_to_slp, unrolling_factor);
- return (decided_to_slp > 0);
- }
- /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
- can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
- static void
- vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
- {
- gimple stmt = SLP_TREE_SCALAR_STMTS (node)[i];
- imm_use_iterator imm_iter;
- gimple use_stmt;
- stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
- slp_tree child;
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- int j;
- /* Propagate hybrid down the SLP tree. */
- if (stype == hybrid)
- ;
- else if (HYBRID_SLP_STMT (stmt_vinfo))
- stype = hybrid;
- else
- {
- /* Check if a pure SLP stmt has uses in non-SLP stmts. */
- gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
- /* We always get the pattern stmt here, but for immediate
- uses we have to use the LHS of the original stmt. */
- gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
- if (STMT_VINFO_RELATED_STMT (stmt_vinfo))
- stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
- if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
- FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
- {
- if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
- continue;
- use_vinfo = vinfo_for_stmt (use_stmt);
- if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
- && STMT_VINFO_RELATED_STMT (use_vinfo))
- use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
- if (!STMT_SLP_TYPE (use_vinfo)
- && (STMT_VINFO_RELEVANT (use_vinfo)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
- && !(gimple_code (use_stmt) == GIMPLE_PHI
- && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
- stype = hybrid;
- }
- }
- if (stype == hybrid)
- STMT_SLP_TYPE (stmt_vinfo) = hybrid;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
- vect_detect_hybrid_slp_stmts (child, i, stype);
- }
- /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
- static tree
- vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
- {
- walk_stmt_info *wi = (walk_stmt_info *)data;
- struct loop *loopp = (struct loop *)wi->info;
- if (wi->is_lhs)
- return NULL_TREE;
- if (TREE_CODE (*tp) == SSA_NAME
- && !SSA_NAME_IS_DEFAULT_DEF (*tp))
- {
- gimple def_stmt = SSA_NAME_DEF_STMT (*tp);
- if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
- && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
- STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
- }
- return NULL_TREE;
- }
- static tree
- vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
- walk_stmt_info *)
- {
- /* If the stmt is in a SLP instance then this isn't a reason
- to mark use definitions in other SLP instances as hybrid. */
- if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
- *handled = true;
- return NULL_TREE;
- }
- /* Find stmts that must be both vectorized and SLPed. */
- void
- vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
- {
- unsigned int i;
- vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
- slp_instance instance;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
- "\n");
- /* First walk all pattern stmt in the loop and mark defs of uses as
- hybrid because immediate uses in them are not recorded. */
- for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
- {
- basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
- for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
- gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- if (STMT_VINFO_IN_PATTERN_P (stmt_info))
- {
- walk_stmt_info wi;
- memset (&wi, 0, sizeof (wi));
- wi.info = LOOP_VINFO_LOOP (loop_vinfo);
- gimple_stmt_iterator gsi2
- = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
- walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
- vect_detect_hybrid_slp_1, &wi);
- walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
- vect_detect_hybrid_slp_2,
- vect_detect_hybrid_slp_1, &wi);
- }
- }
- }
- /* Then walk the SLP instance trees marking stmts with uses in
- non-SLP stmts as hybrid, also propagating hybrid down the
- SLP tree, collecting the above info on-the-fly. */
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
- vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
- i, pure_slp);
- }
- }
- /* Create and initialize a new bb_vec_info struct for BB, as well as
- stmt_vec_info structs for all the stmts in it. */
- static bb_vec_info
- new_bb_vec_info (basic_block bb)
- {
- bb_vec_info res = NULL;
- gimple_stmt_iterator gsi;
- res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
- BB_VINFO_BB (res) = bb;
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- gimple_set_uid (stmt, 0);
- set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
- }
- BB_VINFO_GROUPED_STORES (res).create (10);
- BB_VINFO_SLP_INSTANCES (res).create (2);
- BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
- bb->aux = res;
- return res;
- }
- /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
- stmts in the basic block. */
- static void
- destroy_bb_vec_info (bb_vec_info bb_vinfo)
- {
- vec<slp_instance> slp_instances;
- slp_instance instance;
- basic_block bb;
- gimple_stmt_iterator si;
- unsigned i;
- if (!bb_vinfo)
- return;
- bb = BB_VINFO_BB (bb_vinfo);
- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
- {
- gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- if (stmt_info)
- /* Free stmt_vec_info. */
- free_stmt_vec_info (stmt);
- }
- vect_destroy_datarefs (NULL, bb_vinfo);
- free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
- BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
- slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- vect_free_slp_instance (instance);
- BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
- destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
- free (bb_vinfo);
- bb->aux = NULL;
- }
- /* Analyze statements contained in SLP tree node after recursively analyzing
- the subtree. Return TRUE if the operations are supported. */
- static bool
- vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
- {
- bool dummy;
- int i;
- gimple stmt;
- slp_tree child;
- if (!node)
- return true;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- if (!vect_slp_analyze_node_operations (bb_vinfo, child))
- return false;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
- {
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- gcc_assert (stmt_info);
- gcc_assert (PURE_SLP_STMT (stmt_info));
- if (!vect_analyze_stmt (stmt, &dummy, node))
- return false;
- }
- return true;
- }
- /* Analyze statements in SLP instances of the basic block. Return TRUE if the
- operations are supported. */
- static bool
- vect_slp_analyze_operations (bb_vec_info bb_vinfo)
- {
- vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
- slp_instance instance;
- int i;
- for (i = 0; slp_instances.iterate (i, &instance); )
- {
- if (!vect_slp_analyze_node_operations (bb_vinfo,
- SLP_INSTANCE_TREE (instance)))
- {
- vect_free_slp_instance (instance);
- slp_instances.ordered_remove (i);
- }
- else
- i++;
- }
- if (!slp_instances.length ())
- return false;
- return true;
- }
- /* Compute the scalar cost of the SLP node NODE and its children
- and return it. Do not account defs that are marked in LIFE and
- update LIFE according to uses of NODE. */
- static unsigned
- vect_bb_slp_scalar_cost (basic_block bb,
- slp_tree node, vec<bool, va_heap> *life)
- {
- unsigned scalar_cost = 0;
- unsigned i;
- gimple stmt;
- slp_tree child;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
- {
- unsigned stmt_cost;
- ssa_op_iter op_iter;
- def_operand_p def_p;
- stmt_vec_info stmt_info;
- if ((*life)[i])
- continue;
- /* If there is a non-vectorized use of the defs then the scalar
- stmt is kept live in which case we do not account it or any
- required defs in the SLP children in the scalar cost. This
- way we make the vectorization more costly when compared to
- the scalar cost. */
- FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
- {
- imm_use_iterator use_iter;
- gimple use_stmt;
- FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
- if (!is_gimple_debug (use_stmt)
- && (gimple_code (use_stmt) == GIMPLE_PHI
- || gimple_bb (use_stmt) != bb
- || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
- {
- (*life)[i] = true;
- BREAK_FROM_IMM_USE_STMT (use_iter);
- }
- }
- if ((*life)[i])
- continue;
- stmt_info = vinfo_for_stmt (stmt);
- if (STMT_VINFO_DATA_REF (stmt_info))
- {
- if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
- stmt_cost = vect_get_stmt_cost (scalar_load);
- else
- stmt_cost = vect_get_stmt_cost (scalar_store);
- }
- else
- stmt_cost = vect_get_stmt_cost (scalar_stmt);
- scalar_cost += stmt_cost;
- }
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
- return scalar_cost;
- }
- /* Check if vectorization of the basic block is profitable. */
- static bool
- vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
- {
- vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
- slp_instance instance;
- int i, j;
- unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
- unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
- void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
- stmt_vec_info stmt_info = NULL;
- stmt_vector_for_cost body_cost_vec;
- stmt_info_for_cost *ci;
- /* Calculate vector costs. */
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
- FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
- {
- stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
- (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
- stmt_info, ci->misalign, vect_body);
- }
- }
- /* Calculate scalar cost. */
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- auto_vec<bool, 20> life;
- life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
- scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
- SLP_INSTANCE_TREE (instance),
- &life);
- }
- /* Complete the target-specific cost calculation. */
- finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
- &vec_inside_cost, &vec_epilogue_cost);
- vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
- dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
- vec_inside_cost);
- dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
- dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
- dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
- }
- /* Vectorization is profitable if its cost is less than the cost of scalar
- version. */
- if (vec_outside_cost + vec_inside_cost >= scalar_cost)
- return false;
- return true;
- }
- /* Check if the basic block can be vectorized. */
- static bb_vec_info
- vect_slp_analyze_bb_1 (basic_block bb)
- {
- bb_vec_info bb_vinfo;
- vec<slp_instance> slp_instances;
- slp_instance instance;
- int i;
- int min_vf = 2;
- unsigned n_stmts = 0;
- bb_vinfo = new_bb_vec_info (bb);
- if (!bb_vinfo)
- return NULL;
- if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unhandled data-ref in basic "
- "block.\n");
- destroy_bb_vec_info (bb_vinfo);
- return NULL;
- }
- if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: not enough data-refs in "
- "basic block.\n");
- destroy_bb_vec_info (bb_vinfo);
- return NULL;
- }
- if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unhandled data access in "
- "basic block.\n");
- destroy_bb_vec_info (bb_vinfo);
- return NULL;
- }
- vect_pattern_recog (NULL, bb_vinfo);
- if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: bad data alignment in basic "
- "block.\n");
- destroy_bb_vec_info (bb_vinfo);
- return NULL;
- }
- /* Check the SLP opportunities in the basic block, analyze and build SLP
- trees. */
- if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: failed to find SLP opportunities "
- "in basic block.\n");
- destroy_bb_vec_info (bb_vinfo);
- return NULL;
- }
- slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
- /* Mark all the statements that we want to vectorize as pure SLP and
- relevant. */
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
- vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
- }
- /* Mark all the statements that we do not want to vectorize. */
- for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
- !gsi_end_p (gsi); gsi_next (&gsi))
- {
- stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
- if (STMT_SLP_TYPE (vinfo) != pure_slp)
- STMT_VINFO_VECTORIZABLE (vinfo) = false;
- }
- /* Analyze dependences. At this point all stmts not participating in
- vectorization have to be marked. Dependence analysis assumes
- that we either vectorize all SLP instances or none at all. */
- if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unhandled data dependence "
- "in basic block.\n");
- destroy_bb_vec_info (bb_vinfo);
- return NULL;
- }
- if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: unsupported alignment in basic "
- "block.\n");
- destroy_bb_vec_info (bb_vinfo);
- return NULL;
- }
- if (!vect_slp_analyze_operations (bb_vinfo))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: bad operation in basic block.\n");
- destroy_bb_vec_info (bb_vinfo);
- return NULL;
- }
- /* Cost model: check if the vectorization is worthwhile. */
- if (!unlimited_cost_model (NULL)
- && !vect_bb_vectorization_profitable_p (bb_vinfo))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: vectorization is not "
- "profitable.\n");
- destroy_bb_vec_info (bb_vinfo);
- return NULL;
- }
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Basic block will be vectorized using SLP\n");
- return bb_vinfo;
- }
- bb_vec_info
- vect_slp_analyze_bb (basic_block bb)
- {
- bb_vec_info bb_vinfo;
- int insns = 0;
- gimple_stmt_iterator gsi;
- unsigned int vector_sizes;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- if (!is_gimple_debug (stmt)
- && !gimple_nop_p (stmt)
- && gimple_code (stmt) != GIMPLE_LABEL)
- insns++;
- }
- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: too many instructions in "
- "basic block.\n");
- return NULL;
- }
- /* Autodetect first vector size we try. */
- current_vector_size = 0;
- vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
- while (1)
- {
- bb_vinfo = vect_slp_analyze_bb_1 (bb);
- if (bb_vinfo)
- return bb_vinfo;
- destroy_bb_vec_info (bb_vinfo);
- vector_sizes &= ~current_vector_size;
- if (vector_sizes == 0
- || current_vector_size == 0)
- return NULL;
- /* Try the next biggest vector size. */
- current_vector_size = 1 << floor_log2 (vector_sizes);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "***** Re-trying analysis with "
- "vector size %d\n", current_vector_size);
- }
- }
- /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
- the number of created vector stmts depends on the unrolling factor).
- However, the actual number of vector stmts for every SLP node depends on
- VF which is set later in vect_analyze_operations (). Hence, SLP costs
- should be updated. In this function we assume that the inside costs
- calculated in vect_model_xxx_cost are linear in ncopies. */
- void
- vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
- {
- unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
- slp_instance instance;
- stmt_vector_for_cost body_cost_vec;
- stmt_info_for_cost *si;
- void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "=== vect_update_slp_costs_according_to_vf ===\n");
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- /* We assume that costs are linear in ncopies. */
- int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
- /* Record the instance's instructions in the target cost model.
- This was delayed until here because the count of instructions
- isn't known beforehand. */
- body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
- FOR_EACH_VEC_ELT (body_cost_vec, j, si)
- (void) add_stmt_cost (data, si->count * ncopies, si->kind,
- vinfo_for_stmt (si->stmt), si->misalign,
- vect_body);
- }
- }
- /* For constant and loop invariant defs of SLP_NODE this function returns
- (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
- OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
- scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
- REDUC_INDEX is the index of the reduction operand in the statements, unless
- it is -1. */
- static void
- vect_get_constant_vectors (tree op, slp_tree slp_node,
- vec<tree> *vec_oprnds,
- unsigned int op_num, unsigned int number_of_vectors,
- int reduc_index)
- {
- vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
- gimple stmt = stmts[0];
- stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
- unsigned nunits;
- tree vec_cst;
- tree *elts;
- unsigned j, number_of_places_left_in_vector;
- tree vector_type;
- tree vop;
- int group_size = stmts.length ();
- unsigned int vec_num, i;
- unsigned number_of_copies = 1;
- vec<tree> voprnds;
- voprnds.create (number_of_vectors);
- bool constant_p, is_store;
- tree neutral_op = NULL;
- enum tree_code code = gimple_expr_code (stmt);
- gimple def_stmt;
- struct loop *loop;
- gimple_seq ctor_seq = NULL;
- if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
- && reduc_index != -1)
- {
- op_num = reduc_index - 1;
- op = gimple_op (stmt, reduc_index);
- /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
- we need either neutral operands or the original operands. See
- get_initial_def_for_reduction() for details. */
- switch (code)
- {
- case WIDEN_SUM_EXPR:
- case DOT_PROD_EXPR:
- case PLUS_EXPR:
- case MINUS_EXPR:
- case BIT_IOR_EXPR:
- case BIT_XOR_EXPR:
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
- neutral_op = build_real (TREE_TYPE (op), dconst0);
- else
- neutral_op = build_int_cst (TREE_TYPE (op), 0);
- break;
- case MULT_EXPR:
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
- neutral_op = build_real (TREE_TYPE (op), dconst1);
- else
- neutral_op = build_int_cst (TREE_TYPE (op), 1);
- break;
- case BIT_AND_EXPR:
- neutral_op = build_int_cst (TREE_TYPE (op), -1);
- break;
- /* For MIN/MAX we don't have an easy neutral operand but
- the initial values can be used fine here. Only for
- a reduction chain we have to force a neutral element. */
- case MAX_EXPR:
- case MIN_EXPR:
- if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
- neutral_op = NULL;
- else
- {
- def_stmt = SSA_NAME_DEF_STMT (op);
- loop = (gimple_bb (stmt))->loop_father;
- neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
- loop_preheader_edge (loop));
- }
- break;
- default:
- neutral_op = NULL;
- }
- }
- if (STMT_VINFO_DATA_REF (stmt_vinfo))
- {
- is_store = true;
- op = gimple_assign_rhs1 (stmt);
- }
- else
- is_store = false;
- gcc_assert (op);
- if (CONSTANT_CLASS_P (op))
- constant_p = true;
- else
- constant_p = false;
- vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
- gcc_assert (vector_type);
- nunits = TYPE_VECTOR_SUBPARTS (vector_type);
- /* NUMBER_OF_COPIES is the number of times we need to use the same values in
- created vectors. It is greater than 1 if unrolling is performed.
- For example, we have two scalar operands, s1 and s2 (e.g., group of
- strided accesses of size two), while NUNITS is four (i.e., four scalars
- of this type can be packed in a vector). The output vector will contain
- two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
- will be 2).
- If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
- containing the operands.
- For example, NUNITS is four as before, and the group size is 8
- (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
- {s5, s6, s7, s8}. */
- number_of_copies = least_common_multiple (nunits, group_size) / group_size;
- number_of_places_left_in_vector = nunits;
- elts = XALLOCAVEC (tree, nunits);
- for (j = 0; j < number_of_copies; j++)
- {
- for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
- {
- if (is_store)
- op = gimple_assign_rhs1 (stmt);
- else
- {
- switch (code)
- {
- case COND_EXPR:
- if (op_num == 0 || op_num == 1)
- {
- tree cond = gimple_assign_rhs1 (stmt);
- op = TREE_OPERAND (cond, op_num);
- }
- else
- {
- if (op_num == 2)
- op = gimple_assign_rhs2 (stmt);
- else
- op = gimple_assign_rhs3 (stmt);
- }
- break;
- case CALL_EXPR:
- op = gimple_call_arg (stmt, op_num);
- break;
- case LSHIFT_EXPR:
- case RSHIFT_EXPR:
- case LROTATE_EXPR:
- case RROTATE_EXPR:
- op = gimple_op (stmt, op_num + 1);
- /* Unlike the other binary operators, shifts/rotates have
- the shift count being int, instead of the same type as
- the lhs, so make sure the scalar is the right type if
- we are dealing with vectors of
- long long/long/short/char. */
- if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
- op = fold_convert (TREE_TYPE (vector_type), op);
- break;
- default:
- op = gimple_op (stmt, op_num + 1);
- break;
- }
- }
- if (reduc_index != -1)
- {
- loop = (gimple_bb (stmt))->loop_father;
- def_stmt = SSA_NAME_DEF_STMT (op);
- gcc_assert (loop);
- /* Get the def before the loop. In reduction chain we have only
- one initial value. */
- if ((j != (number_of_copies - 1)
- || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
- && i != 0))
- && neutral_op)
- op = neutral_op;
- else
- op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
- loop_preheader_edge (loop));
- }
- /* Create 'vect_ = {op0,op1,...,opn}'. */
- number_of_places_left_in_vector--;
- if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
- {
- if (CONSTANT_CLASS_P (op))
- {
- op = fold_unary (VIEW_CONVERT_EXPR,
- TREE_TYPE (vector_type), op);
- gcc_assert (op && CONSTANT_CLASS_P (op));
- }
- else
- {
- tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
- gimple init_stmt;
- op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
- init_stmt
- = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
- gimple_seq_add_stmt (&ctor_seq, init_stmt);
- op = new_temp;
- }
- }
- elts[number_of_places_left_in_vector] = op;
- if (!CONSTANT_CLASS_P (op))
- constant_p = false;
- if (number_of_places_left_in_vector == 0)
- {
- number_of_places_left_in_vector = nunits;
- if (constant_p)
- vec_cst = build_vector (vector_type, elts);
- else
- {
- vec<constructor_elt, va_gc> *v;
- unsigned k;
- vec_alloc (v, nunits);
- for (k = 0; k < nunits; ++k)
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
- vec_cst = build_constructor (vector_type, v);
- }
- voprnds.quick_push (vect_init_vector (stmt, vec_cst,
- vector_type, NULL));
- if (ctor_seq != NULL)
- {
- gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
- gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
- gsi_insert_seq_before_without_update (&gsi, ctor_seq,
- GSI_SAME_STMT);
- ctor_seq = NULL;
- }
- }
- }
- }
- /* Since the vectors are created in the reverse order, we should invert
- them. */
- vec_num = voprnds.length ();
- for (j = vec_num; j != 0; j--)
- {
- vop = voprnds[j - 1];
- vec_oprnds->quick_push (vop);
- }
- voprnds.release ();
- /* In case that VF is greater than the unrolling factor needed for the SLP
- group of stmts, NUMBER_OF_VECTORS to be created is greater than
- NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
- to replicate the vectors. */
- while (number_of_vectors > vec_oprnds->length ())
- {
- tree neutral_vec = NULL;
- if (neutral_op)
- {
- if (!neutral_vec)
- neutral_vec = build_vector_from_val (vector_type, neutral_op);
- vec_oprnds->quick_push (neutral_vec);
- }
- else
- {
- for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
- vec_oprnds->quick_push (vop);
- }
- }
- }
- /* Get vectorized definitions from SLP_NODE that contains corresponding
- vectorized def-stmts. */
- static void
- vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
- {
- tree vec_oprnd;
- gimple vec_def_stmt;
- unsigned int i;
- gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
- FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
- {
- gcc_assert (vec_def_stmt);
- vec_oprnd = gimple_get_lhs (vec_def_stmt);
- vec_oprnds->quick_push (vec_oprnd);
- }
- }
- /* Get vectorized definitions for SLP_NODE.
- If the scalar definitions are loop invariants or constants, collect them and
- call vect_get_constant_vectors() to create vector stmts.
- Otherwise, the def-stmts must be already vectorized and the vectorized stmts
- must be stored in the corresponding child of SLP_NODE, and we call
- vect_get_slp_vect_defs () to retrieve them. */
- void
- vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
- vec<vec<tree> > *vec_oprnds, int reduc_index)
- {
- gimple first_stmt;
- int number_of_vects = 0, i;
- unsigned int child_index = 0;
- HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
- slp_tree child = NULL;
- vec<tree> vec_defs;
- tree oprnd;
- bool vectorized_defs;
- first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
- FOR_EACH_VEC_ELT (ops, i, oprnd)
- {
- /* For each operand we check if it has vectorized definitions in a child
- node or we need to create them (for invariants and constants). We
- check if the LHS of the first stmt of the next child matches OPRND.
- If it does, we found the correct child. Otherwise, we call
- vect_get_constant_vectors (), and not advance CHILD_INDEX in order
- to check this child node for the next operand. */
- vectorized_defs = false;
- if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
- {
- child = SLP_TREE_CHILDREN (slp_node)[child_index];
- /* We have to check both pattern and original def, if available. */
- gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
- gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
- if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
- || (related
- && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
- {
- /* The number of vector defs is determined by the number of
- vector statements in the node from which we get those
- statements. */
- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
- vectorized_defs = true;
- child_index++;
- }
- }
- if (!vectorized_defs)
- {
- if (i == 0)
- {
- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- /* Number of vector stmts was calculated according to LHS in
- vect_schedule_slp_instance (), fix it by replacing LHS with
- RHS, if necessary. See vect_get_smallest_scalar_type () for
- details. */
- vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
- &rhs_size_unit);
- if (rhs_size_unit != lhs_size_unit)
- {
- number_of_vects *= rhs_size_unit;
- number_of_vects /= lhs_size_unit;
- }
- }
- }
- /* Allocate memory for vectorized defs. */
- vec_defs = vNULL;
- vec_defs.create (number_of_vects);
- /* For reduction defs we call vect_get_constant_vectors (), since we are
- looking for initial loop invariant values. */
- if (vectorized_defs && reduc_index == -1)
- /* The defs are already vectorized. */
- vect_get_slp_vect_defs (child, &vec_defs);
- else
- /* Build vectors from scalar defs. */
- vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
- number_of_vects, reduc_index);
- vec_oprnds->quick_push (vec_defs);
- /* For reductions, we only need initial values. */
- if (reduc_index != -1)
- return;
- }
- }
- /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
- building a vector of type MASK_TYPE from it) and two input vectors placed in
- DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
- shifting by STRIDE elements of DR_CHAIN for every copy.
- (STRIDE is the number of vectorized stmts for NODE divided by the number of
- copies).
- VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
- the created stmts must be inserted. */
- static inline void
- vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
- tree mask, int first_vec_indx, int second_vec_indx,
- gimple_stmt_iterator *gsi, slp_tree node,
- tree vectype, vec<tree> dr_chain,
- int ncopies, int vect_stmts_counter)
- {
- tree perm_dest;
- gimple perm_stmt = NULL;
- stmt_vec_info next_stmt_info;
- int i, stride;
- tree first_vec, second_vec, data_ref;
- stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
- /* Initialize the vect stmts of NODE to properly insert the generated
- stmts later. */
- for (i = SLP_TREE_VEC_STMTS (node).length ();
- i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
- SLP_TREE_VEC_STMTS (node).quick_push (NULL);
- perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
- for (i = 0; i < ncopies; i++)
- {
- first_vec = dr_chain[first_vec_indx];
- second_vec = dr_chain[second_vec_indx];
- /* Generate the permute statement. */
- perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
- first_vec, second_vec, mask);
- data_ref = make_ssa_name (perm_dest, perm_stmt);
- gimple_set_lhs (perm_stmt, data_ref);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- /* Store the vector statement in NODE. */
- SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
- first_vec_indx += stride;
- second_vec_indx += stride;
- }
- /* Mark the scalar stmt as vectorized. */
- next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
- STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
- }
- /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
- return in CURRENT_MASK_ELEMENT its equivalent in target specific
- representation. Check that the mask is valid and return FALSE if not.
- Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
- the next vector, i.e., the current first vector is not needed. */
- static bool
- vect_get_mask_element (gimple stmt, int first_mask_element, int m,
- int mask_nunits, bool only_one_vec, int index,
- unsigned char *mask, int *current_mask_element,
- bool *need_next_vector, int *number_of_mask_fixes,
- bool *mask_fixed, bool *needs_first_vector)
- {
- int i;
- /* Convert to target specific representation. */
- *current_mask_element = first_mask_element + m;
- /* Adjust the value in case it's a mask for second and third vectors. */
- *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
- if (*current_mask_element < mask_nunits)
- *needs_first_vector = true;
- /* We have only one input vector to permute but the mask accesses values in
- the next vector as well. */
- if (only_one_vec && *current_mask_element >= mask_nunits)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "permutation requires at least two vectors ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
- /* The mask requires the next vector. */
- while (*current_mask_element >= mask_nunits * 2)
- {
- if (*needs_first_vector || *mask_fixed)
- {
- /* We either need the first vector too or have already moved to the
- next vector. In both cases, this permutation needs three
- vectors. */
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "permutation requires at "
- "least three vectors ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
- /* We move to the next vector, dropping the first one and working with
- the second and the third - we need to adjust the values of the mask
- accordingly. */
- *current_mask_element -= mask_nunits * *number_of_mask_fixes;
- for (i = 0; i < index; i++)
- mask[i] -= mask_nunits * *number_of_mask_fixes;
- (*number_of_mask_fixes)++;
- *mask_fixed = true;
- }
- *need_next_vector = *mask_fixed;
- /* This was the last element of this mask. Start a new one. */
- if (index == mask_nunits - 1)
- {
- *number_of_mask_fixes = 1;
- *mask_fixed = false;
- *needs_first_vector = false;
- }
- return true;
- }
- /* Generate vector permute statements from a list of loads in DR_CHAIN.
- If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
- permute statements for the SLP node NODE of the SLP instance
- SLP_NODE_INSTANCE. */
- bool
- vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
- gimple_stmt_iterator *gsi, int vf,
- slp_instance slp_node_instance, bool analyze_only)
- {
- gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- tree mask_element_type = NULL_TREE, mask_type;
- int i, j, k, nunits, vec_index = 0, scalar_index;
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- gimple next_scalar_stmt;
- int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
- int first_mask_element;
- int index, unroll_factor, current_mask_element, ncopies;
- unsigned char *mask;
- bool only_one_vec = false, need_next_vector = false;
- int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
- int number_of_mask_fixes = 1;
- bool mask_fixed = false;
- bool needs_first_vector = false;
- machine_mode mode;
- mode = TYPE_MODE (vectype);
- if (!can_vec_perm_p (mode, false, NULL))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "no vect permute for ");
- dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
- /* The generic VEC_PERM_EXPR code always uses an integral type of the
- same size as the vector element being permuted. */
- mask_element_type = lang_hooks.types.type_for_mode
- (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
- mask_type = get_vectype_for_scalar_type (mask_element_type);
- nunits = TYPE_VECTOR_SUBPARTS (vectype);
- mask = XALLOCAVEC (unsigned char, nunits);
- unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
- /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
- unrolling factor. */
- orig_vec_stmts_num = group_size *
- SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
- if (orig_vec_stmts_num == 1)
- only_one_vec = true;
- /* Number of copies is determined by the final vectorization factor
- relatively to SLP_NODE_INSTANCE unrolling factor. */
- ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
- if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
- return false;
- /* Generate permutation masks for every NODE. Number of masks for each NODE
- is equal to GROUP_SIZE.
- E.g., we have a group of three nodes with three loads from the same
- location in each node, and the vector size is 4. I.e., we have a
- a0b0c0a1b1c1... sequence and we need to create the following vectors:
- for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
- for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
- ...
- The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
- The last mask is illegal since we assume two operands for permute
- operation, and the mask element values can't be outside that range.
- Hence, the last mask must be converted into {2,5,5,5}.
- For the first two permutations we need the first and the second input
- vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
- we need the second and the third vectors: {b1,c1,a2,b2} and
- {c2,a3,b3,c3}. */
- {
- scalar_index = 0;
- index = 0;
- vect_stmts_counter = 0;
- vec_index = 0;
- first_vec_index = vec_index++;
- if (only_one_vec)
- second_vec_index = first_vec_index;
- else
- second_vec_index = vec_index++;
- for (j = 0; j < unroll_factor; j++)
- {
- for (k = 0; k < group_size; k++)
- {
- i = SLP_TREE_LOAD_PERMUTATION (node)[k];
- first_mask_element = i + j * group_size;
- if (!vect_get_mask_element (stmt, first_mask_element, 0,
- nunits, only_one_vec, index,
- mask, ¤t_mask_element,
- &need_next_vector,
- &number_of_mask_fixes, &mask_fixed,
- &needs_first_vector))
- return false;
- gcc_assert (current_mask_element < 2 * nunits);
- mask[index++] = current_mask_element;
- if (index == nunits)
- {
- index = 0;
- if (!can_vec_perm_p (mode, false, mask))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION,
- vect_location,
- "unsupported vect permute { ");
- for (i = 0; i < nunits; ++i)
- dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
- mask[i]);
- dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
- }
- return false;
- }
- if (!analyze_only)
- {
- int l;
- tree mask_vec, *mask_elts;
- mask_elts = XALLOCAVEC (tree, nunits);
- for (l = 0; l < nunits; ++l)
- mask_elts[l] = build_int_cst (mask_element_type,
- mask[l]);
- mask_vec = build_vector (mask_type, mask_elts);
- if (need_next_vector)
- {
- first_vec_index = second_vec_index;
- second_vec_index = vec_index;
- }
- next_scalar_stmt
- = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
- vect_create_mask_and_perm (stmt, next_scalar_stmt,
- mask_vec, first_vec_index, second_vec_index,
- gsi, node, vectype, dr_chain,
- ncopies, vect_stmts_counter++);
- }
- }
- }
- }
- }
- return true;
- }
- /* Vectorize SLP instance tree in postorder. */
- static bool
- vect_schedule_slp_instance (slp_tree node, slp_instance instance,
- unsigned int vectorization_factor)
- {
- gimple stmt;
- bool grouped_store, is_store;
- gimple_stmt_iterator si;
- stmt_vec_info stmt_info;
- unsigned int vec_stmts_size, nunits, group_size;
- tree vectype;
- int i;
- slp_tree child;
- if (!node)
- return false;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_schedule_slp_instance (child, instance, vectorization_factor);
- stmt = SLP_TREE_SCALAR_STMTS (node)[0];
- stmt_info = vinfo_for_stmt (stmt);
- /* VECTYPE is the type of the destination. */
- vectype = STMT_VINFO_VECTYPE (stmt_info);
- nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
- group_size = SLP_INSTANCE_GROUP_SIZE (instance);
- /* For each SLP instance calculate number of vector stmts to be created
- for the scalar stmts in each node of the SLP tree. Number of vector
- elements in one vector iteration is the number of scalar elements in
- one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
- size. */
- vec_stmts_size = (vectorization_factor * group_size) / nunits;
- if (!SLP_TREE_VEC_STMTS (node).exists ())
- {
- SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
- }
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE,vect_location,
- "------>vectorizing SLP node starting from: ");
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
- dump_printf (MSG_NOTE, "\n");
- }
- /* Loads should be inserted before the first load. */
- if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
- && STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
- && SLP_TREE_LOAD_PERMUTATION (node).exists ())
- si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
- else if (is_pattern_stmt_p (stmt_info))
- si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
- else
- si = gsi_for_stmt (stmt);
- /* Stores should be inserted just before the last store. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
- {
- gimple last_store = vect_find_last_store_in_slp_instance (instance);
- if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
- last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
- si = gsi_for_stmt (last_store);
- }
- /* Mark the first element of the reduction chain as reduction to properly
- transform the node. In the analysis phase only the last element of the
- chain is marked as reduction. */
- if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
- {
- STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
- STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
- }
- is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
- return is_store;
- }
- /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
- For loop vectorization this is done in vectorizable_call, but for SLP
- it needs to be deferred until end of vect_schedule_slp, because multiple
- SLP instances may refer to the same scalar stmt. */
- static void
- vect_remove_slp_scalar_calls (slp_tree node)
- {
- gimple stmt, new_stmt;
- gimple_stmt_iterator gsi;
- int i;
- slp_tree child;
- tree lhs;
- stmt_vec_info stmt_info;
- if (!node)
- return;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_remove_slp_scalar_calls (child);
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
- {
- if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
- continue;
- stmt_info = vinfo_for_stmt (stmt);
- if (stmt_info == NULL
- || is_pattern_stmt_p (stmt_info)
- || !PURE_SLP_STMT (stmt_info))
- continue;
- lhs = gimple_call_lhs (stmt);
- new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
- set_vinfo_for_stmt (new_stmt, stmt_info);
- set_vinfo_for_stmt (stmt, NULL);
- STMT_VINFO_STMT (stmt_info) = new_stmt;
- gsi = gsi_for_stmt (stmt);
- gsi_replace (&gsi, new_stmt, false);
- SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
- }
- }
- /* Generate vector code for all SLP instances in the loop/basic block. */
- bool
- vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
- {
- vec<slp_instance> slp_instances;
- slp_instance instance;
- unsigned int i, vf;
- bool is_store = false;
- if (loop_vinfo)
- {
- slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
- vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- }
- else
- {
- slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
- vf = 1;
- }
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- /* Schedule the tree of INSTANCE. */
- is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
- instance, vf);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vectorizing stmts using SLP.\n");
- }
- FOR_EACH_VEC_ELT (slp_instances, i, instance)
- {
- slp_tree root = SLP_INSTANCE_TREE (instance);
- gimple store;
- unsigned int j;
- gimple_stmt_iterator gsi;
- /* Remove scalar call stmts. Do not do this for basic-block
- vectorization as not all uses may be vectorized.
- ??? Why should this be necessary? DCE should be able to
- remove the stmts itself.
- ??? For BB vectorization we can as well remove scalar
- stmts starting from the SLP tree root if they have no
- uses. */
- if (loop_vinfo)
- vect_remove_slp_scalar_calls (root);
- for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
- && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
- {
- if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
- break;
- if (is_pattern_stmt_p (vinfo_for_stmt (store)))
- store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
- /* Free the attached stmt_vec_info and remove the stmt. */
- gsi = gsi_for_stmt (store);
- unlink_stmt_vdef (store);
- gsi_remove (&gsi, true);
- release_defs (store);
- free_stmt_vec_info (store);
- }
- }
- return is_store;
- }
- /* Vectorize the basic block. */
- void
- vect_slp_transform_bb (basic_block bb)
- {
- bb_vec_info bb_vinfo = vec_info_for_bb (bb);
- gimple_stmt_iterator si;
- gcc_assert (bb_vinfo);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
- {
- gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "------>SLPing statement: ");
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
- dump_printf (MSG_NOTE, "\n");
- }
- stmt_info = vinfo_for_stmt (stmt);
- gcc_assert (stmt_info);
- /* Schedule all the SLP instances when the first SLP stmt is reached. */
- if (STMT_SLP_TYPE (stmt_info))
- {
- vect_schedule_slp (NULL, bb_vinfo);
- break;
- }
- }
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "BASIC BLOCK VECTORIZED\n");
- destroy_bb_vec_info (bb_vinfo);
- }
|