sh-mem.cc 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. /* Helper routines for memory move and comparison insns.
  2. Copyright (C) 2013-2015 Free Software Foundation, Inc.
  3. This file is part of GCC.
  4. GCC is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 3, or (at your option)
  7. any later version.
  8. GCC is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GCC; see the file COPYING3. If not see
  14. <http://www.gnu.org/licenses/>. */
  15. #include "config.h"
  16. #include "system.h"
  17. #include "coretypes.h"
  18. #include "tm.h"
  19. #include "machmode.h"
  20. #include "rtl.h"
  21. #include "hash-set.h"
  22. #include "vec.h"
  23. #include "double-int.h"
  24. #include "input.h"
  25. #include "alias.h"
  26. #include "symtab.h"
  27. #include "wide-int.h"
  28. #include "inchash.h"
  29. #include "tree.h"
  30. #include "hashtab.h"
  31. #include "hard-reg-set.h"
  32. #include "function.h"
  33. #include "flags.h"
  34. #include "statistics.h"
  35. #include "real.h"
  36. #include "fixed-value.h"
  37. #include "insn-config.h"
  38. #include "expmed.h"
  39. #include "dojump.h"
  40. #include "explow.h"
  41. #include "calls.h"
  42. #include "emit-rtl.h"
  43. #include "varasm.h"
  44. #include "stmt.h"
  45. #include "expr.h"
  46. #include "tm_p.h"
  47. #include "predict.h"
  48. #include "dominance.h"
  49. #include "cfg.h"
  50. #include "cfgrtl.h"
  51. #include "cfganal.h"
  52. #include "lcm.h"
  53. #include "cfgbuild.h"
  54. #include "cfgcleanup.h"
  55. #include "basic-block.h"
  56. /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
  57. static void
  58. force_into (rtx value, rtx target)
  59. {
  60. value = force_operand (value, target);
  61. if (! rtx_equal_p (value, target))
  62. emit_insn (gen_move_insn (target, value));
  63. }
  64. /* Emit code to perform a block move. Choose the best method.
  65. OPERANDS[0] is the destination.
  66. OPERANDS[1] is the source.
  67. OPERANDS[2] is the size.
  68. OPERANDS[3] is the alignment safe to use. */
  69. bool
  70. expand_block_move (rtx *operands)
  71. {
  72. int align = INTVAL (operands[3]);
  73. int constp = (CONST_INT_P (operands[2]));
  74. int bytes = (constp ? INTVAL (operands[2]) : 0);
  75. if (! constp)
  76. return false;
  77. /* If we could use mov.l to move words and dest is word-aligned, we
  78. can use movua.l for loads and still generate a relatively short
  79. and efficient sequence. */
  80. if (TARGET_SH4A && align < 4
  81. && MEM_ALIGN (operands[0]) >= 32
  82. && can_move_by_pieces (bytes, 32))
  83. {
  84. rtx dest = copy_rtx (operands[0]);
  85. rtx src = copy_rtx (operands[1]);
  86. /* We could use different pseudos for each copied word, but
  87. since movua can only load into r0, it's kind of
  88. pointless. */
  89. rtx temp = gen_reg_rtx (SImode);
  90. rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
  91. int copied = 0;
  92. while (copied + 4 <= bytes)
  93. {
  94. rtx to = adjust_address (dest, SImode, copied);
  95. rtx from = adjust_automodify_address (src, BLKmode,
  96. src_addr, copied);
  97. set_mem_size (from, 4);
  98. emit_insn (gen_movua (temp, from));
  99. emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
  100. emit_move_insn (to, temp);
  101. copied += 4;
  102. }
  103. if (copied < bytes)
  104. move_by_pieces (adjust_address (dest, BLKmode, copied),
  105. adjust_automodify_address (src, BLKmode,
  106. src_addr, copied),
  107. bytes - copied, align, 0);
  108. return true;
  109. }
  110. /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
  111. alignment, or if it isn't a multiple of 4 bytes, then fail. */
  112. if (align < 4 || (bytes % 4 != 0))
  113. return false;
  114. if (TARGET_HARD_SH4)
  115. {
  116. if (bytes < 12)
  117. return false;
  118. else if (bytes == 12)
  119. {
  120. rtx func_addr_rtx = gen_reg_rtx (Pmode);
  121. rtx r4 = gen_rtx_REG (SImode, 4);
  122. rtx r5 = gen_rtx_REG (SImode, 5);
  123. function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
  124. force_into (XEXP (operands[0], 0), r4);
  125. force_into (XEXP (operands[1], 0), r5);
  126. emit_insn (gen_block_move_real_i4 (func_addr_rtx));
  127. return true;
  128. }
  129. else if (! optimize_size)
  130. {
  131. const char *entry_name;
  132. rtx func_addr_rtx = gen_reg_rtx (Pmode);
  133. int dwords;
  134. rtx r4 = gen_rtx_REG (SImode, 4);
  135. rtx r5 = gen_rtx_REG (SImode, 5);
  136. rtx r6 = gen_rtx_REG (SImode, 6);
  137. entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
  138. function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
  139. force_into (XEXP (operands[0], 0), r4);
  140. force_into (XEXP (operands[1], 0), r5);
  141. dwords = bytes >> 3;
  142. emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
  143. emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
  144. return true;
  145. }
  146. else
  147. return false;
  148. }
  149. if (bytes < 64)
  150. {
  151. char entry[30];
  152. rtx func_addr_rtx = gen_reg_rtx (Pmode);
  153. rtx r4 = gen_rtx_REG (SImode, 4);
  154. rtx r5 = gen_rtx_REG (SImode, 5);
  155. sprintf (entry, "__movmemSI%d", bytes);
  156. function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
  157. force_into (XEXP (operands[0], 0), r4);
  158. force_into (XEXP (operands[1], 0), r5);
  159. emit_insn (gen_block_move_real (func_addr_rtx));
  160. return true;
  161. }
  162. /* This is the same number of bytes as a memcpy call, but to a different
  163. less common function name, so this will occasionally use more space. */
  164. if (! optimize_size)
  165. {
  166. rtx func_addr_rtx = gen_reg_rtx (Pmode);
  167. int final_switch, while_loop;
  168. rtx r4 = gen_rtx_REG (SImode, 4);
  169. rtx r5 = gen_rtx_REG (SImode, 5);
  170. rtx r6 = gen_rtx_REG (SImode, 6);
  171. function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
  172. force_into (XEXP (operands[0], 0), r4);
  173. force_into (XEXP (operands[1], 0), r5);
  174. /* r6 controls the size of the move. 16 is decremented from it
  175. for each 64 bytes moved. Then the negative bit left over is used
  176. as an index into a list of move instructions. e.g., a 72 byte move
  177. would be set up with size(r6) = 14, for one iteration through the
  178. big while loop, and a switch of -2 for the last part. */
  179. final_switch = 16 - ((bytes / 4) % 16);
  180. while_loop = ((bytes / 4) / 16 - 1) * 16;
  181. emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
  182. emit_insn (gen_block_lump_real (func_addr_rtx));
  183. return true;
  184. }
  185. return false;
  186. }
  187. static const int prob_unlikely = REG_BR_PROB_BASE / 10;
  188. static const int prob_likely = REG_BR_PROB_BASE / 4;
  189. /* Emit code to perform a strcmp.
  190. OPERANDS[0] is the destination.
  191. OPERANDS[1] is the first string.
  192. OPERANDS[2] is the second string.
  193. OPERANDS[3] is the known alignment. */
  194. bool
  195. sh_expand_cmpstr (rtx *operands)
  196. {
  197. rtx addr1 = operands[1];
  198. rtx addr2 = operands[2];
  199. rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
  200. rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
  201. rtx tmp0 = gen_reg_rtx (SImode);
  202. rtx tmp1 = gen_reg_rtx (SImode);
  203. rtx tmp2 = gen_reg_rtx (SImode);
  204. rtx tmp3 = gen_reg_rtx (SImode);
  205. rtx jump;
  206. rtx_code_label *L_return = gen_label_rtx ();
  207. rtx_code_label *L_loop_byte = gen_label_rtx ();
  208. rtx_code_label *L_end_loop_byte = gen_label_rtx ();
  209. rtx_code_label *L_loop_long = gen_label_rtx ();
  210. rtx_code_label *L_end_loop_long = gen_label_rtx ();
  211. int align = INTVAL (operands[3]);
  212. emit_move_insn (tmp0, const0_rtx);
  213. if (align < 4)
  214. {
  215. emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
  216. emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
  217. jump = emit_jump_insn (gen_branch_false (L_loop_byte));
  218. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  219. }
  220. addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
  221. addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
  222. /* tmp2 is aligned, OK to load. */
  223. emit_move_insn (tmp3, addr2);
  224. emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
  225. /* start long loop. */
  226. emit_label (L_loop_long);
  227. emit_move_insn (tmp2, tmp3);
  228. /* tmp1 is aligned, OK to load. */
  229. emit_move_insn (tmp1, addr1);
  230. emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
  231. /* Is there a 0 byte ? */
  232. emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
  233. emit_insn (gen_cmpstr_t (tmp0, tmp3));
  234. jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
  235. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  236. emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
  237. /* tmp2 is aligned, OK to load. */
  238. emit_move_insn (tmp3, addr2);
  239. emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
  240. jump = emit_jump_insn (gen_branch_true (L_loop_long));
  241. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  242. /* end loop. */
  243. /* Fallthu, substract words. */
  244. if (TARGET_LITTLE_ENDIAN)
  245. {
  246. rtx low_1 = gen_lowpart (HImode, tmp1);
  247. rtx low_2 = gen_lowpart (HImode, tmp2);
  248. emit_insn (gen_rotlhi3_8 (low_1, low_1));
  249. emit_insn (gen_rotlhi3_8 (low_2, low_2));
  250. emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
  251. emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
  252. emit_insn (gen_rotlhi3_8 (low_1, low_1));
  253. emit_insn (gen_rotlhi3_8 (low_2, low_2));
  254. }
  255. jump = emit_jump_insn (gen_jump_compact (L_return));
  256. emit_barrier_after (jump);
  257. emit_label (L_end_loop_long);
  258. emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
  259. emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
  260. /* start byte loop. */
  261. addr1 = adjust_address (addr1, QImode, 0);
  262. addr2 = adjust_address (addr2, QImode, 0);
  263. emit_label (L_loop_byte);
  264. emit_insn (gen_extendqisi2 (tmp2, addr2));
  265. emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
  266. emit_insn (gen_extendqisi2 (tmp1, addr1));
  267. emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
  268. emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
  269. jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
  270. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  271. emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
  272. if (flag_delayed_branch)
  273. emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
  274. jump = emit_jump_insn (gen_branch_true (L_loop_byte));
  275. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  276. /* end loop. */
  277. emit_label (L_end_loop_byte);
  278. if (! flag_delayed_branch)
  279. emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
  280. emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
  281. emit_label (L_return);
  282. emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
  283. return true;
  284. }
  285. /* Emit code to perform a strncmp.
  286. OPERANDS[0] is the destination.
  287. OPERANDS[1] is the first string.
  288. OPERANDS[2] is the second string.
  289. OPERANDS[3] is the length.
  290. OPERANDS[4] is the known alignment. */
  291. bool
  292. sh_expand_cmpnstr (rtx *operands)
  293. {
  294. rtx addr1 = operands[1];
  295. rtx addr2 = operands[2];
  296. rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
  297. rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
  298. rtx tmp1 = gen_reg_rtx (SImode);
  299. rtx tmp2 = gen_reg_rtx (SImode);
  300. rtx jump;
  301. rtx_code_label *L_return = gen_label_rtx ();
  302. rtx_code_label *L_loop_byte = gen_label_rtx ();
  303. rtx_code_label *L_end_loop_byte = gen_label_rtx ();
  304. rtx len = force_reg (SImode, operands[3]);
  305. int constp = CONST_INT_P (operands[3]);
  306. /* Loop on a register count. */
  307. if (constp)
  308. {
  309. rtx tmp0 = gen_reg_rtx (SImode);
  310. rtx tmp3 = gen_reg_rtx (SImode);
  311. rtx lenw = gen_reg_rtx (SImode);
  312. rtx_code_label *L_loop_long = gen_label_rtx ();
  313. rtx_code_label *L_end_loop_long = gen_label_rtx ();
  314. int align = INTVAL (operands[4]);
  315. int bytes = INTVAL (operands[3]);
  316. int witers = bytes / 4;
  317. if (witers > 1)
  318. {
  319. addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
  320. addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
  321. emit_move_insn (tmp0, const0_rtx);
  322. if (align < 4)
  323. {
  324. emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
  325. emit_insn (gen_tstsi_t (tmp1, GEN_INT (3)));
  326. jump = emit_jump_insn (gen_branch_false (L_loop_byte));
  327. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  328. }
  329. /* word count. Do we have iterations ? */
  330. emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
  331. /* start long loop. */
  332. emit_label (L_loop_long);
  333. /* tmp2 is aligned, OK to load. */
  334. emit_move_insn (tmp2, addr2);
  335. emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
  336. GET_MODE_SIZE (SImode)));
  337. /* tmp1 is aligned, OK to load. */
  338. emit_move_insn (tmp1, addr1);
  339. emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
  340. GET_MODE_SIZE (SImode)));
  341. /* Is there a 0 byte ? */
  342. emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
  343. emit_insn (gen_cmpstr_t (tmp0, tmp3));
  344. jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
  345. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  346. emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
  347. jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
  348. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  349. if (TARGET_SH2)
  350. emit_insn (gen_dect (lenw, lenw));
  351. else
  352. {
  353. emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
  354. emit_insn (gen_tstsi_t (lenw, lenw));
  355. }
  356. jump = emit_jump_insn (gen_branch_false (L_loop_long));
  357. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  358. int sbytes = bytes % 4;
  359. /* end loop. Reached max iterations. */
  360. if (sbytes == 0)
  361. {
  362. emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
  363. jump = emit_jump_insn (gen_jump_compact (L_return));
  364. emit_barrier_after (jump);
  365. }
  366. else
  367. {
  368. /* Remaining bytes to check. */
  369. addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
  370. addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
  371. while (sbytes--)
  372. {
  373. emit_insn (gen_extendqisi2 (tmp1, addr1));
  374. emit_insn (gen_extendqisi2 (tmp2, addr2));
  375. emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
  376. jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
  377. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  378. emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
  379. if (flag_delayed_branch)
  380. emit_insn (gen_zero_extendqisi2 (tmp2,
  381. gen_lowpart (QImode,
  382. tmp2)));
  383. jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
  384. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  385. addr1 = adjust_address (addr1, QImode,
  386. GET_MODE_SIZE (QImode));
  387. addr2 = adjust_address (addr2, QImode,
  388. GET_MODE_SIZE (QImode));
  389. }
  390. jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
  391. emit_barrier_after (jump);
  392. }
  393. emit_label (L_end_loop_long);
  394. /* Found last word. Restart it byte per byte. */
  395. emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
  396. -GET_MODE_SIZE (SImode)));
  397. emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
  398. -GET_MODE_SIZE (SImode)));
  399. /* fall thru. */
  400. }
  401. addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
  402. addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
  403. while (bytes--)
  404. {
  405. emit_insn (gen_extendqisi2 (tmp1, addr1));
  406. emit_insn (gen_extendqisi2 (tmp2, addr2));
  407. emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
  408. jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
  409. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  410. emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
  411. if (flag_delayed_branch)
  412. emit_insn (gen_zero_extendqisi2 (tmp2,
  413. gen_lowpart (QImode, tmp2)));
  414. jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
  415. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  416. addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
  417. addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
  418. }
  419. jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
  420. emit_barrier_after (jump);
  421. }
  422. else
  423. {
  424. emit_insn (gen_cmpeqsi_t (len, const0_rtx));
  425. emit_move_insn (operands[0], const0_rtx);
  426. jump = emit_jump_insn (gen_branch_true (L_return));
  427. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  428. }
  429. addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
  430. addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
  431. emit_label (L_loop_byte);
  432. emit_insn (gen_extendqisi2 (tmp2, addr2));
  433. emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
  434. emit_insn (gen_extendqisi2 (tmp1, addr1));
  435. emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
  436. emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
  437. jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
  438. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  439. emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
  440. if (flag_delayed_branch)
  441. emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
  442. jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
  443. add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
  444. if (TARGET_SH2)
  445. emit_insn (gen_dect (len, len));
  446. else
  447. {
  448. emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
  449. emit_insn (gen_tstsi_t (len, len));
  450. }
  451. jump = emit_jump_insn (gen_branch_false (L_loop_byte));
  452. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  453. /* end byte loop. */
  454. emit_label (L_end_loop_byte);
  455. if (! flag_delayed_branch)
  456. emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
  457. emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
  458. emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
  459. emit_label (L_return);
  460. return true;
  461. }
  462. /* Emit code to perform a strlen.
  463. OPERANDS[0] is the destination.
  464. OPERANDS[1] is the string.
  465. OPERANDS[2] is the char to search.
  466. OPERANDS[3] is the alignment. */
  467. bool
  468. sh_expand_strlen (rtx *operands)
  469. {
  470. rtx addr1 = operands[1];
  471. rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
  472. rtx start_addr = gen_reg_rtx (Pmode);
  473. rtx tmp0 = gen_reg_rtx (SImode);
  474. rtx tmp1 = gen_reg_rtx (SImode);
  475. rtx_code_label *L_return = gen_label_rtx ();
  476. rtx_code_label *L_loop_byte = gen_label_rtx ();
  477. rtx jump;
  478. rtx_code_label *L_loop_long = gen_label_rtx ();
  479. rtx_code_label *L_end_loop_long = gen_label_rtx ();
  480. int align = INTVAL (operands[3]);
  481. emit_move_insn (operands[0], GEN_INT (-1));
  482. /* remember start of string. */
  483. emit_move_insn (start_addr, current_addr);
  484. if (align < 4)
  485. {
  486. emit_insn (gen_tstsi_t (current_addr, GEN_INT (3)));
  487. jump = emit_jump_insn (gen_branch_false (L_loop_byte));
  488. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  489. }
  490. emit_move_insn (tmp0, operands[2]);
  491. addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
  492. /* start long loop. */
  493. emit_label (L_loop_long);
  494. /* tmp1 is aligned, OK to load. */
  495. emit_move_insn (tmp1, addr1);
  496. emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
  497. /* Is there a 0 byte ? */
  498. emit_insn (gen_cmpstr_t (tmp0, tmp1));
  499. jump = emit_jump_insn (gen_branch_false (L_loop_long));
  500. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  501. /* end loop. */
  502. emit_label (L_end_loop_long);
  503. emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
  504. addr1 = adjust_address (addr1, QImode, 0);
  505. /* unroll remaining bytes. */
  506. for (int i = 0; i < 4; ++i)
  507. {
  508. emit_insn (gen_extendqisi2 (tmp1, addr1));
  509. emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
  510. emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
  511. jump = emit_jump_insn (gen_branch_true (L_return));
  512. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  513. }
  514. emit_barrier_after (jump);
  515. /* start byte loop. */
  516. emit_label (L_loop_byte);
  517. emit_insn (gen_extendqisi2 (tmp1, addr1));
  518. emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
  519. emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
  520. jump = emit_jump_insn (gen_branch_false (L_loop_byte));
  521. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  522. /* end loop. */
  523. emit_label (L_return);
  524. emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
  525. emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
  526. return true;
  527. }
  528. /* Emit code to perform a memset.
  529. OPERANDS[0] is the destination.
  530. OPERANDS[1] is the size;
  531. OPERANDS[2] is the char to search.
  532. OPERANDS[3] is the alignment. */
  533. void
  534. sh_expand_setmem (rtx *operands)
  535. {
  536. rtx_code_label *L_loop_byte = gen_label_rtx ();
  537. rtx_code_label *L_loop_word = gen_label_rtx ();
  538. rtx_code_label *L_return = gen_label_rtx ();
  539. rtx jump;
  540. rtx dest = copy_rtx (operands[0]);
  541. rtx dest_addr = copy_addr_to_reg (XEXP (dest, 0));
  542. rtx val = force_reg (SImode, operands[2]);
  543. int align = INTVAL (operands[3]);
  544. rtx len = force_reg (SImode, operands[1]);
  545. if (! CONST_INT_P (operands[1]))
  546. return;
  547. int count = INTVAL (operands[1]);
  548. if (CONST_INT_P (operands[2])
  549. && (INTVAL (operands[2]) == 0 || INTVAL (operands[2]) == -1) && count > 8)
  550. {
  551. rtx lenw = gen_reg_rtx (SImode);
  552. if (align < 4)
  553. {
  554. emit_insn (gen_tstsi_t (dest_addr, GEN_INT (3)));
  555. jump = emit_jump_insn (gen_branch_false (L_loop_byte));
  556. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  557. }
  558. /* word count. Do we have iterations ? */
  559. emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
  560. dest = adjust_automodify_address (dest, SImode, dest_addr, 0);
  561. /* start loop. */
  562. emit_label (L_loop_word);
  563. if (TARGET_SH2)
  564. emit_insn (gen_dect (lenw, lenw));
  565. else
  566. {
  567. emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
  568. emit_insn (gen_tstsi_t (lenw, lenw));
  569. }
  570. emit_move_insn (dest, val);
  571. emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
  572. GET_MODE_SIZE (SImode)));
  573. jump = emit_jump_insn (gen_branch_false (L_loop_word));
  574. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  575. count = count % 4;
  576. dest = adjust_address (dest, QImode, 0);
  577. val = gen_lowpart (QImode, val);
  578. while (count--)
  579. {
  580. emit_move_insn (dest, val);
  581. emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
  582. GET_MODE_SIZE (QImode)));
  583. }
  584. jump = emit_jump_insn (gen_jump_compact (L_return));
  585. emit_barrier_after (jump);
  586. }
  587. dest = adjust_automodify_address (dest, QImode, dest_addr, 0);
  588. /* start loop. */
  589. emit_label (L_loop_byte);
  590. if (TARGET_SH2)
  591. emit_insn (gen_dect (len, len));
  592. else
  593. {
  594. emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
  595. emit_insn (gen_tstsi_t (len, len));
  596. }
  597. val = gen_lowpart (QImode, val);
  598. emit_move_insn (dest, val);
  599. emit_move_insn (dest_addr, plus_constant (Pmode, dest_addr,
  600. GET_MODE_SIZE (QImode)));
  601. jump = emit_jump_insn (gen_branch_false (L_loop_byte));
  602. add_int_reg_note (jump, REG_BR_PROB, prob_likely);
  603. emit_label (L_return);
  604. }