enc_loop.c 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. static inline void
  2. enc_loop_neon64_inner (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
  3. {
  4. // Load 48 bytes and deinterleave:
  5. uint8x16x3_t src = vld3q_u8(*s);
  6. // Divide bits of three input bytes over four output bytes:
  7. uint8x16x4_t out = enc_reshuffle(src);
  8. // The bits have now been shifted to the right locations;
  9. // translate their values 0..63 to the Base64 alphabet.
  10. // Use a 64-byte table lookup:
  11. out.val[0] = vqtbl4q_u8(tbl_enc, out.val[0]);
  12. out.val[1] = vqtbl4q_u8(tbl_enc, out.val[1]);
  13. out.val[2] = vqtbl4q_u8(tbl_enc, out.val[2]);
  14. out.val[3] = vqtbl4q_u8(tbl_enc, out.val[3]);
  15. // Interleave and store output:
  16. vst4q_u8(*o, out);
  17. *s += 48;
  18. *o += 64;
  19. }
  20. static inline void
  21. enc_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
  22. {
  23. size_t rounds = *slen / 48;
  24. *slen -= rounds * 48; // 48 bytes consumed per round
  25. *olen += rounds * 64; // 64 bytes produced per round
  26. // Load the encoding table:
  27. const uint8x16x4_t tbl_enc = load_64byte_table(base64_table_enc_6bit);
  28. while (rounds > 0) {
  29. if (rounds >= 8) {
  30. enc_loop_neon64_inner(s, o, tbl_enc);
  31. enc_loop_neon64_inner(s, o, tbl_enc);
  32. enc_loop_neon64_inner(s, o, tbl_enc);
  33. enc_loop_neon64_inner(s, o, tbl_enc);
  34. enc_loop_neon64_inner(s, o, tbl_enc);
  35. enc_loop_neon64_inner(s, o, tbl_enc);
  36. enc_loop_neon64_inner(s, o, tbl_enc);
  37. enc_loop_neon64_inner(s, o, tbl_enc);
  38. rounds -= 8;
  39. continue;
  40. }
  41. if (rounds >= 4) {
  42. enc_loop_neon64_inner(s, o, tbl_enc);
  43. enc_loop_neon64_inner(s, o, tbl_enc);
  44. enc_loop_neon64_inner(s, o, tbl_enc);
  45. enc_loop_neon64_inner(s, o, tbl_enc);
  46. rounds -= 4;
  47. continue;
  48. }
  49. if (rounds >= 2) {
  50. enc_loop_neon64_inner(s, o, tbl_enc);
  51. enc_loop_neon64_inner(s, o, tbl_enc);
  52. rounds -= 2;
  53. continue;
  54. }
  55. enc_loop_neon64_inner(s, o, tbl_enc);
  56. break;
  57. }
  58. }