sse_vector.c 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. #include <stdio.h>
  2. #include "../c3dlas.h"
  3. #ifndef __SSE__
  4. #define c3dlas_sse3_vAdd c3dlas_pure_vAdd
  5. #else
  6. void c3dlas_sse3_vAdd(Vector3* a, Vector3* b, Vector3* out) {
  7. /*
  8. out->x = a->x + b->x;
  9. out->y = a->y + b->y;
  10. out->z = a->z + b->z;
  11. */
  12. asm (
  13. // pointer dereference
  14. "mov (%0), %%rax;"
  15. "mov (%1), %%rbx;"
  16. "mov (%2), %%rcx;"
  17. // move 3 floats into the xmm regs
  18. "movq (%%rax), %%xmm1;"
  19. "movlhps %%xmm1,%%xmm1;"
  20. "movd 0x8(%%rax),%%xmm1;"
  21. "movq (%%rbx), %%xmm2;"
  22. "movlhps %%xmm2,%%xmm2;"
  23. "movd 0x8(%%rbx),%%xmm2;"
  24. // THE MATH
  25. "addps %%xmm2, %%xmm1;"
  26. // move 3 floats to output
  27. "movd %%xmm1, 0x8(%%rcx);"
  28. "movhlps %%xmm1,%%xmm1;"
  29. "movq %%xmm1, (%%rax);"
  30. : // no outputs
  31. : "r" (a), "r" (b), "r" (out)
  32. : "rax", "rbx", "rcx", "memory"
  33. );
  34. }
  35. #endif
  36. #ifndef __SSE__
  37. #define c3dlas_sse3_vAdd4 c3dlas_pure_vAdd
  38. #else
  39. void c3dlas_sse3_vAdd4(Vector3* a, Vector3* b, Vector3* out) {
  40. /*
  41. out->x = a->x + b->x;
  42. out->y = a->y + b->y;
  43. out->z = a->z + b->z;
  44. out->w = a->w + b->w;
  45. */
  46. asm (
  47. "mov (%0), %%rax;"
  48. "mov (%1), %%rbx;"
  49. "mov (%2), %%rcx;"
  50. "movups (%%rax), %%xmm1;"
  51. "movups (%%rbx), %%xmm2;"
  52. "addps %%xmm2, %%xmm1;"
  53. "movups %%xmm1, (%%rcx);"
  54. : // no outputs
  55. : "r" (a), "r" (b), "r" (out)
  56. : "rax", "rbx", "rcx", "memory"
  57. );
  58. }