lib_openmp.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. // This code makes some assumptions on the implementation of
  2. // base64_stream_encode_init(), base64_stream_encode() and base64_stream_decode().
  3. // Basically these assumptions boil down to that when breaking the src into
  4. // parts, out parts can be written without side effects.
  5. // This is met when:
  6. // 1) base64_stream_encode() and base64_stream_decode() don't use globals;
  7. // 2) the shared variables src and out are not read or written outside of the
  8. // bounds of their parts, i.e. when base64_stream_encode() reads a multiple
  9. // of 3 bytes, it must write no more then a multiple of 4 bytes, not even
  10. // temporarily;
  11. // 3) the state flag can be discarded after base64_stream_encode() and
  12. // base64_stream_decode() on the parts.
  13. static inline void
  14. base64_encode_openmp
  15. ( const char *src
  16. , size_t srclen
  17. , char *out
  18. , size_t *outlen
  19. , int flags
  20. )
  21. {
  22. size_t s;
  23. size_t t;
  24. size_t sum = 0, len, last_len;
  25. struct base64_state state, initial_state;
  26. int num_threads, i;
  27. // Request a number of threads but not necessarily get them:
  28. #pragma omp parallel
  29. {
  30. // Get the number of threads used from one thread only,
  31. // as num_threads is a shared var:
  32. #pragma omp single
  33. {
  34. num_threads = omp_get_num_threads();
  35. // Split the input string into num_threads parts, each
  36. // part a multiple of 3 bytes. The remaining bytes will
  37. // be done later:
  38. len = srclen / (num_threads * 3);
  39. len *= 3;
  40. last_len = srclen - num_threads * len;
  41. // Init the stream reader:
  42. base64_stream_encode_init(&state, flags);
  43. initial_state = state;
  44. }
  45. // Single has an implicit barrier for all threads to wait here
  46. // for the above to complete:
  47. #pragma omp for firstprivate(state) private(s) reduction(+:sum) schedule(static,1)
  48. for (i = 0; i < num_threads; i++)
  49. {
  50. // Feed each part of the string to the stream reader:
  51. base64_stream_encode(&state, src + i * len, len, out + i * len * 4 / 3, &s);
  52. sum += s;
  53. }
  54. }
  55. // As encoding should never fail and we encode an exact multiple
  56. // of 3 bytes, we can discard state:
  57. state = initial_state;
  58. // Encode the remaining bytes:
  59. base64_stream_encode(&state, src + num_threads * len, last_len, out + num_threads * len * 4 / 3, &s);
  60. // Finalize the stream by writing trailer if any:
  61. base64_stream_encode_final(&state, out + num_threads * len * 4 / 3 + s, &t);
  62. // Final output length is stream length plus tail:
  63. sum += s + t;
  64. *outlen = sum;
  65. }
  66. static inline int
  67. base64_decode_openmp
  68. ( const char *src
  69. , size_t srclen
  70. , char *out
  71. , size_t *outlen
  72. , int flags
  73. )
  74. {
  75. int num_threads, result = 0, i;
  76. size_t sum = 0, len, last_len, s;
  77. struct base64_state state, initial_state;
  78. // Request a number of threads but not necessarily get them:
  79. #pragma omp parallel
  80. {
  81. // Get the number of threads used from one thread only,
  82. // as num_threads is a shared var:
  83. #pragma omp single
  84. {
  85. num_threads = omp_get_num_threads();
  86. // Split the input string into num_threads parts, each
  87. // part a multiple of 4 bytes. The remaining bytes will
  88. // be done later:
  89. len = srclen / (num_threads * 4);
  90. len *= 4;
  91. last_len = srclen - num_threads * len;
  92. // Init the stream reader:
  93. base64_stream_decode_init(&state, flags);
  94. initial_state = state;
  95. }
  96. // Single has an implicit barrier to wait here for the above to
  97. // complete:
  98. #pragma omp for firstprivate(state) private(s) reduction(+:sum, result) schedule(static,1)
  99. for (i = 0; i < num_threads; i++)
  100. {
  101. int this_result;
  102. // Feed each part of the string to the stream reader:
  103. this_result = base64_stream_decode(&state, src + i * len, len, out + i * len * 3 / 4, &s);
  104. sum += s;
  105. result += this_result;
  106. }
  107. }
  108. // If `result' equals `-num_threads', then all threads returned -1,
  109. // indicating that the requested codec is not available:
  110. if (result == -num_threads) {
  111. return -1;
  112. }
  113. // If `result' does not equal `num_threads', then at least one of the
  114. // threads hit a decode error:
  115. if (result != num_threads) {
  116. return 0;
  117. }
  118. // So far so good, now decode whatever remains in the buffer. Reuse the
  119. // initial state, since we are at a 4-byte boundary:
  120. state = initial_state;
  121. result = base64_stream_decode(&state, src + num_threads * len, last_len, out + num_threads * len * 3 / 4, &s);
  122. sum += s;
  123. *outlen = sum;
  124. // If when decoding a whole block, we're still waiting for input then fail:
  125. if (result && (state.bytes == 0)) {
  126. return result;
  127. }
  128. return 0;
  129. }