audio_effect_pitch_shift.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. /**************************************************************************/
  2. /* audio_effect_pitch_shift.cpp */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #include "audio_effect_pitch_shift.h"
  31. #include "core/math/math_funcs.h"
  32. #include "servers/audio_server.h"
  33. /* Thirdparty code, so disable clang-format with Godot style */
  34. /* clang-format off */
  35. /****************************************************************************
  36. *
  37. * NAME: smbPitchShift.cpp
  38. * VERSION: 1.2
  39. * HOME URL: https://blogs.zynaptiq.com/bernsee
  40. * KNOWN BUGS: none
  41. *
  42. * SYNOPSIS: Routine for doing pitch shifting while maintaining
  43. * duration using the Short Time Fourier Transform.
  44. *
  45. * DESCRIPTION: The routine takes a pitchShift factor value which is between 0.5
  46. * (one octave down) and 2. (one octave up). A value of exactly 1 does not change
  47. * the pitch. numSampsToProcess tells the routine how many samples in indata[0...
  48. * numSampsToProcess-1] should be pitch shifted and moved to outdata[0 ...
  49. * numSampsToProcess-1]. The two buffers can be identical (ie. it can process the
  50. * data in-place). fftFrameSize defines the FFT frame size used for the
  51. * processing. Typical values are 1024, 2048 and 4096. It may be any value <=
  52. * MAX_FRAME_LENGTH but it MUST be a power of 2. osamp is the STFT
  53. * oversampling factor which also determines the overlap between adjacent STFT
  54. * frames. It should at least be 4 for moderate scaling ratios. A value of 32 is
  55. * recommended for best quality. sampleRate takes the sample rate for the signal
  56. * in unit Hz, ie. 44100 for 44.1 kHz audio. The data passed to the routine in
  57. * indata[] should be in the range [-1.0, 1.0), which is also the output range
  58. * for the data, make sure you scale the data accordingly (for 16bit signed integers
  59. * you would have to divide (and multiply) by 32768).
  60. *
  61. * COPYRIGHT 1999-2015 Stephan M. Bernsee <s.bernsee [AT] zynaptiq [DOT] com>
  62. *
  63. * The Wide Open License (WOL)
  64. *
  65. * Permission to use, copy, modify, distribute and sell this software and its
  66. * documentation for any purpose is hereby granted without fee, provided that
  67. * the above copyright notice and this license appear in all source copies.
  68. * THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF
  69. * ANY KIND. See https://dspguru.com/wide-open-license/ for more information.
  70. *
  71. *****************************************************************************/
  72. void SMBPitchShift::PitchShift(float pitchShift, long numSampsToProcess, long fftFrameSize, long osamp, float sampleRate, float *indata, float *outdata,int stride) {
  73. /*
  74. Routine smbPitchShift(). See top of file for explanation
  75. Purpose: doing pitch shifting while maintaining duration using the Short
  76. Time Fourier Transform.
  77. Author: (c)1999-2015 Stephan M. Bernsee <s.bernsee [AT] zynaptiq [DOT] com>
  78. */
  79. double magn, phase, tmp, window, real, imag;
  80. double freqPerBin, expct;
  81. long i,k, qpd, index, inFifoLatency, stepSize, fftFrameSize2;
  82. /* set up some handy variables */
  83. fftFrameSize2 = fftFrameSize/2;
  84. stepSize = fftFrameSize/osamp;
  85. freqPerBin = sampleRate/(double)fftFrameSize;
  86. expct = 2.*Math_PI*(double)stepSize/(double)fftFrameSize;
  87. inFifoLatency = fftFrameSize-stepSize;
  88. if (gRover == 0) { gRover = inFifoLatency;
  89. }
  90. /* initialize our static arrays */
  91. /* main processing loop */
  92. for (i = 0; i < numSampsToProcess; i++){
  93. /* As long as we have not yet collected enough data just read in */
  94. gInFIFO[gRover] = indata[i*stride];
  95. outdata[i*stride] = gOutFIFO[gRover-inFifoLatency];
  96. gRover++;
  97. /* now we have enough data for processing */
  98. if (gRover >= fftFrameSize) {
  99. gRover = inFifoLatency;
  100. /* do windowing and re,im interleave */
  101. for (k = 0; k < fftFrameSize;k++) {
  102. window = -.5*cos(2.*Math_PI*(double)k/(double)fftFrameSize)+.5;
  103. gFFTworksp[2*k] = gInFIFO[k] * window;
  104. gFFTworksp[2*k+1] = 0.;
  105. }
  106. /* ***************** ANALYSIS ******************* */
  107. /* do transform */
  108. smbFft(gFFTworksp, fftFrameSize, -1);
  109. /* this is the analysis step */
  110. for (k = 0; k <= fftFrameSize2; k++) {
  111. /* de-interlace FFT buffer */
  112. real = gFFTworksp[2*k];
  113. imag = gFFTworksp[2*k+1];
  114. /* compute magnitude and phase */
  115. magn = 2.*sqrt(real*real + imag*imag);
  116. phase = atan2(imag,real);
  117. /* compute phase difference */
  118. tmp = phase - gLastPhase[k];
  119. gLastPhase[k] = phase;
  120. /* subtract expected phase difference */
  121. tmp -= (double)k*expct;
  122. /* map delta phase into +/- Pi interval */
  123. qpd = tmp/Math_PI;
  124. if (qpd >= 0) { qpd += qpd&1;
  125. } else { qpd -= qpd&1;
  126. }
  127. tmp -= Math_PI*(double)qpd;
  128. /* get deviation from bin frequency from the +/- Pi interval */
  129. tmp = osamp*tmp/(2.*Math_PI);
  130. /* compute the k-th partials' true frequency */
  131. tmp = (double)k*freqPerBin + tmp*freqPerBin;
  132. /* store magnitude and true frequency in analysis arrays */
  133. gAnaMagn[k] = magn;
  134. gAnaFreq[k] = tmp;
  135. }
  136. /* ***************** PROCESSING ******************* */
  137. /* this does the actual pitch shifting */
  138. memset(gSynMagn, 0, fftFrameSize*sizeof(float));
  139. memset(gSynFreq, 0, fftFrameSize*sizeof(float));
  140. for (k = 0; k <= fftFrameSize2; k++) {
  141. index = k*pitchShift;
  142. if (index <= fftFrameSize2) {
  143. gSynMagn[index] += gAnaMagn[k];
  144. gSynFreq[index] = gAnaFreq[k] * pitchShift;
  145. }
  146. }
  147. /* ***************** SYNTHESIS ******************* */
  148. /* this is the synthesis step */
  149. for (k = 0; k <= fftFrameSize2; k++) {
  150. /* get magnitude and true frequency from synthesis arrays */
  151. magn = gSynMagn[k];
  152. tmp = gSynFreq[k];
  153. /* subtract bin mid frequency */
  154. tmp -= (double)k*freqPerBin;
  155. /* get bin deviation from freq deviation */
  156. tmp /= freqPerBin;
  157. /* take osamp into account */
  158. tmp = 2.*Math_PI*tmp/osamp;
  159. /* add the overlap phase advance back in */
  160. tmp += (double)k*expct;
  161. /* accumulate delta phase to get bin phase */
  162. gSumPhase[k] += tmp;
  163. phase = gSumPhase[k];
  164. /* get real and imag part and re-interleave */
  165. gFFTworksp[2*k] = magn*cos(phase);
  166. gFFTworksp[2*k+1] = magn*sin(phase);
  167. }
  168. /* zero negative frequencies */
  169. for (k = fftFrameSize+2; k < 2*fftFrameSize; k++) { gFFTworksp[k] = 0.;
  170. }
  171. /* do inverse transform */
  172. smbFft(gFFTworksp, fftFrameSize, 1);
  173. /* do windowing and add to output accumulator */
  174. for(k=0; k < fftFrameSize; k++) {
  175. window = -.5*cos(2.*Math_PI*(double)k/(double)fftFrameSize)+.5;
  176. gOutputAccum[k] += 2.*window*gFFTworksp[2*k]/(fftFrameSize2*osamp);
  177. }
  178. for (k = 0; k < stepSize; k++) { gOutFIFO[k] = gOutputAccum[k];
  179. }
  180. /* shift accumulator */
  181. memmove(gOutputAccum, gOutputAccum+stepSize, fftFrameSize*sizeof(float));
  182. /* move input FIFO */
  183. for (k = 0; k < inFifoLatency; k++) { gInFIFO[k] = gInFIFO[k+stepSize];
  184. }
  185. }
  186. }
  187. }
  188. void SMBPitchShift::smbFft(float *fftBuffer, long fftFrameSize, long sign)
  189. /*
  190. FFT routine, (C)1996 S.M.Bernsee. Sign = -1 is FFT, 1 is iFFT (inverse)
  191. Fills fftBuffer[0...2*fftFrameSize-1] with the Fourier transform of the
  192. time domain data in fftBuffer[0...2*fftFrameSize-1]. The FFT array takes
  193. and returns the cosine and sine parts in an interleaved manner, ie.
  194. fftBuffer[0] = cosPart[0], fftBuffer[1] = sinPart[0], asf. fftFrameSize
  195. must be a power of 2. It expects a complex input signal (see footnote 2),
  196. ie. when working with 'common' audio signals our input signal has to be
  197. passed as {in[0],0.,in[1],0.,in[2],0.,...} asf. In that case, the transform
  198. of the frequencies of interest is in fftBuffer[0...fftFrameSize].
  199. */
  200. {
  201. float wr, wi, arg, *p1, *p2, temp;
  202. float tr, ti, ur, ui, *p1r, *p1i, *p2r, *p2i;
  203. long i, bitm, j, le, le2, k;
  204. for (i = 2; i < 2*fftFrameSize-2; i += 2) {
  205. for (bitm = 2, j = 0; bitm < 2*fftFrameSize; bitm <<= 1) {
  206. if (i & bitm) { j++;
  207. }
  208. j <<= 1;
  209. }
  210. if (i < j) {
  211. p1 = fftBuffer+i; p2 = fftBuffer+j;
  212. temp = *p1; *(p1++) = *p2;
  213. *(p2++) = temp; temp = *p1;
  214. *p1 = *p2; *p2 = temp;
  215. }
  216. }
  217. for (k = 0, le = 2; k < (long)(log((double)fftFrameSize)/log(2.)+.5); k++) {
  218. le <<= 1;
  219. le2 = le>>1;
  220. ur = 1.0;
  221. ui = 0.0;
  222. arg = Math_PI / (le2>>1);
  223. wr = cos(arg);
  224. wi = sign*sin(arg);
  225. for (j = 0; j < le2; j += 2) {
  226. p1r = fftBuffer+j; p1i = p1r+1;
  227. p2r = p1r+le2; p2i = p2r+1;
  228. for (i = j; i < 2*fftFrameSize; i += le) {
  229. tr = *p2r * ur - *p2i * ui;
  230. ti = *p2r * ui + *p2i * ur;
  231. *p2r = *p1r - tr; *p2i = *p1i - ti;
  232. *p1r += tr; *p1i += ti;
  233. p1r += le; p1i += le;
  234. p2r += le; p2i += le;
  235. }
  236. tr = ur*wr - ui*wi;
  237. ui = ur*wi + ui*wr;
  238. ur = tr;
  239. }
  240. }
  241. }
  242. /* Godot code again */
  243. /* clang-format on */
  244. void AudioEffectPitchShiftInstance::process(const AudioFrame *p_src_frames, AudioFrame *p_dst_frames, int p_frame_count) {
  245. // Avoid distortion by skipping processing if pitch_scale is 1.0.
  246. if (Math::is_equal_approx(base->pitch_scale, 1.0f)) {
  247. return;
  248. }
  249. float sample_rate = AudioServer::get_singleton()->get_mix_rate();
  250. float *in_l = (float *)p_src_frames;
  251. float *in_r = in_l + 1;
  252. float *out_l = (float *)p_dst_frames;
  253. float *out_r = out_l + 1;
  254. shift_l.PitchShift(base->pitch_scale, p_frame_count, fft_size, base->oversampling, sample_rate, in_l, out_l, 2);
  255. shift_r.PitchShift(base->pitch_scale, p_frame_count, fft_size, base->oversampling, sample_rate, in_r, out_r, 2);
  256. }
  257. Ref<AudioEffectInstance> AudioEffectPitchShift::instantiate() {
  258. Ref<AudioEffectPitchShiftInstance> ins;
  259. ins.instantiate();
  260. ins->base = Ref<AudioEffectPitchShift>(this);
  261. static const int fft_sizes[FFT_SIZE_MAX] = { 256, 512, 1024, 2048, 4096 };
  262. ins->fft_size = fft_sizes[fft_size];
  263. return ins;
  264. }
  265. void AudioEffectPitchShift::set_pitch_scale(float p_pitch_scale) {
  266. ERR_FAIL_COND(!(p_pitch_scale > 0.0));
  267. pitch_scale = p_pitch_scale;
  268. }
  269. float AudioEffectPitchShift::get_pitch_scale() const {
  270. return pitch_scale;
  271. }
  272. void AudioEffectPitchShift::set_oversampling(int p_oversampling) {
  273. ERR_FAIL_COND(p_oversampling < 4);
  274. oversampling = p_oversampling;
  275. }
  276. int AudioEffectPitchShift::get_oversampling() const {
  277. return oversampling;
  278. }
  279. void AudioEffectPitchShift::set_fft_size(FFTSize p_fft_size) {
  280. ERR_FAIL_INDEX(p_fft_size, FFT_SIZE_MAX);
  281. fft_size = p_fft_size;
  282. }
  283. AudioEffectPitchShift::FFTSize AudioEffectPitchShift::get_fft_size() const {
  284. return fft_size;
  285. }
  286. void AudioEffectPitchShift::_bind_methods() {
  287. ClassDB::bind_method(D_METHOD("set_pitch_scale", "rate"), &AudioEffectPitchShift::set_pitch_scale);
  288. ClassDB::bind_method(D_METHOD("get_pitch_scale"), &AudioEffectPitchShift::get_pitch_scale);
  289. ClassDB::bind_method(D_METHOD("set_oversampling", "amount"), &AudioEffectPitchShift::set_oversampling);
  290. ClassDB::bind_method(D_METHOD("get_oversampling"), &AudioEffectPitchShift::get_oversampling);
  291. ClassDB::bind_method(D_METHOD("set_fft_size", "size"), &AudioEffectPitchShift::set_fft_size);
  292. ClassDB::bind_method(D_METHOD("get_fft_size"), &AudioEffectPitchShift::get_fft_size);
  293. ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "pitch_scale", PROPERTY_HINT_RANGE, "0.01,16,0.01"), "set_pitch_scale", "get_pitch_scale");
  294. ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "oversampling", PROPERTY_HINT_RANGE, "4,32,1"), "set_oversampling", "get_oversampling");
  295. ADD_PROPERTY(PropertyInfo(Variant::INT, "fft_size", PROPERTY_HINT_ENUM, "256,512,1024,2048,4096"), "set_fft_size", "get_fft_size");
  296. BIND_ENUM_CONSTANT(FFT_SIZE_256);
  297. BIND_ENUM_CONSTANT(FFT_SIZE_512);
  298. BIND_ENUM_CONSTANT(FFT_SIZE_1024);
  299. BIND_ENUM_CONSTANT(FFT_SIZE_2048);
  300. BIND_ENUM_CONSTANT(FFT_SIZE_4096);
  301. BIND_ENUM_CONSTANT(FFT_SIZE_MAX);
  302. }