AudioSegment.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  4. * You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #ifndef MOZILLA_AUDIOSEGMENT_H_
  6. #define MOZILLA_AUDIOSEGMENT_H_
  7. #include "MediaSegment.h"
  8. #include "AudioSampleFormat.h"
  9. #include "AudioChannelFormat.h"
  10. #include "SharedBuffer.h"
  11. #include "WebAudioUtils.h"
  12. #ifdef MOZILLA_INTERNAL_API
  13. #include "mozilla/TimeStamp.h"
  14. #endif
  15. #include <float.h>
  16. namespace mozilla {
  17. template<typename T>
  18. class SharedChannelArrayBuffer : public ThreadSharedObject {
  19. public:
  20. explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >* aBuffers)
  21. {
  22. mBuffers.SwapElements(*aBuffers);
  23. }
  24. size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override
  25. {
  26. size_t amount = 0;
  27. amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf);
  28. for (size_t i = 0; i < mBuffers.Length(); i++) {
  29. amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
  30. }
  31. return amount;
  32. }
  33. size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override
  34. {
  35. return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
  36. }
  37. nsTArray<nsTArray<T> > mBuffers;
  38. };
  39. class AudioMixer;
  40. /**
  41. * For auto-arrays etc, guess this as the common number of channels.
  42. */
  43. const int GUESS_AUDIO_CHANNELS = 2;
  44. // We ensure that the graph advances in steps that are multiples of the Web
  45. // Audio block size
  46. const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7;
  47. const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS;
  48. template <typename SrcT, typename DestT>
  49. static void
  50. InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels,
  51. uint32_t aLength, float aVolume,
  52. uint32_t aChannels,
  53. DestT* aOutput)
  54. {
  55. DestT* output = aOutput;
  56. for (size_t i = 0; i < aLength; ++i) {
  57. for (size_t channel = 0; channel < aChannels; ++channel) {
  58. float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume;
  59. *output = FloatToAudioSample<DestT>(v);
  60. ++output;
  61. }
  62. }
  63. }
  64. template <typename SrcT, typename DestT>
  65. static void
  66. DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer,
  67. uint32_t aFrames, uint32_t aChannels,
  68. DestT** aOutput)
  69. {
  70. for (size_t i = 0; i < aChannels; i++) {
  71. size_t interleavedIndex = i;
  72. for (size_t j = 0; j < aFrames; j++) {
  73. ConvertAudioSample(aSourceBuffer[interleavedIndex],
  74. aOutput[i][j]);
  75. interleavedIndex += aChannels;
  76. }
  77. }
  78. }
  79. class SilentChannel
  80. {
  81. public:
  82. static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */
  83. static const uint8_t gZeroChannel[MAX_AUDIO_SAMPLE_SIZE*AUDIO_PROCESSING_FRAMES];
  84. // We take advantage of the fact that zero in float and zero in int have the
  85. // same all-zeros bit layout.
  86. template<typename T>
  87. static const T* ZeroChannel();
  88. };
  89. /**
  90. * Given an array of input channels (aChannelData), downmix to aOutputChannels,
  91. * interleave the channel data. A total of aOutputChannels*aDuration
  92. * interleaved samples will be copied to a channel buffer in aOutput.
  93. */
  94. template <typename SrcT, typename DestT>
  95. void
  96. DownmixAndInterleave(const nsTArray<const SrcT*>& aChannelData,
  97. int32_t aDuration, float aVolume, uint32_t aOutputChannels,
  98. DestT* aOutput)
  99. {
  100. if (aChannelData.Length() == aOutputChannels) {
  101. InterleaveAndConvertBuffer(aChannelData.Elements(),
  102. aDuration, aVolume, aOutputChannels, aOutput);
  103. } else {
  104. AutoTArray<SrcT*,GUESS_AUDIO_CHANNELS> outputChannelData;
  105. AutoTArray<SrcT, SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS> outputBuffers;
  106. outputChannelData.SetLength(aOutputChannels);
  107. outputBuffers.SetLength(aDuration * aOutputChannels);
  108. for (uint32_t i = 0; i < aOutputChannels; i++) {
  109. outputChannelData[i] = outputBuffers.Elements() + aDuration * i;
  110. }
  111. AudioChannelsDownMix(aChannelData,
  112. outputChannelData.Elements(),
  113. aOutputChannels,
  114. aDuration);
  115. InterleaveAndConvertBuffer(outputChannelData.Elements(),
  116. aDuration, aVolume, aOutputChannels, aOutput);
  117. }
  118. }
  119. /**
  120. * An AudioChunk represents a multi-channel buffer of audio samples.
  121. * It references an underlying ThreadSharedObject which manages the lifetime
  122. * of the buffer. An AudioChunk maintains its own duration and channel data
  123. * pointers so it can represent a subinterval of a buffer without copying.
  124. * An AudioChunk can store its individual channels anywhere; it maintains
  125. * separate pointers to each channel's buffer.
  126. */
  127. struct AudioChunk {
  128. typedef mozilla::AudioSampleFormat SampleFormat;
  129. AudioChunk() : mPrincipalHandle(PRINCIPAL_HANDLE_NONE) {}
  130. // Generic methods
  131. void SliceTo(StreamTime aStart, StreamTime aEnd)
  132. {
  133. MOZ_ASSERT(aStart >= 0 && aStart < aEnd && aEnd <= mDuration,
  134. "Slice out of bounds");
  135. if (mBuffer) {
  136. MOZ_ASSERT(aStart < INT32_MAX, "Can't slice beyond 32-bit sample lengths");
  137. for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
  138. mChannelData[channel] = AddAudioSampleOffset(mChannelData[channel],
  139. mBufferFormat, int32_t(aStart));
  140. }
  141. }
  142. mDuration = aEnd - aStart;
  143. }
  144. StreamTime GetDuration() const { return mDuration; }
  145. bool CanCombineWithFollowing(const AudioChunk& aOther) const
  146. {
  147. if (aOther.mBuffer != mBuffer) {
  148. return false;
  149. }
  150. if (mBuffer) {
  151. NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,
  152. "Wrong metadata about buffer");
  153. NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),
  154. "Mismatched channel count");
  155. if (mDuration > INT32_MAX) {
  156. return false;
  157. }
  158. for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
  159. if (aOther.mChannelData[channel] != AddAudioSampleOffset(mChannelData[channel],
  160. mBufferFormat, int32_t(mDuration))) {
  161. return false;
  162. }
  163. }
  164. }
  165. return true;
  166. }
  167. bool IsNull() const { return mBuffer == nullptr; }
  168. void SetNull(StreamTime aDuration)
  169. {
  170. mBuffer = nullptr;
  171. mChannelData.Clear();
  172. mDuration = aDuration;
  173. mVolume = 1.0f;
  174. mBufferFormat = AUDIO_FORMAT_SILENCE;
  175. mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
  176. }
  177. size_t ChannelCount() const { return mChannelData.Length(); }
  178. bool IsMuted() const { return mVolume == 0.0f; }
  179. size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const
  180. {
  181. return SizeOfExcludingThis(aMallocSizeOf, true);
  182. }
  183. size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const
  184. {
  185. size_t amount = 0;
  186. // Possibly owned:
  187. // - mBuffer - Can hold data that is also in the decoded audio queue. If it
  188. // is not shared, or unshared == false it gets counted.
  189. if (mBuffer && (!aUnshared || !mBuffer->IsShared())) {
  190. amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
  191. }
  192. // Memory in the array is owned by mBuffer.
  193. amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf);
  194. return amount;
  195. }
  196. template<typename T>
  197. const nsTArray<const T*>& ChannelData()
  198. {
  199. MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
  200. return *reinterpret_cast<nsTArray<const T*>*>(&mChannelData);
  201. }
  202. PrincipalHandle GetPrincipalHandle() const { return mPrincipalHandle; }
  203. StreamTime mDuration; // in frames within the buffer
  204. RefPtr<ThreadSharedObject> mBuffer; // the buffer object whose lifetime is managed; null means data is all zeroes
  205. nsTArray<const void*> mChannelData; // one pointer per channel; empty if and only if mBuffer is null
  206. float mVolume; // volume multiplier to apply (1.0f if mBuffer is nonnull)
  207. SampleFormat mBufferFormat; // format of frames in mBuffer (only meaningful if mBuffer is nonnull)
  208. #ifdef MOZILLA_INTERNAL_API
  209. mozilla::TimeStamp mTimeStamp; // time at which this has been fetched from the MediaEngine
  210. #endif
  211. // principalHandle for the data in this chunk.
  212. // This can be compared to an nsIPrincipal* when back on main thread.
  213. PrincipalHandle mPrincipalHandle;
  214. };
  215. /**
  216. * A list of audio samples consisting of a sequence of slices of SharedBuffers.
  217. * The audio rate is determined by the track, not stored in this class.
  218. */
  219. class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
  220. public:
  221. typedef mozilla::AudioSampleFormat SampleFormat;
  222. AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}
  223. // Resample the whole segment in place.
  224. template<typename T>
  225. void Resample(SpeexResamplerState* aResampler, uint32_t aInRate, uint32_t aOutRate)
  226. {
  227. mDuration = 0;
  228. #ifdef DEBUG
  229. uint32_t segmentChannelCount = ChannelCount();
  230. #endif
  231. for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
  232. AutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output;
  233. AutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs;
  234. AudioChunk& c = *ci;
  235. // If this chunk is null, don't bother resampling, just alter its duration
  236. if (c.IsNull()) {
  237. c.mDuration = (c.mDuration * aOutRate) / aInRate;
  238. mDuration += c.mDuration;
  239. continue;
  240. }
  241. uint32_t channels = c.mChannelData.Length();
  242. MOZ_ASSERT(channels == segmentChannelCount);
  243. output.SetLength(channels);
  244. bufferPtrs.SetLength(channels);
  245. uint32_t inFrames = c.mDuration;
  246. // Round up to allocate; the last frame may not be used.
  247. NS_ASSERTION((UINT32_MAX - aInRate + 1) / c.mDuration >= aOutRate,
  248. "Dropping samples");
  249. uint32_t outSize = (c.mDuration * aOutRate + aInRate - 1) / aInRate;
  250. for (uint32_t i = 0; i < channels; i++) {
  251. T* out = output[i].AppendElements(outSize);
  252. uint32_t outFrames = outSize;
  253. const T* in = static_cast<const T*>(c.mChannelData[i]);
  254. dom::WebAudioUtils::SpeexResamplerProcess(aResampler, i,
  255. in, &inFrames,
  256. out, &outFrames);
  257. MOZ_ASSERT(inFrames == c.mDuration);
  258. bufferPtrs[i] = out;
  259. output[i].SetLength(outFrames);
  260. }
  261. MOZ_ASSERT(channels > 0);
  262. c.mDuration = output[0].Length();
  263. c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(&output);
  264. for (uint32_t i = 0; i < channels; i++) {
  265. c.mChannelData[i] = bufferPtrs[i];
  266. }
  267. mDuration += c.mDuration;
  268. }
  269. }
  270. void ResampleChunks(SpeexResamplerState* aResampler,
  271. uint32_t aInRate,
  272. uint32_t aOutRate);
  273. void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
  274. const nsTArray<const float*>& aChannelData,
  275. int32_t aDuration, const PrincipalHandle& aPrincipalHandle)
  276. {
  277. AudioChunk* chunk = AppendChunk(aDuration);
  278. chunk->mBuffer = aBuffer;
  279. for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
  280. chunk->mChannelData.AppendElement(aChannelData[channel]);
  281. }
  282. chunk->mVolume = 1.0f;
  283. chunk->mBufferFormat = AUDIO_FORMAT_FLOAT32;
  284. #ifdef MOZILLA_INTERNAL_API
  285. chunk->mTimeStamp = TimeStamp::Now();
  286. #endif
  287. chunk->mPrincipalHandle = aPrincipalHandle;
  288. }
  289. void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
  290. const nsTArray<const int16_t*>& aChannelData,
  291. int32_t aDuration, const PrincipalHandle& aPrincipalHandle)
  292. {
  293. AudioChunk* chunk = AppendChunk(aDuration);
  294. chunk->mBuffer = aBuffer;
  295. for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
  296. chunk->mChannelData.AppendElement(aChannelData[channel]);
  297. }
  298. chunk->mVolume = 1.0f;
  299. chunk->mBufferFormat = AUDIO_FORMAT_S16;
  300. #ifdef MOZILLA_INTERNAL_API
  301. chunk->mTimeStamp = TimeStamp::Now();
  302. #endif
  303. chunk->mPrincipalHandle = aPrincipalHandle;
  304. }
  305. // Consumes aChunk, and returns a pointer to the persistent copy of aChunk
  306. // in the segment.
  307. AudioChunk* AppendAndConsumeChunk(AudioChunk* aChunk)
  308. {
  309. AudioChunk* chunk = AppendChunk(aChunk->mDuration);
  310. chunk->mBuffer = aChunk->mBuffer.forget();
  311. chunk->mChannelData.SwapElements(aChunk->mChannelData);
  312. chunk->mVolume = aChunk->mVolume;
  313. chunk->mBufferFormat = aChunk->mBufferFormat;
  314. #ifdef MOZILLA_INTERNAL_API
  315. chunk->mTimeStamp = TimeStamp::Now();
  316. #endif
  317. chunk->mPrincipalHandle = aChunk->mPrincipalHandle;
  318. return chunk;
  319. }
  320. void ApplyVolume(float aVolume);
  321. // Mix the segment into a mixer, interleaved. This is useful to output a
  322. // segment to a system audio callback. It up or down mixes to aChannelCount
  323. // channels.
  324. void WriteTo(uint64_t aID, AudioMixer& aMixer, uint32_t aChannelCount,
  325. uint32_t aSampleRate);
  326. // Mix the segment into a mixer, keeping it planar, up or down mixing to
  327. // aChannelCount channels.
  328. void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);
  329. int ChannelCount() {
  330. NS_WARNING_ASSERTION(
  331. !mChunks.IsEmpty(),
  332. "Cannot query channel count on a AudioSegment with no chunks.");
  333. // Find the first chunk that has non-zero channels. A chunk that hs zero
  334. // channels is just silence and we can simply discard it.
  335. for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
  336. if (ci->ChannelCount()) {
  337. return ci->ChannelCount();
  338. }
  339. }
  340. return 0;
  341. }
  342. bool IsNull() const {
  343. for (ChunkIterator ci(*const_cast<AudioSegment*>(this)); !ci.IsEnded();
  344. ci.Next()) {
  345. if (!ci->IsNull()) {
  346. return false;
  347. }
  348. }
  349. return true;
  350. }
  351. static Type StaticType() { return AUDIO; }
  352. size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override
  353. {
  354. return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
  355. }
  356. };
  357. template<typename SrcT>
  358. void WriteChunk(AudioChunk& aChunk,
  359. uint32_t aOutputChannels,
  360. AudioDataValue* aOutputBuffer)
  361. {
  362. AutoTArray<const SrcT*,GUESS_AUDIO_CHANNELS> channelData;
  363. channelData = aChunk.ChannelData<SrcT>();
  364. if (channelData.Length() < aOutputChannels) {
  365. // Up-mix. Note that this might actually make channelData have more
  366. // than aOutputChannels temporarily.
  367. AudioChannelsUpMix(&channelData, aOutputChannels, SilentChannel::ZeroChannel<SrcT>());
  368. }
  369. if (channelData.Length() > aOutputChannels) {
  370. // Down-mix.
  371. DownmixAndInterleave(channelData, aChunk.mDuration,
  372. aChunk.mVolume, aOutputChannels, aOutputBuffer);
  373. } else {
  374. InterleaveAndConvertBuffer(channelData.Elements(),
  375. aChunk.mDuration, aChunk.mVolume,
  376. aOutputChannels,
  377. aOutputBuffer);
  378. }
  379. }
  380. } // namespace mozilla
  381. #endif /* MOZILLA_AUDIOSEGMENT_H_ */