mirror of
				https://github.com/PabloMK7/citra.git
				synced 2025-10-31 13:50:03 +00:00 
			
		
		
		
	time_stretch: Simplify audio stretcher
This commit is contained in:
		
							parent
							
								
									f34711219a
								
							
						
					
					
						commit
						eed55a813e
					
				
					 4 changed files with 88 additions and 162 deletions
				
			
		|  | @ -15,6 +15,7 @@ DspInterface::DspInterface() = default; | |||
| DspInterface::~DspInterface() = default; | ||||
| 
 | ||||
| void DspInterface::SetSink(const std::string& sink_id, const std::string& audio_device) { | ||||
|     sink.reset(); | ||||
|     const SinkDetails& sink_details = GetSinkDetails(sink_id); | ||||
|     sink = sink_details.factory(audio_device); | ||||
|     sink->SetCallback( | ||||
|  | @ -32,7 +33,7 @@ void DspInterface::EnableStretching(bool enable) { | |||
|         return; | ||||
| 
 | ||||
|     if (!enable) { | ||||
|         FlushResidualStretcherAudio(); | ||||
|         flushing_time_stretcher = true; | ||||
|     } | ||||
|     perform_time_stretching = enable; | ||||
| } | ||||
|  | @ -51,17 +52,27 @@ void DspInterface::OutputFrame(StereoFrame16& frame) { | |||
|     fifo.Push(frame.data(), frame.size()); | ||||
| } | ||||
| 
 | ||||
| void DspInterface::FlushResidualStretcherAudio() {} | ||||
| 
 | ||||
| void DspInterface::OutputCallback(s16* buffer, size_t num_frames) { | ||||
|     const size_t frames_written = fifo.Pop(buffer, num_frames); | ||||
| void DspInterface::OutputCallback(s16* buffer, std::size_t num_frames) { | ||||
|     std::size_t frames_written; | ||||
|     if (perform_time_stretching) { | ||||
|         const std::vector<s16> in{fifo.Pop()}; | ||||
|         const std::size_t num_in{in.size() / 2}; | ||||
|         frames_written = time_stretcher.Process(in.data(), num_in, buffer, num_frames); | ||||
|     } else if (flushing_time_stretcher) { | ||||
|         time_stretcher.Flush(); | ||||
|         frames_written = time_stretcher.Process(nullptr, 0, buffer, num_frames); | ||||
|         frames_written += fifo.Pop(buffer, num_frames - frames_written); | ||||
|         flushing_time_stretcher = false; | ||||
|     } else { | ||||
|         frames_written = fifo.Pop(buffer, num_frames); | ||||
|     } | ||||
| 
 | ||||
|     if (frames_written > 0) { | ||||
|         std::memcpy(&last_frame[0], buffer + 2 * (frames_written - 1), 2 * sizeof(s16)); | ||||
|     } | ||||
| 
 | ||||
|     // Hold last emitted frame; this prevents popping.
 | ||||
|     for (size_t i = frames_written; i < num_frames; i++) { | ||||
|     for (std::size_t i = frames_written; i < num_frames; i++) { | ||||
|         std::memcpy(buffer + 2 * i, &last_frame[0], 2 * sizeof(s16)); | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -85,7 +85,8 @@ private: | |||
|     void OutputCallback(s16* buffer, std::size_t num_frames); | ||||
| 
 | ||||
|     std::unique_ptr<Sink> sink; | ||||
|     bool perform_time_stretching = false; | ||||
|     std::atomic<bool> perform_time_stretching = false; | ||||
|     std::atomic<bool> flushing_time_stretcher = false; | ||||
|     Common::RingBuffer<s16, 0x2000, 2> fifo; | ||||
|     std::array<s16, 2> last_frame{}; | ||||
|     TimeStretcher time_stretcher; | ||||
|  |  | |||
|  | @ -3,143 +3,75 @@ | |||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <chrono> | ||||
| #include <cmath> | ||||
| #include <vector> | ||||
| #include <cstddef> | ||||
| #include <memory> | ||||
| #include <SoundTouch.h> | ||||
| #include "audio_core/audio_types.h" | ||||
| #include "audio_core/time_stretch.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/logging/log.h" | ||||
| 
 | ||||
| using steady_clock = std::chrono::steady_clock; | ||||
| 
 | ||||
| namespace AudioCore { | ||||
| 
 | ||||
| constexpr double MIN_RATIO = 0.1; | ||||
| constexpr double MAX_RATIO = 100.0; | ||||
| 
 | ||||
| static double ClampRatio(double ratio) { | ||||
|     return std::clamp(ratio, MIN_RATIO, MAX_RATIO); | ||||
| TimeStretcher::TimeStretcher() | ||||
|     : sample_rate(native_sample_rate), sound_touch(std::make_unique<soundtouch::SoundTouch>()) { | ||||
|     sound_touch->setChannels(2); | ||||
|     sound_touch->setSampleRate(native_sample_rate); | ||||
|     sound_touch->setPitch(1.0); | ||||
|     sound_touch->setTempo(1.0); | ||||
| } | ||||
| 
 | ||||
| constexpr double MIN_DELAY_TIME = 0.05;                 // Units: seconds
 | ||||
| constexpr double MAX_DELAY_TIME = 0.25;                 // Units: seconds
 | ||||
| constexpr std::size_t DROP_FRAMES_SAMPLE_DELAY = 16000; // Units: samples
 | ||||
| 
 | ||||
| constexpr double SMOOTHING_FACTOR = 0.007; | ||||
| 
 | ||||
| struct TimeStretcher::Impl { | ||||
|     soundtouch::SoundTouch soundtouch; | ||||
| 
 | ||||
|     steady_clock::time_point frame_timer = steady_clock::now(); | ||||
|     std::size_t samples_queued = 0; | ||||
| 
 | ||||
|     double smoothed_ratio = 1.0; | ||||
| 
 | ||||
|     double sample_rate = static_cast<double>(native_sample_rate); | ||||
| }; | ||||
| 
 | ||||
| std::vector<s16> TimeStretcher::Process(std::size_t samples_in_queue) { | ||||
|     // This is a very simple algorithm without any fancy control theory. It works and is stable.
 | ||||
| 
 | ||||
|     double ratio = CalculateCurrentRatio(); | ||||
|     ratio = CorrectForUnderAndOverflow(ratio, samples_in_queue); | ||||
|     impl->smoothed_ratio = | ||||
|         (1.0 - SMOOTHING_FACTOR) * impl->smoothed_ratio + SMOOTHING_FACTOR * ratio; | ||||
|     impl->smoothed_ratio = ClampRatio(impl->smoothed_ratio); | ||||
| 
 | ||||
|     // SoundTouch's tempo definition the inverse of our ratio definition.
 | ||||
|     impl->soundtouch.setTempo(1.0 / impl->smoothed_ratio); | ||||
| 
 | ||||
|     std::vector<s16> samples = GetSamples(); | ||||
|     if (samples_in_queue >= DROP_FRAMES_SAMPLE_DELAY) { | ||||
|         samples.clear(); | ||||
|         LOG_DEBUG(Audio, "Dropping frames!"); | ||||
|     } | ||||
|     return samples; | ||||
| } | ||||
| 
 | ||||
| TimeStretcher::TimeStretcher() : impl(std::make_unique<Impl>()) { | ||||
|     impl->soundtouch.setPitch(1.0); | ||||
|     impl->soundtouch.setChannels(2); | ||||
|     impl->soundtouch.setSampleRate(native_sample_rate); | ||||
|     Reset(); | ||||
| } | ||||
| 
 | ||||
| TimeStretcher::~TimeStretcher() { | ||||
|     impl->soundtouch.clear(); | ||||
| } | ||||
| TimeStretcher::~TimeStretcher() = default; | ||||
| 
 | ||||
| void TimeStretcher::SetOutputSampleRate(unsigned int sample_rate) { | ||||
|     impl->sample_rate = static_cast<double>(sample_rate); | ||||
|     impl->soundtouch.setRate(static_cast<double>(native_sample_rate) / impl->sample_rate); | ||||
|     sound_touch->setSampleRate(sample_rate); | ||||
|     sample_rate = native_sample_rate; | ||||
| } | ||||
| 
 | ||||
| void TimeStretcher::AddSamples(const s16* buffer, std::size_t num_samples) { | ||||
|     impl->soundtouch.putSamples(buffer, static_cast<uint>(num_samples)); | ||||
|     impl->samples_queued += num_samples; | ||||
| std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out, | ||||
|                                    std::size_t num_out) { | ||||
|     const double time_delta = static_cast<double>(num_out) / sample_rate; // seconds
 | ||||
|     double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out); | ||||
| 
 | ||||
|     const double max_latency = 0.25; // seconds
 | ||||
|     const double max_backlog = sample_rate * max_latency; | ||||
|     const double backlog_fullness = sound_touch->numSamples() / max_backlog; | ||||
|     if (backlog_fullness > 4.0) { | ||||
|         // Too many samples in backlog: Don't push anymore on
 | ||||
|         num_in = 0; | ||||
|     } | ||||
| 
 | ||||
|     // We ideally want the backlog to be about 50% full.
 | ||||
|     // This gives some headroom both ways to prevent underflow and overflow.
 | ||||
|     // We tweak current_ratio to encourage this.
 | ||||
|     constexpr double tweak_time_scale = 0.050; // seconds
 | ||||
|     const double tweak_correction = (backlog_fullness - 0.5) * (time_delta / tweak_time_scale); | ||||
|     current_ratio *= std::pow(1.0 + 2.0 * tweak_correction, tweak_correction < 0 ? 3.0 : 1.0); | ||||
| 
 | ||||
|     // This low-pass filter smoothes out variance in the calculated stretch ratio.
 | ||||
|     // The time-scale determines how responsive this filter is.
 | ||||
|     constexpr double lpf_time_scale = 0.712; // seconds
 | ||||
|     const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale); | ||||
|     stretch_ratio += lpf_gain * (current_ratio - stretch_ratio); | ||||
| 
 | ||||
|     // Place a lower limit of 5% speed.  When a game boots up, there will be
 | ||||
|     // many silence samples.  These do not need to be timestretched.
 | ||||
|     stretch_ratio = std::max(stretch_ratio, 0.05); | ||||
|     sound_touch->setTempo(stretch_ratio); | ||||
| 
 | ||||
|     LOG_DEBUG(Audio, "{:5}/{:5} ratio:{:0.6f} backlog:{:0.6f}", num_in, num_out, stretch_ratio, | ||||
|               backlog_fullness); | ||||
| 
 | ||||
|     sound_touch->putSamples(in, num_in); | ||||
|     return sound_touch->receiveSamples(out, num_out); | ||||
| } | ||||
| 
 | ||||
| void TimeStretcher::Clear() { | ||||
|     sound_touch->clear(); | ||||
| } | ||||
| 
 | ||||
| void TimeStretcher::Flush() { | ||||
|     impl->soundtouch.flush(); | ||||
| } | ||||
| 
 | ||||
| void TimeStretcher::Reset() { | ||||
|     impl->soundtouch.setTempo(1.0); | ||||
|     impl->soundtouch.clear(); | ||||
|     impl->smoothed_ratio = 1.0; | ||||
|     impl->frame_timer = steady_clock::now(); | ||||
|     impl->samples_queued = 0; | ||||
|     SetOutputSampleRate(native_sample_rate); | ||||
| } | ||||
| 
 | ||||
| double TimeStretcher::CalculateCurrentRatio() { | ||||
|     const steady_clock::time_point now = steady_clock::now(); | ||||
|     const std::chrono::duration<double> duration = now - impl->frame_timer; | ||||
| 
 | ||||
|     const double expected_time = | ||||
|         static_cast<double>(impl->samples_queued) / static_cast<double>(native_sample_rate); | ||||
|     const double actual_time = duration.count(); | ||||
| 
 | ||||
|     double ratio; | ||||
|     if (expected_time != 0) { | ||||
|         ratio = ClampRatio(actual_time / expected_time); | ||||
|     } else { | ||||
|         ratio = impl->smoothed_ratio; | ||||
|     } | ||||
| 
 | ||||
|     impl->frame_timer = now; | ||||
|     impl->samples_queued = 0; | ||||
| 
 | ||||
|     return ratio; | ||||
| } | ||||
| 
 | ||||
| double TimeStretcher::CorrectForUnderAndOverflow(double ratio, std::size_t sample_delay) const { | ||||
|     const std::size_t min_sample_delay = | ||||
|         static_cast<std::size_t>(MIN_DELAY_TIME * impl->sample_rate); | ||||
|     const std::size_t max_sample_delay = | ||||
|         static_cast<std::size_t>(MAX_DELAY_TIME * impl->sample_rate); | ||||
| 
 | ||||
|     if (sample_delay < min_sample_delay) { | ||||
|         // Make the ratio bigger.
 | ||||
|         ratio = ratio > 1.0 ? ratio * ratio : sqrt(ratio); | ||||
|     } else if (sample_delay > max_sample_delay) { | ||||
|         // Make the ratio smaller.
 | ||||
|         ratio = ratio > 1.0 ? sqrt(ratio) : ratio * ratio; | ||||
|     } | ||||
| 
 | ||||
|     return ClampRatio(ratio); | ||||
| } | ||||
| 
 | ||||
| std::vector<s16> TimeStretcher::GetSamples() { | ||||
|     uint available = impl->soundtouch.numSamples(); | ||||
| 
 | ||||
|     std::vector<s16> output(static_cast<std::size_t>(available) * 2); | ||||
| 
 | ||||
|     impl->soundtouch.receiveSamples(output.data(), available); | ||||
| 
 | ||||
|     return output; | ||||
|     sound_touch->flush(); | ||||
| } | ||||
| 
 | ||||
| } // namespace AudioCore
 | ||||
|  |  | |||
|  | @ -4,57 +4,39 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <cstddef> | ||||
| #include <memory> | ||||
| #include <vector> | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace soundtouch { | ||||
| class SoundTouch; | ||||
| } | ||||
| 
 | ||||
| namespace AudioCore { | ||||
| 
 | ||||
| class TimeStretcher final { | ||||
| class TimeStretcher { | ||||
| public: | ||||
|     TimeStretcher(); | ||||
|     ~TimeStretcher(); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Set sample rate for the samples that Process returns. | ||||
|      * @param sample_rate The sample rate. | ||||
|      */ | ||||
|     void SetOutputSampleRate(unsigned int sample_rate); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Add samples to be processed. | ||||
|      * @param sample_buffer Buffer of samples in interleaved stereo PCM16 format. | ||||
|      * @param num_samples Number of samples. | ||||
|      */ | ||||
|     void AddSamples(const s16* sample_buffer, std::size_t num_samples); | ||||
|     /// @param in       Input sample buffer
 | ||||
|     /// @param num_in   Number of input frames in `in`
 | ||||
|     /// @param out      Output sample buffer
 | ||||
|     /// @param num_out  Desired number of output frames in `out`
 | ||||
|     /// @returns Actual number of frames written to `out`
 | ||||
|     std::size_t Process(const s16* in, std::size_t num_in, s16* out, std::size_t num_out); | ||||
| 
 | ||||
|     void Clear(); | ||||
| 
 | ||||
|     /// Flush audio remaining in internal buffers.
 | ||||
|     void Flush(); | ||||
| 
 | ||||
|     /// Resets internal state and clears buffers.
 | ||||
|     void Reset(); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Does audio stretching and produces the time-stretched samples. | ||||
|      * Timer calculations use sample_delay to determine how much of a margin we have. | ||||
|      * @param sample_delay How many samples are buffered downstream of this module and haven't been | ||||
|      * played yet. | ||||
|      * @return Samples to play in interleaved stereo PCM16 format. | ||||
|      */ | ||||
|     std::vector<s16> Process(std::size_t sample_delay); | ||||
| 
 | ||||
| private: | ||||
|     struct Impl; | ||||
|     std::unique_ptr<Impl> impl; | ||||
| 
 | ||||
|     /// INTERNAL: ratio = wallclock time / emulated time
 | ||||
|     double CalculateCurrentRatio(); | ||||
|     /// INTERNAL: If we have too many or too few samples downstream, nudge ratio in the appropriate
 | ||||
|     /// direction.
 | ||||
|     double CorrectForUnderAndOverflow(double ratio, std::size_t sample_delay) const; | ||||
|     /// INTERNAL: Gets the time-stretched samples from SoundTouch.
 | ||||
|     std::vector<s16> GetSamples(); | ||||
|     unsigned int sample_rate; | ||||
|     std::unique_ptr<soundtouch::SoundTouch> sound_touch; | ||||
|     double stretch_ratio = 1.0; | ||||
| }; | ||||
| 
 | ||||
| } // namespace AudioCore
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue