Cog/Audio/Chain/ConverterNode.m
Christopher Snowhill 7ac32284ff
Revert to previous low latency output system
This reverts usage of the AVFoundation output to use
the previous lower latency CoreAudio output, and
paves the way for a change I am cooking up soon.

Fixes several issues with playback and seeking latency.

Signed-off-by: Christopher Snowhill <kode54@gmail.com>
2023-10-02 10:57:47 -07:00

523 lines
14 KiB
Objective-C

//
// ConverterNode.m
// Cog
//
// Created by Zaphod Beeblebrox on 8/2/05.
// Copyright 2005 __MyCompanyName__. All rights reserved.
//
#import <Accelerate/Accelerate.h>
#import <Foundation/Foundation.h>
#import "ConverterNode.h"
#import "BufferChain.h"
#import "OutputNode.h"
#import "Logging.h"
#import "lpc.h"
#import "util.h"
#ifdef _DEBUG
#import "BadSampleCleaner.h"
#endif
void PrintStreamDesc(AudioStreamBasicDescription *inDesc) {
if(!inDesc) {
DLog(@"Can't print a NULL desc!\n");
return;
}
DLog(@"- - - - - - - - - - - - - - - - - - - -\n");
DLog(@" Sample Rate:%f\n", inDesc->mSampleRate);
DLog(@" Format ID:%s\n", (char *)&inDesc->mFormatID);
DLog(@" Format Flags:%X\n", inDesc->mFormatFlags);
DLog(@" Bytes per Packet:%d\n", inDesc->mBytesPerPacket);
DLog(@" Frames per Packet:%d\n", inDesc->mFramesPerPacket);
DLog(@" Bytes per Frame:%d\n", inDesc->mBytesPerFrame);
DLog(@" Channels per Frame:%d\n", inDesc->mChannelsPerFrame);
DLog(@" Bits per Channel:%d\n", inDesc->mBitsPerChannel);
DLog(@"- - - - - - - - - - - - - - - - - - - -\n");
}
@implementation ConverterNode
static void *kConverterNodeContext = &kConverterNodeContext;
@synthesize inputFormat;
- (id)initWithController:(id)c previous:(id)p {
self = [super initWithController:c previous:p];
if(self) {
rgInfo = nil;
soxr = 0;
inputBuffer = NULL;
inputBufferSize = 0;
floatBuffer = NULL;
floatBufferSize = 0;
stopping = NO;
convertEntered = NO;
paused = NO;
skipResampler = YES;
extrapolateBuffer = NULL;
extrapolateBufferSize = 0;
[[NSUserDefaultsController sharedUserDefaultsController] addObserver:self forKeyPath:@"values.volumeScaling" options:0 context:kConverterNodeContext];
}
return self;
}
void scale_by_volume(float *buffer, size_t count, float volume) {
if(volume != 1.0) {
size_t unaligned = (uintptr_t)buffer & 15;
if(unaligned) {
size_t count3 = unaligned >> 2;
while(count3 > 0) {
*buffer++ *= volume;
count3--;
count--;
}
}
vDSP_vsmul(buffer, 1, &volume, buffer, 1, count);
}
}
- (void)process {
// Removed endOfStream check from here, since we want to be able to flush the converter
// when the end of stream is reached. Convert function instead processes what it can,
// and returns 0 samples when it has nothing more to process at the end of stream.
while([self shouldContinue] == YES) {
AudioChunk *chunk = nil;
while(paused) {
usleep(500);
}
@autoreleasepool {
chunk = [self convert];
}
if(!chunk) {
if(paused) {
continue;
} else if(!streamFormatChanged) {
break;
}
} else {
@autoreleasepool {
[self writeChunk:chunk];
chunk = nil;
}
}
if(streamFormatChanged) {
@autoreleasepool {
[self cleanUp];
[self setupWithInputFormat:newInputFormat withInputConfig:newInputChannelConfig outputFormat:self->outputFormat isLossless:rememberedLossless];
}
}
}
}
- (AudioChunk *)convert {
UInt32 ioNumberPackets;
if(stopping)
return 0;
convertEntered = YES;
if(stopping || [self shouldContinue] == NO) {
convertEntered = NO;
return nil;
}
while(inpOffset == inpSize) {
// Approximately the most we want on input
ioNumberPackets = 4096;
size_t newSize = ioNumberPackets * floatFormat.mBytesPerPacket;
if(!inputBuffer || inputBufferSize < newSize)
inputBuffer = realloc(inputBuffer, inputBufferSize = newSize);
ssize_t amountToWrite = ioNumberPackets * floatFormat.mBytesPerPacket;
ssize_t bytesReadFromInput = 0;
while(bytesReadFromInput < amountToWrite && !stopping && !paused && !streamFormatChanged && [self shouldContinue] == YES && [self endOfStream] == NO) {
AudioStreamBasicDescription inf;
uint32_t config;
if([self peekFormat:&inf channelConfig:&config]) {
if(config != inputChannelConfig || memcmp(&inf, &inputFormat, sizeof(inf)) != 0) {
if(inputChannelConfig == 0 && memcmp(&inf, &inputFormat, sizeof(inf)) == 0) {
inputChannelConfig = config;
continue;
} else {
newInputFormat = inf;
newInputChannelConfig = config;
streamFormatChanged = YES;
break;
}
}
}
AudioChunk *chunk = [self readChunkAsFloat32:((amountToWrite - bytesReadFromInput) / floatFormat.mBytesPerPacket)];
inf = [chunk format];
size_t frameCount = [chunk frameCount];
config = [chunk channelConfig];
size_t bytesRead = frameCount * inf.mBytesPerPacket;
if(frameCount) {
NSData *samples = [chunk removeSamples:frameCount];
memcpy(((uint8_t *)inputBuffer) + bytesReadFromInput, [samples bytes], bytesRead);
if([chunk isHDCD]) {
[controller sustainHDCD];
}
}
bytesReadFromInput += bytesRead;
if(!frameCount) {
usleep(500);
}
}
if(!bytesReadFromInput) {
convertEntered = NO;
return nil;
}
if(stopping || paused || streamFormatChanged || [self shouldContinue] == NO || [self endOfStream] == YES) {
if(!skipResampler) {
if(!is_postextrapolated_) {
is_postextrapolated_ = 1;
}
} else {
is_postextrapolated_ = 3;
}
}
// Extrapolate start
if(!skipResampler && !is_preextrapolated_) {
size_t inputSamples = bytesReadFromInput / floatFormat.mBytesPerPacket;
size_t prime = MIN(inputSamples, PRIME_LEN_);
size_t _N_samples_to_add_ = N_samples_to_add_;
size_t newSize = _N_samples_to_add_ * floatFormat.mBytesPerPacket;
newSize += bytesReadFromInput;
if(newSize > inputBufferSize) {
inputBuffer = realloc(inputBuffer, inputBufferSize = newSize * 3);
}
memmove(inputBuffer + _N_samples_to_add_ * floatFormat.mBytesPerPacket, inputBuffer, bytesReadFromInput);
lpc_extrapolate_bkwd(inputBuffer + _N_samples_to_add_ * floatFormat.mBytesPerPacket, inputSamples, prime, floatFormat.mChannelsPerFrame, LPC_ORDER, _N_samples_to_add_, &extrapolateBuffer, &extrapolateBufferSize);
bytesReadFromInput += _N_samples_to_add_ * floatFormat.mBytesPerPacket;
latencyEaten = N_samples_to_drop_;
is_preextrapolated_ = YES;
}
if(is_postextrapolated_ == 1) {
size_t inputSamples = bytesReadFromInput / floatFormat.mBytesPerPacket;
size_t prime = MIN(inputSamples, PRIME_LEN_);
size_t _N_samples_to_add_ = N_samples_to_add_;
size_t newSize = bytesReadFromInput;
newSize += _N_samples_to_add_ * floatFormat.mBytesPerPacket;
if(newSize > inputBufferSize) {
inputBuffer = realloc(inputBuffer, inputBufferSize = newSize * 3);
}
lpc_extrapolate_fwd(inputBuffer, inputSamples, prime, floatFormat.mChannelsPerFrame, LPC_ORDER, _N_samples_to_add_, &extrapolateBuffer, &extrapolateBufferSize);
bytesReadFromInput += _N_samples_to_add_ * floatFormat.mBytesPerPacket;
latencyEatenPost = N_samples_to_drop_;
is_postextrapolated_ = 2;
} else if(is_postextrapolated_ == 3) {
latencyEatenPost = 0;
}
// Input now contains bytesReadFromInput worth of floats, in the input sample rate
inpSize = bytesReadFromInput;
inpOffset = 0;
}
ioNumberPackets = (UInt32)(inpSize - inpOffset);
ioNumberPackets -= ioNumberPackets % floatFormat.mBytesPerPacket;
if(ioNumberPackets) {
size_t inputSamples = ioNumberPackets / floatFormat.mBytesPerPacket;
ioNumberPackets = (UInt32)inputSamples;
ioNumberPackets = (UInt32)ceil((float)ioNumberPackets * sampleRatio);
ioNumberPackets = (ioNumberPackets + 255) & ~255;
size_t newSize = ioNumberPackets * floatFormat.mBytesPerPacket;
if(!floatBuffer || floatBufferSize < newSize) {
floatBuffer = realloc(floatBuffer, floatBufferSize = newSize * 3);
}
if(stopping) {
convertEntered = NO;
return nil;
}
size_t inputDone = 0;
size_t outputDone = 0;
if(!skipResampler) {
ioNumberPackets += soxr_delay(soxr);
soxr_process(soxr, (float *)(((uint8_t *)inputBuffer) + inpOffset), inputSamples, &inputDone, floatBuffer, ioNumberPackets, &outputDone);
if(latencyEatenPost) {
// Post file or format change flush
size_t idone = 0, odone = 0;
do {
soxr_process(soxr, NULL, 0, &idone, floatBuffer + outputDone * floatFormat.mBytesPerPacket, ioNumberPackets - outputDone, &odone);
outputDone += odone;
} while(odone > 0);
}
} else {
memcpy(floatBuffer, (((uint8_t *)inputBuffer) + inpOffset), inputSamples * floatFormat.mBytesPerPacket);
inputDone = inputSamples;
outputDone = inputSamples;
}
inpOffset += inputDone * floatFormat.mBytesPerPacket;
if(latencyEaten) {
if(outputDone > latencyEaten) {
outputDone -= latencyEaten;
memmove(floatBuffer, floatBuffer + latencyEaten * floatFormat.mBytesPerPacket, outputDone * floatFormat.mBytesPerPacket);
latencyEaten = 0;
} else {
latencyEaten -= outputDone;
outputDone = 0;
}
}
if(latencyEatenPost) {
if(outputDone > latencyEatenPost) {
outputDone -= latencyEatenPost;
} else {
outputDone = 0;
}
latencyEatenPost = 0;
}
ioNumberPackets = (UInt32)outputDone * floatFormat.mBytesPerPacket;
}
if(ioNumberPackets) {
AudioChunk *chunk = [[AudioChunk alloc] init];
[chunk setFormat:nodeFormat];
if(nodeChannelConfig) {
[chunk setChannelConfig:nodeChannelConfig];
}
scale_by_volume(floatBuffer, ioNumberPackets / sizeof(float), volumeScale);
[chunk assignSamples:floatBuffer frameCount:ioNumberPackets / floatFormat.mBytesPerPacket];
convertEntered = NO;
return chunk;
}
convertEntered = NO;
return nil;
}
- (void)observeValueForKeyPath:(NSString *)keyPath
ofObject:(id)object
change:(NSDictionary *)change
context:(void *)context {
if(context == kConverterNodeContext) {
DLog(@"SOMETHING CHANGED!");
if([keyPath isEqualToString:@"values.volumeScaling"]) {
// User reset the volume scaling option
[self refreshVolumeScaling];
}
} else {
[super observeValueForKeyPath:keyPath ofObject:object change:change context:context];
}
}
static float db_to_scale(float db) {
return pow(10.0, db / 20);
}
- (void)refreshVolumeScaling {
if(rgInfo == nil) {
volumeScale = 1.0;
return;
}
NSString *scaling = [[NSUserDefaults standardUserDefaults] stringForKey:@"volumeScaling"];
BOOL useAlbum = [scaling hasPrefix:@"albumGain"];
BOOL useTrack = useAlbum || [scaling hasPrefix:@"trackGain"];
BOOL useVolume = useAlbum || useTrack || [scaling isEqualToString:@"volumeScale"];
BOOL usePeak = [scaling hasSuffix:@"WithPeak"];
float scale = 1.0;
float peak = 0.0;
if(useVolume) {
id pVolumeScale = [rgInfo objectForKey:@"volume"];
if(pVolumeScale != nil)
scale = [pVolumeScale floatValue];
}
if(useTrack) {
id trackGain = [rgInfo objectForKey:@"replayGainTrackGain"];
id trackPeak = [rgInfo objectForKey:@"replayGainTrackPeak"];
if(trackGain != nil)
scale = db_to_scale([trackGain floatValue]);
if(trackPeak != nil)
peak = [trackPeak floatValue];
}
if(useAlbum) {
id albumGain = [rgInfo objectForKey:@"replayGainAlbumGain"];
id albumPeak = [rgInfo objectForKey:@"replayGainAlbumPeak"];
if(albumGain != nil)
scale = db_to_scale([albumGain floatValue]);
if(albumPeak != nil)
peak = [albumPeak floatValue];
}
if(usePeak) {
if(scale * peak > 1.0)
scale = 1.0 / peak;
}
volumeScale = scale;
}
- (BOOL)setupWithInputFormat:(AudioStreamBasicDescription)inf withInputConfig:(uint32_t)inputConfig outputFormat:(AudioStreamBasicDescription)outf isLossless:(BOOL)lossless {
// Make the converter
inputFormat = inf;
outputFormat = outf;
inputChannelConfig = inputConfig;
rememberedLossless = lossless;
// These are the only sample formats we support translating
BOOL isFloat = !!(inputFormat.mFormatFlags & kAudioFormatFlagIsFloat);
if((!isFloat && !(inputFormat.mBitsPerChannel >= 1 && inputFormat.mBitsPerChannel <= 32)) || (isFloat && !(inputFormat.mBitsPerChannel == 32 || inputFormat.mBitsPerChannel == 64)))
return NO;
floatFormat = inputFormat;
floatFormat.mFormatFlags = kAudioFormatFlagsNativeFloatPacked;
floatFormat.mBitsPerChannel = 32;
floatFormat.mBytesPerFrame = (32 / 8) * floatFormat.mChannelsPerFrame;
floatFormat.mBytesPerPacket = floatFormat.mBytesPerFrame * floatFormat.mFramesPerPacket;
#if DSD_DECIMATE
if(inputFormat.mBitsPerChannel == 1) {
// Decimate this for speed
floatFormat.mSampleRate *= 1.0 / 8.0;
}
#endif
inpOffset = 0;
inpSize = 0;
// This is a post resampler format
nodeFormat = floatFormat;
nodeFormat.mSampleRate = outputFormat.mSampleRate;
nodeChannelConfig = inputChannelConfig;
sampleRatio = (double)outputFormat.mSampleRate / (double)floatFormat.mSampleRate;
skipResampler = fabs(sampleRatio - 1.0) < 1e-7;
if(!skipResampler) {
soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_HQ, 0);
soxr_io_spec_t io_spec = soxr_io_spec(SOXR_FLOAT32_I, SOXR_FLOAT32_I);
soxr_runtime_spec_t runtime_spec = soxr_runtime_spec(0);
soxr_error_t error;
soxr = soxr_create(floatFormat.mSampleRate, outputFormat.mSampleRate, floatFormat.mChannelsPerFrame, &error, &io_spec, &q_spec, &runtime_spec);
if(error)
return NO;
PRIME_LEN_ = MAX(floatFormat.mSampleRate / 20, 1024u);
PRIME_LEN_ = MIN(PRIME_LEN_, 16384u);
PRIME_LEN_ = MAX(PRIME_LEN_, (unsigned int)(2 * LPC_ORDER + 1));
N_samples_to_add_ = floatFormat.mSampleRate;
N_samples_to_drop_ = outputFormat.mSampleRate;
samples_len(&N_samples_to_add_, &N_samples_to_drop_, 20, 8192u);
is_preextrapolated_ = NO;
is_postextrapolated_ = 0;
}
latencyEaten = 0;
latencyEatenPost = 0;
PrintStreamDesc(&inf);
PrintStreamDesc(&nodeFormat);
[self refreshVolumeScaling];
// Move this here so process call isn't running the resampler until it's allocated
stopping = NO;
convertEntered = NO;
streamFormatChanged = NO;
paused = NO;
return YES;
}
- (void)dealloc {
DLog(@"Decoder dealloc");
[[NSUserDefaultsController sharedUserDefaultsController] removeObserver:self forKeyPath:@"values.volumeScaling" context:kConverterNodeContext];
paused = NO;
[self cleanUp];
}
- (void)inputFormatDidChange:(AudioStreamBasicDescription)format inputConfig:(uint32_t)inputConfig {
DLog(@"FORMAT CHANGED");
paused = YES;
while(convertEntered) {
usleep(500);
}
[self cleanUp];
[self setupWithInputFormat:format withInputConfig:inputConfig outputFormat:self->outputFormat isLossless:rememberedLossless];
}
- (void)setRGInfo:(NSDictionary *)rgi {
DLog(@"Setting ReplayGain info");
rgInfo = rgi;
[self refreshVolumeScaling];
}
- (void)cleanUp {
stopping = YES;
while(convertEntered) {
usleep(500);
}
if(soxr) {
soxr_delete(soxr);
soxr = NULL;
}
if(extrapolateBuffer) {
free(extrapolateBuffer);
extrapolateBuffer = NULL;
extrapolateBufferSize = 0;
}
if(floatBuffer) {
free(floatBuffer);
floatBuffer = NULL;
floatBufferSize = 0;
}
if(inputBuffer) {
free(inputBuffer);
inputBuffer = NULL;
inputBufferSize = 0;
}
inpOffset = 0;
inpSize = 0;
}
- (double)secondsBuffered {
return [buffer listDuration];
}
@end