From d0d3917ba1ba513a0269499f93530a995699485e Mon Sep 17 00:00:00 2001 From: Stephen Saunders Date: Thu, 10 Feb 2022 01:28:08 -0500 Subject: [PATCH] Simplify ffmpeg audio code, sync ffmpeg audio frames to video, enable Bink cinematic audio, ensure audio buffers are released (cherry picked from commit 62853bfc0307d1507903f0454bd062f387f775e2) --- neo/renderer/Cinematic.cpp | 132 ++++++++++++++++------- neo/sound/OpenAL/AL_CinematicAudio.cpp | 46 +++++--- neo/sound/XAudio2/XA2_CinematicAudio.cpp | 48 +++++++-- 3 files changed, 172 insertions(+), 54 deletions(-) diff --git a/neo/renderer/Cinematic.cpp b/neo/renderer/Cinematic.cpp index d82b5268..b5152650 100644 --- a/neo/renderer/Cinematic.cpp +++ b/neo/renderer/Cinematic.cpp @@ -74,18 +74,19 @@ extern "C" // SRS - For handling cinematic audio packets #include #define NUM_PACKETS 4 +#define NUM_LAG_FRAMES 15 // SRS - Lag cinematic audio by 15 frames (~1/2 sec at 30 fps) to sync with FFMPEG video bool hasplanar = true; #endif #ifdef USE_BINKDEC - // DG: not sure how to use FFMPEG and BINKDEC at the same time.. it might be useful if someone wants to - // use binkdec for bink and FFMPEG for other formats in custom code so I didn't just rip FFMPEG out - // But right now it's unsupported, if you need this adjust the video loading code etc yourself - #ifdef USE_FFMPEG - #error "Currently, only one of FFMPEG and BINKDEC is supported at a time!" - #endif +// DG: not sure how to use FFMPEG and BINKDEC at the same time.. it might be useful if someone wants to +// use binkdec for bink and FFMPEG for other formats in custom code so I didn't just rip FFMPEG out +// But right now it's unsupported, if you need this adjust the video loading code etc yourself +#ifdef USE_FFMPEG +#error "Currently, only one of FFMPEG and BINKDEC is supported at a time!" +#endif - #include +#include #endif // USE_BINKDEC class idCinematicLocal : public idCinematic @@ -131,6 +132,9 @@ private: bool InitFromFFMPEGFile( const char* qpath, bool looping ); void FFMPEGReset(); std::queue packets[NUM_PACKETS]; + uint8_t* lagBuffer[NUM_LAG_FRAMES] = {}; + int lagBufSize[NUM_LAG_FRAMES] = {}; + int lagIndex; #endif #ifdef USE_BINKDEC BinkHandle binkHandle; @@ -145,6 +149,9 @@ private: idImage* imgY; idImage* imgCr; idImage* imgCb; + uint32_t audioTracks; + uint32_t trackIndex; + AudioInfo binkInfo; #endif idImage* img; bool isRoQ; @@ -447,6 +454,7 @@ idCinematicLocal::idCinematicLocal() img_convert_ctx = NULL; hasFrame = false; framePos = -1; + lagIndex = 0; #endif #ifdef USE_BINKDEC @@ -455,6 +463,9 @@ idCinematicLocal::idCinematicLocal() hasFrame = false; framePos = -1; numFrames = 0; + audioTracks = 0; + trackIndex = -1; + binkInfo = {}; imgY = globalImages->AllocStandaloneImage( "_cinematicY" ); imgCr = globalImages->AllocStandaloneImage( "_cinematicCr" ); @@ -537,6 +548,12 @@ idCinematicLocal::~idCinematicLocal() av_freep( &frame2 ); av_freep( &frame3 ); #endif + + // SRS - Free any lagged cinematic audio buffers + for( int i = 0; i < NUM_LAG_FRAMES; i++ ) + { + av_freep( &lagBuffer[ i ] ); + } if( fmt_ctx ) { @@ -681,7 +698,7 @@ bool idCinematicLocal::InitFromFFMPEGFile( const char* qpath, bool amilooping ) { char* error = new char[256]; av_strerror( ret, error, 256 ); - common->Warning( "idCinematic: Failed to create codec context from codec parameters with error: %s\n", error ); + common->Warning( "idCinematic: Failed to create video codec context from codec parameters with error: %s\n", error ); } dec_ctx->time_base = fmt_ctx->streams[video_stream_index]->time_base; dec_ctx->framerate = fmt_ctx->streams[video_stream_index]->avg_frame_rate; @@ -691,7 +708,7 @@ bool idCinematicLocal::InitFromFFMPEGFile( const char* qpath, bool amilooping ) { char* error = new char[256]; av_strerror( ret, error, 256 ); - common->Warning( "idCinematic: Cannot open video decoder for: '%s', %d, with message: %s\n", qpath, looping, error ); + common->Warning( "idCinematic: Cannot open video decoder for: '%s', %d, with error: %s\n", qpath, looping, error ); return false; } //GK:Begin @@ -705,7 +722,7 @@ bool idCinematicLocal::InitFromFFMPEGFile( const char* qpath, bool amilooping ) { char* error = new char[256]; av_strerror( ret2, error, 256 ); - common->Warning( "idCinematic: Failed to create codec context from codec parameters with error: %s\n", error ); + common->Warning( "idCinematic: Failed to create audio codec context from codec parameters with error: %s\n", error ); } dec_ctx2->time_base = fmt_ctx->streams[audio_stream_index]->time_base; dec_ctx2->framerate = fmt_ctx->streams[audio_stream_index]->avg_frame_rate; @@ -715,15 +732,16 @@ bool idCinematicLocal::InitFromFFMPEGFile( const char* qpath, bool amilooping ) common->Warning( "idCinematic: Cannot open audio decoder for: '%s', %d\n", qpath, looping ); //return false; } - if( dec_ctx2->sample_fmt >= 5 ) + if( dec_ctx2->sample_fmt >= AV_SAMPLE_FMT_U8P ) // SRS - Planar formats start at AV_SAMPLE_FMT_U8P { - dst_smp = static_cast( dec_ctx2->sample_fmt - 5 ); + dst_smp = static_cast( dec_ctx2->sample_fmt - AV_SAMPLE_FMT_U8P ); // SRS - Setup context to convert from planar to packed swr_ctx = swr_alloc_set_opts( NULL, dec_ctx2->channel_layout, dst_smp, dec_ctx2->sample_rate, dec_ctx2->channel_layout, dec_ctx2->sample_fmt, dec_ctx2->sample_rate, 0, NULL ); int res = swr_init( swr_ctx ); hasplanar = true; } else { + dst_smp = dec_ctx2->sample_fmt; // SRS - Must always define the destination format hasplanar = false; } common->Printf( "Cinematic audio stream found: Sample Rate=%d Hz, Channels=%d, Format=%s, Planar=%d\n", dec_ctx2->sample_rate, dec_ctx2->channels, GetSampleFormat( dec_ctx2->sample_fmt ), hasplanar ); @@ -754,7 +772,8 @@ bool idCinematicLocal::InitFromFFMPEGFile( const char* qpath, bool amilooping ) * Set to time_base ticks per frame. Default 1, e.g., H.264/MPEG-2 set it to 2. */ int ticksPerFrame = dec_ctx->ticks_per_frame; - float durationSec = static_cast( fmt_ctx->streams[video_stream_index]->duration ) * static_cast( ticksPerFrame ) / static_cast( avr.den ); + // SRS - In addition to ticks, must also use time_base numerator (not always 1) and denominator in the duration calculation + float durationSec = static_cast( fmt_ctx->streams[video_stream_index]->duration ) * static_cast( ticksPerFrame ) * static_cast( avr.num ) / static_cast( avr.den ); //GK: No duration is given. Check if we get at least bitrate to calculate the length, otherwise set it to a fixed 100 seconds (should it be lower ?) if( durationSec < 0 ) { @@ -863,6 +882,16 @@ bool idCinematicLocal::InitFromBinkDecFile( const char* qpath, bool amilooping ) CIN_HEIGHT = h; } + // SRS - Support Bink Audio for cinematic playback + audioTracks = Bink_GetNumAudioTracks( binkHandle ); + if( audioTracks > 0 ) + { + trackIndex = 0; // SRS - Use the first audio track - is this reasonable? + binkInfo = Bink_GetAudioTrackDetails( binkHandle, trackIndex ); + common->Printf( "Cinematic audio stream found: Sample Rate=%d Hz, Channels=%d\n", binkInfo.sampleRate, binkInfo.nChannels ); + cinematicAudio->InitAudio( &binkInfo ); + } + frameRate = Bink_GetFrameRate( binkHandle ); numFrames = Bink_GetNumFrames( binkHandle ); float durationSec = numFrames / frameRate; // SRS - fixed Bink durationSec calculation @@ -1221,7 +1250,7 @@ idCinematicLocal::ImageForTimeFFMPEG cinData_t idCinematicLocal::ImageForTimeFFMPEG( int thisTime ) { cinData_t cinData; - uint8_t** tBuffer2 = NULL; + uint8_t* audioBuffer = NULL; int num_bytes = 0; if( thisTime <= 0 ) @@ -1314,7 +1343,7 @@ cinData_t idCinematicLocal::ImageForTimeFFMPEG( int thisTime ) { char* error = new char[256]; av_strerror( res, error, 256 ); - common->Warning( "idCinematic: Failed to send packet for decoding with message: %s\n", error ); + common->Warning( "idCinematic: Failed to send video packet for decoding with error: %s\n", error ); } else { @@ -1322,12 +1351,12 @@ cinData_t idCinematicLocal::ImageForTimeFFMPEG( int thisTime ) { char* error = new char[256]; av_strerror( frameFinished, error, 256 ); - common->Warning( "idCinematic: Failed to receive frame from decoding with message: %s\n", error ); + common->Warning( "idCinematic: Failed to receive video frame from decoding with error: %s\n", error ); } } } //GK:Begin - if( packet.stream_index == audio_stream_index ) //Check if it found any audio data + else if( packet.stream_index == audio_stream_index ) //Check if it found any audio data { packets->push( packet ); res = avcodec_send_packet( dec_ctx2, &packets->front() ); @@ -1335,7 +1364,7 @@ cinData_t idCinematicLocal::ImageForTimeFFMPEG( int thisTime ) { char* error = new char[256]; av_strerror( res, error, 256 ); - common->Warning( "idCinematic: Failed to send packet for decoding with message: %s\n", error ); + common->Warning( "idCinematic: Failed to send audio packet for decoding with error: %s\n", error ); } else { @@ -1345,31 +1374,31 @@ cinData_t idCinematicLocal::ImageForTimeFFMPEG( int thisTime ) { char* error = new char[256]; av_strerror( frameFinished1, error, 256 ); - common->Warning( "idCinematic: Failed to receive frame from decoding with message: %s\n", error ); + common->Warning( "idCinematic: Failed to receive audio frame from decoding with error: %s\n", error ); } else { - int bufflinesize; + // SRS - Since destination sample format is packed (non-planar), returned bufflinesize equals num_bytes + res = av_samples_alloc( &audioBuffer, &num_bytes, frame3->channels, frame3->nb_samples, dst_smp, 0 ); + if( res < 0 || res != num_bytes ) + { + common->Warning( "idCinematic: Failed to allocate audio buffer with result: %d\n", res ); + } if( hasplanar ) { - av_samples_alloc_array_and_samples( &tBuffer2, - &bufflinesize, - frame3->channels, - av_rescale_rnd( frame3->nb_samples, frame3->sample_rate, frame3->sample_rate, AV_ROUND_UP ), - dst_smp, - 0 ); - int res = swr_convert( swr_ctx, tBuffer2, bufflinesize, ( const uint8_t** )frame3->extended_data, frame3->nb_samples ); - num_bytes = av_samples_get_buffer_size( &bufflinesize, frame3->channels, - res, dst_smp, 1 ); + // SRS - Convert from planar to packed format keeping sample count the same + res = swr_convert( swr_ctx, &audioBuffer, frame3->nb_samples, ( const uint8_t** )frame3->extended_data, frame3->nb_samples ); + if( res < 0 || res != frame3->nb_samples ) + { + common->Warning( "idCinematic: Failed to convert planar audio data to packed format with result: %d\n", res ); + } } else { - num_bytes = frame3->linesize[0]; - tBuffer2 = ( uint8_t** )malloc( sizeof( frame3->extended_data ) / sizeof( uint8_t* ) ); - tBuffer2[0] = ( uint8_t* )malloc( num_bytes ); + // SRS - Since audio is already in packed format, just copy into audio buffer if( num_bytes > 0 ) { - memcpy( tBuffer2[0], frame3->extended_data[0], num_bytes ); + memcpy( audioBuffer, frame3->extended_data[0], num_bytes ); } } } @@ -1393,10 +1422,21 @@ cinData_t idCinematicLocal::ImageForTimeFFMPEG( int thisTime ) hasFrame = true; cinData.image = img; - // SRS - If we have cinematic audio data, start playing it now - if( tBuffer2 ) + // SRS - If we have cinematic audio data, play a lagged frame (for FFMPEG video sync) and save the current frame + if( num_bytes > 0 ) { - cinematicAudio->PlayAudio( tBuffer2[0], num_bytes ); + // SRS - If we have a lagged cinematic audio frame, then play it now + if( lagBufSize[ lagIndex ] > 0 ) + { + // SRS - Note that PlayAudio() is responsible for releasing any audio buffers sent to it + cinematicAudio->PlayAudio( lagBuffer[ lagIndex ], lagBufSize[ lagIndex ] ); + } + + // SRS - Save the current (new) audio buffer and its size to play NUM_LAG_FRAMES in the future + lagBuffer[ lagIndex ] = audioBuffer; + lagBufSize[ lagIndex ] = num_bytes; + + lagIndex = ( lagIndex + 1 ) % NUM_LAG_FRAMES; } return cinData; @@ -1408,6 +1448,8 @@ cinData_t idCinematicLocal::ImageForTimeFFMPEG( int thisTime ) cinData_t idCinematicLocal::ImageForTimeBinkDec( int thisTime ) { cinData_t cinData; + int16_t* audioBuffer = NULL; + uint32_t num_bytes = 0; if( thisTime <= 0 ) { @@ -1542,6 +1584,24 @@ cinData_t idCinematicLocal::ImageForTimeBinkDec( int thisTime ) cinData.imageCr = imgCr; cinData.imageCb = imgCb; + if( audioTracks > 0 ) + { + audioBuffer = ( int16_t* )malloc( binkInfo.idealBufferSize ); + num_bytes = Bink_GetAudioData( binkHandle, trackIndex, audioBuffer ); + + // SRS - If we have cinematic audio data, start playing it now + if( num_bytes > 0 ) + { + // SRS - Note that PlayAudio() is responsible for releasing any audio buffers sent to it + cinematicAudio->PlayAudio( ( uint8_t* )audioBuffer, num_bytes ); + } + else + { + // SRS - Even though we have no audio data to play, still need to free the audio buffer + free( audioBuffer ); + } + } + return cinData; } #endif diff --git a/neo/sound/OpenAL/AL_CinematicAudio.cpp b/neo/sound/OpenAL/AL_CinematicAudio.cpp index e8dd8509..29cbf5ad 100644 --- a/neo/sound/OpenAL/AL_CinematicAudio.cpp +++ b/neo/sound/OpenAL/AL_CinematicAudio.cpp @@ -33,6 +33,10 @@ extern "C" } #endif +#if defined(USE_BINKDEC) +#include +#endif + extern idCVar s_noSound; extern idCVar s_volume_dB; @@ -53,7 +57,6 @@ CinematicAudio_OpenAL::CinematicAudio_OpenAL(): void CinematicAudio_OpenAL::InitAudio( void* audioContext ) { -//SRS - This InitAudio() implementation is FFMPEG-only until we have a BinkDec solution as well #if defined(USE_FFMPEG) AVCodecContext* dec_ctx2 = ( AVCodecContext* )audioContext; av_rate_cin = dec_ctx2->sample_rate; @@ -84,12 +87,16 @@ void CinematicAudio_OpenAL::InitAudio( void* audioContext ) return; } } - +#elif defined(USE_BINKDEC) + AudioInfo* binkInfo = ( AudioInfo* )audioContext; + av_rate_cin = binkInfo->sampleRate; + av_sample_cin = binkInfo->nChannels == 2 ? AL_FORMAT_STEREO16 : AL_FORMAT_MONO16; +#endif + alSourceRewind( alMusicSourceVoicecin ); alSourcei( alMusicSourceVoicecin, AL_BUFFER, 0 ); offset = 0; trigger = false; -#endif } void CinematicAudio_OpenAL::PlayAudio( uint8_t* data, int size ) @@ -118,6 +125,12 @@ void CinematicAudio_OpenAL::PlayAudio( uint8_t* data, int size ) if( tempSize > 0 ) { alBufferData( bufid, av_sample_cin, tempdata, tempSize, av_rate_cin ); + // SRS - We must free the audio buffer once it has been copied into an alBuffer +#if defined(USE_FFMPEG) + av_freep( &tempdata ); +#elif defined(USE_BINKDEC) + free( tempdata ); +#endif alSourceQueueBuffers( alMusicSourceVoicecin, 1, &bufid ); ALenum error = alGetError(); if( error != AL_NO_ERROR ) @@ -126,17 +139,18 @@ void CinematicAudio_OpenAL::PlayAudio( uint8_t* data, int size ) return; } } - offset++; - if( offset == NUM_BUFFERS ) - { - offset = 0; - } } } } else { alBufferData( alMusicBuffercin[offset], av_sample_cin, data, size, av_rate_cin ); + // SRS - We must free the audio buffer once it has been copied into an alBuffer +#if defined(USE_FFMPEG) + av_freep( &data ); +#elif defined(USE_BINKDEC) + free( data ); +#endif offset++; if( offset == NUM_BUFFERS ) { @@ -148,7 +162,6 @@ void CinematicAudio_OpenAL::PlayAudio( uint8_t* data, int size ) return; } trigger = true; - offset = 0; } } @@ -178,10 +191,12 @@ void CinematicAudio_OpenAL::ShutdownAudio() if( alIsSource( alMusicSourceVoicecin ) ) { alSourceStop( alMusicSourceVoicecin ); - // SRS - Make sure we don't try to unqueue buffers that were never queued in the first place - if( !tBuffer->empty() ) + // SRS - Make sure we don't try to unqueue buffers that were never processed + ALint processed; + alGetSourcei( alMusicSourceVoicecin, AL_BUFFERS_PROCESSED, &processed ); + if( processed > 0 ) { - alSourceUnqueueBuffers( alMusicSourceVoicecin, NUM_BUFFERS, alMusicBuffercin ); + alSourceUnqueueBuffers( alMusicSourceVoicecin, processed, alMusicBuffercin ); } alSourcei( alMusicSourceVoicecin, AL_BUFFER, 0 ); alDeleteSources( 1, &alMusicSourceVoicecin ); @@ -200,7 +215,14 @@ void CinematicAudio_OpenAL::ShutdownAudio() int buffersize = tBuffer->size(); while( buffersize > 0 ) { + uint8_t* tempdata = tBuffer->front(); tBuffer->pop(); + // SRS - We must free any audio buffers that have not been copied into an alBuffer +#if defined(USE_FFMPEG) + av_freep( &tempdata ); +#elif defined(USE_BINKDEC) + free( tempdata ); +#endif buffersize--; } } diff --git a/neo/sound/XAudio2/XA2_CinematicAudio.cpp b/neo/sound/XAudio2/XA2_CinematicAudio.cpp index 32c49a28..54d64d48 100644 --- a/neo/sound/XAudio2/XA2_CinematicAudio.cpp +++ b/neo/sound/XAudio2/XA2_CinematicAudio.cpp @@ -33,14 +33,42 @@ extern "C" } #endif +#if defined(USE_BINKDEC) +#include +#endif + CinematicAudio_XAudio2::CinematicAudio_XAudio2(): pMusicSourceVoice1(NULL) { } +// SRS - Implement the voice callback interface to determine when audio buffers can be freed +class VoiceCallback : public IXAudio2VoiceCallback +{ +public: + // SRS - We must free the audio buffer once it has finished playing + void OnBufferEnd( void* data ) + { +#if defined(USE_FFMPEG) + av_freep( &data ); +#elif defined(USE_BINKDEC) + free( data ); +#endif + } + //Unused methods are stubs + void OnBufferStart( void* pBufferContext ) { } + void OnLoopEnd( void* pBufferContext ) { } + void OnStreamEnd( ) { } + void OnVoiceError( void* pBufferContext, HRESULT Error) { } + void OnVoiceProcessingPassEnd( ) { } + void OnVoiceProcessingPassStart( UINT32 BytesRequired ) { } +}; + +VoiceCallback voiceCallback; +// SRS end + void CinematicAudio_XAudio2::InitAudio( void* audioContext ) { -//SRS - This InitAudio() implementation is FFMPEG-only until we have a BinkDec solution as well #if defined(USE_FFMPEG) AVCodecContext* dec_ctx2 = ( AVCodecContext* )audioContext; int format_byte = 0; @@ -79,11 +107,18 @@ void CinematicAudio_XAudio2::InitAudio( void* audioContext ) return; } } + voiceFormatcine.nChannels = dec_ctx2->channels; //fixed + voiceFormatcine.nSamplesPerSec = dec_ctx2->sample_rate; //fixed +#elif defined(USE_BINKDEC) + AudioInfo* binkInfo = ( AudioInfo* )audioContext; + int format_byte = 2; + bool use_ext = false; + voiceFormatcine.nChannels = binkInfo->nChannels; //fixed + voiceFormatcine.nSamplesPerSec = binkInfo->sampleRate; //fixed +#endif WAVEFORMATEXTENSIBLE exvoice = { 0 }; voiceFormatcine.wFormatTag = WAVE_FORMAT_EXTENSIBLE; //Use extensible wave format in order to handle properly the audio - voiceFormatcine.nChannels = dec_ctx2->channels; //fixed - voiceFormatcine.nSamplesPerSec = dec_ctx2->sample_rate; //fixed voiceFormatcine.wBitsPerSample = format_byte * 8; //fixed voiceFormatcine.nBlockAlign = format_byte * voiceFormatcine.nChannels; //fixed voiceFormatcine.nAvgBytesPerSec = voiceFormatcine.nSamplesPerSec * voiceFormatcine.nBlockAlign; //fixed @@ -114,8 +149,9 @@ void CinematicAudio_XAudio2::InitAudio( void* audioContext ) exvoice.Samples.wValidBitsPerSample = voiceFormatcine.wBitsPerSample; exvoice.Samples.wSamplesPerBlock = voiceFormatcine.wBitsPerSample; exvoice.SubFormat = use_ext ? KSDATAFORMAT_SUBTYPE_IEEE_FLOAT : KSDATAFORMAT_SUBTYPE_PCM; - ( ( IXAudio2* )soundSystemLocal.GetIXAudio2() )->CreateSourceVoice( &pMusicSourceVoice1, ( WAVEFORMATEX* )&exvoice, XAUDIO2_VOICE_USEFILTER ); // Use the XAudio2 that the game has initialized instead of making our own -#endif + // Use the XAudio2 that the game has initialized instead of making our own + // SRS - Hook up the voice callback interface to get notice when audio buffers can be freed + ( ( IXAudio2* )soundSystemLocal.GetIXAudio2() )->CreateSourceVoice( &pMusicSourceVoice1, ( WAVEFORMATEX* )&exvoice, XAUDIO2_VOICE_USEFILTER, XAUDIO2_DEFAULT_FREQ_RATIO, &voiceCallback ); } void CinematicAudio_XAudio2::PlayAudio( uint8_t* data, int size ) @@ -129,7 +165,7 @@ void CinematicAudio_XAudio2::PlayAudio( uint8_t* data, int size ) Packet.LoopBegin = 0; Packet.LoopLength = 0; Packet.LoopCount = 0; - Packet.pContext = NULL; + Packet.pContext = ( BYTE* )data; // SRS - Pass the audio buffer pointer to the voice callback methods so it can be freed when buffer playback is finished HRESULT hr; if( FAILED( hr = pMusicSourceVoice1->SubmitSourceBuffer( &Packet ) ) ) {