diff --git a/engine/Makefile b/engine/Makefile index fdb3f31b..53a7fec3 100644 --- a/engine/Makefile +++ b/engine/Makefile @@ -190,6 +190,10 @@ ifndef USE_CODEC_OPUS USE_CODEC_OPUS=1 endif +ifndef USE_CODEC_THEORA +USE_CODEC_THEORA=1 +endif + ifndef USE_MUMBLE USE_MUMBLE=1 endif @@ -218,6 +222,10 @@ ifndef USE_INTERNAL_OPUS USE_INTERNAL_OPUS=$(USE_INTERNAL_LIBS) endif +ifndef USE_INTERNAL_THEORA +USE_INTERNAL_THEORA=$(USE_INTERNAL_LIBS) +endif + ifndef USE_INTERNAL_ZLIB USE_INTERNAL_ZLIB=$(USE_INTERNAL_LIBS) endif @@ -270,6 +278,7 @@ OGGDIR=$(MOUNT_DIR)/libogg-1.3.3 VORBISDIR=$(MOUNT_DIR)/libvorbis-1.3.6 OPUSDIR=$(MOUNT_DIR)/opus-1.2.1 OPUSFILEDIR=$(MOUNT_DIR)/opusfile-0.9 +THEORADIR=$(MOUNT_DIR)/libtheora-1.1.1 ZDIR=$(MOUNT_DIR)/zlib TOOLSDIR=$(MOUNT_DIR)/tools Q3ASMDIR=$(MOUNT_DIR)/tools/asm @@ -326,21 +335,15 @@ ifeq ($(SDL_CFLAGS),) endif endif -# Add svn version info -USE_SVN= -ifeq ($(wildcard ../.svn),../.svn) - SVN_REV=$(shell LANG=C svnversion .) - ifneq ($(SVN_REV),) - VERSION:=$(VERSION)_r$(SVN_REV) - USE_SVN=1 + +# Add git version info +USE_GIT= +ifeq ($(wildcard .git),.git) + GIT_REV=$(shell git show -s --pretty=format:%ad+%h --date=short | tr -d '-') + ifneq ($(GIT_REV),) + VERSION:=$(VERSION)+$(GIT_REV) + USE_GIT=1 endif -else -ifeq ($(wildcard ../.git/svn/.metadata),../.git/svn/.metadata) - SVN_REV=$(shell LANG=C git svn info | awk '$$1 == "Revision:" {print $$2; exit 0}') - ifneq ($(SVN_REV),) - VERSION:=$(VERSION)_r$(SVN_REV) - endif -endif endif ifdef IOQ3_REVISION VERSION:=$(VERSION)_IOQ3r$(IOQ3_REVISION) @@ -931,7 +934,7 @@ ifeq ($(PLATFORM),sunos) CC=gcc INSTALL=ginstall MKDIR=gmkdir -p - COPYDIR="/usr/local/share/games/quake3" + COPYDIR="/usr/local/share/games/q3rally" ifneq ($(ARCH),x86) ifneq ($(ARCH),sparc) @@ -1099,6 +1102,19 @@ ifeq ($(USE_CURL),1) endif endif + +ifeq ($(USE_CODEC_THEORA),1) + CLIENT_CFLAGS += -DUSE_CIN_THEORA + ifeq ($(USE_INTERNAL_THEORA),1) + THEORA_CFLAGS=-I$(THEORADIR)/include + else + THEORA_CFLAGS ?= $(shell $(PKG_CONFIG) --silence-errors --cflags theoradec || true) + THEORA_LIBS ?= $(shell $(PKG_CONFIG) --silence-errors --libs theoradec || echo -ltheoradec) + endif + CLIENT_CFLAGS += $(THEORA_CFLAGS) + CLIENT_LIBS += $(THEORA_LIBS) + NEED_OGG=1 +endif ifeq ($(USE_VOIP),1) CLIENT_CFLAGS += -DUSE_VOIP SERVER_CFLAGS += -DUSE_VOIP @@ -1475,6 +1491,7 @@ endif makedirs: @$(MKDIR) $(B)/autoupdater @$(MKDIR) $(B)/client/opus + @$(MKDIR) $(B)/client/theora @$(MKDIR) $(B)/client/vorbis @$(MKDIR) $(B)/renderergl1 @$(MKDIR) $(B)/renderergl2 @@ -1721,6 +1738,7 @@ $(B)/$(AUTOUPDATER_BIN): $(Q3AUTOUPDATEROBJ) Q3OBJ = \ $(B)/client/cl_cgame.o \ $(B)/client/cl_cin.o \ + $(B)/client/cl_cin_ogm.o \ $(B)/client/cl_console.o \ $(B)/client/cl_input.o \ $(B)/client/cl_keys.o \ @@ -2013,6 +2031,38 @@ ifeq ($(ARCH),x86_64) $(B)/client/ftola.o endif + +ifeq ($(USE_CODEC_THEORA),1) +ifeq ($(USE_INTERNAL_THEORA),1) +Q3OBJ += \ + $(B)/client/theora/apiwrapper.o \ + $(B)/client/theora/bitpack.o \ + $(B)/client/theora/decapiwrapper.o \ + $(B)/client/theora/decinfo.o \ + $(B)/client/theora/decode.o \ + $(B)/client/theora/dequant.o \ + $(B)/client/theora/fragment.o \ + $(B)/client/theora/huffdec.o \ + $(B)/client/theora/idct.o \ + $(B)/client/theora/info.o \ + $(B)/client/theora/internal.o \ + $(B)/client/theora/quant.o \ + $(B)/client/theora/state.o + +THEORA_OBJ_X86 = \ + $(B)/client/theora/mmxidct.o \ + $(B)/client/theora/mmxfrag.o \ + $(B)/client/theora/mmxstate.o \ + $(B)/client/theora/x86state.o + +ifeq ($(ARCH),x86) + Q3OBJ += $(THEORA_OBJ_X86) +endif +ifeq ($(ARCH),x86_64) + Q3OBJ += $(THEORA_OBJ_X86) +endif +endif +endif ifeq ($(NEED_OPUS),1) ifeq ($(USE_INTERNAL_OPUS),1) Q3OBJ += \ @@ -2765,6 +2815,12 @@ $(B)/client/%.o: $(OGGDIR)/src/%.c $(B)/client/vorbis/%.o: $(VORBISDIR)/lib/%.c $(DO_CC) +$(B)/client/theora/%.o: $(THEORADIR)/lib/%.c + $(DO_CC) + +$(B)/client/theora/%.o: $(THEORADIR)/lib/x86/%.c + $(DO_CC) + $(B)/client/opus/%.o: $(OPUSDIR)/src/%.c $(DO_CC) diff --git a/engine/code/cgame/cg_local.h b/engine/code/cgame/cg_local.h index c7af11b0..8ae07e5b 100644 --- a/engine/code/cgame/cg_local.h +++ b/engine/code/cgame/cg_local.h @@ -1520,12 +1520,8 @@ screenPlacement_e CG_GetScreenVerticalPlacement(void); void CG_AdjustFrom640( float *x, float *y, float *w, float *h ); void CG_FillRect( float x, float y, float width, float height, const float *color ); void CG_DrawPic( float x, float y, float width, float height, qhandle_t hShader ); -void CG_DrawString( float x, float y, const char *string, - float charWidth, float charHeight, const float *modulate ); - - -void CG_DrawStringExt( int x, int y, const char *string, const float *setColor, - qboolean forceColor, qboolean shadow, int charWidth, int charHeight, int maxChars ); +void CG_DrawString( float x, float y, const char *string, float charWidth, float charHeight, const float *modulate ); +void CG_DrawStringExt( int x, int y, const char *string, const float *setColor, qboolean forceColor, qboolean shadow, int charWidth, int charHeight, int maxChars ); void CG_DrawBigString( int x, int y, const char *s, float alpha ); void CG_DrawBigStringColor( int x, int y, const char *s, vec4_t color ); void CG_DrawSmallString( int x, int y, const char *s, float alpha ); @@ -1555,15 +1551,10 @@ extern char systemChat[256]; extern char teamChat1[256]; extern char teamChat2[256]; -// Q3Rally Code Start float CG_DrawScores( float x, float y ); -// Q3Rally Code END void CG_AddLagometerFrameInfo( void ); void CG_AddLagometerSnapshotInfo( snapshot_t *snap ); void CG_CenterPrint( const char *str, int y, int charWidth ); -// Q3Rally Code (removed function) -// void CG_DrawHead( float x, float y, float w, float h, int clientNum, vec3_t headAngles ); -// Q3Rally Code END void CG_DrawActive( stereoFrame_t stereoView ); void CG_DrawFlagModel( float x, float y, float w, float h, int team, qboolean force2D ); void CG_DrawTeamBackground( int x, int y, int w, int h, float alpha, int team ); @@ -1876,7 +1867,7 @@ float CG_DrawUpperRightHUD( float y ); float CG_DrawLowerRightHUD( float y ); float CG_DrawLowerLeftHUD( float y ); void CG_DrawMMap( float x, float y, float w, float h ); -// void CG_DrawHUD_DerbyList(float x, float y); +void CG_DrawHUD_DerbyList(float x, float y); // diff --git a/engine/code/cgame/cg_rally_hud.c b/engine/code/cgame/cg_rally_hud.c index f24f8f08..ac506262 100644 --- a/engine/code/cgame/cg_rally_hud.c +++ b/engine/code/cgame/cg_rally_hud.c @@ -352,9 +352,8 @@ static float CG_DrawArrowToCheckpoint( float y ) { CG_Draw3DLine( cent->currentState.origin, cg.snap->ps.origin ); */ - - CG_DrawStringExt( x, SCREEN_HEIGHT * .30, "WRONG WAY!", color, qfalse, qtrue, - BIGCHAR_WIDTH, (int)(BIGCHAR_WIDTH * 1.5), 0 ); +// CG_SetScreenPlacement(PLACE_CENTER, PLACE_CENTER); + CG_DrawStringExt( x, SCREEN_HEIGHT * .30, "WRONG WAY!", color, qfalse, qtrue, BIGCHAR_WIDTH, (int)(BIGCHAR_WIDTH * 1.5), 0 ); return y; } diff --git a/engine/code/cgame/cg_rally_hud2.c b/engine/code/cgame/cg_rally_hud2.c index b949fb7b..58bedc7f 100644 --- a/engine/code/cgame/cg_rally_hud2.c +++ b/engine/code/cgame/cg_rally_hud2.c @@ -467,7 +467,7 @@ qboolean CG_DrawHUD( void ) { break; case GT_DERBY: - CG_DrawHUD_DerbyList(44, 130); +// CG_DrawHUD_DerbyList(44, 130); break; } diff --git a/engine/code/client/cl_cin.c b/engine/code/client/cl_cin.c index cdc3ab3e..4e0dbbdf 100644 --- a/engine/code/client/cl_cin.c +++ b/engine/code/client/cl_cin.c @@ -73,6 +73,10 @@ static unsigned short vq2[256*16*4]; static unsigned short vq4[256*64*4]; static unsigned short vq8[256*256*4]; +typedef enum { + FT_ROQ = 0, // normal roq (vq3 stuff) + FT_OGM // ogm(ogg wrapper, vorbis audio, xvid/theora video) for WoP +} filetype_t; typedef struct { byte linbuf[DEFAULT_CIN_WIDTH*DEFAULT_CIN_HEIGHT*4*2]; @@ -125,6 +129,7 @@ typedef struct { int playonwalls; byte* buf; long drawX, drawY; + filetype_t fileType; } cin_cache; static cinematics_t cin; @@ -387,8 +392,9 @@ static void blit8_32( byte *src, byte *dst, int spl ) ******************************************************************************/ static void blit4_32( byte *src, byte *dst, int spl ) { - int i; + int i; + for(i = 0; i < 4; ++i) { memmove(dst, src, 16); @@ -505,7 +511,7 @@ int spl; * ******************************************************************************/ -static void ROQ_GenYUVTables( void ) +void ROQ_GenYUVTables( void ) { float t_ub,t_vr,t_ug,t_vg; long i; @@ -612,6 +618,51 @@ static unsigned int yuv_to_rgb24( long y, long u, long v ) return LittleLong ((unsigned long)((r)|(g<<8)|(b<<16))|(255UL<<24)); } +/****************************************************************************** +* +* Function: Frame_yuv_to_rgb24 +* +* Description: Used by the Theora(ogm) code +* moved the convertion into one function, to reduce the number of function-calls +* +******************************************************************************/ +void Frame_yuv_to_rgb24(const unsigned char *y, const unsigned char *u, const unsigned char *v, + int width, int height, int y_stride, int uv_stride, + int yWShift, int uvWShift, int yHShift, int uvHShift, unsigned int *output) +{ + int i, j, uvI; + long r, g, b, YY; + + for(j = 0; j < height; ++j) + { + for(i = 0; i < width; ++i) + { + YY = (long)(ROQ_YY_tab[(y[(i >> yWShift) + (j >> yHShift) * y_stride])]); + uvI = (i >> uvWShift) + (j >> uvHShift) * uv_stride; + + r = (YY + ROQ_VR_tab[v[uvI]]) >> 6; + g = (YY + ROQ_UG_tab[u[uvI]] + ROQ_VG_tab[v[uvI]]) >> 6; + b = (YY + ROQ_UB_tab[u[uvI]]) >> 6; + + if(r < 0) + r = 0; + if(g < 0) + g = 0; + if(b < 0) + b = 0; + if(r > 255) + r = 255; + if(g > 255) + g = 255; + if(b > 255) + b = 255; + + *output = LittleLong((r) | (g << 8) | (b << 16) | (255 << 24)); + ++output; + } + } +} + /****************************************************************************** * * Function: @@ -1262,10 +1313,12 @@ static void RoQ_init( void ) * ******************************************************************************/ +//FIXME: this isn't realy a "roq-shutdown" (it's more a CIN-shutdown, beside the file-closing) static void RoQShutdown( void ) { const char *s; if (!cinTable[currentHandle].buf) { + //FIXME: there could be something that should be "shutdowned" even if we don't have a output frame (at least in the ogm code) return; } @@ -1294,6 +1347,11 @@ static void RoQShutdown( void ) { CL_handle = -1; } cinTable[currentHandle].fileName[0] = 0; + if (cinTable[currentHandle].fileType == FT_OGM) + { + Cin_OGM_Shutdown(); + cinTable[currentHandle].buf = NULL; + } currentHandle = -1; } @@ -1364,6 +1422,78 @@ e_status CIN_RunCinematic (int handle) return cinTable[currentHandle].status; } +if (cinTable[currentHandle].fileType == FT_OGM) + { + if (Cin_OGM_Run(cinTable[currentHandle].startTime == 0 ? 0 : CL_ScaledMilliseconds() - cinTable[currentHandle].startTime)) + cinTable[currentHandle].status = FMV_EOF; + else + { + int newW, newH; + qboolean resolutionChange = qfalse; + + cinTable[currentHandle].buf = Cin_OGM_GetOutput(&newW, &newH); + + if (newW != cinTable[currentHandle].CIN_WIDTH) + { + cinTable[currentHandle].CIN_WIDTH = newW; + resolutionChange = qtrue; + } + if (newH != cinTable[currentHandle].CIN_HEIGHT) + { + cinTable[currentHandle].CIN_HEIGHT = newH; + resolutionChange = qtrue; + } + + if (resolutionChange) + { + cinTable[currentHandle].drawX = cinTable[currentHandle].CIN_WIDTH; + cinTable[currentHandle].drawY = cinTable[currentHandle].CIN_HEIGHT; + + // some old drivers can't do it at all + if (cls.glconfig.maxTextureSize <= 256) { + if (cinTable[currentHandle].drawX>256) { + cinTable[currentHandle].drawX = 256; + } + if (cinTable[currentHandle].drawY>256) { + cinTable[currentHandle].drawY = 256; + } + if (cinTable[currentHandle].CIN_WIDTH != 256 || cinTable[currentHandle].CIN_HEIGHT != 256) { + Com_Printf("HACK: approxmimating cinematic to 256x256 from %dx%d\n", cinTable[currentHandle].CIN_WIDTH, cinTable[currentHandle].CIN_HEIGHT); + } + } + } + + cinTable[currentHandle].status = FMV_PLAY; + cinTable[currentHandle].dirty = qtrue; + } + + if (!cinTable[currentHandle].startTime) + cinTable[currentHandle].startTime = CL_ScaledMilliseconds(); + + if (cinTable[currentHandle].status == FMV_EOF) + { + if (cinTable[currentHandle].holdAtEnd) + { + cinTable[currentHandle].status = FMV_IDLE; + } + else if (cinTable[currentHandle].looping) + { + Cin_OGM_Shutdown(); + Cin_OGM_Init(cinTable[currentHandle].fileName); + cinTable[currentHandle].buf = NULL; + cinTable[currentHandle].startTime = 0; + cinTable[currentHandle].status = FMV_PLAY; + } + else + { + RoQShutdown(); +// Cin_OGM_Shutdown(); + } + } + + return cinTable[currentHandle].status; + } + thisTime = CL_ScaledMilliseconds(); if (cinTable[currentHandle].shader && (abs(thisTime - cinTable[currentHandle].lastTime))>100) { cinTable[currentHandle].startTime += thisTime - cinTable[currentHandle].lastTime; @@ -1399,6 +1529,97 @@ e_status CIN_RunCinematic (int handle) return cinTable[currentHandle].status; } +// Also see S_TheCheckExtension +qboolean CIN_TheCheckExtension(char *filename) +{ + enum + { + CIN_RoQ, + CIN_roq, +#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA)) + CIN_ogm, + CIN_ogv, +#endif + CIN_MAX + }; + const char cin_ext[CIN_MAX][4] = { "RoQ\0", "roq\0" +#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA)) + , "ogm\0", "ogv\0" +#endif + }; + qboolean skipCin[CIN_MAX] = { qfalse, qfalse +#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA)) + , qfalse, qfalse +#endif + }; + fileHandle_t hnd; + char fn[MAX_QPATH]; + int stringlen = strlen(filename); + char *extptr; + int i; + + strncpy(fn, filename, stringlen+1); + extptr = strrchr(fn, '.'); + + if(!extptr) + { + extptr = &fn[stringlen]; + + extptr[0] = '.'; + extptr[1] = 'R'; + extptr[2] = 'o'; + extptr[3] = 'Q'; + extptr[4] = '\0'; + + stringlen += 4; + + skipCin[CIN_RoQ] = qtrue; + } + + FS_FOpenFileRead(fn, &hnd, qtrue); + + if (!hnd) + { + extptr++; + + for (i = 0; i < CIN_MAX; i++) + { + if (!strcmp(extptr, cin_ext[i])) + { + skipCin[i] = qtrue; + break; + } + } + + for (i = 0; i < CIN_MAX; i++) + { + if (skipCin[i]) { + continue; + } + + extptr[0] = cin_ext[i][0]; + extptr[1] = cin_ext[i][1]; + extptr[2] = cin_ext[i][2]; + extptr[3] = '\0'; + + FS_FOpenFileRead(fn, &hnd, qtrue); + + if (hnd) { + break; + } + } + + if(!hnd) { + return qfalse; + } + } + + FS_FCloseFile(hnd); + strcpy(filename, fn); + + return qtrue; +} + /* ================== CIN_PlayCinematic @@ -1407,6 +1628,9 @@ CIN_PlayCinematic int CIN_PlayCinematic( const char *arg, int x, int y, int w, int h, int systemBits ) { unsigned short RoQID; char name[MAX_OSPATH]; +#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA)) + const char *ext; +#endif int i; if (strstr(arg, "/") == NULL && strstr(arg, "\\") == NULL) { @@ -1415,6 +1639,12 @@ int CIN_PlayCinematic( const char *arg, int x, int y, int w, int h, int systemBi Com_sprintf (name, sizeof(name), "%s", arg); } + if (!CIN_TheCheckExtension(name)) + { + // Can't find video + return -1; + } + if (!(systemBits & CIN_system)) { for ( i = 0 ; i < MAX_VIDEO_HANDLES ; i++ ) { if (!strcmp(cinTable[i].fileName, name) ) { @@ -1428,10 +1658,56 @@ int CIN_PlayCinematic( const char *arg, int x, int y, int w, int h, int systemBi Com_Memset(&cin, 0, sizeof(cinematics_t) ); currentHandle = CIN_HandleForVideo(); + Com_Memset(&cinTable[currentHandle], 0, sizeof(cin_cache)); + cin.currentHandle = currentHandle; strcpy(cinTable[currentHandle].fileName, name); +#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA)) + ext = COM_GetExtension(name); + if (!Q_stricmp(ext, "ogm") || !Q_stricmp(ext, "ogv")) + { + if (Cin_OGM_Init(name)) + { + Com_DPrintf("starting ogm-playback failed(%s)\n", arg); + cinTable[currentHandle].fileName[0] = 0; + Cin_OGM_Shutdown(); + return -1; + } + + cinTable[currentHandle].fileType = FT_OGM; + + CIN_SetExtents(currentHandle, x, y, w, h); + CIN_SetLooping(currentHandle, (systemBits & CIN_loop) != 0); + + cinTable[currentHandle].holdAtEnd = (systemBits & CIN_hold) != 0; + cinTable[currentHandle].alterGameState = (systemBits & CIN_system) != 0; + cinTable[currentHandle].playonwalls = 1; + cinTable[currentHandle].silent = (systemBits & CIN_silent) != 0; + cinTable[currentHandle].shader = (systemBits & CIN_shader) != 0; + +/* we will set this info after the first xvid-frame + cinTable[currentHandle].CIN_HEIGHT = DEFAULT_CIN_HEIGHT; + cinTable[currentHandle].CIN_WIDTH = DEFAULT_CIN_WIDTH; +*/ + + if (cinTable[currentHandle].alterGameState) { + CL_ShowMainMenu(); + } else { + cinTable[currentHandle].playonwalls = cl_inGameVideo->integer; + } + + if (cinTable[currentHandle].alterGameState) { + clc.state = CA_CINEMATIC; + } + + cinTable[currentHandle].status = FMV_PLAY; + + return currentHandle; + } +#endif + cinTable[currentHandle].ROQSize = 0; cinTable[currentHandle].ROQSize = FS_FOpenFileRead (cinTable[currentHandle].fileName, &cinTable[currentHandle].iFile, qtrue); diff --git a/engine/code/client/cl_cin_ogm.c b/engine/code/client/cl_cin_ogm.c new file mode 100644 index 00000000..c40b42d3 --- /dev/null +++ b/engine/code/client/cl_cin_ogm.c @@ -0,0 +1,999 @@ +/* +=========================================================================== +Copyright (C) 2008 Stefan Langer + +This file is part of Spearmint Source Code. + +Spearmint Source Code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 3 of the License, +or (at your option) any later version. + +Spearmint Source Code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Spearmint Source Code. If not, see . + +In addition, Spearmint Source Code is also subject to certain additional terms. +You should have received a copy of these additional terms immediately following +the terms and conditions of the GNU General Public License. If not, please +request a copy in writing from id Software at the address below. + +If you have questions concerning this license or the applicable additional +terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., +Suite 120, Rockville, Maryland 20850 USA. +=========================================================================== +*/ + +/* + + This is a "ogm"-decoder to use a "better"(smaller files,higher resolutions) Cinematic-Format than roq + + In this code "ogm" is only: ogg wrapper, vorbis audio, xvid video (or theora video) + (ogm(Ogg Media) in general is ogg wrapper with all kind of audio/video/subtitle/...) + +... infos used for this src: +xvid: + * examples/xvid_decraw.c + * xvid.h +ogg/vobis: + * decoder_example.c (libvorbis src) + * libogg Documentation ( http://www.xiph.org/ogg/doc/libogg/ ) + * VLC ogg demux ( http://trac.videolan.org/vlc/browser/trunk/modules/demux/ogg.c ) +theora: + * theora doxygen docs (1.0beta1) +*/ + +#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA)) + +#include +#include + +#ifdef USE_CIN_XVID +#include +#endif +#ifdef USE_CIN_THEORA +#include +#endif + +#include "client.h" +#include "snd_local.h" + +#define OGG_BUFFER_SIZE 8*1024 //4096 + +typedef struct +{ + fileHandle_t ogmFile; + + ogg_sync_state oy; /* sync and verify incoming physical bitstream */ + //ogg_stream_state os; /* take physical pages, weld into a logical stream of packets */ + ogg_stream_state os_audio; + ogg_stream_state os_video; + + vorbis_dsp_state vd; /* central working state for the packet->PCM decoder */ + vorbis_info vi; /* struct that stores all the static vorbis bitstream settings */ + vorbis_comment vc; /* struct that stores all the bitstream user comments */ + + qboolean videoStreamIsXvid; //FIXME: atm there isn't realy a check for this (all "video" streams are handelt as xvid, because xvid support more than one "subtype") +#ifdef USE_CIN_XVID + xvid_dec_stats_t xvid_dec_stats; + void *xvid_dec_handle; +#endif + qboolean videoStreamIsTheora; +#ifdef USE_CIN_THEORA + theora_info th_info; // dump_video.c(example decoder): ti + theora_comment th_comment; // dump_video.c(example decoder): tc + theora_state th_state; // dump_video.c(example decoder): td + + yuv_buffer th_yuvbuffer; +#endif + + unsigned char *outputBuffer; + int outputWidht; + int outputHeight; + int outputBufferSize; // in Pixel (so "real Bytesize" = outputBufferSize*4) + int VFrameCount; // output video-stream + ogg_int64_t Vtime_unit; + int currentTime; // input from Run-function +} cin_ogm_t; + +static cin_ogm_t g_ogm; + +int nextNeededVFrame(void); + + +/* ####################### ####################### + + XVID + +*/ +#ifdef USE_CIN_XVID + +#define BPP 4 + +static int init_xvid(void) +{ + int ret; + + xvid_gbl_init_t xvid_gbl_init; + xvid_dec_create_t xvid_dec_create; + + /* Reset the structure with zeros */ + memset(&xvid_gbl_init, 0, sizeof(xvid_gbl_init_t)); + memset(&xvid_dec_create, 0, sizeof(xvid_dec_create_t)); + + /* Version */ + xvid_gbl_init.version = XVID_VERSION; + + xvid_gbl_init.cpu_flags = 0; + xvid_gbl_init.debug = 0; + + xvid_global(NULL, 0, &xvid_gbl_init, NULL); + + /* Version */ + xvid_dec_create.version = XVID_VERSION; + + /* + * Image dimensions -- set to 0, xvidcore will resize when ever it is + * needed + */ + xvid_dec_create.width = 0; + xvid_dec_create.height = 0; + + ret = xvid_decore(NULL, XVID_DEC_CREATE, &xvid_dec_create, NULL); + + g_ogm.xvid_dec_handle = xvid_dec_create.handle; + + return (ret); +} + +static int dec_xvid(unsigned char *input, int input_size) +{ + int ret; + + xvid_dec_frame_t xvid_dec_frame; + + /* Reset all structures */ + memset(&xvid_dec_frame, 0, sizeof(xvid_dec_frame_t)); + memset(&g_ogm.xvid_dec_stats, 0, sizeof(xvid_dec_stats_t)); + + /* Set version */ + xvid_dec_frame.version = XVID_VERSION; + g_ogm.xvid_dec_stats.version = XVID_VERSION; + + /* No general flags to set */ + xvid_dec_frame.general = XVID_LOWDELAY; //0; + + /* Input stream */ + xvid_dec_frame.bitstream = input; + xvid_dec_frame.length = input_size; + + /* Output frame structure */ + xvid_dec_frame.output.plane[0] = g_ogm.outputBuffer; + xvid_dec_frame.output.stride[0] = g_ogm.outputWidht * BPP; + if(g_ogm.outputBuffer == NULL) + xvid_dec_frame.output.csp = XVID_CSP_NULL; + else + xvid_dec_frame.output.csp = XVID_CSP_RGBA; // example was with XVID_CSP_I420 + + ret = xvid_decore(g_ogm.xvid_dec_handle, XVID_DEC_DECODE, &xvid_dec_frame, &g_ogm.xvid_dec_stats); + + return (ret); +} + +static int shutdown_xvid(void) +{ + int ret = 0; + + if(g_ogm.xvid_dec_handle) + ret = xvid_decore(g_ogm.xvid_dec_handle, XVID_DEC_DESTROY, NULL, NULL); + + return (ret); +} +#endif + +/* ####################### ####################### + + OGG/OGM + ... also calls to vorbis/theora-libs + +*/ + +/* + loadBlockToSync + + return: + !0 -> no data transferred +*/ +static int loadBlockToSync(void) +{ + int r = -1; + char *buffer; + int bytes; + + if(g_ogm.ogmFile) + { + buffer = ogg_sync_buffer(&g_ogm.oy, OGG_BUFFER_SIZE); + bytes = FS_Read(buffer, OGG_BUFFER_SIZE, g_ogm.ogmFile); + ogg_sync_wrote(&g_ogm.oy, bytes); + + r = (bytes == 0); + } + + return r; +} + +/* + loadPagesToStreams + + return: + !0 -> no data transferred (or not for all Streams) +*/ +static int loadPagesToStreams(void) +{ + int r = -1; + int AudioPages = 0; + int VideoPages = 0; + ogg_stream_state *osptr = NULL; + ogg_page og; + + while(!AudioPages || !VideoPages) + { + if(ogg_sync_pageout(&g_ogm.oy, &og) != 1) + break; + + if(g_ogm.os_audio.serialno == ogg_page_serialno(&og)) + { + osptr = &g_ogm.os_audio; + ++AudioPages; + } + if(g_ogm.os_video.serialno == ogg_page_serialno(&og)) + { + osptr = &g_ogm.os_video; + ++VideoPages; + } + + if(osptr != NULL) + { + ogg_stream_pagein(osptr, &og); + } + } + + if(AudioPages && VideoPages) + r = 0; + + return r; +} + +#define SIZEOF_RAWBUFF 4*1024 +static byte rawBuffer[SIZEOF_RAWBUFF]; + +#define MIN_AUDIO_PRELOAD 400 // in ms +#define MAX_AUDIO_PRELOAD 500 // in ms + + +/* + + return: audio wants more packets +*/ +static qboolean loadAudio(void) +{ + qboolean anyDataTransferred = qtrue; + float **pcm; + float *right, *left; + int samples, samplesNeeded; + int i; + short *ptr; + ogg_packet op; + vorbis_block vb; + + memset(&op, 0, sizeof(op)); + memset(&vb, 0, sizeof(vb)); + vorbis_block_init(&g_ogm.vd, &vb); + + while(anyDataTransferred && g_ogm.currentTime + MAX_AUDIO_PRELOAD > (int)(g_ogm.vd.granulepos * 1000 / g_ogm.vi.rate)) + { + anyDataTransferred = qfalse; + + if((samples = vorbis_synthesis_pcmout(&g_ogm.vd, &pcm)) > 0) + { + // vorbis -> raw + ptr = (short *)rawBuffer; + samplesNeeded = (SIZEOF_RAWBUFF) / (2 * 2); // (width*channel) + if(samples < samplesNeeded) + samplesNeeded = samples; + + left = pcm[0]; + right = (g_ogm.vi.channels > 1) ? pcm[1] : pcm[0]; + for(i = 0; i < samplesNeeded; ++i) + { + ptr[0] = (left[i] >= -1.0f && + left[i] <= 1.0f) ? left[i] * 32767.f : 32767 * ((left[i] > 0.0f) - (left[i] < 0.0f)); + ptr[1] = (right[i] >= -1.0f && + right[i] <= 1.0f) ? right[i] * 32767.f : 32767 * ((right[i] > 0.0f) - (right[i] < 0.0f)); + ptr += 2; //numChans; + } + + if(i > 0) + { + // tell libvorbis how many samples we actually consumed + vorbis_synthesis_read(&g_ogm.vd, i); + +// S_RawSamples(ssize, 22050, 2, 2, (byte *)sbuf, 1.0f, -1); + S_RawSamples(0, i, g_ogm.vi.rate, 2, 2, rawBuffer, 1.0f, -1); + + anyDataTransferred = qtrue; + } + } + + if(!anyDataTransferred) + { + // op -> vorbis + if(ogg_stream_packetout(&g_ogm.os_audio, &op)) + { + if(vorbis_synthesis(&vb, &op) == 0) + vorbis_synthesis_blockin(&g_ogm.vd, &vb); + anyDataTransferred = qtrue; + } + } + } + + vorbis_block_clear(&vb); + + if(g_ogm.currentTime + MIN_AUDIO_PRELOAD > (int)(g_ogm.vd.granulepos * 1000 / g_ogm.vi.rate)) + return qtrue; + else + return qfalse; +} + +/* + + return: 1 -> loaded a new Frame ( g_ogm.outputBuffer points to the actual frame ) + 0 -> no new Frame + <0 -> error +*/ +#ifdef USE_CIN_XVID +static int loadVideoFrameXvid(void) +{ + int r = 0; + ogg_packet op; + int used_bytes = 0; + + memset(&op, 0, sizeof(op)); + + while(!r && (ogg_stream_packetout(&g_ogm.os_video, &op))) + { + used_bytes = dec_xvid(op.packet, op.bytes); + if(g_ogm.xvid_dec_stats.type == XVID_TYPE_VOL) + { + if(g_ogm.outputWidht != g_ogm.xvid_dec_stats.data.vol.width || + g_ogm.outputHeight != g_ogm.xvid_dec_stats.data.vol.height) + { + g_ogm.outputWidht = g_ogm.xvid_dec_stats.data.vol.width; + g_ogm.outputHeight = g_ogm.xvid_dec_stats.data.vol.height; + Com_DPrintf("[XVID]new resolution %dx%d\n", g_ogm.outputWidht, g_ogm.outputHeight); + } + + if(g_ogm.outputBufferSize < g_ogm.xvid_dec_stats.data.vol.width * g_ogm.xvid_dec_stats.data.vol.height) + { + + g_ogm.outputBufferSize = g_ogm.xvid_dec_stats.data.vol.width * g_ogm.xvid_dec_stats.data.vol.height; + + /* Free old output buffer */ + if(g_ogm.outputBuffer) + free(g_ogm.outputBuffer); + + /* Allocate the new buffer */ + g_ogm.outputBuffer = (unsigned char *)malloc(g_ogm.outputBufferSize * 4); //FIXME? should the 4 stay for BPP? + if(g_ogm.outputBuffer == NULL) + { + g_ogm.outputBufferSize = 0; + r = -2; + break; + } + } + + // use the rest of this packet + used_bytes += dec_xvid(op.packet + used_bytes, op.bytes - used_bytes); + } + + // we got a real output frame ... + if(g_ogm.xvid_dec_stats.type > 0) + { + r = 1; + + ++g_ogm.VFrameCount; +// Com_Printf("frame infos: %d %d %d\n", xvid_dec_stats.data.vop.general, xvid_dec_stats.data.vop.time_base, xvid_dec_stats.data.vop.time_increment); +// Com_Printf("frame info time: %d (Frame# %d, %d)\n", xvid_dec_stats.data.vop.time_base, VFrameCount, (int)(VFrameCount*Vtime_unit/10000000)); + } + +// if((op.bytes-used_bytes)>0) +// Com_Printf("unused: %d(firstChar: %X)\n",(op.bytes-used_bytes),(int)(op.packet[used_bytes])); + } + + return r; +} +#endif + +/* + + return: 1 -> loaded a new Frame ( g_ogm.outputBuffer points to the actual frame ) + 0 -> no new Frame + <0 -> error +*/ +#ifdef USE_CIN_THEORA +/* +how many >> are needed to make y==x (shifting y>>i) +return: -1 -> no match + >=0 -> number of shifts +*/ +static int findSizeShift(int x, int y) +{ + int i; + + for(i = 0; (y >> i); ++i) + if(x == (y >> i)) + return i; + + return -1; +} + +static int loadVideoFrameTheora(void) +{ + int r = 0; + ogg_packet op; + + memset(&op, 0, sizeof(op)); + + while(!r && (ogg_stream_packetout(&g_ogm.os_video, &op))) + { + ogg_int64_t th_frame; + + theora_decode_packetin(&g_ogm.th_state, &op); + + th_frame = theora_granule_frame(&g_ogm.th_state, g_ogm.th_state.granulepos); + + if((g_ogm.VFrameCount < th_frame && th_frame >= nextNeededVFrame()) || !g_ogm.outputBuffer) + { +// int i,j; + int yWShift, uvWShift; + int yHShift, uvHShift; + + if(theora_decode_YUVout(&g_ogm.th_state, &g_ogm.th_yuvbuffer)) + continue; + + if(g_ogm.outputWidht != g_ogm.th_info.width || g_ogm.outputHeight != g_ogm.th_info.height) + { + g_ogm.outputWidht = g_ogm.th_info.width; + g_ogm.outputHeight = g_ogm.th_info.height; + Com_DPrintf("[Theora(ogg)]new resolution %dx%d\n", g_ogm.outputWidht, g_ogm.outputHeight); + } + + if(g_ogm.outputBufferSize < g_ogm.th_info.width * g_ogm.th_info.height) + { + + g_ogm.outputBufferSize = g_ogm.th_info.width * g_ogm.th_info.height; + + /* Free old output buffer */ + if(g_ogm.outputBuffer) + free(g_ogm.outputBuffer); + + /* Allocate the new buffer */ + g_ogm.outputBuffer = (unsigned char *)malloc(g_ogm.outputBufferSize * 4); + if(g_ogm.outputBuffer == NULL) + { + g_ogm.outputBufferSize = 0; + r = -2; + break; + } + } + + yWShift = findSizeShift(g_ogm.th_yuvbuffer.y_width, g_ogm.th_info.width); + uvWShift = findSizeShift(g_ogm.th_yuvbuffer.uv_width, g_ogm.th_info.width); + yHShift = findSizeShift(g_ogm.th_yuvbuffer.y_height, g_ogm.th_info.height); + uvHShift = findSizeShift(g_ogm.th_yuvbuffer.uv_height, g_ogm.th_info.height); + + if(yWShift < 0 || uvWShift < 0 || yHShift < 0 || uvHShift < 0) + { + Com_Printf("[Theora] unexpected resolution in a yuv-Frame\n"); + r = -1; + } + else + { + + Frame_yuv_to_rgb24(g_ogm.th_yuvbuffer.y, g_ogm.th_yuvbuffer.u, g_ogm.th_yuvbuffer.v, + g_ogm.th_info.width, g_ogm.th_info.height, g_ogm.th_yuvbuffer.y_stride, + g_ogm.th_yuvbuffer.uv_stride, yWShift, uvWShift, yHShift, uvHShift, + (unsigned int *)g_ogm.outputBuffer); + +/* unsigned char* pixelPtr = g_ogm.outputBuffer; + unsigned int* pixPtr; + pixPtr = (unsigned int*)g_ogm.outputBuffer; + + //TODO: use one yuv->rgb funktion for the hole frame (the big amout of stack movement(yuv->rgb calls) couldn't be good ;) ) + for(j=0;jrgb code + *pixPtr++ = yuv_to_rgb24( g_ogm.th_yuvbuffer.y[(i>>yWShift)+(j>>yHShift)*g_ogm.th_yuvbuffer.y_stride], + g_ogm.th_yuvbuffer.u[(i>>uvWShift)+(j>>uvHShift)*g_ogm.th_yuvbuffer.uv_stride], + g_ogm.th_yuvbuffer.v[(i>>uvWShift)+(j>>uvHShift)*g_ogm.th_yuvbuffer.uv_stride]); +#endif + } + } +*/ + + r = 1; + g_ogm.VFrameCount = th_frame; + } + } + + + } + + return r; +} +#endif + + +/* + + return: 1 -> loaded a new Frame ( g_ogm.outputBuffer points to the actual frame ) + 0 -> no new Frame + <0 -> error +*/ +static int loadVideoFrame(void) +{ +#ifdef USE_CIN_XVID + if(g_ogm.videoStreamIsXvid) + return loadVideoFrameXvid(); +#endif +#ifdef USE_CIN_THEORA + if(g_ogm.videoStreamIsTheora) + return loadVideoFrameTheora(); +#endif + + // if we come to this point, there will be no codec that use the stream content ... + if(g_ogm.os_video.serialno) + { + ogg_packet op; + + while(ogg_stream_packetout(&g_ogm.os_video, &op)); + } + + return 1; +} + +/* + + return: qtrue => noDataTransferred +*/ +static qboolean loadFrame(void) +{ + qboolean anyDataTransferred = qtrue; + qboolean needVOutputData = qtrue; + +// qboolean audioSDone = qfalse; +// qboolean videoSDone = qfalse; + qboolean audioWantsMoreData = qfalse; + int status; + + while(anyDataTransferred && (needVOutputData || audioWantsMoreData)) + { + anyDataTransferred = qfalse; + +// xvid -> "gl" ? videoDone : needPacket +// vorbis -> raw sound ? audioDone : needPacket +// anyDataTransferred = videoDone && audioDone; +// needVOutputData = videoDone && audioDone; +// if needPacket + { +// videoStream -> xvid ? videoStreamDone : needPage +// audioSteam -> vorbis ? audioStreamDone : needPage +// anyDataTransferred = audioStreamDone && audioStreamDone; + + if(needVOutputData && (status = loadVideoFrame())) + { + needVOutputData = qfalse; + if(status > 0) + anyDataTransferred = qtrue; + else + anyDataTransferred = qfalse; // error (we don't need any videodata and we had no transferred) + } + +// if needPage + if(needVOutputData || audioWantsMoreData) + { + // try to transfer Pages to the audio- and video-Stream + if(loadPagesToStreams()) + { + // try to load a datablock from file + anyDataTransferred |= !loadBlockToSync(); + } + else + anyDataTransferred = qtrue; // successful loadPagesToStreams() + } + + // load all Audio after loading new pages ... + if(g_ogm.VFrameCount > 1) // wait some videoframes (it's better to have some delay, than a lagy sound) + audioWantsMoreData = loadAudio(); + } + } + +// ogg_packet_clear(&op); + + return !anyDataTransferred; +} + +//from VLC ogg.c ( http://trac.videolan.org/vlc/browser/trunk/modules/demux/ogg.c ) +typedef struct +{ + char streamtype[8]; + char subtype[4]; + + ogg_int32_t size; /* size of the structure */ + + ogg_int64_t time_unit; /* in reference time */// in 10^-7 seconds (dT between frames) + ogg_int64_t samples_per_unit; + ogg_int32_t default_len; /* in media time */ + + ogg_int32_t buffersize; + ogg_int16_t bits_per_sample; + + union + { + struct + { + ogg_int32_t width; + ogg_int32_t height; + } stream_header_video; + + struct + { + ogg_int16_t channels; + ogg_int16_t blockalign; + ogg_int32_t avgbytespersec; + } stream_header_audio; + } sh; +} stream_header_t; + +qboolean isPowerOf2(int x) +{ + int bitsSet = 0; + int i; + + for(i = 0; i < sizeof(int) * 8; ++i) + if(x & (1 << i)) + ++bitsSet; + + return (bitsSet <= 1); +} + +/* + + return: 0 -> no problem +*/ +//TODO: vorbis/theora-header&init in sub-functions +//TODO: "clean" error-returns ... +int Cin_OGM_Init(const char *filename) +{ + int status; + ogg_page og; + ogg_packet op; + int i; + + if(g_ogm.ogmFile) + { + Com_Printf(S_COLOR_YELLOW "WARNING: it seams there was already a ogm running, it will be killed to start %s\n", filename); + Cin_OGM_Shutdown(); + } + + memset(&g_ogm, 0, sizeof(cin_ogm_t)); + + FS_FOpenFileRead(filename, &g_ogm.ogmFile, qtrue); + if(!g_ogm.ogmFile) + { + Com_Printf(S_COLOR_YELLOW "WARNING: Can't open ogm-file for reading (%s)\n", filename); + return -1; + } + + ogg_sync_init(&g_ogm.oy); /* Now we can read pages */ + + //FIXME? can serialno be 0 in ogg? (better way to check inited?) + //TODO: support for more than one audio stream? / detect files with one stream(or without correct ones) + while(!g_ogm.os_audio.serialno || !g_ogm.os_video.serialno) + { + if(ogg_sync_pageout(&g_ogm.oy, &og) == 1) + { + if(strstr((char *)(og.body + 1), "vorbis")) + { + //FIXME? better way to find audio stream + if(g_ogm.os_audio.serialno) + { + Com_Printf(S_COLOR_YELLOW "WARNING: more than one audio stream, in ogm-file(%s) ... we will stay at the first one\n", filename); + } + else + { + ogg_stream_init(&g_ogm.os_audio, ogg_page_serialno(&og)); + ogg_stream_pagein(&g_ogm.os_audio, &og); + } + } +#ifdef USE_CIN_THEORA + if(strstr((char *)(og.body + 1), "theora")) + { + if(g_ogm.os_video.serialno) + { + Com_Printf(S_COLOR_YELLOW "WARNING: more than one video stream, in ogm-file(%s) ... we will stay at the first one\n", filename); + } + else + { + g_ogm.videoStreamIsTheora = qtrue; + ogg_stream_init(&g_ogm.os_video, ogg_page_serialno(&og)); + ogg_stream_pagein(&g_ogm.os_video, &og); + } + } +#endif +#ifdef USE_CIN_XVID + if(strstr((char *)(og.body + 1), "video")) + { //FIXME? better way to find video stream + if(g_ogm.os_video.serialno) + { + Com_Printf("more than one video stream, in ogm-file(%s) ... we will stay at the first one\n", filename); + } + else + { + stream_header_t *sh; + + g_ogm.videoStreamIsXvid = qtrue; + + sh = (stream_header_t *) (og.body + 1); + //TODO: one solution for checking xvid and theora + if(!isPowerOf2(sh->sh.stream_header_video.width)) + { + Com_Printf("VideoWidth of the ogm-file isn't a power of 2 value (%s)\n", filename); + + return -5; + } + if(!isPowerOf2(sh->sh.stream_header_video.height)) + { + Com_Printf("VideoHeight of the ogm-file isn't a power of 2 value (%s)\n", filename); + + return -6; + } + + g_ogm.Vtime_unit = sh->time_unit; + + ogg_stream_init(&g_ogm.os_video, ogg_page_serialno(&og)); + ogg_stream_pagein(&g_ogm.os_video, &og); + } + } +#endif + } + else if(loadBlockToSync()) + break; + } + + if(g_ogm.videoStreamIsXvid && g_ogm.videoStreamIsTheora) + { + Com_Printf(S_COLOR_YELLOW "WARNING: Found \"video\"- and \"theora\"-stream ,ogm-file (%s)\n", filename); + return -2; + } + +#if 1 + if(!g_ogm.os_audio.serialno) + { + Com_Printf(S_COLOR_YELLOW "WARNING: Haven't found a audio(vorbis) stream in ogm-file (%s)\n", filename); + return -2; + } +#endif + if(!g_ogm.os_video.serialno) + { + Com_Printf(S_COLOR_YELLOW "WARNING: Haven't found a video stream in ogm-file (%s)\n", filename); + return -3; + } + + //load vorbis header + vorbis_info_init(&g_ogm.vi); + vorbis_comment_init(&g_ogm.vc); + i = 0; + while(i < 3) + { + status = ogg_stream_packetout(&g_ogm.os_audio, &op); + if(status < 0) + { + Com_Printf(S_COLOR_YELLOW "WARNING: Corrupt ogg packet while loading vorbis-headers, ogm-file(%s)\n", filename); + return -8; + } + if(status > 0) + { + status = vorbis_synthesis_headerin(&g_ogm.vi, &g_ogm.vc, &op); + if(i == 0 && status < 0) + { + Com_Printf(S_COLOR_YELLOW "WARNING: This Ogg bitstream does not contain Vorbis audio data, ogm-file(%s)\n", filename); + return -9; + } + ++i; + } + else if(loadPagesToStreams()) + { + if(loadBlockToSync()) + { + Com_Printf(S_COLOR_YELLOW "WARNING: Couldn't find all vorbis headers before end of ogm-file (%s)\n", filename); + return -10; + } + } + } + + vorbis_synthesis_init(&g_ogm.vd, &g_ogm.vi); + +#ifdef USE_CIN_XVID + status = init_xvid(); + if(status) + { + Com_Printf("[Xvid]Decore INIT problem, return value %d(ogm-file: %s)\n", status, filename); + + return -4; + } +#endif + +#ifdef USE_CIN_THEORA + if(g_ogm.videoStreamIsTheora) + { + ROQ_GenYUVTables(); + + theora_info_init(&g_ogm.th_info); + theora_comment_init(&g_ogm.th_comment); + + i = 0; + while(i < 3) + { + status = ogg_stream_packetout(&g_ogm.os_video, &op); + if(status < 0) + { + Com_Printf(S_COLOR_YELLOW "WARNING: Corrupt ogg packet while loading theora-headers, ogm-file(%s)\n", filename); + return -8; + } + if(status > 0) + { + status = theora_decode_header(&g_ogm.th_info, &g_ogm.th_comment, &op); + if(i == 0 && status != 0) + { + Com_Printf(S_COLOR_YELLOW "WARNING: This Ogg bitstream does not contain theora data, ogm-file(%s)\n", filename); + return -9; + } + ++i; + } + else if(loadPagesToStreams()) + { + if(loadBlockToSync()) + { + Com_Printf(S_COLOR_YELLOW "WARNING: Couldn't find all theora headers before end of ogm-file (%s)\n", filename); + return -10; + } + } + } + + theora_decode_init(&g_ogm.th_state, &g_ogm.th_info); + + if(!isPowerOf2(g_ogm.th_info.width)) + { + Com_Printf(S_COLOR_YELLOW "WARNING: VideoWidth of the ogm-file isn't a power of 2 value (%s)\n", filename); + return -5; + } + if(!isPowerOf2(g_ogm.th_info.height)) + { + Com_Printf(S_COLOR_YELLOW "WARNING: VideoHeight of the ogm-file isn't a power of 2 value (%s)\n", filename); + return -6; + } + + g_ogm.Vtime_unit = ((ogg_int64_t) g_ogm.th_info.fps_denominator * 1000 * 10000 / g_ogm.th_info.fps_numerator); + } +#endif + + Com_DPrintf("OGM-Init done (%s)\n", filename); + + return 0; +} + +int nextNeededVFrame(void) +{ + return (int)(g_ogm.currentTime * (ogg_int64_t) 10000 / g_ogm.Vtime_unit); +} + +/* + + time ~> time in ms to which the movie should run + return: 0 => nothing special + 1 => eof +*/ +int Cin_OGM_Run(int time) +{ + + g_ogm.currentTime = time; + + while(!g_ogm.VFrameCount || time + 20 >= (int)(g_ogm.VFrameCount * g_ogm.Vtime_unit / 10000)) + { + if(loadFrame()) + return 1; + } + + return 0; +} + +/* + Gives a Pointer to the current Output-Buffer + and the Resolution +*/ +unsigned char *Cin_OGM_GetOutput(int *outWidth, int *outHeight) +{ + if(outWidth != NULL) + *outWidth = g_ogm.outputWidht; + if(outHeight != NULL) + *outHeight = g_ogm.outputHeight; + + return g_ogm.outputBuffer; +} + +void Cin_OGM_Shutdown() +{ +#ifdef USE_CIN_XVID + int status; + + status = shutdown_xvid(); + if(status) + Com_Printf("[Xvid]Decore RELEASE problem, return value %d\n", status); +#endif + +#ifdef USE_CIN_THEORA + theora_clear(&g_ogm.th_state); + theora_comment_clear(&g_ogm.th_comment); + theora_info_clear(&g_ogm.th_info); +#endif + + if(g_ogm.outputBuffer) + free(g_ogm.outputBuffer); + g_ogm.outputBuffer = NULL; + + vorbis_dsp_clear(&g_ogm.vd); + vorbis_comment_clear(&g_ogm.vc); + vorbis_info_clear(&g_ogm.vi); /* must be called last (comment from vorbis example code) */ + + ogg_stream_clear(&g_ogm.os_audio); + ogg_stream_clear(&g_ogm.os_video); + + ogg_sync_clear(&g_ogm.oy); + + FS_FCloseFile(g_ogm.ogmFile); + g_ogm.ogmFile = 0; +} + +#else +int Cin_OGM_Init(const char *filename) +{ + return 1; +} +int Cin_OGM_Run(int time) +{ + return 1; +} +unsigned char *Cin_OGM_GetOutput(int *outWidth, int *outHeight) +{ + return 0; +} + +void Cin_OGM_Shutdown(void) +{ + return; +} +#endif + diff --git a/engine/code/client/client.h b/engine/code/client/client.h index 9bc03068..70420f72 100644 --- a/engine/code/client/client.h +++ b/engine/code/client/client.h @@ -599,6 +599,21 @@ void CIN_SetLooping (int handle, qboolean loop); void CIN_UploadCinematic(int handle); void CIN_CloseAllVideos(void); +// yuv->rgb will be used for Theora(ogm) +void ROQ_GenYUVTables(void); +void Frame_yuv_to_rgb24(const unsigned char *y, const unsigned char *u, const unsigned char *v, + int width, int height, int y_stride, int uv_stride, + int yWShift, int uvWShift, int yHShift, int uvHShift, unsigned int *output); + +// +// cl_cin_ogm.c +// + +int Cin_OGM_Init(const char *filename); +int Cin_OGM_Run(int time); +unsigned char *Cin_OGM_GetOutput(int *outWidth, int *outHeight); +void Cin_OGM_Shutdown(void); + // // cl_cgame.c // diff --git a/engine/code/libtheora-1.1.1/include/theora/codec.h b/engine/code/libtheora-1.1.1/include/theora/codec.h new file mode 100644 index 00000000..5c266963 --- /dev/null +++ b/engine/code/libtheora-1.1.1/include/theora/codec.h @@ -0,0 +1,591 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $ + + ********************************************************************/ + +/**\mainpage + * + * \section intro Introduction + * + * This is the documentation for libtheora C API. + * The current reference + * implementation for Theora, a free, + * patent-unencumbered video codec. + * Theora is derived from On2's VP3 codec with additional features and + * integration with Ogg multimedia formats by + * the Xiph.Org Foundation. + * Complete documentation of the format itself is available in + * the Theora + * specification. + * + * \subsection Organization + * + * The functions documented here are actually subdivided into three + * separate libraries: + * - libtheoraenc contains the encoder interface, + * described in \ref encfuncs. + * - libtheoradec contains the decoder interface and + * routines shared with the encoder. + * You must also link to this if you link to libtheoraenc. + * The routines in this library are described in \ref decfuncs and + * \ref basefuncs. + * - libtheora contains the \ref oldfuncs. + * + * New code should link to libtheoradec and, if using encoder + * features, libtheoraenc. Together these two export both + * the standard and the legacy API, so this is all that is needed by + * any code. The older libtheora library is provided just for + * compatibility with older build configurations. + * + * In general the recommended 1.x API symbols can be distinguished + * by their th_ or TH_ namespace prefix. + * The older, legacy API uses theora_ or OC_ + * prefixes instead. + */ + +/**\file + * The shared libtheoradec and libtheoraenc C API. + * You don't need to include this directly.*/ + +#if !defined(_O_THEORA_CODEC_H_) +# define _O_THEORA_CODEC_H_ (1) +# include + +#if defined(__cplusplus) +extern "C" { +#endif + + + +/**\name Return codes*/ +/*@{*/ +/**An invalid pointer was provided.*/ +#define TH_EFAULT (-1) +/**An invalid argument was provided.*/ +#define TH_EINVAL (-10) +/**The contents of the header were incomplete, invalid, or unexpected.*/ +#define TH_EBADHEADER (-20) +/**The header does not belong to a Theora stream.*/ +#define TH_ENOTFORMAT (-21) +/**The bitstream version is too high.*/ +#define TH_EVERSION (-22) +/**The specified function is not implemented.*/ +#define TH_EIMPL (-23) +/**There were errors in the video data packet.*/ +#define TH_EBADPACKET (-24) +/**The decoded packet represented a dropped frame. + The player can continue to display the current frame, as the contents of the + decoded frame buffer have not changed.*/ +#define TH_DUPFRAME (1) +/*@}*/ + +/**The currently defined color space tags. + * See the Theora + * specification, Chapter 4, for exact details on the meaning + * of each of these color spaces.*/ +typedef enum{ + /**The color space was not specified at the encoder. + It may be conveyed by an external means.*/ + TH_CS_UNSPECIFIED, + /**A color space designed for NTSC content.*/ + TH_CS_ITU_REC_470M, + /**A color space designed for PAL/SECAM content.*/ + TH_CS_ITU_REC_470BG, + /**The total number of currently defined color spaces.*/ + TH_CS_NSPACES +}th_colorspace; + +/**The currently defined pixel format tags. + * See the Theora + * specification, Section 4.4, for details on the precise sample + * locations.*/ +typedef enum{ + /**Chroma decimation by 2 in both the X and Y directions (4:2:0). + The Cb and Cr chroma planes are half the width and half the + height of the luma plane.*/ + TH_PF_420, + /**Currently reserved.*/ + TH_PF_RSVD, + /**Chroma decimation by 2 in the X direction (4:2:2). + The Cb and Cr chroma planes are half the width of the luma plane, but full + height.*/ + TH_PF_422, + /**No chroma decimation (4:4:4). + The Cb and Cr chroma planes are full width and full height.*/ + TH_PF_444, + /**The total number of currently defined pixel formats.*/ + TH_PF_NFORMATS +}th_pixel_fmt; + + + +/**A buffer for a single color plane in an uncompressed image. + * This contains the image data in a left-to-right, top-down format. + * Each row of pixels is stored contiguously in memory, but successive + * rows need not be. + * Use \a stride to compute the offset of the next row. + * The encoder accepts both positive \a stride values (top-down in memory) + * and negative (bottom-up in memory). + * The decoder currently always generates images with positive strides.*/ +typedef struct{ + /**The width of this plane.*/ + int width; + /**The height of this plane.*/ + int height; + /**The offset in bytes between successive rows.*/ + int stride; + /**A pointer to the beginning of the first row.*/ + unsigned char *data; +}th_img_plane; + +/**A complete image buffer for an uncompressed frame. + * The chroma planes may be decimated by a factor of two in either + * direction, as indicated by th_info#pixel_fmt. + * The width and height of the Y' plane must be multiples of 16. + * They may need to be cropped for display, using the rectangle + * specified by th_info#pic_x, th_info#pic_y, th_info#pic_width, + * and th_info#pic_height. + * All samples are 8 bits. + * \note The term YUV often used to describe a colorspace is ambiguous. + * The exact parameters of the RGB to YUV conversion process aside, in + * many contexts the U and V channels actually have opposite meanings. + * To avoid this confusion, we are explicit: the name of the color + * channels are Y'CbCr, and they appear in that order, always. + * The prime symbol denotes that the Y channel is non-linear. + * Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/ +typedef th_img_plane th_ycbcr_buffer[3]; + +/**Theora bitstream information. + * This contains the basic playback parameters for a stream, and corresponds to + * the initial 'info' header packet. + * To initialize an encoder, the application fills in this structure and + * passes it to th_encode_alloc(). + * A default encoding mode is chosen based on the values of the #quality and + * #target_bitrate fields. + * On decode, it is filled in by th_decode_headerin(), and then passed to + * th_decode_alloc(). + * + * Encoded Theora frames must be a multiple of 16 in size; + * this is what the #frame_width and #frame_height members represent. + * To handle arbitrary picture sizes, a crop rectangle is specified in the + * #pic_x, #pic_y, #pic_width and #pic_height members. + * + * All frame buffers contain pointers to the full, padded frame. + * However, the current encoder will not reference pixels outside of + * the cropped picture region, and the application does not need to fill them + * in. + * The decoder will allocate storage for a full frame, but the + * application should not rely on the padding containing sensible + * data. + * + * It is also generally recommended that the offsets and sizes should still be + * multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled. + * See the Theora + * specification, Section 4.4, for more details. + * + * Frame rate, in frames per second, is stored as a rational fraction, as is + * the pixel aspect ratio. + * Note that this refers to the aspect ratio of the individual pixels, not of + * the overall frame itself. + * The frame aspect ratio can be computed from pixel aspect ratio using the + * image dimensions.*/ +typedef struct{ + /**\name Theora version + * Bitstream version information.*/ + /*@{*/ + unsigned char version_major; + unsigned char version_minor; + unsigned char version_subminor; + /*@}*/ + /**The encoded frame width. + * This must be a multiple of 16, and less than 1048576.*/ + ogg_uint32_t frame_width; + /**The encoded frame height. + * This must be a multiple of 16, and less than 1048576.*/ + ogg_uint32_t frame_height; + /**The displayed picture width. + * This must be no larger than width.*/ + ogg_uint32_t pic_width; + /**The displayed picture height. + * This must be no larger than height.*/ + ogg_uint32_t pic_height; + /**The X offset of the displayed picture. + * This must be no larger than #frame_width-#pic_width or 255, whichever is + * smaller.*/ + ogg_uint32_t pic_x; + /**The Y offset of the displayed picture. + * This must be no larger than #frame_height-#pic_height, and + * #frame_height-#pic_height-#pic_y must be no larger than 255. + * This slightly funny restriction is due to the fact that the offset is + * specified from the top of the image for consistency with the standard + * graphics left-handed coordinate system used throughout this API, while + * it is stored in the encoded stream as an offset from the bottom.*/ + ogg_uint32_t pic_y; + /**\name Frame rate + * The frame rate, as a fraction. + * If either is 0, the frame rate is undefined.*/ + /*@{*/ + ogg_uint32_t fps_numerator; + ogg_uint32_t fps_denominator; + /*@}*/ + /**\name Aspect ratio + * The aspect ratio of the pixels. + * If either value is zero, the aspect ratio is undefined. + * If not specified by any external means, 1:1 should be assumed. + * The aspect ratio of the full picture can be computed as + * \code + * aspect_numerator*pic_width/(aspect_denominator*pic_height). + * \endcode */ + /*@{*/ + ogg_uint32_t aspect_numerator; + ogg_uint32_t aspect_denominator; + /*@}*/ + /**The color space.*/ + th_colorspace colorspace; + /**The pixel format.*/ + th_pixel_fmt pixel_fmt; + /**The target bit-rate in bits per second. + If initializing an encoder with this struct, set this field to a non-zero + value to activate CBR encoding by default.*/ + int target_bitrate; + /**The target quality level. + Valid values range from 0 to 63, inclusive, with higher values giving + higher quality. + If initializing an encoder with this struct, and #target_bitrate is set + to zero, VBR encoding at this quality will be activated by default.*/ + /*Currently this is set so that a qi of 0 corresponds to distortions of 24 + times the JND, and each increase by 16 halves that value. + This gives us fine discrimination at low qualities, yet effective rate + control at high qualities. + The qi value 63 is special, however. + For this, the highest quality, we use one half of a JND for our threshold. + Due to the lower bounds placed on allowable quantizers in Theora, we will + not actually be able to achieve quality this good, but this should + provide as close to visually lossless quality as Theora is capable of. + We could lift the quantizer restrictions without breaking VP3.1 + compatibility, but this would result in quantized coefficients that are + too large for the current bitstream to be able to store. + We'd have to redesign the token syntax to store these large coefficients, + which would make transcoding complex.*/ + int quality; + /**The amount to shift to extract the last keyframe number from the granule + * position. + * This can be at most 31. + * th_info_init() will set this to a default value (currently 6, + * which is good for streaming applications), but you can set it to 0 to + * make every frame a keyframe. + * The maximum distance between key frames is + * 1<<#keyframe_granule_shift. + * The keyframe frequency can be more finely controlled with + * #TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, which can also be adjusted + * during encoding (for example, to force the next frame to be a keyframe), + * but it cannot be set larger than the amount permitted by this field after + * the headers have been output.*/ + int keyframe_granule_shift; +}th_info; + +/**The comment information. + * + * This structure holds the in-stream metadata corresponding to + * the 'comment' header packet. + * The comment header is meant to be used much like someone jotting a quick + * note on the label of a video. + * It should be a short, to the point text note that can be more than a couple + * words, but not more than a short paragraph. + * + * The metadata is stored as a series of (tag, value) pairs, in + * length-encoded string vectors. + * The first occurrence of the '=' character delimits the tag and value. + * A particular tag may occur more than once, and order is significant. + * The character set encoding for the strings is always UTF-8, but the tag + * names are limited to ASCII, and treated as case-insensitive. + * See the Theora + * specification, Section 6.3.3 for details. + * + * In filling in this structure, th_decode_headerin() will null-terminate + * the user_comment strings for safety. + * However, the bitstream format itself treats them as 8-bit clean vectors, + * possibly containing null characters, and so the length array should be + * treated as their authoritative length. + */ +typedef struct th_comment{ + /**The array of comment string vectors.*/ + char **user_comments; + /**An array of the corresponding length of each vector, in bytes.*/ + int *comment_lengths; + /**The total number of comment strings.*/ + int comments; + /**The null-terminated vendor string. + This identifies the software used to encode the stream.*/ + char *vendor; +}th_comment; + + + +/**A single base matrix.*/ +typedef unsigned char th_quant_base[64]; + +/**A set of \a qi ranges.*/ +typedef struct{ + /**The number of ranges in the set.*/ + int nranges; + /**The size of each of the #nranges ranges. + These must sum to 63.*/ + const int *sizes; + /**#nranges +1 base matrices. + Matrices \a i and i+1 form the endpoints of range \a i.*/ + const th_quant_base *base_matrices; +}th_quant_ranges; + +/**A complete set of quantization parameters. + The quantizer for each coefficient is calculated as: + \code + Q=MAX(MIN(qmin[qti][ci!=0],scale[ci!=0][qi]*base[qti][pli][qi][ci]/100), + 1024). + \endcode + + \a qti is the quantization type index: 0 for intra, 1 for inter. + ci!=0 is 0 for the DC coefficient and 1 for AC coefficients. + \a qi is the quality index, ranging between 0 (low quality) and 63 (high + quality). + \a pli is the color plane index: 0 for Y', 1 for Cb, 2 for Cr. + \a ci is the DCT coefficient index. + Coefficient indices correspond to the normal 2D DCT block + ordering--row-major with low frequencies first--\em not zig-zag order. + + Minimum quantizers are constant, and are given by: + \code + qmin[2][2]={{4,2},{8,4}}. + \endcode + + Parameters that can be stored in the bitstream are as follows: + - The two scale matrices ac_scale and dc_scale. + \code + scale[2][64]={dc_scale,ac_scale}. + \endcode + - The base matrices for each \a qi, \a qti and \a pli (up to 384 in all). + In order to avoid storing a full 384 base matrices, only a sparse set of + matrices are stored, and the rest are linearly interpolated. + This is done as follows. + For each \a qti and \a pli, a series of \a n \a qi ranges is defined. + The size of each \a qi range can vary arbitrarily, but they must sum to + 63. + Then, n+1 matrices are specified, one for each endpoint of the + ranges. + For interpolation purposes, each range's endpoints are the first \a qi + value it contains and one past the last \a qi value it contains. + Fractional values are rounded to the nearest integer, with ties rounded + away from zero. + + Base matrices are stored by reference, so if the same matrices are used + multiple times, they will only appear once in the bitstream. + The bitstream is also capable of omitting an entire set of ranges and + its associated matrices if they are the same as either the previous + set (indexed in row-major order) or if the inter set is the same as the + intra set. + + - Loop filter limit values. + The same limits are used for the loop filter in all color planes, despite + potentially differing levels of quantization in each. + + For the current encoder, scale[ci!=0][qi] must be no greater + than scale[ci!=0][qi-1] and base[qti][pli][qi][ci] must + be no greater than base[qti][pli][qi-1][ci]. + These two conditions ensure that the actual quantizer for a given \a qti, + \a pli, and \a ci does not increase as \a qi increases. + This is not required by the decoder.*/ +typedef struct{ + /**The DC scaling factors.*/ + ogg_uint16_t dc_scale[64]; + /**The AC scaling factors.*/ + ogg_uint16_t ac_scale[64]; + /**The loop filter limit values.*/ + unsigned char loop_filter_limits[64]; + /**The \a qi ranges for each \a ci and \a pli.*/ + th_quant_ranges qi_ranges[2][3]; +}th_quant_info; + + + +/**The number of Huffman tables used by Theora.*/ +#define TH_NHUFFMAN_TABLES (80) +/**The number of DCT token values in each table.*/ +#define TH_NDCT_TOKENS (32) + +/**A Huffman code for a Theora DCT token. + * Each set of Huffman codes in a given table must form a complete, prefix-free + * code. + * There is no requirement that all the tokens in a table have a valid code, + * but the current encoder is not optimized to take advantage of this. + * If each of the five grouops of 16 tables does not contain at least one table + * with a code for every token, then the encoder may fail to encode certain + * frames. + * The complete table in the first group of 16 does not have to be in the same + * place as the complete table in the other groups, but the complete tables in + * the remaining four groups must all be in the same place.*/ +typedef struct{ + /**The bit pattern for the code, with the LSbit of the pattern aligned in + * the LSbit of the word.*/ + ogg_uint32_t pattern; + /**The number of bits in the code. + * This must be between 0 and 32, inclusive.*/ + int nbits; +}th_huff_code; + + + +/**\defgroup basefuncs Functions Shared by Encode and Decode*/ +/*@{*/ +/**\name Basic shared functions*/ +/*@{*/ +/**Retrieves a human-readable string to identify the library vendor and + * version. + * \return the version string.*/ +extern const char *th_version_string(void); +/**Retrieves the library version number. + * This is the highest bitstream version that the encoder library will produce, + * or that the decoder library can decode. + * This number is composed of a 16-bit major version, 8-bit minor version + * and 8 bit sub-version, composed as follows: + * \code + * (VERSION_MAJOR<<16)+(VERSION_MINOR<<8)+(VERSION_SUBMINOR) + * \endcode + * \return the version number.*/ +extern ogg_uint32_t th_version_number(void); +/**Converts a granule position to an absolute frame index, starting at + * 0. + * The granule position is interpreted in the context of a given + * #th_enc_ctx or #th_dec_ctx handle (either will suffice). + * \param _encdec A previously allocated #th_enc_ctx or #th_dec_ctx + * handle. + * \param _granpos The granule position to convert. + * \returns The absolute frame index corresponding to \a _granpos. + * \retval -1 The given granule position was invalid (i.e. negative).*/ +extern ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos); +/**Converts a granule position to an absolute time in seconds. + * The granule position is interpreted in the context of a given + * #th_enc_ctx or #th_dec_ctx handle (either will suffice). + * \param _encdec A previously allocated #th_enc_ctx or #th_dec_ctx + * handle. + * \param _granpos The granule position to convert. + * \return The absolute time in seconds corresponding to \a _granpos. + * This is the "end time" for the frame, or the latest time it should + * be displayed. + * It is not the presentation time. + * \retval -1 The given granule position was invalid (i.e. negative).*/ +extern double th_granule_time(void *_encdec,ogg_int64_t _granpos); +/**Determines whether a Theora packet is a header or not. + * This function does no verification beyond checking the packet type bit, so + * it should not be used for bitstream identification; use + * th_decode_headerin() for that. + * As per the Theora specification, an empty (0-byte) packet is treated as a + * data packet (a delta frame with no coded blocks). + * \param _op An ogg_packet containing encoded Theora data. + * \retval 1 The packet is a header packet + * \retval 0 The packet is a video data packet.*/ +extern int th_packet_isheader(ogg_packet *_op); +/**Determines whether a theora packet is a key frame or not. + * This function does no verification beyond checking the packet type and + * key frame bits, so it should not be used for bitstream identification; use + * th_decode_headerin() for that. + * As per the Theora specification, an empty (0-byte) packet is treated as a + * delta frame (with no coded blocks). + * \param _op An ogg_packet containing encoded Theora data. + * \retval 1 The packet contains a key frame. + * \retval 0 The packet contains a delta frame. + * \retval -1 The packet is not a video data packet.*/ +extern int th_packet_iskeyframe(ogg_packet *_op); +/*@}*/ + + +/**\name Functions for manipulating header data*/ +/*@{*/ +/**Initializes a th_info structure. + * This should be called on a freshly allocated #th_info structure before + * attempting to use it. + * \param _info The #th_info struct to initialize.*/ +extern void th_info_init(th_info *_info); +/**Clears a #th_info structure. + * This should be called on a #th_info structure after it is no longer + * needed. + * \param _info The #th_info struct to clear.*/ +extern void th_info_clear(th_info *_info); + +/**Initialize a #th_comment structure. + * This should be called on a freshly allocated #th_comment structure + * before attempting to use it. + * \param _tc The #th_comment struct to initialize.*/ +extern void th_comment_init(th_comment *_tc); +/**Add a comment to an initialized #th_comment structure. + * \note Neither th_comment_add() nor th_comment_add_tag() support + * comments containing null values, although the bitstream format does + * support them. + * To add such comments you will need to manipulate the #th_comment + * structure directly. + * \param _tc The #th_comment struct to add the comment to. + * \param _comment Must be a null-terminated UTF-8 string containing the + * comment in "TAG=the value" form.*/ +extern void th_comment_add(th_comment *_tc, char *_comment); +/**Add a comment to an initialized #th_comment structure. + * \note Neither th_comment_add() nor th_comment_add_tag() support + * comments containing null values, although the bitstream format does + * support them. + * To add such comments you will need to manipulate the #th_comment + * structure directly. + * \param _tc The #th_comment struct to add the comment to. + * \param _tag A null-terminated string containing the tag associated with + * the comment. + * \param _val The corresponding value as a null-terminated string.*/ +extern void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val); +/**Look up a comment value by its tag. + * \param _tc An initialized #th_comment structure. + * \param _tag The tag to look up. + * \param _count The instance of the tag. + * The same tag can appear multiple times, each with a distinct + * value, so an index is required to retrieve them all. + * The order in which these values appear is significant and + * should be preserved. + * Use th_comment_query_count() to get the legal range for + * the \a _count parameter. + * \return A pointer to the queried tag's value. + * This points directly to data in the #th_comment structure. + * It should not be modified or freed by the application, and + * modifications to the structure may invalidate the pointer. + * \retval NULL If no matching tag is found.*/ +extern char *th_comment_query(th_comment *_tc,char *_tag,int _count); +/**Look up the number of instances of a tag. + * Call this first when querying for a specific tag and then iterate over the + * number of instances with separate calls to th_comment_query() to + * retrieve all the values for that tag in order. + * \param _tc An initialized #th_comment structure. + * \param _tag The tag to look up. + * \return The number on instances of this particular tag.*/ +extern int th_comment_query_count(th_comment *_tc,char *_tag); +/**Clears a #th_comment structure. + * This should be called on a #th_comment structure after it is no longer + * needed. + * It will free all memory used by the structure members. + * \param _tc The #th_comment struct to clear.*/ +extern void th_comment_clear(th_comment *_tc); +/*@}*/ +/*@}*/ + + + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/engine/code/libtheora-1.1.1/include/theora/theora.h b/engine/code/libtheora-1.1.1/include/theora/theora.h new file mode 100644 index 00000000..af6eb6f3 --- /dev/null +++ b/engine/code/libtheora-1.1.1/include/theora/theora.h @@ -0,0 +1,784 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: theora.h,v 1.17 2003/12/06 18:06:19 arc Exp $ + + ********************************************************************/ + +#ifndef _O_THEORA_H_ +#define _O_THEORA_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +#include /* for size_t */ + +#include + +/** \file + * The libtheora pre-1.0 legacy C API. + * + * \ingroup oldfuncs + * + * \section intro Introduction + * + * This is the documentation for the libtheora legacy C API, declared in + * the theora.h header, which describes the old interface used before + * the 1.0 release. This API was widely deployed for several years and + * remains supported, but for new code we recommend the cleaner API + * declared in theoradec.h and theoraenc.h. + * + * libtheora is the reference implementation for + * Theora, a free video codec. + * Theora is derived from On2's VP3 codec with improved integration with + * Ogg multimedia formats by Xiph.Org. + * + * \section overview Overview + * + * This library will both decode and encode theora packets to/from raw YUV + * frames. In either case, the packets will most likely either come from or + * need to be embedded in an Ogg stream. Use + * libogg or + * liboggz + * to extract/package these packets. + * + * \section decoding Decoding Process + * + * Decoding can be separated into the following steps: + * -# initialise theora_info and theora_comment structures using + * theora_info_init() and theora_comment_init(): + \verbatim + theora_info info; + theora_comment comment; + + theora_info_init(&info); + theora_comment_init(&comment); + \endverbatim + * -# retrieve header packets from Ogg stream (there should be 3) and decode + * into theora_info and theora_comment structures using + * theora_decode_header(). See \ref identification for more information on + * identifying which packets are theora packets. + \verbatim + int i; + for (i = 0; i < 3; i++) + { + (get a theora packet "op" from the Ogg stream) + theora_decode_header(&info, &comment, op); + } + \endverbatim + * -# initialise the decoder based on the information retrieved into the + * theora_info struct by theora_decode_header(). You will need a + * theora_state struct. + \verbatim + theora_state state; + + theora_decode_init(&state, &info); + \endverbatim + * -# pass in packets and retrieve decoded frames! See the yuv_buffer + * documentation for information on how to retrieve raw YUV data. + \verbatim + yuf_buffer buffer; + while (last packet was not e_o_s) { + (get a theora packet "op" from the Ogg stream) + theora_decode_packetin(&state, op); + theora_decode_YUVout(&state, &buffer); + } + \endverbatim + * + * + * \subsection identification Identifying Theora Packets + * + * All streams inside an Ogg file have a unique serial_no attached to the + * stream. Typically, you will want to + * - retrieve the serial_no for each b_o_s (beginning of stream) page + * encountered within the Ogg file; + * - test the first (only) packet on that page to determine if it is a theora + * packet; + * - once you have found a theora b_o_s page then use the retrieved serial_no + * to identify future packets belonging to the same theora stream. + * + * Note that you \e cannot use theora_packet_isheader() to determine if a + * packet is a theora packet or not, as this function does not perform any + * checking beyond whether a header bit is present. Instead, use the + * theora_decode_header() function and check the return value; or examine the + * header bytes at the beginning of the Ogg page. + */ + + +/** \defgroup oldfuncs Legacy pre-1.0 C API */ +/* @{ */ + +/** + * A YUV buffer for passing uncompressed frames to and from the codec. + * This holds a Y'CbCr frame in planar format. The CbCr planes can be + * subsampled and have their own separate dimensions and row stride + * offsets. Note that the strides may be negative in some + * configurations. For theora the width and height of the largest plane + * must be a multiple of 16. The actual meaningful picture size and + * offset are stored in the theora_info structure; frames returned by + * the decoder may need to be cropped for display. + * + * All samples are 8 bits. Within each plane samples are ordered by + * row from the top of the frame to the bottom. Within each row samples + * are ordered from left to right. + * + * During decode, the yuv_buffer struct is allocated by the user, but all + * fields (including luma and chroma pointers) are filled by the library. + * These pointers address library-internal memory and their contents should + * not be modified. + * + * Conversely, during encode the user allocates the struct and fills out all + * fields. The user also manages the data addressed by the luma and chroma + * pointers. See the encoder_example.c and dump_video.c example files in + * theora/examples/ for more information. + */ +typedef struct { + int y_width; /**< Width of the Y' luminance plane */ + int y_height; /**< Height of the luminance plane */ + int y_stride; /**< Offset in bytes between successive rows */ + + int uv_width; /**< Width of the Cb and Cr chroma planes */ + int uv_height; /**< Height of the chroma planes */ + int uv_stride; /**< Offset between successive chroma rows */ + unsigned char *y; /**< Pointer to start of luminance data */ + unsigned char *u; /**< Pointer to start of Cb data */ + unsigned char *v; /**< Pointer to start of Cr data */ + +} yuv_buffer; + +/** + * A Colorspace. + */ +typedef enum { + OC_CS_UNSPECIFIED, /**< The colorspace is unknown or unspecified */ + OC_CS_ITU_REC_470M, /**< This is the best option for 'NTSC' content */ + OC_CS_ITU_REC_470BG, /**< This is the best option for 'PAL' content */ + OC_CS_NSPACES /**< This marks the end of the defined colorspaces */ +} theora_colorspace; + +/** + * A Chroma subsampling + * + * These enumerate the available chroma subsampling options supported + * by the theora format. See Section 4.4 of the specification for + * exact definitions. + */ +typedef enum { + OC_PF_420, /**< Chroma subsampling by 2 in each direction (4:2:0) */ + OC_PF_RSVD, /**< Reserved value */ + OC_PF_422, /**< Horizonatal chroma subsampling by 2 (4:2:2) */ + OC_PF_444, /**< No chroma subsampling at all (4:4:4) */ +} theora_pixelformat; + +/** + * Theora bitstream info. + * Contains the basic playback parameters for a stream, + * corresponding to the initial 'info' header packet. + * + * Encoded theora frames must be a multiple of 16 in width and height. + * To handle other frame sizes, a crop rectangle is specified in + * frame_height and frame_width, offset_x and * offset_y. The offset + * and size should still be a multiple of 2 to avoid chroma sampling + * shifts. Offset values in this structure are measured from the + * upper left of the image. + * + * Frame rate, in frames per second, is stored as a rational + * fraction. Aspect ratio is also stored as a rational fraction, and + * refers to the aspect ratio of the frame pixels, not of the + * overall frame itself. + * + * See + * examples/encoder_example.c for usage examples of the + * other paramters and good default settings for the encoder parameters. + */ +typedef struct { + ogg_uint32_t width; /**< encoded frame width */ + ogg_uint32_t height; /**< encoded frame height */ + ogg_uint32_t frame_width; /**< display frame width */ + ogg_uint32_t frame_height; /**< display frame height */ + ogg_uint32_t offset_x; /**< horizontal offset of the displayed frame */ + ogg_uint32_t offset_y; /**< vertical offset of the displayed frame */ + ogg_uint32_t fps_numerator; /**< frame rate numerator **/ + ogg_uint32_t fps_denominator; /**< frame rate denominator **/ + ogg_uint32_t aspect_numerator; /**< pixel aspect ratio numerator */ + ogg_uint32_t aspect_denominator; /**< pixel aspect ratio denominator */ + theora_colorspace colorspace; /**< colorspace */ + int target_bitrate; /**< nominal bitrate in bits per second */ + int quality; /**< Nominal quality setting, 0-63 */ + int quick_p; /**< Quick encode/decode */ + + /* decode only */ + unsigned char version_major; + unsigned char version_minor; + unsigned char version_subminor; + + void *codec_setup; + + /* encode only */ + int dropframes_p; + int keyframe_auto_p; + ogg_uint32_t keyframe_frequency; + ogg_uint32_t keyframe_frequency_force; /* also used for decode init to + get granpos shift correct */ + ogg_uint32_t keyframe_data_target_bitrate; + ogg_int32_t keyframe_auto_threshold; + ogg_uint32_t keyframe_mindistance; + ogg_int32_t noise_sensitivity; + ogg_int32_t sharpness; + + theora_pixelformat pixelformat; /**< chroma subsampling mode to expect */ + +} theora_info; + +/** Codec internal state and context. + */ +typedef struct{ + theora_info *i; + ogg_int64_t granulepos; + + void *internal_encode; + void *internal_decode; + +} theora_state; + +/** + * Comment header metadata. + * + * This structure holds the in-stream metadata corresponding to + * the 'comment' header packet. + * + * Meta data is stored as a series of (tag, value) pairs, in + * length-encoded string vectors. The first occurence of the + * '=' character delimits the tag and value. A particular tag + * may occur more than once. The character set encoding for + * the strings is always UTF-8, but the tag names are limited + * to case-insensitive ASCII. See the spec for details. + * + * In filling in this structure, theora_decode_header() will + * null-terminate the user_comment strings for safety. However, + * the bitstream format itself treats them as 8-bit clean, + * and so the length array should be treated as authoritative + * for their length. + */ +typedef struct theora_comment{ + char **user_comments; /**< An array of comment string vectors */ + int *comment_lengths; /**< An array of corresponding string vector lengths in bytes */ + int comments; /**< The total number of comment string vectors */ + char *vendor; /**< The vendor string identifying the encoder, null terminated */ + +} theora_comment; + + +/**\name theora_control() codes */ +/* \anchor decctlcodes_old + * These are the available request codes for theora_control() + * when called with a decoder instance. + * By convention decoder control codes are odd, to distinguish + * them from \ref encctlcodes_old "encoder control codes" which + * are even. + * + * Note that since the 1.0 release, both the legacy and the final + * implementation accept all the same control codes, but only the + * final API declares the newer codes. + * + * Keep any experimental or vendor-specific values above \c 0x8000.*/ + +/*@{*/ + +/**Get the maximum post-processing level. + * The decoder supports a post-processing filter that can improve + * the appearance of the decoded images. This returns the highest + * level setting for this post-processor, corresponding to maximum + * improvement and computational expense. + */ +#define TH_DECCTL_GET_PPLEVEL_MAX (1) + +/**Set the post-processing level. + * Sets the level of post-processing to use when decoding the + * compressed stream. This must be a value between zero (off) + * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX. + */ +#define TH_DECCTL_SET_PPLEVEL (3) + +/**Sets the maximum distance between key frames. + * This can be changed during an encode, but will be bounded by + * 1<. + * If it is set before encoding begins, th_info#keyframe_granule_shift will + * be enlarged appropriately. + * + * \param[in] buf ogg_uint32_t: The maximum distance between key + * frames. + * \param[out] buf ogg_uint32_t: The actual maximum distance set. + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(ogg_uint32_t). + * \retval OC_IMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4) + +/**Set the granule position. + * Call this after a seek, to update the internal granulepos + * in the decoder, to insure that subsequent frames are marked + * properly. If you track timestamps yourself and do not use + * the granule postion returned by the decoder, then you do + * not need to use this control. + */ +#define TH_DECCTL_SET_GRANPOS (5) + +/**\anchor encctlcodes_old */ + +/**Sets the quantization parameters to use. + * The parameters are copied, not stored by reference, so they can be freed + * after this call. + * NULL may be specified to revert to the default parameters. + * + * \param[in] buf #th_quant_info + * \retval OC_FAULT \a theora_state is NULL. + * \retval OC_EINVAL Encoding has already begun, the quantization parameters + * are not acceptable to this version of the encoder, + * \a buf is NULL and \a buf_sz is not zero, + * or \a buf is non-NULL and \a buf_sz is + * not sizeof(#th_quant_info). + * \retval OC_IMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_QUANT_PARAMS (2) + +/**Disables any encoder features that would prevent lossless transcoding back + * to VP3. + * This primarily means disabling block-level QI values and not using 4MV mode + * when any of the luma blocks in a macro block are not coded. + * It also includes using the VP3 quantization tables and Huffman codes; if you + * set them explicitly after calling this function, the resulting stream will + * not be VP3-compatible. + * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source + * material, or when using a picture region smaller than the full frame (e.g. + * a non-multiple-of-16 width or height), then non-VP3 bitstream features will + * still be disabled, but the stream will still not be VP3-compatible, as VP3 + * was not capable of encoding such formats. + * If you call this after encoding has already begun, then the quantization + * tables and codebooks cannot be changed, but the frame-level features will + * be enabled or disabled as requested. + * + * \param[in] buf int: a non-zero value to enable VP3 compatibility, + * or 0 to disable it (the default). + * \param[out] buf int: 1 if all bitstream features required for + * VP3-compatibility could be set, and 0 otherwise. + * The latter will be returned if the pixel format is not + * 4:2:0, the picture region is smaller than the full frame, + * or if encoding has begun, preventing the quantization + * tables and codebooks from being set. + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(int). + * \retval OC_IMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_VP3_COMPATIBLE (10) + +/**Gets the maximum speed level. + * Higher speed levels favor quicker encoding over better quality per bit. + * Depending on the encoding mode, and the internal algorithms used, quality + * may actually improve, but in this case bitrate will also likely increase. + * In any case, overall rate/distortion performance will probably decrease. + * The maximum value, and the meaning of each value, may change depending on + * the current encoding mode (VBR vs. CQI, etc.). + * + * \param[out] buf int: The maximum encoding speed level. + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(int). + * \retval OC_IMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_GET_SPLEVEL_MAX (12) + +/**Sets the speed level. + * By default a speed value of 1 is used. + * + * \param[in] buf int: The new encoding speed level. + * 0 is slowest, larger values use less CPU. + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(int), or the + * encoding speed level is out of bounds. + * The maximum encoding speed level may be + * implementation- and encoding mode-specific, and can be + * obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. + * \retval OC_IMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_SPLEVEL (14) + +/*@}*/ + +#define OC_FAULT -1 /**< General failure */ +#define OC_EINVAL -10 /**< Library encountered invalid internal data */ +#define OC_DISABLED -11 /**< Requested action is disabled */ +#define OC_BADHEADER -20 /**< Header packet was corrupt/invalid */ +#define OC_NOTFORMAT -21 /**< Packet is not a theora packet */ +#define OC_VERSION -22 /**< Bitstream version is not handled */ +#define OC_IMPL -23 /**< Feature or action not implemented */ +#define OC_BADPACKET -24 /**< Packet is corrupt */ +#define OC_NEWPACKET -25 /**< Packet is an (ignorable) unhandled extension */ +#define OC_DUPFRAME 1 /**< Packet is a dropped frame */ + +/** + * Retrieve a human-readable string to identify the encoder vendor and version. + * \returns A version string. + */ +extern const char *theora_version_string(void); + +/** + * Retrieve a 32-bit version number. + * This number is composed of a 16-bit major version, 8-bit minor version + * and 8 bit sub-version, composed as follows: +
+   (VERSION_MAJOR<<16) + (VERSION_MINOR<<8) + (VERSION_SUB)
+
+* \returns The version number. +*/ +extern ogg_uint32_t theora_version_number(void); + +/** + * Initialize the theora encoder. + * \param th The theora_state handle to initialize for encoding. + * \param ti A theora_info struct filled with the desired encoding parameters. + * \retval 0 Success + */ +extern int theora_encode_init(theora_state *th, theora_info *ti); + +/** + * Submit a YUV buffer to the theora encoder. + * \param t A theora_state handle previously initialized for encoding. + * \param yuv A buffer of YUV data to encode. Note that both the yuv_buffer + * struct and the luma/chroma buffers within should be allocated by + * the user. + * \retval OC_EINVAL Encoder is not ready, or is finished. + * \retval -1 The size of the given frame differs from those previously input + * \retval 0 Success + */ +extern int theora_encode_YUVin(theora_state *t, yuv_buffer *yuv); + +/** + * Request the next packet of encoded video. + * The encoded data is placed in a user-provided ogg_packet structure. + * \param t A theora_state handle previously initialized for encoding. + * \param last_p whether this is the last packet the encoder should produce. + * \param op An ogg_packet structure to fill. libtheora will set all + * elements of this structure, including a pointer to encoded + * data. The memory for the encoded data is owned by libtheora. + * \retval 0 No internal storage exists OR no packet is ready + * \retval -1 The encoding process has completed + * \retval 1 Success + */ +extern int theora_encode_packetout( theora_state *t, int last_p, + ogg_packet *op); + +/** + * Request a packet containing the initial header. + * A pointer to the header data is placed in a user-provided ogg_packet + * structure. + * \param t A theora_state handle previously initialized for encoding. + * \param op An ogg_packet structure to fill. libtheora will set all + * elements of this structure, including a pointer to the header + * data. The memory for the header data is owned by libtheora. + * \retval 0 Success + */ +extern int theora_encode_header(theora_state *t, ogg_packet *op); + +/** + * Request a comment header packet from provided metadata. + * A pointer to the comment data is placed in a user-provided ogg_packet + * structure. + * \param tc A theora_comment structure filled with the desired metadata + * \param op An ogg_packet structure to fill. libtheora will set all + * elements of this structure, including a pointer to the encoded + * comment data. The memory for the comment data is owned by + * libtheora. + * \retval 0 Success + */ +extern int theora_encode_comment(theora_comment *tc, ogg_packet *op); + +/** + * Request a packet containing the codebook tables for the stream. + * A pointer to the codebook data is placed in a user-provided ogg_packet + * structure. + * \param t A theora_state handle previously initialized for encoding. + * \param op An ogg_packet structure to fill. libtheora will set all + * elements of this structure, including a pointer to the codebook + * data. The memory for the header data is owned by libtheora. + * \retval 0 Success + */ +extern int theora_encode_tables(theora_state *t, ogg_packet *op); + +/** + * Decode an Ogg packet, with the expectation that the packet contains + * an initial header, comment data or codebook tables. + * + * \param ci A theora_info structure to fill. This must have been previously + * initialized with theora_info_init(). If \a op contains an initial + * header, theora_decode_header() will fill \a ci with the + * parsed header values. If \a op contains codebook tables, + * theora_decode_header() will parse these and attach an internal + * representation to \a ci->codec_setup. + * \param cc A theora_comment structure to fill. If \a op contains comment + * data, theora_decode_header() will fill \a cc with the parsed + * comments. + * \param op An ogg_packet structure which you expect contains an initial + * header, comment data or codebook tables. + * + * \retval OC_BADHEADER \a op is NULL; OR the first byte of \a op->packet + * has the signature of an initial packet, but op is + * not a b_o_s packet; OR this packet has the signature + * of an initial header packet, but an initial header + * packet has already been seen; OR this packet has the + * signature of a comment packet, but the initial header + * has not yet been seen; OR this packet has the signature + * of a comment packet, but contains invalid data; OR + * this packet has the signature of codebook tables, + * but the initial header or comments have not yet + * been seen; OR this packet has the signature of codebook + * tables, but contains invalid data; + * OR the stream being decoded has a compatible version + * but this packet does not have the signature of a + * theora initial header, comments, or codebook packet + * \retval OC_VERSION The packet data of \a op is an initial header with + * a version which is incompatible with this version of + * libtheora. + * \retval OC_NEWPACKET the stream being decoded has an incompatible (future) + * version and contains an unknown signature. + * \retval 0 Success + * + * \note The normal usage is that theora_decode_header() be called on the + * first three packets of a theora logical bitstream in succession. + */ +extern int theora_decode_header(theora_info *ci, theora_comment *cc, + ogg_packet *op); + +/** + * Initialize a theora_state handle for decoding. + * \param th The theora_state handle to initialize. + * \param c A theora_info struct filled with the desired decoding parameters. + * This is of course usually obtained from a previous call to + * theora_decode_header(). + * \retval 0 Success + */ +extern int theora_decode_init(theora_state *th, theora_info *c); + +/** + * Input a packet containing encoded data into the theora decoder. + * \param th A theora_state handle previously initialized for decoding. + * \param op An ogg_packet containing encoded theora data. + * \retval 0 Success + * \retval OC_BADPACKET \a op does not contain encoded video data + */ +extern int theora_decode_packetin(theora_state *th,ogg_packet *op); + +/** + * Output the next available frame of decoded YUV data. + * \param th A theora_state handle previously initialized for decoding. + * \param yuv A yuv_buffer in which libtheora should place the decoded data. + * Note that the buffer struct itself is allocated by the user, but + * that the luma and chroma pointers will be filled in by the + * library. Also note that these luma and chroma regions should be + * considered read-only by the user. + * \retval 0 Success + */ +extern int theora_decode_YUVout(theora_state *th,yuv_buffer *yuv); + +/** + * Report whether a theora packet is a header or not + * This function does no verification beyond checking the header + * flag bit so it should not be used for bitstream identification; + * use theora_decode_header() for that. + * + * \param op An ogg_packet containing encoded theora data. + * \retval 1 The packet is a header packet + * \retval 0 The packet is not a header packet (and so contains frame data) + * + * Thus function was added in the 1.0alpha4 release. + */ +extern int theora_packet_isheader(ogg_packet *op); + +/** + * Report whether a theora packet is a keyframe or not + * + * \param op An ogg_packet containing encoded theora data. + * \retval 1 The packet contains a keyframe image + * \retval 0 The packet is contains an interframe delta + * \retval -1 The packet is not an image data packet at all + * + * Thus function was added in the 1.0alpha4 release. + */ +extern int theora_packet_iskeyframe(ogg_packet *op); + +/** + * Report the granulepos shift radix + * + * When embedded in Ogg, Theora uses a two-part granulepos, + * splitting the 64-bit field into two pieces. The more-significant + * section represents the frame count at the last keyframe, + * and the less-significant section represents the count of + * frames since the last keyframe. In this way the overall + * field is still non-decreasing with time, but usefully encodes + * a pointer to the last keyframe, which is necessary for + * correctly restarting decode after a seek. + * + * This function reports the number of bits used to represent + * the distance to the last keyframe, and thus how the granulepos + * field must be shifted or masked to obtain the two parts. + * + * Since libtheora returns compressed data in an ogg_packet + * structure, this may be generally useful even if the Theora + * packets are not being used in an Ogg container. + * + * \param ti A previously initialized theora_info struct + * \returns The bit shift dividing the two granulepos fields + * + * This function was added in the 1.0alpha5 release. + */ +int theora_granule_shift(theora_info *ti); + +/** + * Convert a granulepos to an absolute frame index, starting at 0. + * The granulepos is interpreted in the context of a given theora_state handle. + * + * Note that while the granulepos encodes the frame count (i.e. starting + * from 1) this call returns the frame index, starting from zero. Thus + * One can calculate the presentation time by multiplying the index by + * the rate. + * + * \param th A previously initialized theora_state handle (encode or decode) + * \param granulepos The granulepos to convert. + * \returns The frame index corresponding to \a granulepos. + * \retval -1 The given granulepos is undefined (i.e. negative) + * + * Thus function was added in the 1.0alpha4 release. + */ +extern ogg_int64_t theora_granule_frame(theora_state *th,ogg_int64_t granulepos); + +/** + * Convert a granulepos to absolute time in seconds. The granulepos is + * interpreted in the context of a given theora_state handle, and gives + * the end time of a frame's presentation as used in Ogg mux ordering. + * + * \param th A previously initialized theora_state handle (encode or decode) + * \param granulepos The granulepos to convert. + * \returns The absolute time in seconds corresponding to \a granulepos. + * This is the "end time" for the frame, or the latest time it should + * be displayed. + * It is not the presentation time. + * \retval -1. The given granulepos is undefined (i.e. negative), or + * \retval -1. The function has been disabled because floating + * point support is not available. + */ +extern double theora_granule_time(theora_state *th,ogg_int64_t granulepos); + +/** + * Initialize a theora_info structure. All values within the given theora_info + * structure are initialized, and space is allocated within libtheora for + * internal codec setup data. + * \param c A theora_info struct to initialize. + */ +extern void theora_info_init(theora_info *c); + +/** + * Clear a theora_info structure. All values within the given theora_info + * structure are cleared, and associated internal codec setup data is freed. + * \param c A theora_info struct to initialize. + */ +extern void theora_info_clear(theora_info *c); + +/** + * Free all internal data associated with a theora_state handle. + * \param t A theora_state handle. + */ +extern void theora_clear(theora_state *t); + +/** + * Initialize an allocated theora_comment structure + * \param tc An allocated theora_comment structure + **/ +extern void theora_comment_init(theora_comment *tc); + +/** + * Add a comment to an initialized theora_comment structure + * \param tc A previously initialized theora comment structure + * \param comment A null-terminated string encoding the comment in the form + * "TAG=the value" + * + * Neither theora_comment_add() nor theora_comment_add_tag() support + * comments containing null values, although the bitstream format + * supports this. To add such comments you will need to manipulate + * the theora_comment structure directly. + **/ + +extern void theora_comment_add(theora_comment *tc, char *comment); + +/** + * Add a comment to an initialized theora_comment structure. + * \param tc A previously initialized theora comment structure + * \param tag A null-terminated string containing the tag + * associated with the comment. + * \param value The corresponding value as a null-terminated string + * + * Neither theora_comment_add() nor theora_comment_add_tag() support + * comments containing null values, although the bitstream format + * supports this. To add such comments you will need to manipulate + * the theora_comment structure directly. + **/ +extern void theora_comment_add_tag(theora_comment *tc, + char *tag, char *value); + +/** + * Look up a comment value by tag. + * \param tc Tn initialized theora_comment structure + * \param tag The tag to look up + * \param count The instance of the tag. The same tag can appear multiple + * times, each with a distinct and ordered value, so an index + * is required to retrieve them all. + * \returns A pointer to the queried tag's value + * \retval NULL No matching tag is found + * + * \note Use theora_comment_query_count() to get the legal range for the + * count parameter. + **/ + +extern char *theora_comment_query(theora_comment *tc, char *tag, int count); + +/** Look up the number of instances of a tag. + * \param tc An initialized theora_comment structure + * \param tag The tag to look up + * \returns The number on instances of a particular tag. + * + * Call this first when querying for a specific tag and then interate + * over the number of instances with separate calls to + * theora_comment_query() to retrieve all instances in order. + **/ +extern int theora_comment_query_count(theora_comment *tc, char *tag); + +/** + * Clear an allocated theora_comment struct so that it can be freed. + * \param tc An allocated theora_comment structure. + **/ +extern void theora_comment_clear(theora_comment *tc); + +/**Encoder control function. + * This is used to provide advanced control the encoding process. + * \param th A #theora_state handle. + * \param req The control code to process. + * See \ref encctlcodes_old "the list of available + * control codes" for details. + * \param buf The parameters for this control code. + * \param buf_sz The size of the parameter buffer.*/ +extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz); + +/* @} */ /* end oldfuncs doxygen group */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _O_THEORA_H_ */ diff --git a/engine/code/libtheora-1.1.1/include/theora/theoradec.h b/engine/code/libtheora-1.1.1/include/theora/theoradec.h new file mode 100644 index 00000000..b20f0e3a --- /dev/null +++ b/engine/code/libtheora-1.1.1/include/theora/theoradec.h @@ -0,0 +1,325 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $ + + ********************************************************************/ + +/**\file + * The libtheoradec C decoding API.*/ + +#if !defined(_O_THEORA_THEORADEC_H_) +# define _O_THEORA_THEORADEC_H_ (1) +# include +# include +# include "codec.h" + +#if defined(__cplusplus) +extern "C" { +#endif + + + +/**\name th_decode_ctl() codes + * \anchor decctlcodes + * These are the available request codes for th_decode_ctl(). + * By convention, these are odd, to distinguish them from the + * \ref encctlcodes "encoder control codes". + * Keep any experimental or vendor-specific values above \c 0x8000.*/ +/*@{*/ +/**Gets the maximum post-processing level. + * The decoder supports a post-processing filter that can improve + * the appearance of the decoded images. This returns the highest + * level setting for this post-processor, corresponding to maximum + * improvement and computational expense. + * + * \param[out] _buf int: The maximum post-processing level. + * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int). + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_DECCTL_GET_PPLEVEL_MAX (1) +/**Sets the post-processing level. + * By default, post-processing is disabled. + * + * Sets the level of post-processing to use when decoding the + * compressed stream. This must be a value between zero (off) + * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX. + * + * \param[in] _buf int: The new post-processing level. + * 0 to disable; larger values use more CPU. + * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the + * post-processing level is out of bounds. + * The maximum post-processing level may be + * implementation-specific, and can be obtained via + * #TH_DECCTL_GET_PPLEVEL_MAX. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_DECCTL_SET_PPLEVEL (3) +/**Sets the granule position. + * Call this after a seek, before decoding the first frame, to ensure that the + * proper granule position is returned for all subsequent frames. + * If you track timestamps yourself and do not use the granule position + * returned by the decoder, then you need not call this function. + * + * \param[in] _buf ogg_int64_t: The granule position of the next + * frame. + * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(ogg_int64_t), or the + * granule position is negative.*/ +#define TH_DECCTL_SET_GRANPOS (5) +/**Sets the striped decode callback function. + * If set, this function will be called as each piece of a frame is fully + * decoded in th_decode_packetin(). + * You can pass in a #th_stripe_callback with + * th_stripe_callback#stripe_decoded set to NULL to disable the + * callbacks at any point. + * Enabling striped decode does not prevent you from calling + * th_decode_ycbcr_out() after the frame is fully decoded. + * + * \param[in] _buf #th_stripe_callback: The callback parameters. + * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not + * sizeof(th_stripe_callback).*/ +#define TH_DECCTL_SET_STRIPE_CB (7) + +/**Enables telemetry and sets the macroblock display mode */ +#define TH_DECCTL_SET_TELEMETRY_MBMODE (9) +/**Enables telemetry and sets the motion vector display mode */ +#define TH_DECCTL_SET_TELEMETRY_MV (11) +/**Enables telemetry and sets the adaptive quantization display mode */ +#define TH_DECCTL_SET_TELEMETRY_QI (13) +/**Enables telemetry and sets the bitstream breakdown visualization mode */ +#define TH_DECCTL_SET_TELEMETRY_BITS (15) +/*@}*/ + + + +/**A callback function for striped decode. + * This is a function pointer to an application-provided function that will be + * called each time a section of the image is fully decoded in + * th_decode_packetin(). + * This allows the application to process the section immediately, while it is + * still in cache. + * Note that the frame is decoded bottom to top, so \a _yfrag0 will steadily + * decrease with each call until it reaches 0, at which point the full frame + * is decoded. + * The number of fragment rows made available in each call depends on the pixel + * format and the number of post-processing filters enabled, and may not even + * be constant for the entire frame. + * If a non-NULL \a _granpos pointer is passed to + * th_decode_packetin(), the granule position for the frame will be stored + * in it before the first callback is made. + * If an entire frame is dropped (a 0-byte packet), then no callbacks will be + * made at all for that frame. + * \param _ctx An application-provided context pointer. + * \param _buf The image buffer for the decoded frame. + * \param _yfrag0 The Y coordinate of the first row of 8x8 fragments + * decoded. + * Multiply this by 8 to obtain the pixel row number in the + * luma plane. + * If the chroma planes are subsampled in the Y direction, + * this will always be divisible by two. + * \param _yfrag_end The Y coordinate of the first row of 8x8 fragments past + * the newly decoded section. + * If the chroma planes are subsampled in the Y direction, + * this will always be divisible by two. + * I.e., this section contains fragment rows + * \a _yfrag0 ...\a _yfrag_end -1.*/ +typedef void (*th_stripe_decoded_func)(void *_ctx,th_ycbcr_buffer _buf, + int _yfrag0,int _yfrag_end); + +/**The striped decode callback data to pass to #TH_DECCTL_SET_STRIPE_CB.*/ +typedef struct{ + /**An application-provided context pointer. + * This will be passed back verbatim to the application.*/ + void *ctx; + /**The callback function pointer.*/ + th_stripe_decoded_func stripe_decoded; +}th_stripe_callback; + + + +/**\name Decoder state + The following data structures are opaque, and their contents are not + publicly defined by this API. + Referring to their internals directly is unsupported, and may break without + warning.*/ +/*@{*/ +/**The decoder context.*/ +typedef struct th_dec_ctx th_dec_ctx; +/**Setup information. + This contains auxiliary information (Huffman tables and quantization + parameters) decoded from the setup header by th_decode_headerin() to be + passed to th_decode_alloc(). + It can be re-used to initialize any number of decoders, and can be freed + via th_setup_free() at any time.*/ +typedef struct th_setup_info th_setup_info; +/*@}*/ + + + +/**\defgroup decfuncs Functions for Decoding*/ +/*@{*/ +/**\name Functions for decoding + * You must link to libtheoradec if you use any of the + * functions in this section. + * + * The functions are listed in the order they are used in a typical decode. + * The basic steps are: + * - Parse the header packets by repeatedly calling th_decode_headerin(). + * - Allocate a #th_dec_ctx handle with th_decode_alloc(). + * - Call th_setup_free() to free any memory used for codec setup + * information. + * - Perform any additional decoder configuration with th_decode_ctl(). + * - For each video data packet: + * - Submit the packet to the decoder via th_decode_packetin(). + * - Retrieve the uncompressed video data via th_decode_ycbcr_out(). + * - Call th_decode_free() to release all decoder memory.*/ +/*@{*/ +/**Decodes the header packets of a Theora stream. + * This should be called on the initial packets of the stream, in succession, + * until it returns 0, indicating that all headers have been + * processed, or an error is encountered. + * At least three header packets are required, and additional optional header + * packets may follow. + * This can be used on the first packet of any logical stream to determine if + * that stream is a Theora stream. + * \param _info A #th_info structure to fill in. + * This must have been previously initialized with + * th_info_init(). + * The application may immediately begin using the contents of + * this structure after the first header is decoded, though it + * must continue to be passed in on all subsequent calls. + * \param _tc A #th_comment structure to fill in. + * The application may immediately begin using the contents of + * this structure after the second header is decoded, though it + * must continue to be passed in on all subsequent calls. + * \param _setup Returns a pointer to additional, private setup information + * needed by the decoder. + * The contents of this pointer must be initialized to + * NULL on the first call, and the returned value must + * continue to be passed in on all subsequent calls. + * \param _op An ogg_packet structure which contains one of the + * initial packets of an Ogg logical stream. + * \return A positive value indicates that a Theora header was successfully + * processed. + * \retval 0 The first video data packet was encountered after all + * required header packets were parsed. + * The packet just passed in on this call should be saved + * and fed to th_decode_packetin() to begin decoding + * video data. + * \retval TH_EFAULT One of \a _info, \a _tc, or \a _setup was + * NULL. + * \retval TH_EBADHEADER \a _op was NULL, the packet was not the next + * header packet in the expected sequence, or the format + * of the header data was invalid. + * \retval TH_EVERSION The packet data was a Theora info header, but for a + * bitstream version not decodable with this version of + * libtheoradec. + * \retval TH_ENOTFORMAT The packet was not a Theora header. + */ +extern int th_decode_headerin(th_info *_info,th_comment *_tc, + th_setup_info **_setup,ogg_packet *_op); +/**Allocates a decoder instance. + * + * Security Warning: The Theora format supports very large frame sizes, + * potentially even larger than the address space of a 32-bit machine, and + * creating a decoder context allocates the space for several frames of data. + * If the allocation fails here, your program will crash, possibly at some + * future point because the OS kernel returned a valid memory range and will + * only fail when it tries to map the pages in it the first time they are + * used. + * Even if it succeeds, you may experience a denial of service if the frame + * size is large enough to cause excessive paging. + * If you are integrating libtheora in a larger application where such things + * are undesirable, it is highly recommended that you check the frame size in + * \a _info before calling this function and refuse to decode streams where it + * is larger than some reasonable maximum. + * libtheora will not check this for you, because there may be machines that + * can handle such streams and applications that wish to. + * \param _info A #th_info struct filled via th_decode_headerin(). + * \param _setup A #th_setup_info handle returned via + * th_decode_headerin(). + * \return The initialized #th_dec_ctx handle. + * \retval NULL If the decoding parameters were invalid.*/ +extern th_dec_ctx *th_decode_alloc(const th_info *_info, + const th_setup_info *_setup); +/**Releases all storage used for the decoder setup information. + * This should be called after you no longer want to create any decoders for + * a stream whose headers you have parsed with th_decode_headerin(). + * \param _setup The setup information to free. + * This can safely be NULL.*/ +extern void th_setup_free(th_setup_info *_setup); +/**Decoder control function. + * This is used to provide advanced control of the decoding process. + * \param _dec A #th_dec_ctx handle. + * \param _req The control code to process. + * See \ref decctlcodes "the list of available control codes" + * for details. + * \param _buf The parameters for this control code. + * \param _buf_sz The size of the parameter buffer.*/ +extern int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf, + size_t _buf_sz); +/**Submits a packet containing encoded video data to the decoder. + * \param _dec A #th_dec_ctx handle. + * \param _op An ogg_packet containing encoded video data. + * \param _granpos Returns the granule position of the decoded packet. + * If non-NULL, the granule position for this specific + * packet is stored in this location. + * This is computed incrementally from previously decoded + * packets. + * After a seek, the correct granule position must be set via + * #TH_DECCTL_SET_GRANPOS for this to work properly. + * \retval 0 Success. + * A new decoded frame can be retrieved by calling + * th_decode_ycbcr_out(). + * \retval TH_DUPFRAME The packet represented a dropped (0-byte) frame. + * The player can skip the call to th_decode_ycbcr_out(), + * as the contents of the decoded frame buffer have not + * changed. + * \retval TH_EFAULT \a _dec or \a _op was NULL. + * \retval TH_EBADPACKET \a _op does not contain encoded video data. + * \retval TH_EIMPL The video data uses bitstream features which this + * library does not support.*/ +extern int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, + ogg_int64_t *_granpos); +/**Outputs the next available frame of decoded Y'CbCr data. + * If a striped decode callback has been set with #TH_DECCTL_SET_STRIPE_CB, + * then the application does not need to call this function. + * \param _dec A #th_dec_ctx handle. + * \param _ycbcr A video buffer structure to fill in. + * libtheoradec will fill in all the members of this + * structure, including the pointers to the uncompressed video + * data. + * The memory for this video data is owned by + * libtheoradec. + * It may be freed or overwritten without notification when + * subsequent frames are decoded. + * \retval 0 Success + * \retval TH_EFAULT \a _dec or \a _ycbcr was NULL. + */ +extern int th_decode_ycbcr_out(th_dec_ctx *_dec, + th_ycbcr_buffer _ycbcr); +/**Frees an allocated decoder instance. + * \param _dec A #th_dec_ctx handle.*/ +extern void th_decode_free(th_dec_ctx *_dec); +/*@}*/ +/*@}*/ + + + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/engine/code/libtheora-1.1.1/include/theora/theoraenc.h b/engine/code/libtheora-1.1.1/include/theora/theoraenc.h new file mode 100644 index 00000000..fdf2ab21 --- /dev/null +++ b/engine/code/libtheora-1.1.1/include/theora/theoraenc.h @@ -0,0 +1,486 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $ + + ********************************************************************/ + +/**\file + * The libtheoraenc C encoding API.*/ + +#if !defined(_O_THEORA_THEORAENC_H_) +# define _O_THEORA_THEORAENC_H_ (1) +# include +# include +# include "codec.h" + +#if defined(__cplusplus) +extern "C" { +#endif + + + +/**\name th_encode_ctl() codes + * \anchor encctlcodes + * These are the available request codes for th_encode_ctl(). + * By convention, these are even, to distinguish them from the + * \ref decctlcodes "decoder control codes". + * Keep any experimental or vendor-specific values above \c 0x8000.*/ +/*@{*/ +/**Sets the Huffman tables to use. + * The tables are copied, not stored by reference, so they can be freed after + * this call. + * NULL may be specified to revert to the default tables. + * + * \param[in] _buf #th_huff_code[#TH_NHUFFMAN_TABLES][#TH_NDCT_TOKENS] + * \retval TH_EFAULT \a _enc_ctx is NULL. + * \retval TH_EINVAL Encoding has already begun or one or more of the given + * tables is not full or prefix-free, \a _buf is + * NULL and \a _buf_sz is not zero, or \a _buf is + * non-NULL and \a _buf_sz is not + * sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_HUFFMAN_CODES (0) +/**Sets the quantization parameters to use. + * The parameters are copied, not stored by reference, so they can be freed + * after this call. + * NULL may be specified to revert to the default parameters. + * + * \param[in] _buf #th_quant_info + * \retval TH_EFAULT \a _enc_ctx is NULL. + * \retval TH_EINVAL Encoding has already begun, \a _buf is + * NULL and \a _buf_sz is not zero, + * or \a _buf is non-NULL and + * \a _buf_sz is not sizeof(#th_quant_info). + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_QUANT_PARAMS (2) +/**Sets the maximum distance between key frames. + * This can be changed during an encode, but will be bounded by + * 1<. + * If it is set before encoding begins, th_info#keyframe_granule_shift will + * be enlarged appropriately. + * + * \param[in] _buf ogg_uint32_t: The maximum distance between key + * frames. + * \param[out] _buf ogg_uint32_t: The actual maximum distance set. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(ogg_uint32_t). + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4) +/**Disables any encoder features that would prevent lossless transcoding back + * to VP3. + * This primarily means disabling block-adaptive quantization and always coding + * all four luma blocks in a macro block when 4MV is used. + * It also includes using the VP3 quantization tables and Huffman codes; if you + * set them explicitly after calling this function, the resulting stream will + * not be VP3-compatible. + * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source + * material, or when using a picture region smaller than the full frame (e.g. + * a non-multiple-of-16 width or height), then non-VP3 bitstream features will + * still be disabled, but the stream will still not be VP3-compatible, as VP3 + * was not capable of encoding such formats. + * If you call this after encoding has already begun, then the quantization + * tables and codebooks cannot be changed, but the frame-level features will + * be enabled or disabled as requested. + * + * \param[in] _buf int: a non-zero value to enable VP3 compatibility, + * or 0 to disable it (the default). + * \param[out] _buf int: 1 if all bitstream features required for + * VP3-compatibility could be set, and 0 otherwise. + * The latter will be returned if the pixel format is not + * 4:2:0, the picture region is smaller than the full frame, + * or if encoding has begun, preventing the quantization + * tables and codebooks from being set. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int). + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_VP3_COMPATIBLE (10) +/**Gets the maximum speed level. + * Higher speed levels favor quicker encoding over better quality per bit. + * Depending on the encoding mode, and the internal algorithms used, quality + * may actually improve, but in this case bitrate will also likely increase. + * In any case, overall rate/distortion performance will probably decrease. + * The maximum value, and the meaning of each value, may change depending on + * the current encoding mode (VBR vs. constant quality, etc.). + * + * \param[out] _buf int: The maximum encoding speed level. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int). + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_GET_SPLEVEL_MAX (12) +/**Sets the speed level. + * The current speed level may be retrieved using #TH_ENCCTL_GET_SPLEVEL. + * + * \param[in] _buf int: The new encoding speed level. + * 0 is slowest, larger values use less CPU. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the + * encoding speed level is out of bounds. + * The maximum encoding speed level may be + * implementation- and encoding mode-specific, and can be + * obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_SPLEVEL (14) +/**Gets the current speed level. + * The default speed level may vary according to encoder implementation, but if + * this control code is not supported (it returns #TH_EIMPL), the default may + * be assumed to be the slowest available speed (0). + * The maximum encoding speed level may be implementation- and encoding + * mode-specific, and can be obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. + * + * \param[out] _buf int: The current encoding speed level. + * 0 is slowest, larger values use less CPU. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int). + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_GET_SPLEVEL (16) +/**Sets the number of duplicates of the next frame to produce. + * Although libtheora can encode duplicate frames very cheaply, it costs some + * amount of CPU to detect them, and a run of duplicates cannot span a + * keyframe boundary. + * This control code tells the encoder to produce the specified number of extra + * duplicates of the next frame. + * This allows the encoder to make smarter keyframe placement decisions and + * rate control decisions, and reduces CPU usage as well, when compared to + * just submitting the same frame for encoding multiple times. + * This setting only applies to the next frame submitted for encoding. + * You MUST call th_encode_packetout() repeatedly until it returns 0, or the + * extra duplicate frames will be lost. + * + * \param[in] _buf int: The number of duplicates to produce. + * If this is negative or zero, no duplicates will be produced. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the + * number of duplicates is greater than or equal to the + * maximum keyframe interval. + * In the latter case, NO duplicate frames will be produced. + * You must ensure that the maximum keyframe interval is set + * larger than the maximum number of duplicates you will + * ever wish to insert prior to encoding. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_DUP_COUNT (18) +/**Modifies the default bitrate management behavior. + * Use to allow or disallow frame dropping, and to enable or disable capping + * bit reservoir overflows and underflows. + * See \ref encctlcodes "the list of available flags". + * The flags are set by default to + * #TH_RATECTL_DROP_FRAMES|#TH_RATECTL_CAP_OVERFLOW. + * + * \param[in] _buf int: Any combination of + * \ref ratectlflags "the available flags": + * - #TH_RATECTL_DROP_FRAMES: Enable frame dropping. + * - #TH_RATECTL_CAP_OVERFLOW: Don't bank excess bits for later + * use. + * - #TH_RATECTL_CAP_UNDERFLOW: Don't try to make up shortfalls + * later. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int) or rate control + * is not enabled. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_RATE_FLAGS (20) +/**Sets the size of the bitrate management bit reservoir as a function + * of number of frames. + * The reservoir size affects how quickly bitrate management reacts to + * instantaneous changes in the video complexity. + * Larger reservoirs react more slowly, and provide better overall quality, but + * require more buffering by a client, adding more latency to live streams. + * By default, libtheora sets the reservoir to the maximum distance between + * keyframes, subject to a minimum and maximum limit. + * This call may be used to increase or decrease the reservoir, increasing or + * decreasing the allowed temporary variance in bitrate. + * An implementation may impose some limits on the size of a reservoir it can + * handle, in which case the actual reservoir size may not be exactly what was + * requested. + * The actual value set will be returned. + * + * \param[in] _buf int: Requested size of the reservoir measured in + * frames. + * \param[out] _buf int: The actual size of the reservoir set. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or rate control + * is not enabled. The buffer has an implementation + * defined minimum and maximum size and the value in _buf + * will be adjusted to match the actual value set. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_RATE_BUFFER (22) +/**Enable pass 1 of two-pass encoding mode and retrieve the first pass metrics. + * Pass 1 mode must be enabled before the first frame is encoded, and a target + * bitrate must have already been specified to the encoder. + * Although this does not have to be the exact rate that will be used in the + * second pass, closer values may produce better results. + * The first call returns the size of the two-pass header data, along with some + * placeholder content, and sets the encoder into pass 1 mode implicitly. + * This call sets the encoder to pass 1 mode implicitly. + * Then, a subsequent call must be made after each call to + * th_encode_ycbcr_in() to retrieve the metrics for that frame. + * An additional, final call must be made to retrieve the summary data, + * containing such information as the total number of frames, etc. + * This must be stored in place of the placeholder data that was returned + * in the first call, before the frame metrics data. + * All of this data must be presented back to the encoder during pass 2 using + * #TH_ENCCTL_2PASS_IN. + * + * \param[out] char *_buf: Returns a pointer to internal storage + * containing the two pass metrics data. + * This storage is only valid until the next call, or until the + * encoder context is freed, and must be copied by the + * application. + * \retval >=0 The number of bytes of metric data available in the + * returned buffer. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(char *), no target + * bitrate has been set, or the first call was made after + * the first frame was submitted for encoding. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_2PASS_OUT (24) +/**Submits two-pass encoding metric data collected the first encoding pass to + * the second pass. + * The first call must be made before the first frame is encoded, and a target + * bitrate must have already been specified to the encoder. + * It sets the encoder to pass 2 mode implicitly; this cannot be disabled. + * The encoder may require reading data from some or all of the frames in + * advance, depending on, e.g., the reservoir size used in the second pass. + * You must call this function repeatedly before each frame to provide data + * until either a) it fails to consume all of the data presented or b) all of + * the pass 1 data has been consumed. + * In the first case, you must save the remaining data to be presented after + * the next frame. + * You can call this function with a NULL argument to get an upper bound on + * the number of bytes that will be required before the next frame. + * + * When pass 2 is first enabled, the default bit reservoir is set to the entire + * file; this gives maximum flexibility but can lead to very high peak rates. + * You can subsequently set it to another value with #TH_ENCCTL_SET_RATE_BUFFER + * (e.g., to set it to the keyframe interval for non-live streaming), however, + * you may then need to provide more data before the next frame. + * + * \param[in] _buf char[]: A buffer containing the data returned by + * #TH_ENCCTL_2PASS_OUT in pass 1. + * You may pass NULL for \a _buf to return an upper + * bound on the number of additional bytes needed before the + * next frame. + * The summary data returned at the end of pass 1 must be at + * the head of the buffer on the first call with a + * non-NULL \a _buf, and the placeholder data + * returned at the start of pass 1 should be omitted. + * After each call you should advance this buffer by the number + * of bytes consumed. + * \retval >0 The number of bytes of metric data required/consumed. + * \retval 0 No more data is required before the next frame. + * \retval TH_EFAULT \a _enc_ctx is NULL. + * \retval TH_EINVAL No target bitrate has been set, or the first call was + * made after the first frame was submitted for + * encoding. + * \retval TH_ENOTFORMAT The data did not appear to be pass 1 from a compatible + * implementation of this library. + * \retval TH_EBADHEADER The data was invalid; this may be returned when + * attempting to read an aborted pass 1 file that still + * has the placeholder data in place of the summary + * data. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_2PASS_IN (26) +/**Sets the current encoding quality. + * This is only valid so long as no bitrate has been specified, either through + * the #th_info struct used to initialize the encoder or through + * #TH_ENCCTL_SET_BITRATE (this restriction may be relaxed in a future + * version). + * If it is set before the headers are emitted, the target quality encoded in + * them will be updated. + * + * \param[in] _buf int: The new target quality, in the range 0...63, + * inclusive. + * \retval 0 Success. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL A target bitrate has already been specified, or the + * quality index was not in the range 0...63. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_QUALITY (28) +/**Sets the current encoding bitrate. + * Once a bitrate is set, the encoder must use a rate-controlled mode for all + * future frames (this restriction may be relaxed in a future version). + * If it is set before the headers are emitted, the target bitrate encoded in + * them will be updated. + * Due to the buffer delay, the exact bitrate of each section of the encode is + * not guaranteed. + * The encoder may have already used more bits than allowed for the frames it + * has encoded, expecting to make them up in future frames, or it may have + * used fewer, holding the excess in reserve. + * The exact transition between the two bitrates is not well-defined by this + * API, but may be affected by flags set with #TH_ENCCTL_SET_RATE_FLAGS. + * After a number of frames equal to the buffer delay, one may expect further + * output to average at the target bitrate. + * + * \param[in] _buf long: The new target bitrate, in bits per second. + * \retval 0 Success. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL The target bitrate was not positive. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_BITRATE (30) + +/*@}*/ + + +/**\name TH_ENCCTL_SET_RATE_FLAGS flags + * \anchor ratectlflags + * These are the flags available for use with #TH_ENCCTL_SET_RATE_FLAGS.*/ +/*@{*/ +/**Drop frames to keep within bitrate buffer constraints. + * This can have a severe impact on quality, but is the only way to ensure that + * bitrate targets are met at low rates during sudden bursts of activity.*/ +#define TH_RATECTL_DROP_FRAMES (0x1) +/**Ignore bitrate buffer overflows. + * If the encoder uses so few bits that the reservoir of available bits + * overflows, ignore the excess. + * The encoder will not try to use these extra bits in future frames. + * At high rates this may cause the result to be undersized, but allows a + * client to play the stream using a finite buffer; it should normally be + * enabled.*/ +#define TH_RATECTL_CAP_OVERFLOW (0x2) +/**Ignore bitrate buffer underflows. + * If the encoder uses so many bits that the reservoir of available bits + * underflows, ignore the deficit. + * The encoder will not try to make up these extra bits in future frames. + * At low rates this may cause the result to be oversized; it should normally + * be disabled.*/ +#define TH_RATECTL_CAP_UNDERFLOW (0x4) +/*@}*/ + + + +/**The quantization parameters used by VP3.*/ +extern const th_quant_info TH_VP31_QUANT_INFO; + +/**The Huffman tables used by VP3.*/ +extern const th_huff_code + TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]; + + + +/**\name Encoder state + The following data structure is opaque, and its contents are not publicly + defined by this API. + Referring to its internals directly is unsupported, and may break without + warning.*/ +/*@{*/ +/**The encoder context.*/ +typedef struct th_enc_ctx th_enc_ctx; +/*@}*/ + + + +/**\defgroup encfuncs Functions for Encoding*/ +/*@{*/ +/**\name Functions for encoding + * You must link to libtheoraenc and libtheoradec + * if you use any of the functions in this section. + * + * The functions are listed in the order they are used in a typical encode. + * The basic steps are: + * - Fill in a #th_info structure with details on the format of the video you + * wish to encode. + * - Allocate a #th_enc_ctx handle with th_encode_alloc(). + * - Perform any additional encoder configuration required with + * th_encode_ctl(). + * - Repeatedly call th_encode_flushheader() to retrieve all the header + * packets. + * - For each uncompressed frame: + * - Submit the uncompressed frame via th_encode_ycbcr_in() + * - Repeatedly call th_encode_packetout() to retrieve any video data packets + * that are ready. + * - Call th_encode_free() to release all encoder memory.*/ +/*@{*/ +/**Allocates an encoder instance. + * \param _info A #th_info struct filled with the desired encoding parameters. + * \return The initialized #th_enc_ctx handle. + * \retval NULL If the encoding parameters were invalid.*/ +extern th_enc_ctx *th_encode_alloc(const th_info *_info); +/**Encoder control function. + * This is used to provide advanced control the encoding process. + * \param _enc A #th_enc_ctx handle. + * \param _req The control code to process. + * See \ref encctlcodes "the list of available control codes" + * for details. + * \param _buf The parameters for this control code. + * \param _buf_sz The size of the parameter buffer.*/ +extern int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz); +/**Outputs the next header packet. + * This should be called repeatedly after encoder initialization until it + * returns 0 in order to get all of the header packets, in order, before + * encoding actual video data. + * \param _enc A #th_enc_ctx handle. + * \param _comments The metadata to place in the comment header, when it is + * encoded. + * \param _op An ogg_packet structure to fill. + * All of the elements of this structure will be set, + * including a pointer to the header data. + * The memory for the header data is owned by + * libtheoraenc, and may be invalidated when the + * next encoder function is called. + * \return A positive value indicates that a header packet was successfully + * produced. + * \retval 0 No packet was produced, and no more header packets remain. + * \retval TH_EFAULT \a _enc, \a _comments, or \a _op was NULL.*/ +extern int th_encode_flushheader(th_enc_ctx *_enc, + th_comment *_comments,ogg_packet *_op); +/**Submits an uncompressed frame to the encoder. + * \param _enc A #th_enc_ctx handle. + * \param _ycbcr A buffer of Y'CbCr data to encode. + * \retval 0 Success. + * \retval TH_EFAULT \a _enc or \a _ycbcr is NULL. + * \retval TH_EINVAL The buffer size does not match the frame size the encoder + * was initialized with, or encoding has already + * completed.*/ +extern int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _ycbcr); +/**Retrieves encoded video data packets. + * This should be called repeatedly after each frame is submitted to flush any + * encoded packets, until it returns 0. + * The encoder will not buffer these packets as subsequent frames are + * compressed, so a failure to do so will result in lost video data. + * \note Currently the encoder operates in a one-frame-in, one-packet-out + * manner. + * However, this may be changed in the future. + * \param _enc A #th_enc_ctx handle. + * \param _last Set this flag to a non-zero value if no more uncompressed + * frames will be submitted. + * This ensures that a proper EOS flag is set on the last packet. + * \param _op An ogg_packet structure to fill. + * All of the elements of this structure will be set, including a + * pointer to the video data. + * The memory for the video data is owned by + * libtheoraenc, and may be invalidated when the next + * encoder function is called. + * \return A positive value indicates that a video data packet was successfully + * produced. + * \retval 0 No packet was produced, and no more encoded video data + * remains. + * \retval TH_EFAULT \a _enc or \a _op was NULL.*/ +extern int th_encode_packetout(th_enc_ctx *_enc,int _last,ogg_packet *_op); +/**Frees an allocated encoder instance. + * \param _enc A #th_enc_ctx handle.*/ +extern void th_encode_free(th_enc_ctx *_enc); +/*@}*/ +/*@}*/ + + + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/apiwrapper.c b/engine/code/libtheora-1.1.1/lib/apiwrapper.c new file mode 100644 index 00000000..dc959b8d --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/apiwrapper.c @@ -0,0 +1,166 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: apiwrapper.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "apiwrapper.h" + + + +const char *theora_version_string(void){ + return th_version_string(); +} + +ogg_uint32_t theora_version_number(void){ + return th_version_number(); +} + +void theora_info_init(theora_info *_ci){ + memset(_ci,0,sizeof(*_ci)); +} + +void theora_info_clear(theora_info *_ci){ + th_api_wrapper *api; + api=(th_api_wrapper *)_ci->codec_setup; + memset(_ci,0,sizeof(*_ci)); + if(api!=NULL){ + if(api->clear!=NULL)(*api->clear)(api); + _ogg_free(api); + } +} + +void theora_clear(theora_state *_th){ + /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ + if(_th->internal_decode!=NULL){ + (*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th); + } + if(_th->internal_encode!=NULL){ + (*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th); + } + if(_th->i!=NULL)theora_info_clear(_th->i); + memset(_th,0,sizeof(*_th)); +} + +int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){ + /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ + if(_th->internal_decode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th, + _req,_buf,_buf_sz); + } + else if(_th->internal_encode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th, + _req,_buf,_buf_sz); + } + else return TH_EINVAL; +} + +ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){ + /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ + if(_th->internal_decode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)( + _th,_gp); + } + else if(_th->internal_encode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)( + _th,_gp); + } + else return -1; +} + +double theora_granule_time(theora_state *_th, ogg_int64_t _gp){ + /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ + if(_th->internal_decode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)( + _th,_gp); + } + else if(_th->internal_encode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)( + _th,_gp); + } + else return -1; +} + +void oc_theora_info2th_info(th_info *_info,const theora_info *_ci){ + _info->version_major=_ci->version_major; + _info->version_minor=_ci->version_minor; + _info->version_subminor=_ci->version_subminor; + _info->frame_width=_ci->width; + _info->frame_height=_ci->height; + _info->pic_width=_ci->frame_width; + _info->pic_height=_ci->frame_height; + _info->pic_x=_ci->offset_x; + _info->pic_y=_ci->offset_y; + _info->fps_numerator=_ci->fps_numerator; + _info->fps_denominator=_ci->fps_denominator; + _info->aspect_numerator=_ci->aspect_numerator; + _info->aspect_denominator=_ci->aspect_denominator; + switch(_ci->colorspace){ + case OC_CS_ITU_REC_470M:_info->colorspace=TH_CS_ITU_REC_470M;break; + case OC_CS_ITU_REC_470BG:_info->colorspace=TH_CS_ITU_REC_470BG;break; + default:_info->colorspace=TH_CS_UNSPECIFIED;break; + } + switch(_ci->pixelformat){ + case OC_PF_420:_info->pixel_fmt=TH_PF_420;break; + case OC_PF_422:_info->pixel_fmt=TH_PF_422;break; + case OC_PF_444:_info->pixel_fmt=TH_PF_444;break; + default:_info->pixel_fmt=TH_PF_RSVD; + } + _info->target_bitrate=_ci->target_bitrate; + _info->quality=_ci->quality; + _info->keyframe_granule_shift=_ci->keyframe_frequency_force>0? + OC_MINI(31,oc_ilog(_ci->keyframe_frequency_force-1)):0; +} + +int theora_packet_isheader(ogg_packet *_op){ + return th_packet_isheader(_op); +} + +int theora_packet_iskeyframe(ogg_packet *_op){ + return th_packet_iskeyframe(_op); +} + +int theora_granule_shift(theora_info *_ci){ + /*This breaks when keyframe_frequency_force is not positive or is larger than + 2**31 (if your int is more than 32 bits), but that's what the original + function does.*/ + return oc_ilog(_ci->keyframe_frequency_force-1); +} + +void theora_comment_init(theora_comment *_tc){ + th_comment_init((th_comment *)_tc); +} + +char *theora_comment_query(theora_comment *_tc,char *_tag,int _count){ + return th_comment_query((th_comment *)_tc,_tag,_count); +} + +int theora_comment_query_count(theora_comment *_tc,char *_tag){ + return th_comment_query_count((th_comment *)_tc,_tag); +} + +void theora_comment_clear(theora_comment *_tc){ + th_comment_clear((th_comment *)_tc); +} + +void theora_comment_add(theora_comment *_tc,char *_comment){ + th_comment_add((th_comment *)_tc,_comment); +} + +void theora_comment_add_tag(theora_comment *_tc, char *_tag, char *_value){ + th_comment_add_tag((th_comment *)_tc,_tag,_value); +} diff --git a/engine/code/libtheora-1.1.1/lib/apiwrapper.h b/engine/code/libtheora-1.1.1/lib/apiwrapper.h new file mode 100644 index 00000000..93454d7b --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/apiwrapper.h @@ -0,0 +1,54 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: apiwrapper.h 13596 2007-08-23 20:05:38Z tterribe $ + + ********************************************************************/ + +#if !defined(_apiwrapper_H) +# define _apiwrapper_H (1) +# include +# include +# include "theora/theoradec.h" +# include "theora/theoraenc.h" +# include "internal.h" + +typedef struct th_api_wrapper th_api_wrapper; +typedef struct th_api_info th_api_info; + +/*Provide an entry point for the codec setup to clear itself in case we ever + want to break pieces off into a common base library shared by encoder and + decoder. + In addition, this makes several other pieces of the API wrapper cleaner.*/ +typedef void (*oc_setup_clear_func)(void *_ts); + +/*Generally only one of these pointers will be non-NULL in any given instance. + Technically we do not even really need this struct, since we should be able + to figure out which one from "context", but doing it this way makes sure we + don't flub it up.*/ +struct th_api_wrapper{ + oc_setup_clear_func clear; + th_setup_info *setup; + th_dec_ctx *decode; + th_enc_ctx *encode; +}; + +struct th_api_info{ + th_api_wrapper api; + theora_info info; +}; + + +void oc_theora_info2th_info(th_info *_info,const theora_info *_ci); + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/bitpack.c b/engine/code/libtheora-1.1.1/lib/bitpack.c new file mode 100644 index 00000000..8195003b --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/bitpack.c @@ -0,0 +1,111 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: packing variable sized words into an octet stream + last mod: $Id: bitpack.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "bitpack.h" + +/*We're 'MSb' endian; if we write a word but read individual bits, + then we'll read the MSb first.*/ + +void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){ + memset(_b,0,sizeof(*_b)); + _b->ptr=_buf; + _b->stop=_buf+_bytes; +} + +static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){ + const unsigned char *ptr; + const unsigned char *stop; + oc_pb_window window; + int available; + window=_b->window; + available=_b->bits; + ptr=_b->ptr; + stop=_b->stop; + while(available<=OC_PB_WINDOW_SIZE-8&&ptrptr=ptr; + if(_bits>available){ + if(ptr>=stop){ + _b->eof=1; + available=OC_LOTS_OF_BITS; + } + else window|=*ptr>>(available&7); + } + _b->bits=available; + return window; +} + +int oc_pack_look1(oc_pack_buf *_b){ + oc_pb_window window; + int available; + window=_b->window; + available=_b->bits; + if(available<1)_b->window=window=oc_pack_refill(_b,1); + return window>>OC_PB_WINDOW_SIZE-1; +} + +void oc_pack_adv1(oc_pack_buf *_b){ + _b->window<<=1; + _b->bits--; +} + +/*Here we assume that 0<=_bits&&_bits<=32.*/ +long oc_pack_read(oc_pack_buf *_b,int _bits){ + oc_pb_window window; + int available; + long result; + window=_b->window; + available=_b->bits; + if(_bits==0)return 0; + if(available<_bits){ + window=oc_pack_refill(_b,_bits); + available=_b->bits; + } + result=window>>OC_PB_WINDOW_SIZE-_bits; + available-=_bits; + window<<=1; + window<<=_bits-1; + _b->bits=available; + _b->window=window; + return result; +} + +int oc_pack_read1(oc_pack_buf *_b){ + oc_pb_window window; + int available; + int result; + window=_b->window; + available=_b->bits; + if(available<1){ + window=oc_pack_refill(_b,1); + available=_b->bits; + } + result=window>>OC_PB_WINDOW_SIZE-1; + available--; + window<<=1; + _b->bits=available; + _b->window=window; + return result; +} + +long oc_pack_bytes_left(oc_pack_buf *_b){ + if(_b->eof)return -1; + return _b->stop-_b->ptr+(_b->bits>>3); +} diff --git a/engine/code/libtheora-1.1.1/lib/bitpack.h b/engine/code/libtheora-1.1.1/lib/bitpack.h new file mode 100644 index 00000000..a020a292 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/bitpack.h @@ -0,0 +1,59 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: packing variable sized words into an octet stream + last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $ + + ********************************************************************/ +#if !defined(_bitpack_H) +# define _bitpack_H (1) +# include + + + +typedef unsigned long oc_pb_window; +typedef struct oc_pack_buf oc_pack_buf; + + + +# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT) +/*This is meant to be a large, positive constant that can still be efficiently + loaded as an immediate (on platforms like ARM, for example). + Even relatively modest values like 100 would work fine.*/ +# define OC_LOTS_OF_BITS (0x40000000) + + + +struct oc_pack_buf{ + oc_pb_window window; + const unsigned char *ptr; + const unsigned char *stop; + int bits; + int eof; +}; + +void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes); +int oc_pack_look1(oc_pack_buf *_b); +void oc_pack_adv1(oc_pack_buf *_b); +/*Here we assume 0<=_bits&&_bits<=32.*/ +long oc_pack_read(oc_pack_buf *_b,int _bits); +int oc_pack_read1(oc_pack_buf *_b); +/* returns -1 for read beyond EOF, or the number of whole bytes available */ +long oc_pack_bytes_left(oc_pack_buf *_b); + +/*These two functions are implemented locally in huffdec.c*/ +/*Read in bits without advancing the bitptr. + Here we assume 0<=_bits&&_bits<=32.*/ +/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/ +/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/ + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/dct.h b/engine/code/libtheora-1.1.1/lib/dct.h new file mode 100644 index 00000000..24ba6f11 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/dct.h @@ -0,0 +1,31 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: dct.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*Definitions shared by the forward and inverse DCT transforms.*/ +#if !defined(_dct_H) +# define _dct_H (1) + +/*cos(n*pi/16) (resp. sin(m*pi/16)) scaled by 65536.*/ +#define OC_C1S7 ((ogg_int32_t)64277) +#define OC_C2S6 ((ogg_int32_t)60547) +#define OC_C3S5 ((ogg_int32_t)54491) +#define OC_C4S4 ((ogg_int32_t)46341) +#define OC_C5S3 ((ogg_int32_t)36410) +#define OC_C6S2 ((ogg_int32_t)25080) +#define OC_C7S1 ((ogg_int32_t)12785) + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/decapiwrapper.c b/engine/code/libtheora-1.1.1/lib/decapiwrapper.c new file mode 100644 index 00000000..12ea475d --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/decapiwrapper.c @@ -0,0 +1,193 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: decapiwrapper.c 13596 2007-08-23 20:05:38Z tterribe $ + + ********************************************************************/ + +#include +#include +#include +#include "apiwrapper.h" +#include "decint.h" +#include "theora/theoradec.h" + +static void th_dec_api_clear(th_api_wrapper *_api){ + if(_api->setup)th_setup_free(_api->setup); + if(_api->decode)th_decode_free(_api->decode); + memset(_api,0,sizeof(*_api)); +} + +static void theora_decode_clear(theora_state *_td){ + if(_td->i!=NULL)theora_info_clear(_td->i); + memset(_td,0,sizeof(*_td)); +} + +static int theora_decode_control(theora_state *_td,int _req, + void *_buf,size_t _buf_sz){ + return th_decode_ctl(((th_api_wrapper *)_td->i->codec_setup)->decode, + _req,_buf,_buf_sz); +} + +static ogg_int64_t theora_decode_granule_frame(theora_state *_td, + ogg_int64_t _gp){ + return th_granule_frame(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp); +} + +static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){ + return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp); +} + +static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={ + (oc_state_clear_func)theora_decode_clear, + (oc_state_control_func)theora_decode_control, + (oc_state_granule_frame_func)theora_decode_granule_frame, + (oc_state_granule_time_func)theora_decode_granule_time, +}; + +static void th_info2theora_info(theora_info *_ci,const th_info *_info){ + _ci->version_major=_info->version_major; + _ci->version_minor=_info->version_minor; + _ci->version_subminor=_info->version_subminor; + _ci->width=_info->frame_width; + _ci->height=_info->frame_height; + _ci->frame_width=_info->pic_width; + _ci->frame_height=_info->pic_height; + _ci->offset_x=_info->pic_x; + _ci->offset_y=_info->pic_y; + _ci->fps_numerator=_info->fps_numerator; + _ci->fps_denominator=_info->fps_denominator; + _ci->aspect_numerator=_info->aspect_numerator; + _ci->aspect_denominator=_info->aspect_denominator; + switch(_info->colorspace){ + case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break; + case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break; + default:_ci->colorspace=OC_CS_UNSPECIFIED;break; + } + switch(_info->pixel_fmt){ + case TH_PF_420:_ci->pixelformat=OC_PF_420;break; + case TH_PF_422:_ci->pixelformat=OC_PF_422;break; + case TH_PF_444:_ci->pixelformat=OC_PF_444;break; + default:_ci->pixelformat=OC_PF_RSVD; + } + _ci->target_bitrate=_info->target_bitrate; + _ci->quality=_info->quality; + _ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift; +} + +int theora_decode_init(theora_state *_td,theora_info *_ci){ + th_api_info *apiinfo; + th_api_wrapper *api; + th_info info; + api=(th_api_wrapper *)_ci->codec_setup; + /*Allocate our own combined API wrapper/theora_info struct. + We put them both in one malloc'd block so that when the API wrapper is + freed, the info struct goes with it. + This avoids having to figure out whether or not we need to free the info + struct in either theora_info_clear() or theora_clear().*/ + apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo)); + if(apiinfo==NULL)return OC_FAULT; + /*Make our own copy of the info struct, since its lifetime should be + independent of the one we were passed in.*/ + *&apiinfo->info=*_ci; + /*Convert the info struct now instead of saving the the one we decoded with + theora_decode_header(), since the user might have modified values (i.e., + color space, aspect ratio, etc. can be specified from a higher level). + The user also might be doing something "clever" with the header packets if + they are not using an Ogg encapsulation.*/ + oc_theora_info2th_info(&info,_ci); + /*Don't bother to copy the setup info; th_decode_alloc() makes its own copy + of the stuff it needs.*/ + apiinfo->api.decode=th_decode_alloc(&info,api->setup); + if(apiinfo->api.decode==NULL){ + _ogg_free(apiinfo); + return OC_EINVAL; + } + apiinfo->api.clear=(oc_setup_clear_func)th_dec_api_clear; + _td->internal_encode=NULL; + /*Provide entry points for ABI compatibility with old decoder shared libs.*/ + _td->internal_decode=(void *)&OC_DEC_DISPATCH_VTBL; + _td->granulepos=0; + _td->i=&apiinfo->info; + _td->i->codec_setup=&apiinfo->api; + return 0; +} + +int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){ + th_api_wrapper *api; + th_info info; + int ret; + api=(th_api_wrapper *)_ci->codec_setup; + /*Allocate an API wrapper struct on demand, since it will not also include a + theora_info struct like the ones that are used in a theora_state struct.*/ + if(api==NULL){ + _ci->codec_setup=_ogg_calloc(1,sizeof(*api)); + if(_ci->codec_setup==NULL)return OC_FAULT; + api=(th_api_wrapper *)_ci->codec_setup; + api->clear=(oc_setup_clear_func)th_dec_api_clear; + } + /*Convert from the theora_info struct instead of saving our own th_info + struct between calls. + The user might be doing something "clever" with the header packets if they + are not using an Ogg encapsulation, and we don't want to break this.*/ + oc_theora_info2th_info(&info,_ci); + /*We rely on the fact that theora_comment and th_comment structures are + actually identical. + Take care not to change this fact unless you change the code here as + well!*/ + ret=th_decode_headerin(&info,(th_comment *)_cc,&api->setup,_op); + /*We also rely on the fact that the error return code values are the same, + and that the implementations of these two functions return the same set of + them. + Note that theora_decode_header() really can return OC_NOTFORMAT, even + though it is not currently documented to do so.*/ + if(ret<0)return ret; + th_info2theora_info(_ci,&info); + return 0; +} + +int theora_decode_packetin(theora_state *_td,ogg_packet *_op){ + th_api_wrapper *api; + ogg_int64_t gp; + int ret; + if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT; + api=(th_api_wrapper *)_td->i->codec_setup; + ret=th_decode_packetin(api->decode,_op,&gp); + if(ret<0)return OC_BADPACKET; + _td->granulepos=gp; + return 0; +} + +int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){ + th_api_wrapper *api; + th_dec_ctx *decode; + th_ycbcr_buffer buf; + int ret; + if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT; + api=(th_api_wrapper *)_td->i->codec_setup; + decode=(th_dec_ctx *)api->decode; + if(!decode)return OC_FAULT; + ret=th_decode_ycbcr_out(decode,buf); + if(ret>=0){ + _yuv->y_width=buf[0].width; + _yuv->y_height=buf[0].height; + _yuv->y_stride=buf[0].stride; + _yuv->uv_width=buf[1].width; + _yuv->uv_height=buf[1].height; + _yuv->uv_stride=buf[1].stride; + _yuv->y=buf[0].data; + _yuv->u=buf[1].data; + _yuv->v=buf[2].data; + } + return ret; +} diff --git a/engine/code/libtheora-1.1.1/lib/decinfo.c b/engine/code/libtheora-1.1.1/lib/decinfo.c new file mode 100644 index 00000000..845eb136 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/decinfo.c @@ -0,0 +1,246 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: decinfo.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "decint.h" + + + +/*Unpacks a series of octets from a given byte array into the pack buffer. + No checking is done to ensure the buffer contains enough data. + _opb: The pack buffer to read the octets from. + _buf: The byte array to store the unpacked bytes in. + _len: The number of octets to unpack.*/ +static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){ + while(_len-->0){ + long val; + val=oc_pack_read(_opb,8); + *_buf++=(char)val; + } +} + +/*Unpacks a 32-bit integer encoded by octets in little-endian form.*/ +static long oc_unpack_length(oc_pack_buf *_opb){ + long ret[4]; + int i; + for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8); + return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24; +} + +static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){ + long val; + /*Check the codec bitstream version.*/ + val=oc_pack_read(_opb,8); + _info->version_major=(unsigned char)val; + val=oc_pack_read(_opb,8); + _info->version_minor=(unsigned char)val; + val=oc_pack_read(_opb,8); + _info->version_subminor=(unsigned char)val; + /*verify we can parse this bitstream version. + We accept earlier minors and all subminors, by spec*/ + if(_info->version_major>TH_VERSION_MAJOR|| + _info->version_major==TH_VERSION_MAJOR&& + _info->version_minor>TH_VERSION_MINOR){ + return TH_EVERSION; + } + /*Read the encoded frame description.*/ + val=oc_pack_read(_opb,16); + _info->frame_width=(ogg_uint32_t)val<<4; + val=oc_pack_read(_opb,16); + _info->frame_height=(ogg_uint32_t)val<<4; + val=oc_pack_read(_opb,24); + _info->pic_width=(ogg_uint32_t)val; + val=oc_pack_read(_opb,24); + _info->pic_height=(ogg_uint32_t)val; + val=oc_pack_read(_opb,8); + _info->pic_x=(ogg_uint32_t)val; + val=oc_pack_read(_opb,8); + _info->pic_y=(ogg_uint32_t)val; + val=oc_pack_read(_opb,32); + _info->fps_numerator=(ogg_uint32_t)val; + val=oc_pack_read(_opb,32); + _info->fps_denominator=(ogg_uint32_t)val; + if(_info->frame_width==0||_info->frame_height==0|| + _info->pic_width+_info->pic_x>_info->frame_width|| + _info->pic_height+_info->pic_y>_info->frame_height|| + _info->fps_numerator==0||_info->fps_denominator==0){ + return TH_EBADHEADER; + } + /*Note: The sense of pic_y is inverted in what we pass back to the + application compared to how it is stored in the bitstream. + This is because the bitstream uses a right-handed coordinate system, while + applications expect a left-handed one.*/ + _info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y; + val=oc_pack_read(_opb,24); + _info->aspect_numerator=(ogg_uint32_t)val; + val=oc_pack_read(_opb,24); + _info->aspect_denominator=(ogg_uint32_t)val; + val=oc_pack_read(_opb,8); + _info->colorspace=(th_colorspace)val; + val=oc_pack_read(_opb,24); + _info->target_bitrate=(int)val; + val=oc_pack_read(_opb,6); + _info->quality=(int)val; + val=oc_pack_read(_opb,5); + _info->keyframe_granule_shift=(int)val; + val=oc_pack_read(_opb,2); + _info->pixel_fmt=(th_pixel_fmt)val; + if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER; + val=oc_pack_read(_opb,3); + if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; + return 0; +} + +static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){ + long len; + int i; + /*Read the vendor string.*/ + len=oc_unpack_length(_opb); + if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER; + _tc->vendor=_ogg_malloc((size_t)len+1); + if(_tc->vendor==NULL)return TH_EFAULT; + oc_unpack_octets(_opb,_tc->vendor,len); + _tc->vendor[len]='\0'; + /*Read the user comments.*/ + _tc->comments=(int)oc_unpack_length(_opb); + len=_tc->comments; + if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){ + _tc->comments=0; + return TH_EBADHEADER; + } + _tc->comment_lengths=(int *)_ogg_malloc( + _tc->comments*sizeof(_tc->comment_lengths[0])); + _tc->user_comments=(char **)_ogg_malloc( + _tc->comments*sizeof(_tc->user_comments[0])); + for(i=0;i<_tc->comments;i++){ + len=oc_unpack_length(_opb); + if(len<0||len>oc_pack_bytes_left(_opb)){ + _tc->comments=i; + return TH_EBADHEADER; + } + _tc->comment_lengths[i]=len; + _tc->user_comments[i]=_ogg_malloc((size_t)len+1); + if(_tc->user_comments[i]==NULL){ + _tc->comments=i; + return TH_EFAULT; + } + oc_unpack_octets(_opb,_tc->user_comments[i],len); + _tc->user_comments[i][len]='\0'; + } + return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0; +} + +static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){ + int ret; + /*Read the quantizer tables.*/ + ret=oc_quant_params_unpack(_opb,&_setup->qinfo); + if(ret<0)return ret; + /*Read the Huffman trees.*/ + return oc_huff_trees_unpack(_opb,_setup->huff_tables); +} + +static void oc_setup_clear(th_setup_info *_setup){ + oc_quant_params_clear(&_setup->qinfo); + oc_huff_trees_clear(_setup->huff_tables); +} + +static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info, + th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){ + char buffer[6]; + long val; + int packtype; + int ret; + val=oc_pack_read(_opb,8); + packtype=(int)val; + /*If we're at a data packet and we have received all three headers, we're + done.*/ + if(!(packtype&0x80)&&_info->frame_width>0&&_tc->vendor!=NULL&&*_setup!=NULL){ + return 0; + } + /*Check the codec string.*/ + oc_unpack_octets(_opb,buffer,6); + if(memcmp(buffer,"theora",6)!=0)return TH_ENOTFORMAT; + switch(packtype){ + /*Codec info header.*/ + case 0x80:{ + /*This should be the first packet, and we should not already be + initialized.*/ + if(!_op->b_o_s||_info->frame_width>0)return TH_EBADHEADER; + ret=oc_info_unpack(_opb,_info); + if(ret<0)th_info_clear(_info); + else ret=3; + }break; + /*Comment header.*/ + case 0x81:{ + if(_tc==NULL)return TH_EFAULT; + /*We shoud have already decoded the info header, and should not yet have + decoded the comment header.*/ + if(_info->frame_width==0||_tc->vendor!=NULL)return TH_EBADHEADER; + ret=oc_comment_unpack(_opb,_tc); + if(ret<0)th_comment_clear(_tc); + else ret=2; + }break; + /*Codec setup header.*/ + case 0x82:{ + oc_setup_info *setup; + if(_tc==NULL||_setup==NULL)return TH_EFAULT; + /*We should have already decoded the info header and the comment header, + and should not yet have decoded the setup header.*/ + if(_info->frame_width==0||_tc->vendor==NULL||*_setup!=NULL){ + return TH_EBADHEADER; + } + setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup)); + if(setup==NULL)return TH_EFAULT; + ret=oc_setup_unpack(_opb,setup); + if(ret<0){ + oc_setup_clear(setup); + _ogg_free(setup); + } + else{ + *_setup=setup; + ret=1; + } + }break; + default:{ + /*We don't know what this header is.*/ + return TH_EBADHEADER; + }break; + } + return ret; +} + + +/*Decodes one header packet. + This should be called repeatedly with the packets at the beginning of the + stream until it returns 0.*/ +int th_decode_headerin(th_info *_info,th_comment *_tc, + th_setup_info **_setup,ogg_packet *_op){ + oc_pack_buf opb; + if(_op==NULL)return TH_EBADHEADER; + if(_info==NULL)return TH_EFAULT; + oc_pack_readinit(&opb,_op->packet,_op->bytes); + return oc_dec_headerin(&opb,_info,_tc,_setup,_op); +} + +void th_setup_free(th_setup_info *_setup){ + if(_setup!=NULL){ + oc_setup_clear(_setup); + _ogg_free(_setup); + } +} diff --git a/engine/code/libtheora-1.1.1/lib/decint.h b/engine/code/libtheora-1.1.1/lib/decint.h new file mode 100644 index 00000000..261b6763 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/decint.h @@ -0,0 +1,107 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: decint.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#if !defined(_decint_H) +# define _decint_H (1) +# include "theora/theoradec.h" +# include "internal.h" +# include "bitpack.h" + +typedef struct th_setup_info oc_setup_info; +typedef struct th_dec_ctx oc_dec_ctx; + +# include "huffdec.h" +# include "dequant.h" + +/*Constants for the packet-in state machine specific to the decoder.*/ + +/*Next packet to read: Data packet.*/ +#define OC_PACKET_DATA (0) + + + +struct th_setup_info{ + /*The Huffman codes.*/ + oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES]; + /*The quantization parameters.*/ + th_quant_info qinfo; +}; + + + +struct th_dec_ctx{ + /*Shared encoder/decoder state.*/ + oc_theora_state state; + /*Whether or not packets are ready to be emitted. + This takes on negative values while there are remaining header packets to + be emitted, reaches 0 when the codec is ready for input, and goes to 1 + when a frame has been processed and a data packet is ready.*/ + int packet_state; + /*Buffer in which to assemble packets.*/ + oc_pack_buf opb; + /*Huffman decode trees.*/ + oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES]; + /*The index of the first token in each plane for each coefficient.*/ + ptrdiff_t ti0[3][64]; + /*The number of outstanding EOB runs at the start of each coefficient in each + plane.*/ + ptrdiff_t eob_runs[3][64]; + /*The DCT token lists.*/ + unsigned char *dct_tokens; + /*The extra bits associated with DCT tokens.*/ + unsigned char *extra_bits; + /*The number of dct tokens unpacked so far.*/ + int dct_tokens_count; + /*The out-of-loop post-processing level.*/ + int pp_level; + /*The DC scale used for out-of-loop deblocking.*/ + int pp_dc_scale[64]; + /*The sharpen modifier used for out-of-loop deringing.*/ + int pp_sharp_mod[64]; + /*The DC quantization index of each block.*/ + unsigned char *dc_qis; + /*The variance of each block.*/ + int *variances; + /*The storage for the post-processed frame buffer.*/ + unsigned char *pp_frame_data; + /*Whether or not the post-processsed frame buffer has space for chroma.*/ + int pp_frame_state; + /*The buffer used for the post-processed frame. + Note that this is _not_ guaranteed to have the same strides and offsets as + the reference frame buffers.*/ + th_ycbcr_buffer pp_frame_buf; + /*The striped decode callback function.*/ + th_stripe_callback stripe_cb; +# if defined(HAVE_CAIRO) + /*Output metrics for debugging.*/ + int telemetry; + int telemetry_mbmode; + int telemetry_mv; + int telemetry_qi; + int telemetry_bits; + int telemetry_frame_bytes; + int telemetry_coding_bytes; + int telemetry_mode_bytes; + int telemetry_mv_bytes; + int telemetry_qi_bytes; + int telemetry_dc_bytes; + unsigned char *telemetry_frame_data; +# endif +}; + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/decode.c b/engine/code/libtheora-1.1.1/lib/decode.c new file mode 100644 index 00000000..7be66463 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/decode.c @@ -0,0 +1,2943 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: decode.c 16581 2009-09-25 22:56:16Z gmaxwell $ + + ********************************************************************/ + +#include +#include +#include +#include "decint.h" +#if defined(OC_DUMP_IMAGES) +# include +# include "png.h" +#endif +#if defined(HAVE_CAIRO) +# include +#endif + + +/*No post-processing.*/ +#define OC_PP_LEVEL_DISABLED (0) +/*Keep track of DC qi for each block only.*/ +#define OC_PP_LEVEL_TRACKDCQI (1) +/*Deblock the luma plane.*/ +#define OC_PP_LEVEL_DEBLOCKY (2) +/*Dering the luma plane.*/ +#define OC_PP_LEVEL_DERINGY (3) +/*Stronger luma plane deringing.*/ +#define OC_PP_LEVEL_SDERINGY (4) +/*Deblock the chroma planes.*/ +#define OC_PP_LEVEL_DEBLOCKC (5) +/*Dering the chroma planes.*/ +#define OC_PP_LEVEL_DERINGC (6) +/*Stronger chroma plane deringing.*/ +#define OC_PP_LEVEL_SDERINGC (7) +/*Maximum valid post-processing level.*/ +#define OC_PP_LEVEL_MAX (7) + + + +/*The mode alphabets for the various mode coding schemes. + Scheme 0 uses a custom alphabet, which is not stored in this table.*/ +static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={ + /*Last MV dominates */ + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV, + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV, + OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2, + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV, + OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV, + OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR + }, + /*No MV dominates.*/ + { + OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2, + OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST, + OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + /*Default ordering.*/ + { + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST, + OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + } +}; + + +/*The original DCT tokens are extended and reordered during the construction of + the Huffman tables. + The extension means more bits can be read with fewer calls to the bitpacker + during the Huffman decoding process (at the cost of larger Huffman tables), + and fewer tokens require additional extra bits (reducing the average storage + per decoded token). + The revised ordering reveals essential information in the token value + itself; specifically, whether or not there are additional extra bits to read + and the parameter to which those extra bits are applied. + The token is used to fetch a code word from the OC_DCT_CODE_WORD table below. + The extra bits are added into code word at the bit position inferred from the + token value, giving the final code word from which all required parameters + are derived. + The number of EOBs and the leading zero run length can be extracted directly. + The coefficient magnitude is optionally negated before extraction, according + to a 'flip' bit.*/ + +/*The number of additional extra bits that are decoded with each of the + internal DCT tokens.*/ +static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={ + 12,4,3,3,4,4,5,5,8,8,8,8,3,3,6 +}; + +/*Whether or not an internal token needs any additional extra bits.*/ +#define OC_DCT_TOKEN_NEEDS_MORE(token) \ + (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \ + sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS))) + +/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/ +#define OC_DCT_TOKEN_FAT_EOB (0) + +/*The number of EOBs to use for an end-of-frame token. + Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which + is not yet available everywhere; this should be equivalent.*/ +#define OC_DCT_EOB_FINISH (~(size_t)0>>1) + +/*The location of the (6) run legth bits in the code word. + These are placed at index 0 and given 8 bits (even though 6 would suffice) + because it may be faster to extract the lower byte on some platforms.*/ +#define OC_DCT_CW_RLEN_SHIFT (0) +/*The location of the (12) EOB bits in the code word.*/ +#define OC_DCT_CW_EOB_SHIFT (8) +/*The location of the (1) flip bit in the code word. + This must be right under the magnitude bits.*/ +#define OC_DCT_CW_FLIP_BIT (20) +/*The location of the (11) token magnitude bits in the code word. + These must be last, and rely on a sign-extending right shift.*/ +#define OC_DCT_CW_MAG_SHIFT (21) + +/*Pack the given fields into a code word.*/ +#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \ + ((_eobs)<state,_info,3); + if(ret<0)return ret; + ret=oc_huff_trees_copy(_dec->huff_tables, + (const oc_huff_node *const *)_setup->huff_tables); + if(ret<0){ + oc_state_clear(&_dec->state); + return ret; + } + /*For each fragment, allocate one byte for every DCT coefficient token, plus + one byte for extra-bits for each token, plus one more byte for the long + EOB run, just in case it's the very last token and has a run length of + one.*/ + _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)* + _dec->state.nfrags*sizeof(_dec->dct_tokens[0])); + if(_dec->dct_tokens==NULL){ + oc_huff_trees_clear(_dec->huff_tables); + oc_state_clear(&_dec->state); + return TH_EFAULT; + } + for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){ + _dec->state.dequant_tables[qi][pli][qti]= + _dec->state.dequant_table_data[qi][pli][qti]; + } + oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale, + &_setup->qinfo); + for(qi=0;qi<64;qi++){ + int qsum; + qsum=0; + for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ + qsum+=_dec->state.dequant_tables[qti][pli][qi][12]+ + _dec->state.dequant_tables[qti][pli][qi][17]+ + _dec->state.dequant_tables[qti][pli][qi][18]+ + _dec->state.dequant_tables[qti][pli][qi][24]<<(pli==0); + } + _dec->pp_sharp_mod[qi]=-(qsum>>11); + } + memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits, + sizeof(_dec->state.loop_filter_limits)); + _dec->pp_level=OC_PP_LEVEL_DISABLED; + _dec->dc_qis=NULL; + _dec->variances=NULL; + _dec->pp_frame_data=NULL; + _dec->stripe_cb.ctx=NULL; + _dec->stripe_cb.stripe_decoded=NULL; +#if defined(HAVE_CAIRO) + _dec->telemetry=0; + _dec->telemetry_bits=0; + _dec->telemetry_qi=0; + _dec->telemetry_mbmode=0; + _dec->telemetry_mv=0; + _dec->telemetry_frame_data=NULL; +#endif + return 0; +} + +static void oc_dec_clear(oc_dec_ctx *_dec){ +#if defined(HAVE_CAIRO) + _ogg_free(_dec->telemetry_frame_data); +#endif + _ogg_free(_dec->pp_frame_data); + _ogg_free(_dec->variances); + _ogg_free(_dec->dc_qis); + _ogg_free(_dec->dct_tokens); + oc_huff_trees_clear(_dec->huff_tables); + oc_state_clear(&_dec->state); +} + + +static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){ + long val; + /*Check to make sure this is a data packet.*/ + val=oc_pack_read1(&_dec->opb); + if(val!=0)return TH_EBADPACKET; + /*Read in the frame type (I or P).*/ + val=oc_pack_read1(&_dec->opb); + _dec->state.frame_type=(int)val; + /*Read in the qi list.*/ + val=oc_pack_read(&_dec->opb,6); + _dec->state.qis[0]=(unsigned char)val; + val=oc_pack_read1(&_dec->opb); + if(!val)_dec->state.nqis=1; + else{ + val=oc_pack_read(&_dec->opb,6); + _dec->state.qis[1]=(unsigned char)val; + val=oc_pack_read1(&_dec->opb); + if(!val)_dec->state.nqis=2; + else{ + val=oc_pack_read(&_dec->opb,6); + _dec->state.qis[2]=(unsigned char)val; + _dec->state.nqis=3; + } + } + if(_dec->state.frame_type==OC_INTRA_FRAME){ + /*Keyframes have 3 unused configuration bits, holdovers from VP3 days. + Most of the other unused bits in the VP3 headers were eliminated. + I don't know why these remain.*/ + /*I wanted to eliminate wasted bits, but not all config wiggle room + --Monty.*/ + val=oc_pack_read(&_dec->opb,3); + if(val!=0)return TH_EIMPL; + } + return 0; +} + +/*Mark all fragments as coded and in OC_MODE_INTRA. + This also builds up the coded fragment list (in coded order), and clears the + uncoded fragment list. + It does not update the coded macro block list nor the super block flags, as + those are not used when decoding INTRA frames.*/ +static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){ + const oc_sb_map *sb_maps; + const oc_sb_flags *sb_flags; + oc_fragment *frags; + ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t prev_ncoded_fragis; + unsigned nsbs; + unsigned sbi; + int pli; + coded_fragis=_dec->state.coded_fragis; + prev_ncoded_fragis=ncoded_fragis=0; + sb_maps=(const oc_sb_map *)_dec->state.sb_maps; + sb_flags=_dec->state.sb_flags; + frags=_dec->state.frags; + sbi=nsbs=0; + for(pli=0;pli<3;pli++){ + nsbs+=_dec->state.fplanes[pli].nsbs; + for(;sbi=0){ + frags[fragi].coded=1; + frags[fragi].mb_mode=OC_MODE_INTRA; + coded_fragis[ncoded_fragis++]=fragi; + } + } + } + } + _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; + prev_ncoded_fragis=ncoded_fragis; + } + _dec->state.ntotal_coded_fragis=ncoded_fragis; +} + +/*Decodes the bit flags indicating whether each super block is partially coded + or not. + Return: The number of partially coded super blocks.*/ +static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){ + oc_sb_flags *sb_flags; + unsigned nsbs; + unsigned sbi; + unsigned npartial; + unsigned run_count; + long val; + int flag; + val=oc_pack_read1(&_dec->opb); + flag=(int)val; + sb_flags=_dec->state.sb_flags; + nsbs=_dec->state.nsbs; + sbi=npartial=0; + while(sbiopb); + full_run=run_count>=4129; + do{ + sb_flags[sbi].coded_partially=flag; + sb_flags[sbi].coded_fully=0; + npartial+=flag; + sbi++; + } + while(--run_count>0&&sbiopb); + flag=(int)val; + } + else flag=!flag; + } + /*TODO: run_count should be 0 here. + If it's not, we should issue a warning of some kind.*/ + return npartial; +} + +/*Decodes the bit flags for whether or not each non-partially-coded super + block is fully coded or not. + This function should only be called if there is at least one + non-partially-coded super block. + Return: The number of partially coded super blocks.*/ +static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){ + oc_sb_flags *sb_flags; + unsigned nsbs; + unsigned sbi; + unsigned run_count; + long val; + int flag; + sb_flags=_dec->state.sb_flags; + nsbs=_dec->state.nsbs; + /*Skip partially coded super blocks.*/ + for(sbi=0;sb_flags[sbi].coded_partially;sbi++); + val=oc_pack_read1(&_dec->opb); + flag=(int)val; + do{ + int full_run; + run_count=oc_sb_run_unpack(&_dec->opb); + full_run=run_count>=4129; + for(;sbiopb); + flag=(int)val; + } + else flag=!flag; + } + while(sbistate.nsbs)oc_dec_coded_sb_flags_unpack(_dec); + if(npartial>0){ + val=oc_pack_read1(&_dec->opb); + flag=!(int)val; + } + else flag=0; + sb_maps=(const oc_sb_map *)_dec->state.sb_maps; + sb_flags=_dec->state.sb_flags; + frags=_dec->state.frags; + sbi=nsbs=run_count=0; + coded_fragis=_dec->state.coded_fragis; + uncoded_fragis=coded_fragis+_dec->state.nfrags; + prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0; + for(pli=0;pli<3;pli++){ + nsbs+=_dec->state.fplanes[pli].nsbs; + for(;sbi=0){ + int coded; + if(sb_flags[sbi].coded_fully)coded=1; + else if(!sb_flags[sbi].coded_partially)coded=0; + else{ + if(run_count<=0){ + run_count=oc_block_run_unpack(&_dec->opb); + flag=!flag; + } + run_count--; + coded=flag; + } + if(coded)coded_fragis[ncoded_fragis++]=fragi; + else *(uncoded_fragis-++nuncoded_fragis)=fragi; + frags[fragi].coded=coded; + } + } + } + } + _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; + prev_ncoded_fragis=ncoded_fragis; + } + _dec->state.ntotal_coded_fragis=ncoded_fragis; + /*TODO: run_count should be 0 here. + If it's not, we should issue a warning of some kind.*/ +} + + + +typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb); + +static int oc_vlc_mode_unpack(oc_pack_buf *_opb){ + long val; + int i; + for(i=0;i<7;i++){ + val=oc_pack_read1(_opb); + if(!val)break; + } + return i; +} + +static int oc_clc_mode_unpack(oc_pack_buf *_opb){ + long val; + val=oc_pack_read(_opb,3); + return (int)val; +} + +/*Unpacks the list of macro block modes for INTER frames.*/ +static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){ + const oc_mb_map *mb_maps; + signed char *mb_modes; + const oc_fragment *frags; + const unsigned char *alphabet; + unsigned char scheme0_alphabet[8]; + oc_mode_unpack_func mode_unpack; + size_t nmbs; + size_t mbi; + long val; + int mode_scheme; + val=oc_pack_read(&_dec->opb,3); + mode_scheme=(int)val; + if(mode_scheme==0){ + int mi; + /*Just in case, initialize the modes to something. + If the bitstream doesn't contain each index exactly once, it's likely + corrupt and the rest of the packet is garbage anyway, but this way we + won't crash, and we'll decode SOMETHING.*/ + /*LOOP VECTORIZES*/ + for(mi=0;miopb,3); + scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi]; + } + alphabet=scheme0_alphabet; + } + else alphabet=OC_MODE_ALPHABETS[mode_scheme-1]; + if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack; + else mode_unpack=oc_vlc_mode_unpack; + mb_modes=_dec->state.mb_modes; + mb_maps=(const oc_mb_map *)_dec->state.mb_maps; + nmbs=_dec->state.nmbs; + frags=_dec->state.frags; + for(mbi=0;mbiopb)]; + /*There were none: INTER_NOMV is forced.*/ + else mb_modes[mbi]=OC_MODE_INTER_NOMV; + } + } +} + + + +typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb); + +static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){ + long bits; + int mask; + int mv; + bits=oc_pack_read(_opb,3); + switch(bits){ + case 0:return 0; + case 1:return 1; + case 2:return -1; + case 3: + case 4:{ + mv=(int)(bits-1); + bits=oc_pack_read1(_opb); + }break; + /*case 5: + case 6: + case 7:*/ + default:{ + mv=1<>1); + bits&=1; + }break; + } + mask=-(int)bits; + return mv+mask^mask; +} + +static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){ + long bits; + int mask; + int mv; + bits=oc_pack_read(_opb,6); + mv=(int)bits>>1; + mask=-((int)bits&1); + return mv+mask^mask; +} + +/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro + block modes and motion vectors to the individual fragments.*/ +static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){ + const oc_mb_map *mb_maps; + const signed char *mb_modes; + oc_set_chroma_mvs_func set_chroma_mvs; + oc_mv_comp_unpack_func mv_comp_unpack; + oc_fragment *frags; + oc_mv *frag_mvs; + const unsigned char *map_idxs; + int map_nidxs; + oc_mv last_mv[2]; + oc_mv cbmvs[4]; + size_t nmbs; + size_t mbi; + long val; + set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt]; + val=oc_pack_read1(&_dec->opb); + mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack; + map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt]; + map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt]; + memset(last_mv,0,sizeof(last_mv)); + frags=_dec->state.frags; + frag_mvs=_dec->state.frag_mvs; + mb_maps=(const oc_mb_map *)_dec->state.mb_maps; + mb_modes=_dec->state.mb_modes; + nmbs=_dec->state.nmbs; + for(mbi=0;mbi>2][mapi&3]; + if(frags[fragi].coded)coded[ncoded++]=mapi; + } + while(++mapiiopb); + lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi])); + } + else lbmvs[bi][0]=lbmvs[bi][1]=0; + } + if(codedi>0){ + memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); + memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0])); + } + if(codedi>2][bi]; + frags[fragi].mb_mode=mb_mode; + memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi])); + } + } + }break; + case OC_MODE_INTER_MV:{ + memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); + mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb); + mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + }break; + case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break; + case OC_MODE_INTER_MV_LAST2:{ + memcpy(mbmv,last_mv[1],sizeof(mbmv)); + memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); + memcpy(last_mv[0],mbmv,sizeof(last_mv[0])); + }break; + case OC_MODE_GOLDEN_MV:{ + mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb); + mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + }break; + default:memset(mbmv,0,sizeof(mbmv));break; + } + /*4MV mode fills in the fragments itself. + For all other modes we can use this common code.*/ + if(mb_mode!=OC_MODE_INTER_MV_FOUR){ + for(codedi=0;codedi>2][mapi&3]; + frags[fragi].mb_mode=mb_mode; + memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv)); + } + } + } + } +} + +static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){ + oc_fragment *frags; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + ptrdiff_t fragi; + ncoded_fragis=_dec->state.ntotal_coded_fragis; + if(ncoded_fragis<=0)return; + frags=_dec->state.frags; + coded_fragis=_dec->state.coded_fragis; + if(_dec->state.nqis==1){ + /*If this frame has only a single qi value, then just use it for all coded + fragments.*/ + for(fragii=0;fragiiopb); + flag=(int)val; + nqi1=0; + fragii=0; + while(fragiiopb); + full_run=run_count>=4129; + do{ + frags[coded_fragis[fragii++]].qii=flag; + nqi1+=flag; + } + while(--run_count>0&&fragiiopb); + flag=(int)val; + } + else flag=!flag; + } + /*TODO: run_count should be 0 here. + If it's not, we should issue a warning of some kind.*/ + /*If we have 3 different qi's for this frame, and there was at least one + fragment with a non-zero qi, make the second pass.*/ + if(_dec->state.nqis==3&&nqi1>0){ + /*Skip qii==0 fragments.*/ + for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++); + val=oc_pack_read1(&_dec->opb); + flag=(int)val; + do{ + int full_run; + run_count=oc_sb_run_unpack(&_dec->opb); + full_run=run_count>=4129; + for(;fragiiopb); + flag=(int)val; + } + else flag=!flag; + } + while(fragiidct_tokens; + frags=_dec->state.frags; + coded_fragis=_dec->state.coded_fragis; + ncoded_fragis=fragii=eobs=ti=0; + for(pli=0;pli<3;pli++){ + ptrdiff_t run_counts[64]; + ptrdiff_t eob_count; + ptrdiff_t eobi; + int rli; + ncoded_fragis+=_dec->state.ncoded_fragis[pli]; + memset(run_counts,0,sizeof(run_counts)); + _dec->eob_runs[pli][0]=eobs; + _dec->ti0[pli][0]=ti; + /*Continue any previous EOB run, if there was one.*/ + eobi=eobs; + if(ncoded_fragis-fragii0)frags[coded_fragis[fragii++]].dc=0; + while(fragiiopb, + _dec->huff_tables[_huff_idxs[pli+1>>1]]); + dct_tokens[ti++]=(unsigned char)token; + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ + eb=(int)oc_pack_read(&_dec->opb, + OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]); + dct_tokens[ti++]=(unsigned char)eb; + if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8); + eb<<=OC_DCT_TOKEN_EB_POS(token); + } + else eb=0; + cw=OC_DCT_CODE_WORD[token]+eb; + eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; + if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH; + if(eobs){ + eobi=OC_MINI(eobs,ncoded_fragis-fragii); + eob_count+=eobi; + eobs-=eobi; + while(eobi-->0)frags[coded_fragis[fragii++]].dc=0; + } + else{ + int coeff; + skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); + cw^=-(cw&1<>OC_DCT_CW_MAG_SHIFT; + if(skip)coeff=0; + run_counts[skip]++; + frags[coded_fragis[fragii++]].dc=coeff; + } + } + /*Add the total EOB count to the longest run length.*/ + run_counts[63]+=eob_count; + /*And convert the run_counts array to a moment table.*/ + for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; + /*Finally, subtract off the number of coefficients that have been + accounted for by runs started in this coefficient.*/ + for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli]; + } + _dec->dct_tokens_count=ti; + return eobs; +} + +/*Unpacks the AC coefficient tokens. + This can completely discard coefficient values while unpacking, and so is + somewhat simpler than unpacking the DC coefficient tokens. + _huff_idx: The index of the Huffman table to use for each color plane. + _ntoks_left: The number of tokens left to be decoded in each color plane for + each coefficient. + This is updated as EOB tokens and zero run tokens are decoded. + _eobs: The length of any outstanding EOB run from previous + coefficients. + Return: The length of any outstanding EOB run.*/ +static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2], + ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){ + unsigned char *dct_tokens; + ptrdiff_t ti; + int pli; + dct_tokens=_dec->dct_tokens; + ti=_dec->dct_tokens_count; + for(pli=0;pli<3;pli++){ + ptrdiff_t run_counts[64]; + ptrdiff_t eob_count; + size_t ntoks_left; + size_t ntoks; + int rli; + _dec->eob_runs[pli][_zzi]=_eobs; + _dec->ti0[pli][_zzi]=ti; + ntoks_left=_ntoks_left[pli][_zzi]; + memset(run_counts,0,sizeof(run_counts)); + eob_count=0; + ntoks=0; + while(ntoks+_eobsopb, + _dec->huff_tables[_huff_idxs[pli+1>>1]]); + dct_tokens[ti++]=(unsigned char)token; + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ + eb=(int)oc_pack_read(&_dec->opb, + OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]); + dct_tokens[ti++]=(unsigned char)eb; + if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8); + eb<<=OC_DCT_TOKEN_EB_POS(token); + } + else eb=0; + cw=OC_DCT_CODE_WORD[token]+eb; + skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); + _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; + if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH; + if(_eobs==0){ + run_counts[skip]++; + ntoks++; + } + } + /*Add the portion of the last EOB run actually used by this coefficient.*/ + eob_count+=ntoks_left-ntoks; + /*And remove it from the remaining EOB count.*/ + _eobs-=ntoks_left-ntoks; + /*Add the total EOB count to the longest run length.*/ + run_counts[63]+=eob_count; + /*And convert the run_counts array to a moment table.*/ + for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; + /*Finally, subtract off the number of coefficients that have been + accounted for by runs started in this coefficient.*/ + for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli]; + } + _dec->dct_tokens_count=ti; + return _eobs; +} + +/*Tokens describing the DCT coefficients that belong to each fragment are + stored in the bitstream grouped by coefficient, not by fragment. + + This means that we either decode all the tokens in order, building up a + separate coefficient list for each fragment as we go, and then go back and + do the iDCT on each fragment, or we have to create separate lists of tokens + for each coefficient, so that we can pull the next token required off the + head of the appropriate list when decoding a specific fragment. + + The former was VP3's choice, and it meant 2*w*h extra storage for all the + decoded coefficient values. + + We take the second option, which lets us store just one to three bytes per + token (generally far fewer than the number of coefficients, due to EOB + tokens and zero runs), and which requires us to only maintain a counter for + each of the 64 coefficients, instead of a counter for every fragment to + determine where the next token goes. + + We actually use 3 counters per coefficient, one for each color plane, so we + can decode all color planes simultaneously. + This lets color conversion, etc., be done as soon as a full MCU (one or + two super block rows) is decoded, while the image data is still in cache.*/ + +static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){ + static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64}; + ptrdiff_t ntoks_left[3][64]; + int huff_idxs[2]; + ptrdiff_t eobs; + long val; + int pli; + int zzi; + int hgi; + for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ + ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli]; + } + val=oc_pack_read(&_dec->opb,4); + huff_idxs[0]=(int)val; + val=oc_pack_read(&_dec->opb,4); + huff_idxs[1]=(int)val; + _dec->eob_runs[0][0]=0; + eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left); +#if defined(HAVE_CAIRO) + _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + val=oc_pack_read(&_dec->opb,4); + huff_idxs[0]=(int)val; + val=oc_pack_read(&_dec->opb,4); + huff_idxs[1]=(int)val; + zzi=1; + for(hgi=1;hgi<5;hgi++){ + huff_idxs[0]+=16; + huff_idxs[1]+=16; + for(;zzipp_level<=OC_PP_LEVEL_DISABLED){ + if(_dec->dc_qis!=NULL){ + _ogg_free(_dec->dc_qis); + _dec->dc_qis=NULL; + _ogg_free(_dec->variances); + _dec->variances=NULL; + _ogg_free(_dec->pp_frame_data); + _dec->pp_frame_data=NULL; + } + return 1; + } + if(_dec->dc_qis==NULL){ + /*If we haven't been tracking DC quantization indices, there's no point in + starting now.*/ + if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1; + _dec->dc_qis=(unsigned char *)_ogg_malloc( + _dec->state.nfrags*sizeof(_dec->dc_qis[0])); + if(_dec->dc_qis==NULL)return 1; + memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags); + } + else{ + unsigned char *dc_qis; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + unsigned char qi0; + /*Update the DC quantization index of each coded block.*/ + dc_qis=_dec->dc_qis; + coded_fragis=_dec->state.coded_fragis; + ncoded_fragis=_dec->state.ncoded_fragis[0]+ + _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2]; + qi0=(unsigned char)_dec->state.qis[0]; + for(fragii=0;fragiipp_level<=OC_PP_LEVEL_TRACKDCQI){ + if(_dec->variances!=NULL){ + _ogg_free(_dec->variances); + _dec->variances=NULL; + _ogg_free(_dec->pp_frame_data); + _dec->pp_frame_data=NULL; + } + return 1; + } + if(_dec->variances==NULL){ + size_t frame_sz; + size_t c_sz; + int c_w; + int c_h; + frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height; + c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); + c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); + c_sz=c_w*(size_t)c_h; + /*Allocate space for the chroma planes, even if we're not going to use + them; this simplifies allocation state management, though it may waste + memory on the few systems that don't overcommit pages.*/ + frame_sz+=c_sz<<1; + _dec->pp_frame_data=(unsigned char *)_ogg_malloc( + frame_sz*sizeof(_dec->pp_frame_data[0])); + _dec->variances=(int *)_ogg_malloc( + _dec->state.nfrags*sizeof(_dec->variances[0])); + if(_dec->variances==NULL||_dec->pp_frame_data==NULL){ + _ogg_free(_dec->pp_frame_data); + _dec->pp_frame_data=NULL; + _ogg_free(_dec->variances); + _dec->variances=NULL; + return 1; + } + /*Force an update of the PP buffer pointers.*/ + _dec->pp_frame_state=0; + } + /*Update the PP buffer pointers if necessary.*/ + if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){ + if(_dec->pp_levelpp_frame_buf[0].width=_dec->state.info.frame_width; + _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; + _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width; + _dec->pp_frame_buf[0].data=_dec->pp_frame_data+ + (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride; + } + else{ + size_t y_sz; + size_t c_sz; + int c_w; + int c_h; + /*Otherwise, set up pointers to all three PP planes.*/ + y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height; + c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); + c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); + c_sz=c_w*(size_t)c_h; + _dec->pp_frame_buf[0].width=_dec->state.info.frame_width; + _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; + _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width; + _dec->pp_frame_buf[0].data=_dec->pp_frame_data; + _dec->pp_frame_buf[1].width=c_w; + _dec->pp_frame_buf[1].height=c_h; + _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width; + _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz; + _dec->pp_frame_buf[2].width=c_w; + _dec->pp_frame_buf[2].height=c_h; + _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width; + _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz; + oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf); + } + _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC); + } + /*If we're not processing chroma, copy the reference frame's chroma planes.*/ + if(_dec->pp_levelpp_frame_buf+1, + _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1, + sizeof(_dec->pp_frame_buf[1])*2); + } + return 0; +} + + + +typedef struct{ + int bounding_values[256]; + ptrdiff_t ti[3][64]; + ptrdiff_t eob_runs[3][64]; + const ptrdiff_t *coded_fragis[3]; + const ptrdiff_t *uncoded_fragis[3]; + ptrdiff_t ncoded_fragis[3]; + ptrdiff_t nuncoded_fragis[3]; + const ogg_uint16_t *dequant[3][3][2]; + int fragy0[3]; + int fragy_end[3]; + int pred_last[3][3]; + int mcu_nvfrags; + int loop_filter; + int pp_level; +}oc_dec_pipeline_state; + + + +/*Initialize the main decoding pipeline.*/ +static void oc_dec_pipeline_init(oc_dec_ctx *_dec, + oc_dec_pipeline_state *_pipe){ + const ptrdiff_t *coded_fragis; + const ptrdiff_t *uncoded_fragis; + int pli; + int qii; + int qti; + /*If chroma is sub-sampled in the vertical direction, we have to decode two + super block rows of Y' for each super block row of Cb and Cr.*/ + _pipe->mcu_nvfrags=4<state.info.pixel_fmt&2); + /*Initialize the token and extra bits indices for each plane and + coefficient.*/ + memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti)); + /*Also copy over the initial the EOB run counts.*/ + memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs)); + /*Set up per-plane pointers to the coded and uncoded fragments lists.*/ + coded_fragis=_dec->state.coded_fragis; + uncoded_fragis=coded_fragis+_dec->state.nfrags; + for(pli=0;pli<3;pli++){ + ptrdiff_t ncoded_fragis; + _pipe->coded_fragis[pli]=coded_fragis; + _pipe->uncoded_fragis[pli]=uncoded_fragis; + ncoded_fragis=_dec->state.ncoded_fragis[pli]; + coded_fragis+=ncoded_fragis; + uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags; + } + /*Set up condensed quantizer tables.*/ + for(pli=0;pli<3;pli++){ + for(qii=0;qii<_dec->state.nqis;qii++){ + for(qti=0;qti<2;qti++){ + _pipe->dequant[pli][qii][qti]= + _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti]; + } + } + } + /*Set the previous DC predictor to 0 for all color planes and frame types.*/ + memset(_pipe->pred_last,0,sizeof(_pipe->pred_last)); + /*Initialize the bounding value array for the loop filter.*/ + _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state, + _pipe->bounding_values); + /*Initialize any buffers needed for post-processing. + We also save the current post-processing level, to guard against the user + changing it from a callback.*/ + if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level; + /*If we don't have enough information to post-process, disable it, regardless + of the user-requested level.*/ + else{ + _pipe->pp_level=OC_PP_LEVEL_DISABLED; + memcpy(_dec->pp_frame_buf, + _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]], + sizeof(_dec->pp_frame_buf[0])*3); + } +} + +/*Undo the DC prediction in a single plane of an MCU (one or two super block + rows). + As a side effect, the number of coded and uncoded fragments in this plane of + the MCU is also computed.*/ +static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec, + oc_dec_pipeline_state *_pipe,int _pli){ + const oc_fragment_plane *fplane; + oc_fragment *frags; + int *pred_last; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragi; + int fragx; + int fragy; + int fragy0; + int fragy_end; + int nhfrags; + /*Compute the first and last fragment row of the current MCU for this + plane.*/ + fplane=_dec->state.fplanes+_pli; + fragy0=_pipe->fragy0[_pli]; + fragy_end=_pipe->fragy_end[_pli]; + nhfrags=fplane->nhfrags; + pred_last=_pipe->pred_last[_pli]; + frags=_dec->state.frags; + ncoded_fragis=0; + fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags; + for(fragy=fragy0;fragy=nhfrags)ur_ref=-1; + else{ + ur_ref=u_frags[fragi+1].coded? + OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1; + } + if(frags[fragi].coded){ + int pred; + int ref; + ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); + /*We break out a separate case based on which of our neighbors use + the same reference frames. + This is somewhat faster than trying to make a generic case which + handles all of them, since it reduces lots of poorly predicted + jumps to one switch statement, and also lets a number of the + multiplications be optimized out by strength reduction.*/ + switch((l_ref==ref)|(ul_ref==ref)<<1| + (u_ref==ref)<<2|(ur_ref==ref)<<3){ + default:pred=pred_last[ref];break; + case 1: + case 3:pred=frags[fragi-1].dc;break; + case 2:pred=u_frags[fragi-1].dc;break; + case 4: + case 6: + case 12:pred=u_frags[fragi].dc;break; + case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break; + case 8:pred=u_frags[fragi+1].dc;break; + case 9: + case 11: + case 13:{ + pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128; + }break; + case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break; + case 14:{ + pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc) + +10*u_frags[fragi].dc)/16; + }break; + case 7: + case 15:{ + int p0; + int p1; + int p2; + p0=frags[fragi-1].dc; + p1=u_frags[fragi-1].dc; + p2=u_frags[fragi].dc; + pred=(29*(p0+p2)-26*p1)/32; + if(abs(pred-p2)>128)pred=p2; + else if(abs(pred-p0)>128)pred=p0; + else if(abs(pred-p1)>128)pred=p1; + }break; + } + pred_last[ref]=frags[fragi].dc+=pred; + ncoded_fragis++; + l_ref=ref; + } + else l_ref=-1; + ul_ref=u_ref; + u_ref=ur_ref; + } + } + } + _pipe->ncoded_fragis[_pli]=ncoded_fragis; + /*Also save the number of uncoded fragments so we know how many to copy.*/ + _pipe->nuncoded_fragis[_pli]= + (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis; +} + +/*Reconstructs all coded fragments in a single MCU (one or two super block + rows). + This requires that each coded fragment have a proper macro block mode and + motion vector (if not in INTRA mode), and have it's DC value decoded, with + the DC prediction process reversed, and the number of coded and uncoded + fragments in this plane of the MCU be counted. + The token lists for each color plane and coefficient should also be filled + in, along with initial token offsets, extra bits offsets, and EOB run + counts.*/ +static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec, + oc_dec_pipeline_state *_pipe,int _pli){ + unsigned char *dct_tokens; + const unsigned char *dct_fzig_zag; + ogg_uint16_t dc_quant[2]; + const oc_fragment *frags; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + ptrdiff_t *ti; + ptrdiff_t *eob_runs; + int qti; + dct_tokens=_dec->dct_tokens; + dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag; + frags=_dec->state.frags; + coded_fragis=_pipe->coded_fragis[_pli]; + ncoded_fragis=_pipe->ncoded_fragis[_pli]; + ti=_pipe->ti[_pli]; + eob_runs=_pipe->eob_runs[_pli]; + for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0]; + for(fragii=0;fragiidequant[_pli][frags[fragi].qii][qti]; + /*Decode the AC coefficients.*/ + for(zzi=0;zzi<64;){ + int token; + last_zzi=zzi; + if(eob_runs[zzi]){ + eob_runs[zzi]--; + break; + } + else{ + ptrdiff_t eob; + int cw; + int rlen; + int coeff; + int lti; + lti=ti[zzi]; + token=dct_tokens[lti++]; + cw=OC_DCT_CODE_WORD[token]; + /*These parts could be done branchless, but the branches are fairly + predictable and the C code translates into more than a few + instructions, so it's worth it to avoid them.*/ + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ + cw+=dct_tokens[lti++]<>OC_DCT_CW_EOB_SHIFT&0xFFF; + if(token==OC_DCT_TOKEN_FAT_EOB){ + eob+=dct_tokens[lti++]<<8; + if(eob==0)eob=OC_DCT_EOB_FINISH; + } + rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); + cw^=-(cw&1<>OC_DCT_CW_MAG_SHIFT; + eob_runs[zzi]=eob; + ti[zzi]=lti; + zzi+=rlen; + dct_coeffs[dct_fzig_zag[zzi]]=(ogg_int16_t)(coeff*(int)ac_quant[zzi]); + zzi+=!eob; + } + } + /*TODO: zzi should be exactly 64 here. + If it's not, we should report some kind of warning.*/ + zzi=OC_MINI(zzi,64); + dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc; + /*last_zzi is always initialized. + If your compiler thinks otherwise, it is dumb.*/ + oc_state_frag_recon(&_dec->state,fragi,_pli, + dct_coeffs,last_zzi,dc_quant[qti]); + } + _pipe->coded_fragis[_pli]+=ncoded_fragis; + /*Right now the reconstructed MCU has only the coded blocks in it.*/ + /*TODO: We make the decision here to always copy the uncoded blocks into it + from the reference frame. + We could also copy the coded blocks back over the reference frame, if we + wait for an additional MCU to be decoded, which might be faster if only a + small number of blocks are coded. + However, this introduces more latency, creating a larger cache footprint. + It's unknown which decision is better, but this one results in simpler + code, and the hard case (high bitrate, high resolution) is handled + correctly.*/ + /*Copy the uncoded blocks from the previous reference frame.*/ + _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli]; + oc_state_frag_copy_list(&_dec->state,_pipe->uncoded_fragis[_pli], + _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli); +} + +/*Filter a horizontal block edge.*/ +static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride, + const unsigned char *_src,int _src_ystride,int _qstep,int _flimit, + int *_variance0,int *_variance1){ + unsigned char *rdst; + const unsigned char *rsrc; + unsigned char *cdst; + const unsigned char *csrc; + int r[10]; + int sum0; + int sum1; + int bx; + int by; + rdst=_dst; + rsrc=_src; + for(bx=0;bx<8;bx++){ + cdst=rdst; + csrc=rsrc; + for(by=0;by<10;by++){ + r[by]=*csrc; + csrc+=_src_ystride; + } + sum0=sum1=0; + for(by=0;by<4;by++){ + sum0+=abs(r[by+1]-r[by]); + sum1+=abs(r[by+5]-r[by+6]); + } + *_variance0+=OC_MINI(255,sum0); + *_variance1+=OC_MINI(255,sum1); + if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ + *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); + cdst+=_dst_ystride; + *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); + cdst+=_dst_ystride; + for(by=0;by<4;by++){ + *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+ + r[by+4]+r[by+5]+r[by+6]+4>>3); + cdst+=_dst_ystride; + } + *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); + cdst+=_dst_ystride; + *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); + } + else{ + for(by=1;by<=8;by++){ + *cdst=(unsigned char)r[by]; + cdst+=_dst_ystride; + } + } + rdst++; + rsrc++; + } +} + +/*Filter a vertical block edge.*/ +static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride, + int _qstep,int _flimit,int *_variances){ + unsigned char *rdst; + const unsigned char *rsrc; + unsigned char *cdst; + int r[10]; + int sum0; + int sum1; + int bx; + int by; + cdst=_dst; + for(by=0;by<8;by++){ + rsrc=cdst-1; + rdst=cdst; + for(bx=0;bx<10;bx++)r[bx]=*rsrc++; + sum0=sum1=0; + for(bx=0;bx<4;bx++){ + sum0+=abs(r[bx+1]-r[bx]); + sum1+=abs(r[bx+5]-r[bx+6]); + } + _variances[0]+=OC_MINI(255,sum0); + _variances[1]+=OC_MINI(255,sum1); + if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ + *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); + *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); + for(bx=0;bx<4;bx++){ + *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+ + r[bx+4]+r[bx+5]+r[bx+6]+4>>3); + } + *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); + *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); + } + cdst+=_dst_ystride; + } +} + +static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec, + th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0, + int _fragy_end){ + oc_fragment_plane *fplane; + int *variance; + unsigned char *dc_qi; + unsigned char *dst; + const unsigned char *src; + ptrdiff_t froffset; + int dst_ystride; + int src_ystride; + int nhfrags; + int width; + int notstart; + int notdone; + int flimit; + int qstep; + int y_end; + int y; + int x; + _dst+=_pli; + _src+=_pli; + fplane=_dec->state.fplanes+_pli; + nhfrags=fplane->nhfrags; + froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags; + variance=_dec->variances+froffset; + dc_qi=_dec->dc_qis+froffset; + notstart=_fragy0>0; + notdone=_fragy_endnvfrags; + /*We want to clear an extra row of variances, except at the end.*/ + memset(variance+(nhfrags&-notstart),0, + (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0]))); + /*Except for the first time, we want to point to the middle of the row.*/ + y=(_fragy0<<3)+(notstart<<2); + dst_ystride=_dst->stride; + src_ystride=_src->stride; + dst=_dst->data+y*(ptrdiff_t)dst_ystride; + src=_src->data+y*(ptrdiff_t)src_ystride; + width=_dst->width; + for(;y<4;y++){ + memcpy(dst,src,width*sizeof(dst[0])); + dst+=dst_ystride; + src+=src_ystride; + } + /*We also want to skip the last row in the frame for this loop.*/ + y_end=_fragy_end-!notdone<<3; + for(;ypp_dc_scale[*dc_qi]; + flimit=(qstep*3)>>2; + oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride, + qstep,flimit,variance,variance+nhfrags); + variance++; + dc_qi++; + for(x=8;xpp_dc_scale[*dc_qi]; + flimit=(qstep*3)>>2; + oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride, + qstep,flimit,variance,variance+nhfrags); + oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride, + qstep,flimit,variance-1); + variance++; + dc_qi++; + } + dst+=dst_ystride<<3; + src+=src_ystride<<3; + } + /*And finally, handle the last row in the frame, if it's in the range.*/ + if(!notdone){ + int height; + height=_dst->height; + for(;ypp_dc_scale[*dc_qi++]; + flimit=(qstep*3)>>2; + oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride, + qstep,flimit,variance++); + } + } +} + +static void oc_dering_block(unsigned char *_idata,int _ystride,int _b, + int _dc_scale,int _sharp_mod,int _strong){ + static const unsigned char OC_MOD_MAX[2]={24,32}; + static const unsigned char OC_MOD_SHIFT[2]={1,0}; + const unsigned char *psrc; + const unsigned char *src; + const unsigned char *nsrc; + unsigned char *dst; + int vmod[72]; + int hmod[72]; + int mod_hi; + int by; + int bx; + mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]); + dst=_idata; + src=dst; + psrc=src-(_ystride&-!(_b&4)); + for(by=0;by<9;by++){ + for(bx=0;bx<8;bx++){ + int mod; + mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<>7); + for(bx=1;bx<7;bx++){ + a=128; + b=64; + w=hmod[(bx<<3)+by]; + a-=w; + b+=w*src[bx-1]; + w=vmod[(by<<3)+bx]; + a-=w; + b+=w*psrc[bx]; + w=vmod[(by+1<<3)+bx]; + a-=w; + b+=w*nsrc[bx]; + w=hmod[(bx+1<<3)+by]; + a-=w; + b+=w*src[bx+1]; + dst[bx]=OC_CLAMP255(a*src[bx]+b>>7); + } + a=128; + b=64; + w=hmod[(7<<3)+by]; + a-=w; + b+=w*src[6]; + w=vmod[(by<<3)+7]; + a-=w; + b+=w*psrc[7]; + w=vmod[(by+1<<3)+7]; + a-=w; + b+=w*nsrc[7]; + w=hmod[(8<<3)+by]; + a-=w; + b+=w*src[7+!(_b&2)]; + dst[7]=OC_CLAMP255(a*src[7]+b>>7); + dst+=_ystride; + psrc=src; + src=nsrc; + nsrc+=_ystride&-(!(_b&8)|by<6); + } +} + +#define OC_DERING_THRESH1 (384) +#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1) +#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1) +#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1) + +static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img, + int _pli,int _fragy0,int _fragy_end){ + th_img_plane *iplane; + oc_fragment_plane *fplane; + oc_fragment *frag; + int *variance; + unsigned char *idata; + ptrdiff_t froffset; + int ystride; + int nhfrags; + int sthresh; + int strong; + int y_end; + int width; + int height; + int y; + int x; + iplane=_img+_pli; + fplane=_dec->state.fplanes+_pli; + nhfrags=fplane->nhfrags; + froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags; + variance=_dec->variances+froffset; + frag=_dec->state.frags+froffset; + strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY); + sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3; + y=_fragy0<<3; + ystride=iplane->stride; + idata=iplane->data+y*(ptrdiff_t)ystride; + y_end=_fragy_end<<3; + width=iplane->width; + height=iplane->height; + for(;ystate.qis[frag->qii]; + var=*variance; + b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3; + if(strong&&var>sthresh){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4|| + !(b&2)&&variance[1]>OC_DERING_THRESH4|| + !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4|| + !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + } + } + else if(var>OC_DERING_THRESH2){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + } + else if(var>OC_DERING_THRESH1){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0); + } + frag++; + variance++; + } + idata+=ystride<<3; + } +} + + + +th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){ + oc_dec_ctx *dec; + if(_info==NULL||_setup==NULL)return NULL; + dec=_ogg_malloc(sizeof(*dec)); + if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){ + _ogg_free(dec); + return NULL; + } + dec->state.curframe_num=0; + return dec; +} + +void th_decode_free(th_dec_ctx *_dec){ + if(_dec!=NULL){ + oc_dec_clear(_dec); + _ogg_free(_dec); + } +} + +int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf, + size_t _buf_sz){ + switch(_req){ + case TH_DECCTL_GET_PPLEVEL_MAX:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + (*(int *)_buf)=OC_PP_LEVEL_MAX; + return 0; + }break; + case TH_DECCTL_SET_PPLEVEL:{ + int pp_level; + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + pp_level=*(int *)_buf; + if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL; + _dec->pp_level=pp_level; + return 0; + }break; + case TH_DECCTL_SET_GRANPOS:{ + ogg_int64_t granpos; + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL; + granpos=*(ogg_int64_t *)_buf; + if(granpos<0)return TH_EINVAL; + _dec->state.granpos=granpos; + _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift) + -_dec->state.granpos_bias; + _dec->state.curframe_num=_dec->state.keyframe_num + +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1); + return 0; + }break; + case TH_DECCTL_SET_STRIPE_CB:{ + th_stripe_callback *cb; + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL; + cb=(th_stripe_callback *)_buf; + _dec->stripe_cb.ctx=cb->ctx; + _dec->stripe_cb.stripe_decoded=cb->stripe_decoded; + return 0; + }break; +#ifdef HAVE_CAIRO + case TH_DECCTL_SET_TELEMETRY_MBMODE:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_mbmode=*(int *)_buf; + return 0; + }break; + case TH_DECCTL_SET_TELEMETRY_MV:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_mv=*(int *)_buf; + return 0; + }break; + case TH_DECCTL_SET_TELEMETRY_QI:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_qi=*(int *)_buf; + return 0; + }break; + case TH_DECCTL_SET_TELEMETRY_BITS:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_bits=*(int *)_buf; + return 0; + }break; +#endif + default:return TH_EIMPL; + } +} + +/*We're decoding an INTER frame, but have no initialized reference + buffers (i.e., decoding did not start on a key frame). + We initialize them to a solid gray here.*/ +static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){ + th_info *info; + size_t yplane_sz; + size_t cplane_sz; + int yhstride; + int yheight; + int chstride; + int cheight; + _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0; + _dec->state.ref_frame_idx[OC_FRAME_PREV]=0; + _dec->state.ref_frame_idx[OC_FRAME_SELF]=1; + info=&_dec->state.info; + yhstride=info->frame_width+2*OC_UMV_PADDING; + yheight=info->frame_height+2*OC_UMV_PADDING; + chstride=yhstride>>!(info->pixel_fmt&1); + cheight=yheight>>!(info->pixel_fmt&2); + yplane_sz=yhstride*(size_t)yheight; + cplane_sz=chstride*(size_t)cheight; + memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz); +} + +int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, + ogg_int64_t *_granpos){ + int ret; + if(_dec==NULL||_op==NULL)return TH_EFAULT; + /*A completely empty packet indicates a dropped frame and is treated exactly + like an inter frame with no coded blocks. + Only proceed if we have a non-empty packet.*/ + if(_op->bytes!=0){ + oc_dec_pipeline_state pipe; + th_ycbcr_buffer stripe_buf; + int stripe_fragy; + int refi; + int pli; + int notstart; + int notdone; + oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes); +#if defined(HAVE_CAIRO) + _dec->telemetry_frame_bytes=_op->bytes; +#endif + ret=oc_dec_frame_header_unpack(_dec); + if(ret<0)return ret; + /*Select a free buffer to use for the reconstructed version of this + frame.*/ + if(_dec->state.frame_type!=OC_INTRA_FRAME&& + (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0|| + _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){ + /*No reference frames yet!*/ + oc_dec_init_dummy_frame(_dec); + refi=_dec->state.ref_frame_idx[OC_FRAME_SELF]; + } + else{ + for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]|| + refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++); + _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi; + } + if(_dec->state.frame_type==OC_INTRA_FRAME){ + oc_dec_mark_all_intra(_dec); + _dec->state.keyframe_num=_dec->state.curframe_num; +#if defined(HAVE_CAIRO) + _dec->telemetry_coding_bytes= + _dec->telemetry_mode_bytes= + _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + } + else{ + oc_dec_coded_flags_unpack(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + oc_dec_mb_modes_unpack(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + oc_dec_mv_unpack_and_frag_modes_fill(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + } + oc_dec_block_qis_unpack(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + oc_dec_residual_tokens_unpack(_dec); + /*Update granule position. + This must be done before the striped decode callbacks so that the + application knows what to do with the frame data.*/ + _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<< + _dec->state.info.keyframe_granule_shift) + +(_dec->state.curframe_num-_dec->state.keyframe_num); + _dec->state.curframe_num++; + if(_granpos!=NULL)*_granpos=_dec->state.granpos; + /*All of the rest of the operations -- DC prediction reversal, + reconstructing coded fragments, copying uncoded fragments, loop + filtering, extending borders, and out-of-loop post-processing -- should + be pipelined. + I.e., DC prediction reversal, reconstruction, and uncoded fragment + copying are done for one or two super block rows, then loop filtering is + run as far as it can, then bordering copying, then post-processing. + For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super + block rows, and one chroma. + Otherwise, an MCU consists of one super block row from each plane. + Inside each MCU, we perform all of the steps on one color plane before + moving on to the next. + After reconstruction, the additional filtering stages introduce a delay + since they need some pixels from the next fragment row. + Thus the actual number of decoded rows available is slightly smaller for + the first MCU, and slightly larger for the last. + + This entire process allows us to operate on the data while it is still in + cache, resulting in big performance improvements. + An application callback allows further application processing (blitting + to video memory, color conversion, etc.) to also use the data while it's + in cache.*/ + oc_dec_pipeline_init(_dec,&pipe); + oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf); + notstart=0; + notdone=1; + for(stripe_fragy=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){ + int avail_fragy0; + int avail_fragy_end; + avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags; + notdone=stripe_fragy+pipe.mcu_nvfragsstate.fplanes+pli; + /*Compute the first and last fragment row of the current MCU for this + plane.*/ + frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2); + pipe.fragy0[pli]=stripe_fragy>>frag_shift; + pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags, + pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift)); + oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli); + oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli); + sdelay=edelay=0; + if(pipe.loop_filter){ + sdelay+=notstart; + edelay+=notdone; + oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values, + refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); + } + /*To fill the borders, we have an additional two pixel delay, since a + fragment in the next row could filter its top edge, using two pixels + from a fragment in this row. + But there's no reason to delay a full fragment between the two.*/ + oc_state_borders_fill_rows(&_dec->state,refi,pli, + (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1), + (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1)); + /*Out-of-loop post-processing.*/ + pp_offset=3*(pli!=0); + if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){ + /*Perform de-blocking in one plane.*/ + sdelay+=notstart; + edelay+=notdone; + oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf, + _dec->state.ref_frame_bufs[refi],pli, + pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); + if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){ + /*Perform de-ringing in one plane.*/ + sdelay+=notstart; + edelay+=notdone; + oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli, + pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); + } + } + /*If no post-processing is done, we still need to delay a row for the + loop filter, thanks to the strange filtering order VP3 chose.*/ + else if(pipe.loop_filter){ + sdelay+=notstart; + edelay+=notdone; + } + /*Compute the intersection of the available rows in all planes. + If chroma is sub-sampled, the effect of each of its delays is + doubled, but luma might have more post-processing filters enabled + than chroma, so we don't know up front which one is the limiting + factor.*/ + avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<stripe_cb.stripe_decoded!=NULL){ + /*The callback might want to use the FPU, so let's make sure they can. + We violate all kinds of ABI restrictions by not doing this until + now, but none of them actually matter since we don't use floating + point ourselves.*/ + oc_restore_fpu(&_dec->state); + /*Make the callback, ensuring we flip the sense of the "start" and + "end" of the available region upside down.*/ + (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf, + _dec->state.fplanes[0].nvfrags-avail_fragy_end, + _dec->state.fplanes[0].nvfrags-avail_fragy0); + } + notstart=1; + } + /*Finish filling in the reference frame borders.*/ + for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli); + /*Update the reference frame indices.*/ + if(_dec->state.frame_type==OC_INTRA_FRAME){ + /*The new frame becomes both the previous and gold reference frames.*/ + _dec->state.ref_frame_idx[OC_FRAME_GOLD]= + _dec->state.ref_frame_idx[OC_FRAME_PREV]= + _dec->state.ref_frame_idx[OC_FRAME_SELF]; + } + else{ + /*Otherwise, just replace the previous reference frame.*/ + _dec->state.ref_frame_idx[OC_FRAME_PREV]= + _dec->state.ref_frame_idx[OC_FRAME_SELF]; + } + /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG + gamma values, if nothing else).*/ + oc_restore_fpu(&_dec->state); +#if defined(OC_DUMP_IMAGES) + /*Don't dump images for dropped frames.*/ + oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec"); +#endif + return 0; + } + else{ + if(_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0|| + _dec->state.ref_frame_idx[OC_FRAME_PREV]<0){ + int refi; + /*No reference frames yet!*/ + oc_dec_init_dummy_frame(_dec); + refi=_dec->state.ref_frame_idx[OC_FRAME_PREV]; + _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi; + memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[refi], + sizeof(_dec->pp_frame_buf[0])*3); + } + /*Just update the granule position and return.*/ + _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<< + _dec->state.info.keyframe_granule_shift) + +(_dec->state.curframe_num-_dec->state.keyframe_num); + _dec->state.curframe_num++; + if(_granpos!=NULL)*_granpos=_dec->state.granpos; + return TH_DUPFRAME; + } +} + +int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){ + if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT; + oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf); +#if defined(HAVE_CAIRO) + /*If telemetry ioctls are active, we need to draw to the output buffer. + Stuff the plane into cairo.*/ + if(_dec->telemetry){ + cairo_surface_t *cs; + unsigned char *data; + unsigned char *y_row; + unsigned char *u_row; + unsigned char *v_row; + unsigned char *rgb_row; + int cstride; + int w; + int h; + int x; + int y; + int hdec; + int vdec; + w=_ycbcr[0].width; + h=_ycbcr[0].height; + hdec=!(_dec->state.info.pixel_fmt&1); + vdec=!(_dec->state.info.pixel_fmt&2); + /*Lazy data buffer init. + We could try to re-use the post-processing buffer, which would save + memory, but complicate the allocation logic there. + I don't think anyone cares about memory usage when using telemetry; it is + not meant for embedded devices.*/ + if(_dec->telemetry_frame_data==NULL){ + _dec->telemetry_frame_data=_ogg_malloc( + (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data)); + if(_dec->telemetry_frame_data==NULL)return 0; + } + cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h); + /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/ + data=cairo_image_surface_get_data(cs); + if(data==NULL){ + cairo_surface_destroy(cs); + return 0; + } + cstride=cairo_image_surface_get_stride(cs); + y_row=_ycbcr[0].data; + u_row=_ycbcr[1].data; + v_row=_ycbcr[2].data; + rgb_row=data; + for(y=0;y>hdec]-363703744)/1635200; + g=(3827562*y_row[x]-1287801*u_row[x>>hdec] + -2672387*v_row[x>>hdec]+447306710)/3287200; + b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600; + rgb_row[4*x+0]=OC_CLAMP255(b); + rgb_row[4*x+1]=OC_CLAMP255(g); + rgb_row[4*x+2]=OC_CLAMP255(r); + } + y_row+=_ycbcr[0].stride; + u_row+=_ycbcr[1].stride&-((y&1)|!vdec); + v_row+=_ycbcr[2].stride&-((y&1)|!vdec); + rgb_row+=cstride; + } + /*Draw coded identifier for each macroblock (stored in Hilbert order).*/ + { + cairo_t *c; + const oc_fragment *frags; + oc_mv *frag_mvs; + const signed char *mb_modes; + oc_mb_map *mb_maps; + size_t nmbs; + size_t mbi; + int row2; + int col2; + int qim[3]={0,0,0}; + if(_dec->state.nqis==2){ + int bqi; + bqi=_dec->state.qis[0]; + if(_dec->state.qis[1]>bqi)qim[1]=1; + if(_dec->state.qis[1]state.nqis==3){ + int bqi; + int cqi; + int dqi; + bqi=_dec->state.qis[0]; + cqi=_dec->state.qis[1]; + dqi=_dec->state.qis[2]; + if(cqi>bqi&&dqi>bqi){ + if(dqi>cqi){ + qim[1]=1; + qim[2]=2; + } + else{ + qim[1]=2; + qim[2]=1; + } + } + else if(cqistate.frags; + frag_mvs=_dec->state.frag_mvs; + mb_modes=_dec->state.mb_modes; + mb_maps=_dec->state.mb_maps; + nmbs=_dec->state.nmbs; + row2=0; + col2=0; + for(mbi=0;mbi>1)&1))*16-16; + x=(col2>>1)*16; + cairo_set_line_width(c,1.); + /*Keyframe (all intra) red box.*/ + if(_dec->state.frame_type==OC_INTRA_FRAME){ + if(_dec->telemetry_mbmode&0x02){ + cairo_set_source_rgba(c,1.,0,0,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,0,0,.25); + cairo_fill(c); + } + } + else{ + const signed char *frag_mv; + ptrdiff_t fragi; + for(bi=0;bi<4;bi++){ + fragi=mb_maps[mbi][0][bi]; + if(fragi>=0&&frags[fragi].coded){ + frag_mv=frag_mvs[fragi]; + break; + } + } + if(bi<4){ + switch(mb_modes[mbi]){ + case OC_MODE_INTRA:{ + if(_dec->telemetry_mbmode&0x02){ + cairo_set_source_rgba(c,1.,0,0,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,0,0,.25); + cairo_fill(c); + } + }break; + case OC_MODE_INTER_NOMV:{ + if(_dec->telemetry_mbmode&0x01){ + cairo_set_source_rgba(c,0,0,1.,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,0,0,1.,.25); + cairo_fill(c); + } + }break; + case OC_MODE_INTER_MV:{ + if(_dec->telemetry_mbmode&0x04){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x04){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_INTER_MV_LAST:{ + if(_dec->telemetry_mbmode&0x08){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_move_to(c,x+13.5,y+2.5); + cairo_line_to(c,x+2.5,y+8); + cairo_line_to(c,x+13.5,y+13.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x08){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_INTER_MV_LAST2:{ + if(_dec->telemetry_mbmode&0x10){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_move_to(c,x+8,y+2.5); + cairo_line_to(c,x+2.5,y+8); + cairo_line_to(c,x+8,y+13.5); + cairo_move_to(c,x+13.5,y+2.5); + cairo_line_to(c,x+8,y+8); + cairo_line_to(c,x+13.5,y+13.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x10){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_GOLDEN_NOMV:{ + if(_dec->telemetry_mbmode&0x20){ + cairo_set_source_rgba(c,1.,1.,0,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,1.,0,.25); + cairo_fill(c); + } + }break; + case OC_MODE_GOLDEN_MV:{ + if(_dec->telemetry_mbmode&0x40){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,1.,1.,0,.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x40){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_INTER_MV_FOUR:{ + if(_dec->telemetry_mbmode&0x80){ + cairo_rectangle(c,x+2.5,y+2.5,4,4); + cairo_rectangle(c,x+9.5,y+2.5,4,4); + cairo_rectangle(c,x+2.5,y+9.5,4,4); + cairo_rectangle(c,x+9.5,y+9.5,4,4); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_stroke(c); + } + /*4mv is odd, coded in raster order.*/ + fragi=mb_maps[mbi][0][0]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+4,y+12); + cairo_stroke(c); + } + fragi=mb_maps[mbi][0][1]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+12,y+12); + cairo_stroke(c); + } + fragi=mb_maps[mbi][0][2]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+4,y+4); + cairo_stroke(c); + } + fragi=mb_maps[mbi][0][3]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+12,y+4); + cairo_stroke(c); + } + }break; + } + } + } + /*qii illustration.*/ + if(_dec->telemetry_qi&0x2){ + cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE); + for(bi=0;bi<4;bi++){ + ptrdiff_t fragi; + int qiv; + int xp; + int yp; + xp=x+(bi&1)*8; + yp=y+8-(bi&2)*4; + fragi=mb_maps[mbi][0][bi]; + if(fragi>=0&&frags[fragi].coded){ + qiv=qim[frags[fragi].qii]; + cairo_set_line_width(c,3.); + cairo_set_source_rgba(c,0.,0.,0.,.5); + switch(qiv){ + /*Double plus:*/ + case 2:{ + if((bi&1)^((bi&2)>>1)){ + cairo_move_to(c,xp+2.5,yp+1.5); + cairo_line_to(c,xp+2.5,yp+3.5); + cairo_move_to(c,xp+1.5,yp+2.5); + cairo_line_to(c,xp+3.5,yp+2.5); + cairo_move_to(c,xp+5.5,yp+4.5); + cairo_line_to(c,xp+5.5,yp+6.5); + cairo_move_to(c,xp+4.5,yp+5.5); + cairo_line_to(c,xp+6.5,yp+5.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,0.,1.,1.,1.); + } + else{ + cairo_move_to(c,xp+5.5,yp+1.5); + cairo_line_to(c,xp+5.5,yp+3.5); + cairo_move_to(c,xp+4.5,yp+2.5); + cairo_line_to(c,xp+6.5,yp+2.5); + cairo_move_to(c,xp+2.5,yp+4.5); + cairo_line_to(c,xp+2.5,yp+6.5); + cairo_move_to(c,xp+1.5,yp+5.5); + cairo_line_to(c,xp+3.5,yp+5.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,0.,1.,1.,1.); + } + }break; + /*Double minus:*/ + case -2:{ + cairo_move_to(c,xp+2.5,yp+2.5); + cairo_line_to(c,xp+5.5,yp+2.5); + cairo_move_to(c,xp+2.5,yp+5.5); + cairo_line_to(c,xp+5.5,yp+5.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,1.,1.,1.); + }break; + /*Plus:*/ + case 1:{ + if(bi&2==0)yp-=2; + if(bi&1==0)xp-=2; + cairo_move_to(c,xp+4.5,yp+2.5); + cairo_line_to(c,xp+4.5,yp+6.5); + cairo_move_to(c,xp+2.5,yp+4.5); + cairo_line_to(c,xp+6.5,yp+4.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,.1,1.,.3,1.); + break; + } + /*Fall through.*/ + /*Minus:*/ + case -1:{ + cairo_move_to(c,xp+2.5,yp+4.5); + cairo_line_to(c,xp+6.5,yp+4.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,.3,.1,1.); + }break; + default:continue; + } + cairo_set_line_width(c,1.); + cairo_stroke(c); + } + } + } + col2++; + if((col2>>1)>=_dec->state.nhmbs){ + col2=0; + row2+=2; + } + } + /*Bit usage indicator[s]:*/ + if(_dec->telemetry_bits){ + int widths[6]; + int fpsn; + int fpsd; + int mult; + int fullw; + int padw; + int i; + fpsn=_dec->state.info.fps_numerator; + fpsd=_dec->state.info.fps_denominator; + mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits); + fullw=250.f*h*fpsd*mult/fpsn; + padw=w-24; + /*Header and coded block bits.*/ + if(_dec->telemetry_frame_bytes<0|| + _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){ + _dec->telemetry_frame_bytes=0; + } + if(_dec->telemetry_coding_bytes<0|| + _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_coding_bytes=0; + } + if(_dec->telemetry_mode_bytes<0|| + _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_mode_bytes=0; + } + if(_dec->telemetry_mv_bytes<0|| + _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_mv_bytes=0; + } + if(_dec->telemetry_qi_bytes<0|| + _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_qi_bytes=0; + } + if(_dec->telemetry_dc_bytes<0|| + _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_dc_bytes=0; + } + widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw; + widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw; + widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw; + widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw; + widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw; + widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw; + for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w; + cairo_set_source_rgba(c,.0,.0,.0,.6); + cairo_rectangle(c,10,h-33,widths[0]+1,5); + cairo_rectangle(c,10,h-29,widths[1]+1,5); + cairo_rectangle(c,10,h-25,widths[2]+1,5); + cairo_rectangle(c,10,h-21,widths[3]+1,5); + cairo_rectangle(c,10,h-17,widths[4]+1,5); + cairo_rectangle(c,10,h-13,widths[5]+1,5); + cairo_fill(c); + cairo_set_source_rgb(c,1,0,0); + cairo_rectangle(c,10.5,h-32.5,widths[0],4); + cairo_fill(c); + cairo_set_source_rgb(c,0,1,0); + cairo_rectangle(c,10.5,h-28.5,widths[1],4); + cairo_fill(c); + cairo_set_source_rgb(c,0,0,1); + cairo_rectangle(c,10.5,h-24.5,widths[2],4); + cairo_fill(c); + cairo_set_source_rgb(c,.6,.4,.0); + cairo_rectangle(c,10.5,h-20.5,widths[3],4); + cairo_fill(c); + cairo_set_source_rgb(c,.3,.3,.3); + cairo_rectangle(c,10.5,h-16.5,widths[4],4); + cairo_fill(c); + cairo_set_source_rgb(c,.5,.5,.8); + cairo_rectangle(c,10.5,h-12.5,widths[5],4); + cairo_fill(c); + } + /*Master qi indicator[s]:*/ + if(_dec->telemetry_qi&0x1){ + cairo_text_extents_t extents; + char buffer[10]; + int p; + int y; + p=0; + y=h-7.5; + if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10; + buffer[p++]=48+_dec->state.qis[0]%10; + if(_dec->state.nqis>=2){ + buffer[p++]=' '; + if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10; + buffer[p++]=48+_dec->state.qis[1]%10; + } + if(_dec->state.nqis==3){ + buffer[p++]=' '; + if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10; + buffer[p++]=48+_dec->state.qis[2]%10; + } + buffer[p++]='\0'; + cairo_select_font_face(c,"sans", + CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD); + cairo_set_font_size(c,18); + cairo_text_extents(c,buffer,&extents); + cairo_set_source_rgb(c,1,1,1); + cairo_move_to(c,w-extents.x_advance-10,y); + cairo_show_text(c,buffer); + cairo_set_source_rgb(c,0,0,0); + cairo_move_to(c,w-extents.x_advance-10,y); + cairo_text_path(c,buffer); + cairo_set_line_width(c,.8); + cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND); + cairo_stroke(c); + } + cairo_destroy(c); + } + /*Out of the Cairo plane into the telemetry YUV buffer.*/ + _ycbcr[0].data=_dec->telemetry_frame_data; + _ycbcr[0].stride=_ycbcr[0].width; + _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride; + _ycbcr[1].stride=_ycbcr[1].width; + _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride; + _ycbcr[2].stride=_ycbcr[2].width; + y_row=_ycbcr[0].data; + u_row=_ycbcr[1].data; + v_row=_ycbcr[2].data; + rgb_row=data; + /*This is one of the few places it's worth handling chroma on a + case-by-case basis.*/ + switch(_dec->state.info.pixel_fmt){ + case TH_PF_420:{ + for(y=0;y>1]=OC_CLAMP255(u); + v_row[x>>1]=OC_CLAMP255(v); + } + y_row+=_ycbcr[0].stride<<1; + u_row+=_ycbcr[1].stride; + v_row+=_ycbcr[2].stride; + rgb_row+=cstride<<1; + } + }break; + case TH_PF_422:{ + for(y=0;y>1]=OC_CLAMP255(u); + v_row[x>>1]=OC_CLAMP255(v); + } + y_row+=_ycbcr[0].stride; + u_row+=_ycbcr[1].stride; + v_row+=_ycbcr[2].stride; + rgb_row+=cstride; + } + }break; + /*case TH_PF_444:*/ + default:{ + for(y=0;y +#include +#include +#include "dequant.h" +#include "decint.h" + +int oc_quant_params_unpack(oc_pack_buf *_opb,th_quant_info *_qinfo){ + th_quant_base *base_mats; + long val; + int nbase_mats; + int sizes[64]; + int indices[64]; + int nbits; + int bmi; + int ci; + int qti; + int pli; + int qri; + int qi; + int i; + val=oc_pack_read(_opb,3); + nbits=(int)val; + for(qi=0;qi<64;qi++){ + val=oc_pack_read(_opb,nbits); + _qinfo->loop_filter_limits[qi]=(unsigned char)val; + } + val=oc_pack_read(_opb,4); + nbits=(int)val+1; + for(qi=0;qi<64;qi++){ + val=oc_pack_read(_opb,nbits); + _qinfo->ac_scale[qi]=(ogg_uint16_t)val; + } + val=oc_pack_read(_opb,4); + nbits=(int)val+1; + for(qi=0;qi<64;qi++){ + val=oc_pack_read(_opb,nbits); + _qinfo->dc_scale[qi]=(ogg_uint16_t)val; + } + val=oc_pack_read(_opb,9); + nbase_mats=(int)val+1; + base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0])); + if(base_mats==NULL)return TH_EFAULT; + for(bmi=0;bmiqi_ranges[qti]+pli; + if(i>0){ + val=oc_pack_read1(_opb); + if(!val){ + int qtj; + int plj; + if(qti>0){ + val=oc_pack_read1(_opb); + if(val){ + qtj=qti-1; + plj=pli; + } + else{ + qtj=(i-1)/3; + plj=(i-1)%3; + } + } + else{ + qtj=(i-1)/3; + plj=(i-1)%3; + } + *qranges=*(_qinfo->qi_ranges[qtj]+plj); + continue; + } + } + val=oc_pack_read(_opb,nbits); + indices[0]=(int)val; + for(qi=qri=0;qi<63;){ + val=oc_pack_read(_opb,oc_ilog(62-qi)); + sizes[qri]=(int)val+1; + qi+=(int)val+1; + val=oc_pack_read(_opb,nbits); + indices[++qri]=(int)val; + } + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + if(qi>63){ + _ogg_free(base_mats); + return TH_EBADHEADER; + } + qranges->nranges=qri; + qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0])); + if(qranges->sizes==NULL){ + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + _ogg_free(base_mats); + return TH_EFAULT; + } + memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0])); + qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0])); + if(qrbms==NULL){ + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + _ogg_free(base_mats); + return TH_EFAULT; + } + qranges->base_matrices=(const th_quant_base *)qrbms; + do{ + bmi=indices[qri]; + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + if(bmi>=nbase_mats){ + _ogg_free(base_mats); + return TH_EBADHEADER; + } + memcpy(qrbms[qri],base_mats[bmi],sizeof(qrbms[qri])); + } + while(qri-->0); + } + _ogg_free(base_mats); + return 0; +} + +void oc_quant_params_clear(th_quant_info *_qinfo){ + int i; + for(i=6;i-->0;){ + int qti; + int pli; + qti=i/3; + pli=i%3; + /*Clear any duplicate pointer references.*/ + if(i>0){ + int qtj; + int plj; + qtj=(i-1)/3; + plj=(i-1)%3; + if(_qinfo->qi_ranges[qti][pli].sizes== + _qinfo->qi_ranges[qtj][plj].sizes){ + _qinfo->qi_ranges[qti][pli].sizes=NULL; + } + if(_qinfo->qi_ranges[qti][pli].base_matrices== + _qinfo->qi_ranges[qtj][plj].base_matrices){ + _qinfo->qi_ranges[qti][pli].base_matrices=NULL; + } + } + if(qti>0){ + if(_qinfo->qi_ranges[1][pli].sizes== + _qinfo->qi_ranges[0][pli].sizes){ + _qinfo->qi_ranges[1][pli].sizes=NULL; + } + if(_qinfo->qi_ranges[1][pli].base_matrices== + _qinfo->qi_ranges[0][pli].base_matrices){ + _qinfo->qi_ranges[1][pli].base_matrices=NULL; + } + } + /*Now free all the non-duplicate storage.*/ + _ogg_free((void *)_qinfo->qi_ranges[qti][pli].sizes); + _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices); + } +} diff --git a/engine/code/libtheora-1.1.1/lib/dequant.h b/engine/code/libtheora-1.1.1/lib/dequant.h new file mode 100644 index 00000000..ef25838e --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/dequant.h @@ -0,0 +1,27 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: dequant.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_dequant_H) +# define _dequant_H (1) +# include "quant.h" +# include "bitpack.h" + +int oc_quant_params_unpack(oc_pack_buf *_opb, + th_quant_info *_qinfo); +void oc_quant_params_clear(th_quant_info *_qinfo); + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/fragment.c b/engine/code/libtheora-1.1.1/lib/fragment.c new file mode 100644 index 00000000..15372e9d --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/fragment.c @@ -0,0 +1,87 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: fragment.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include "internal.h" + +void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride){ + (*_state->opt_vtable.frag_copy)(_dst,_src,_ystride); +} + +void oc_frag_copy_c(unsigned char *_dst,const unsigned char *_src,int _ystride){ + int i; + for(i=8;i-->0;){ + memcpy(_dst,_src,8*sizeof(*_dst)); + _dst+=_ystride; + _src+=_ystride; + } +} + +void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst, + int _ystride,const ogg_int16_t _residue[64]){ + _state->opt_vtable.frag_recon_intra(_dst,_ystride,_residue); +} + +void oc_frag_recon_intra_c(unsigned char *_dst,int _ystride, + const ogg_int16_t _residue[64]){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+128); + _dst+=_ystride; + } +} + +void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ + _state->opt_vtable.frag_recon_inter(_dst,_src,_ystride,_residue); +} + +void oc_frag_recon_inter_c(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+_src[j]); + _dst+=_ystride; + _src+=_ystride; + } +} + +void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride, + const ogg_int16_t _residue[64]){ + _state->opt_vtable.frag_recon_inter2(_dst,_src1,_src2,_ystride,_residue); +} + +void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+(_src1[j]+_src2[j]>>1)); + _dst+=_ystride; + _src1+=_ystride; + _src2+=_ystride; + } +} + +void oc_restore_fpu(const oc_theora_state *_state){ + _state->opt_vtable.restore_fpu(); +} + +void oc_restore_fpu_c(void){} diff --git a/engine/code/libtheora-1.1.1/lib/huffdec.c b/engine/code/libtheora-1.1.1/lib/huffdec.c new file mode 100644 index 00000000..8cf27f03 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/huffdec.c @@ -0,0 +1,489 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: huffdec.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "huffdec.h" +#include "decint.h" + + +/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/ +#define _ogg_offsetof(_type,_field)\ + ((size_t)((char *)&((_type *)0)->_field-(char *)0)) + +/*The number of internal tokens associated with each of the spec tokens.*/ +static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={ + 1,1,1,4,8,1,1,8,1,1,1,1,1,2,2,2,2,4,8,2,2,2,4,2,2,2,2,2,8,2,4,8 +}; + +/*The map from external spec-defined tokens to internal tokens. + This is constructed so that any extra bits read with the original token value + can be masked off the least significant bits of its internal token index. + In addition, all of the tokens which require additional extra bits are placed + at the start of the list, and grouped by type. + OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so + giving it index 0 may simplify comparisons on some architectures. + These requirements require some substantial reordering.*/ +static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={ + /*OC_DCT_EOB1_TOKEN (0 extra bits)*/ + 15, + /*OC_DCT_EOB2_TOKEN (0 extra bits)*/ + 16, + /*OC_DCT_EOB3_TOKEN (0 extra bits)*/ + 17, + /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/ + 88, + /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/ + 80, + /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/ + 1, + /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/ + 0, + /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/ + 48, + /*OC_DCT_ZRL_TOKEN (6 extra bits)*/ + 14, + /*OC_ONE_TOKEN (0 extra bits)*/ + 56, + /*OC_MINUS_ONE_TOKEN (0 extra bits)*/ + 57, + /*OC_TWO_TOKEN (0 extra bits)*/ + 58, + /*OC_MINUS_TWO_TOKEN (0 extra bits)*/ + 59, + /*OC_DCT_VAL_CAT2 (1 extra bit)*/ + 60, + 62, + 64, + 66, + /*OC_DCT_VAL_CAT3 (2 extra bits)*/ + 68, + /*OC_DCT_VAL_CAT4 (3 extra bits)*/ + 72, + /*OC_DCT_VAL_CAT5 (4 extra bits)*/ + 2, + /*OC_DCT_VAL_CAT6 (5 extra bits)*/ + 4, + /*OC_DCT_VAL_CAT7 (6 extra bits)*/ + 6, + /*OC_DCT_VAL_CAT8 (10 extra bits)*/ + 8, + /*OC_DCT_RUN_CAT1A (1 extra bit)*/ + 18, + 20, + 22, + 24, + 26, + /*OC_DCT_RUN_CAT1B (3 extra bits)*/ + 32, + /*OC_DCT_RUN_CAT1C (4 extra bits)*/ + 12, + /*OC_DCT_RUN_CAT2A (2 extra bits)*/ + 28, + /*OC_DCT_RUN_CAT2B (3 extra bits)*/ + 40 +}; + +/*These three functions are really part of the bitpack.c module, but + they are only used here. + Declaring local static versions so they can be inlined saves considerable + function call overhead.*/ + +static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){ + const unsigned char *ptr; + const unsigned char *stop; + oc_pb_window window; + int available; + window=_b->window; + available=_b->bits; + ptr=_b->ptr; + stop=_b->stop; + /*This version of _refill() doesn't bother setting eof because we won't + check for it after we've started decoding DCT tokens.*/ + if(ptr>=stop)available=OC_LOTS_OF_BITS; + while(available<=OC_PB_WINDOW_SIZE-8){ + available+=8; + window|=(oc_pb_window)*ptr++<=stop)available=OC_LOTS_OF_BITS; + } + _b->ptr=ptr; + if(_bits>available)window|=*ptr>>(available&7); + _b->bits=available; + return window; +} + + +/*Read in bits without advancing the bit pointer. + Here we assume 0<=_bits&&_bits<=32.*/ +static long oc_pack_look(oc_pack_buf *_b,int _bits){ + oc_pb_window window; + int available; + long result; + window=_b->window; + available=_b->bits; + if(_bits==0)return 0; + if(_bits>available)_b->window=window=oc_pack_refill(_b,_bits); + result=window>>OC_PB_WINDOW_SIZE-_bits; + return result; +} + +/*Advance the bit pointer.*/ +static void oc_pack_adv(oc_pack_buf *_b,int _bits){ + /*We ignore the special cases for _bits==0 and _bits==32 here, since they are + never used actually used. + OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read + 32 bits in a single go, and would require a 32 GB lookup table (assuming + 8 byte pointers, since 4 byte pointers couldn't fit such a table).*/ + _b->window<<=_bits; + _b->bits-=_bits; +} + + +/*The log_2 of the size of a lookup table is allowed to grow to relative to + the number of unique nodes it contains. + E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is + wasted (each node will have an amortized cost of at most 20 bytes when using + 4-byte pointers). + Larger numbers can decode tokens with fewer read operations, while smaller + numbers may save more space (requiring as little as 8 bytes amortized per + node, though there will be more nodes). + With a sample file: + 32233473 read calls are required when no tree collapsing is done (100.0%). + 19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%). + 11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%). + 10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%). + 10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%). + Since a value of 1 gets us the vast majority of the speed-up with only a + small amount of wasted memory, this is what we use.*/ +#define OC_HUFF_SLUSH (1) + + +/*Determines the size in bytes of a Huffman tree node that represents a + subtree of depth _nbits. + _nbits: The depth of the subtree. + If this is 0, the node is a leaf node. + Otherwise 1<<_nbits pointers are allocated for children. + Return: The number of bytes required to store the node.*/ +static size_t oc_huff_node_size(int _nbits){ + size_t size; + size=_ogg_offsetof(oc_huff_node,nodes); + if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits); + return size; +} + +static oc_huff_node *oc_huff_node_init(char **_storage,size_t _size,int _nbits){ + oc_huff_node *ret; + ret=(oc_huff_node *)*_storage; + ret->nbits=(unsigned char)_nbits; + (*_storage)+=_size; + return ret; +} + + +/*Determines the size in bytes of a Huffman tree. + _nbits: The depth of the subtree. + If this is 0, the node is a leaf node. + Otherwise storage for 1<<_nbits pointers are added for children. + Return: The number of bytes required to store the tree.*/ +static size_t oc_huff_tree_size(const oc_huff_node *_node){ + size_t size; + size=oc_huff_node_size(_node->nbits); + if(_node->nbits){ + int nchildren; + int i; + nchildren=1<<_node->nbits; + for(i=0;inbits-_node->nodes[i]->depth){ + size+=oc_huff_tree_size(_node->nodes[i]); + } + } + return size; +} + + +/*Unpacks a sub-tree from the given buffer. + _opb: The buffer to unpack from. + _binodes: The nodes to store the sub-tree in. + _nbinodes: The number of nodes available for the sub-tree. + Return: 0 on success, or a negative value on error.*/ +static int oc_huff_tree_unpack(oc_pack_buf *_opb, + oc_huff_node *_binodes,int _nbinodes){ + oc_huff_node *binode; + long bits; + int nused; + if(_nbinodes<1)return TH_EBADHEADER; + binode=_binodes; + nused=0; + bits=oc_pack_read1(_opb); + if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; + /*Read an internal node:*/ + if(!bits){ + int ret; + nused++; + binode->nbits=1; + binode->depth=1; + binode->nodes[0]=_binodes+nused; + ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused); + if(ret>=0){ + nused+=ret; + binode->nodes[1]=_binodes+nused; + ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused); + } + if(ret<0)return ret; + nused+=ret; + } + /*Read a leaf node:*/ + else{ + int ntokens; + int token; + int i; + bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS); + if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; + /*Find out how many internal tokens we translate this external token into.*/ + ntokens=OC_DCT_TOKEN_MAP_ENTRIES[bits]; + if(_nbinodes<2*ntokens-1)return TH_EBADHEADER; + /*Fill in a complete binary tree pointing to the internal tokens.*/ + for(i=1;inbits=0; + binode->depth=1; + binode->token=token+i; + } + } + return nused; +} + +/*Finds the depth of shortest branch of the given sub-tree. + The tree must be binary. + _binode: The root of the given sub-tree. + _binode->nbits must be 0 or 1. + Return: The smallest depth of a leaf node in this sub-tree. + 0 indicates this sub-tree is a leaf node.*/ +static int oc_huff_tree_mindepth(oc_huff_node *_binode){ + int depth0; + int depth1; + if(_binode->nbits==0)return 0; + depth0=oc_huff_tree_mindepth(_binode->nodes[0]); + depth1=oc_huff_tree_mindepth(_binode->nodes[1]); + return OC_MINI(depth0,depth1)+1; +} + +/*Finds the number of internal nodes at a given depth, plus the number of + leaves at that depth or shallower. + The tree must be binary. + _binode: The root of the given sub-tree. + _binode->nbits must be 0 or 1. + Return: The number of entries that would be contained in a jump table of the + given depth.*/ +static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){ + if(_binode->nbits==0||_depth<=0)return 1; + else{ + return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+ + oc_huff_tree_occupancy(_binode->nodes[1],_depth-1); + } +} + +/*Makes a copy of the given Huffman tree. + _node: The Huffman tree to copy. + Return: The copy of the Huffman tree.*/ +static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node, + char **_storage){ + oc_huff_node *ret; + ret=oc_huff_node_init(_storage,oc_huff_node_size(_node->nbits),_node->nbits); + ret->depth=_node->depth; + if(_node->nbits){ + int nchildren; + int i; + int inext; + nchildren=1<<_node->nbits; + for(i=0;inodes[i]=oc_huff_tree_copy(_node->nodes[i],_storage); + inext=i+(1<<_node->nbits-ret->nodes[i]->depth); + while(++inodes[i]=ret->nodes[i-1]; + } + } + else ret->token=_node->token; + return ret; +} + +static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){ + size_t size; + int mindepth; + int depth; + int loccupancy; + int occupancy; + if(_binode->nbits!=0&&_depth>0){ + return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+ + oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1); + } + depth=mindepth=oc_huff_tree_mindepth(_binode); + occupancy=1<loccupancy&&occupancy>=1<0){ + size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1); + size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1); + } + return size; +} + +static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode, + char **_storage); + +/*Fills the given nodes table with all the children in the sub-tree at the + given depth. + The nodes in the sub-tree with a depth less than that stored in the table + are freed. + The sub-tree must be binary and complete up until the given depth. + _nodes: The nodes table to fill. + _binode: The root of the sub-tree to fill it with. + _binode->nbits must be 0 or 1. + _level: The current level in the table. + 0 indicates that the current node should be stored, regardless of + whether it is a leaf node or an internal node. + _depth: The depth of the nodes to fill the table with, relative to their + parent.*/ +static void oc_huff_node_fill(oc_huff_node **_nodes, + oc_huff_node *_binode,int _level,int _depth,char **_storage){ + if(_level<=0||_binode->nbits==0){ + int i; + _binode->depth=(unsigned char)(_depth-_level); + _nodes[0]=oc_huff_tree_collapse(_binode,_storage); + for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0]; + } + else{ + _level--; + oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth,_storage); + _nodes+=1<<_level; + oc_huff_node_fill(_nodes,_binode->nodes[1],_level,_depth,_storage); + } +} + +/*Finds the largest complete sub-tree rooted at the current node and collapses + it into a single node. + This procedure is then applied recursively to all the children of that node. + _binode: The root of the sub-tree to collapse. + _binode->nbits must be 0 or 1. + Return: The new root of the collapsed sub-tree.*/ +static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode, + char **_storage){ + oc_huff_node *root; + size_t size; + int mindepth; + int depth; + int loccupancy; + int occupancy; + depth=mindepth=oc_huff_tree_mindepth(_binode); + occupancy=1<loccupancy&&occupancy>=1<depth=_binode->depth; + oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage); + return root; +} + +/*Unpacks a set of Huffman trees, and reduces them to a collapsed + representation. + _opb: The buffer to unpack the trees from. + _nodes: The table to fill with the Huffman trees. + Return: 0 on success, or a negative value on error.*/ +int oc_huff_trees_unpack(oc_pack_buf *_opb, + oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){ + int i; + for(i=0;i0)_ogg_free(_dst[i]); + return TH_EFAULT; + } + _dst[i]=oc_huff_tree_copy(_src[i],&storage); + } + return 0; +} + +/*Frees the memory used by a set of Huffman trees. + _nodes: The array of trees to free.*/ +void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){ + int i; + for(i=0;inbits!=0){ + bits=oc_pack_look(_opb,_node->nbits); + _node=_node->nodes[bits]; + oc_pack_adv(_opb,_node->depth); + } + return _node->token; +} diff --git a/engine/code/libtheora-1.1.1/lib/huffdec.h b/engine/code/libtheora-1.1.1/lib/huffdec.h new file mode 100644 index 00000000..d7ffa0e9 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/huffdec.h @@ -0,0 +1,92 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: huffdec.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_huffdec_H) +# define _huffdec_H (1) +# include "huffman.h" +# include "bitpack.h" + + + +typedef struct oc_huff_node oc_huff_node; + +/*A node in the Huffman tree. + Instead of storing every branching in the tree, subtrees can be collapsed + into one node, with a table of size 1< +#include "internal.h" +#include "dct.h" + +/*Performs an inverse 8 point Type-II DCT transform. + The output is scaled by a factor of 2 relative to the orthonormal version of + the transform. + _y: The buffer to store the result in. + Data will be placed in every 8th entry (e.g., in a column of an 8x8 + block). + _x: The input coefficients. + The first 8 entries are used (e.g., from a row of an 8x8 block).*/ +static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){ + ogg_int32_t t[8]; + ogg_int32_t r; + /*Stage 1:*/ + /*0-1 butterfly.*/ + t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16; + t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16; + /*2-3 rotation by 6pi/16.*/ + t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16); + t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16); + /*4-7 rotation by 7pi/16.*/ + t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16); + /*5-6 rotation by 3pi/16.*/ + t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16); + t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16); + t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16); + /*Stage 2:*/ + /*4-5 butterfly.*/ + r=t[4]+t[5]; + t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; + t[4]=r; + /*7-6 butterfly.*/ + r=t[7]+t[6]; + t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; + t[7]=r; + /*Stage 3:*/ + /*0-3 butterfly.*/ + r=t[0]+t[3]; + t[3]=t[0]-t[3]; + t[0]=r; + /*1-2 butterfly.*/ + r=t[1]+t[2]; + t[2]=t[1]-t[2]; + t[1]=r; + /*6-5 butterfly.*/ + r=t[6]+t[5]; + t[5]=t[6]-t[5]; + t[6]=r; + /*Stage 4:*/ + /*0-7 butterfly.*/ + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); + /*1-6 butterfly.*/ + _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); + /*2-5 butterfly.*/ + _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); + /*3-4 butterfly.*/ + _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); + _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); + _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); + _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); +} + +/*Performs an inverse 8 point Type-II DCT transform. + The output is scaled by a factor of 2 relative to the orthonormal version of + the transform. + _y: The buffer to store the result in. + Data will be placed in every 8th entry (e.g., in a column of an 8x8 + block). + _x: The input coefficients. + Only the first 4 entries are used. + The other 4 are assumed to be 0.*/ +static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){ + ogg_int32_t t[8]; + ogg_int32_t r; + /*Stage 1:*/ + t[0]=OC_C4S4*_x[0]>>16; + t[2]=OC_C6S2*_x[2]>>16; + t[3]=OC_C2S6*_x[2]>>16; + t[4]=OC_C7S1*_x[1]>>16; + t[5]=-(OC_C5S3*_x[3]>>16); + t[6]=OC_C3S5*_x[3]>>16; + t[7]=OC_C1S7*_x[1]>>16; + /*Stage 2:*/ + r=t[4]+t[5]; + t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; + t[4]=r; + r=t[7]+t[6]; + t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; + t[7]=r; + /*Stage 3:*/ + t[1]=t[0]+t[2]; + t[2]=t[0]-t[2]; + r=t[0]+t[3]; + t[3]=t[0]-t[3]; + t[0]=r; + r=t[6]+t[5]; + t[5]=t[6]-t[5]; + t[6]=r; + /*Stage 4:*/ + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); + _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); + _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); + _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); + _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); + _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); + _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); +} + +/*Performs an inverse 8 point Type-II DCT transform. + The output is scaled by a factor of 2 relative to the orthonormal version of + the transform. + _y: The buffer to store the result in. + Data will be placed in every 8th entry (e.g., in a column of an 8x8 + block). + _x: The input coefficients. + Only the first 3 entries are used. + The other 5 are assumed to be 0.*/ +static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){ + ogg_int32_t t[8]; + ogg_int32_t r; + /*Stage 1:*/ + t[0]=OC_C4S4*_x[0]>>16; + t[2]=OC_C6S2*_x[2]>>16; + t[3]=OC_C2S6*_x[2]>>16; + t[4]=OC_C7S1*_x[1]>>16; + t[7]=OC_C1S7*_x[1]>>16; + /*Stage 2:*/ + t[5]=OC_C4S4*t[4]>>16; + t[6]=OC_C4S4*t[7]>>16; + /*Stage 3:*/ + t[1]=t[0]+t[2]; + t[2]=t[0]-t[2]; + r=t[0]+t[3]; + t[3]=t[0]-t[3]; + t[0]=r; + r=t[6]+t[5]; + t[5]=t[6]-t[5]; + t[6]=r; + /*Stage 4:*/ + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); + _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); + _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); + _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); + _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); + _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); + _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); +} + +/*Performs an inverse 8 point Type-II DCT transform. + The output is scaled by a factor of 2 relative to the orthonormal version of + the transform. + _y: The buffer to store the result in. + Data will be placed in every 8th entry (e.g., in a column of an 8x8 + block). + _x: The input coefficients. + Only the first 2 entries are used. + The other 6 are assumed to be 0.*/ +static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){ + ogg_int32_t t[8]; + ogg_int32_t r; + /*Stage 1:*/ + t[0]=OC_C4S4*_x[0]>>16; + t[4]=OC_C7S1*_x[1]>>16; + t[7]=OC_C1S7*_x[1]>>16; + /*Stage 2:*/ + t[5]=OC_C4S4*t[4]>>16; + t[6]=OC_C4S4*t[7]>>16; + /*Stage 3:*/ + r=t[6]+t[5]; + t[5]=t[6]-t[5]; + t[6]=r; + /*Stage 4:*/ + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); + _y[1<<3]=(ogg_int16_t)(t[0]+t[6]); + _y[2<<3]=(ogg_int16_t)(t[0]+t[5]); + _y[3<<3]=(ogg_int16_t)(t[0]+t[4]); + _y[4<<3]=(ogg_int16_t)(t[0]-t[4]); + _y[5<<3]=(ogg_int16_t)(t[0]-t[5]); + _y[6<<3]=(ogg_int16_t)(t[0]-t[6]); + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); +} + +/*Performs an inverse 8 point Type-II DCT transform. + The output is scaled by a factor of 2 relative to the orthonormal version of + the transform. + _y: The buffer to store the result in. + Data will be placed in every 8th entry (e.g., in a column of an 8x8 + block). + _x: The input coefficients. + Only the first entry is used. + The other 7 are assumed to be 0.*/ +static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){ + _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]= + _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform. + All coefficients but the first 3 in zig-zag scan order are assumed to be 0: + x x 0 0 0 0 0 0 + x 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input coefficients.*/ +static void oc_idct8x8_3(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + const ogg_int16_t *in; + ogg_int16_t *end; + ogg_int16_t *out; + ogg_int16_t w[64]; + /*Transform rows of x into columns of w.*/ + idct8_2(w,_x); + idct8_1(w+1,_x+8); + /*Transform rows of w into columns of y.*/ + for(in=w,out=_y,end=out+8;out>4); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform. + All coefficients but the first 10 in zig-zag scan order are assumed to be 0: + x x x x 0 0 0 0 + x x x 0 0 0 0 0 + x x 0 0 0 0 0 0 + x 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input coefficients.*/ +static void oc_idct8x8_10(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + const ogg_int16_t *in; + ogg_int16_t *end; + ogg_int16_t *out; + ogg_int16_t w[64]; + /*Transform rows of x into columns of w.*/ + idct8_4(w,_x); + idct8_3(w+1,_x+8); + idct8_2(w+2,_x+16); + idct8_1(w+3,_x+24); + /*Transform rows of w into columns of y.*/ + for(in=w,out=_y,end=out+8;out>4); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform. + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input coefficients.*/ +static void oc_idct8x8_slow(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + const ogg_int16_t *in; + ogg_int16_t *end; + ogg_int16_t *out; + ogg_int16_t w[64]; + /*Transform rows of x into columns of w.*/ + for(in=_x,out=w,end=out+8;out>4); +} + +void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64], + int _last_zzi){ + (*_state->opt_vtable.idct8x8)(_y,_last_zzi); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform.*/ +void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi){ + /*_last_zzi is subtly different from an actual count of the number of + coefficients we decoded for this block. + It contains the value of zzi BEFORE the final token in the block was + decoded. + In most cases this is an EOB token (the continuation of an EOB run from a + previous block counts), and so this is the same as the coefficient count. + However, in the case that the last token was NOT an EOB token, but filled + the block up with exactly 64 coefficients, _last_zzi will be less than 64. + Provided the last token was not a pure zero run, the minimum value it can + be is 46, and so that doesn't affect any of the cases in this routine. + However, if the last token WAS a pure zero run of length 63, then _last_zzi + will be 1 while the number of coefficients decoded is 64. + Thus, we will trigger the following special case, where the real + coefficient count would not. + Note also that a zero run of length 64 will give _last_zzi a value of 0, + but we still process the DC coefficient, which might have a non-zero value + due to DC prediction. + Although convoluted, this is arguably the correct behavior: it allows us to + use a smaller transform when the block ends with a long zero run instead + of a normal EOB token. + It could be smarter... multiple separate zero runs at the end of a block + will fool it, but an encoder that generates these really deserves what it + gets. + Needless to say we inherited this approach from VP3.*/ + /*Then perform the iDCT.*/ + if(_last_zzi<3)oc_idct8x8_3(_y,_y); + else if(_last_zzi<10)oc_idct8x8_10(_y,_y); + else oc_idct8x8_slow(_y,_y); +} diff --git a/engine/code/libtheora-1.1.1/lib/info.c b/engine/code/libtheora-1.1.1/lib/info.c new file mode 100644 index 00000000..6b976297 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/info.c @@ -0,0 +1,131 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: info.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "internal.h" + + + +/*This is more or less the same as strncasecmp, but that doesn't exist + everywhere, and this is a fairly trivial function, so we include it. + Note: We take advantage of the fact that we know _n is less than or equal to + the length of at least one of the strings.*/ +static int oc_tagcompare(const char *_s1,const char *_s2,int _n){ + int c; + for(c=0;c<_n;c++){ + if(toupper(_s1[c])!=toupper(_s2[c]))return !0; + } + return _s1[c]!='='; +} + + + +void th_info_init(th_info *_info){ + memset(_info,0,sizeof(*_info)); + _info->version_major=TH_VERSION_MAJOR; + _info->version_minor=TH_VERSION_MINOR; + _info->version_subminor=TH_VERSION_SUB; + _info->keyframe_granule_shift=6; +} + +void th_info_clear(th_info *_info){ + memset(_info,0,sizeof(*_info)); +} + + + +void th_comment_init(th_comment *_tc){ + memset(_tc,0,sizeof(*_tc)); +} + +void th_comment_add(th_comment *_tc,char *_comment){ + char **user_comments; + int *comment_lengths; + int comment_len; + user_comments=_ogg_realloc(_tc->user_comments, + (_tc->comments+2)*sizeof(*_tc->user_comments)); + if(user_comments==NULL)return; + _tc->user_comments=user_comments; + comment_lengths=_ogg_realloc(_tc->comment_lengths, + (_tc->comments+2)*sizeof(*_tc->comment_lengths)); + if(comment_lengths==NULL)return; + _tc->comment_lengths=comment_lengths; + comment_len=strlen(_comment); + comment_lengths[_tc->comments]=comment_len; + user_comments[_tc->comments]=_ogg_malloc(comment_len+1); + if(user_comments[_tc->comments]==NULL)return; + memcpy(_tc->user_comments[_tc->comments],_comment,comment_len+1); + _tc->comments++; + _tc->user_comments[_tc->comments]=NULL; +} + +void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val){ + char *comment; + int tag_len; + int val_len; + tag_len=strlen(_tag); + val_len=strlen(_val); + /*+2 for '=' and '\0'.*/ + comment=_ogg_malloc(tag_len+val_len+2); + if(comment==NULL)return; + memcpy(comment,_tag,tag_len); + comment[tag_len]='='; + memcpy(comment+tag_len+1,_val,val_len+1); + th_comment_add(_tc,comment); + _ogg_free(comment); +} + +char *th_comment_query(th_comment *_tc,char *_tag,int _count){ + long i; + int found; + int tag_len; + tag_len=strlen(_tag); + found=0; + for(i=0;i<_tc->comments;i++){ + if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len)){ + /*We return a pointer to the data, not a copy.*/ + if(_count==found++)return _tc->user_comments[i]+tag_len+1; + } + } + /*Didn't find anything.*/ + return NULL; +} + +int th_comment_query_count(th_comment *_tc,char *_tag){ + long i; + int tag_len; + int count; + tag_len=strlen(_tag); + count=0; + for(i=0;i<_tc->comments;i++){ + if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len))count++; + } + return count; +} + +void th_comment_clear(th_comment *_tc){ + if(_tc!=NULL){ + long i; + for(i=0;i<_tc->comments;i++)_ogg_free(_tc->user_comments[i]); + _ogg_free(_tc->user_comments); + _ogg_free(_tc->comment_lengths); + _ogg_free(_tc->vendor); + memset(_tc,0,sizeof(*_tc)); + } +} diff --git a/engine/code/libtheora-1.1.1/lib/internal.c b/engine/code/libtheora-1.1.1/lib/internal.c new file mode 100644 index 00000000..0fe4f63e --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/internal.c @@ -0,0 +1,262 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: internal.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "internal.h" + + + +/*A map from the index in the zig zag scan to the coefficient number in a + block. + All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs + past the end of a block in bogus streams get mapped to a known location.*/ +const unsigned char OC_FZIG_ZAG[128]={ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64 +}; + +/*A map from the coefficient number in a block to its index in the zig zag + scan.*/ +const unsigned char OC_IZIG_ZAG[64]={ + 0, 1, 5, 6,14,15,27,28, + 2, 4, 7,13,16,26,29,42, + 3, 8,12,17,25,30,41,43, + 9,11,18,24,31,40,44,53, + 10,19,23,32,39,45,52,54, + 20,22,33,38,46,51,55,60, + 21,34,37,47,50,56,59,61, + 35,36,48,49,57,58,62,63 +}; + +/*A map from physical macro block ordering to bitstream macro block + ordering within a super block.*/ +const unsigned char OC_MB_MAP[2][2]={{0,3},{1,2}}; + +/*A list of the indices in the oc_mb.map array that can be valid for each of + the various chroma decimation types.*/ +const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={ + {0,1,2,3,4,8}, + {0,1,2,3,4,5,8,9}, + {0,1,2,3,4,6,8,10}, + {0,1,2,3,4,5,6,7,8,9,10,11} +}; + +/*The number of indices in the oc_mb.map array that can be valid for each of + the various chroma decimation types.*/ +const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12}; + +/*The number of extra bits that are coded with each of the DCT tokens. + Each DCT token has some fixed number of additional bits (possibly 0) stored + after the token itself, containing, for example, coefficient magnitude, + sign bits, etc.*/ +const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={ + 0,0,0,2,3,4,12,3,6, + 0,0,0,0, + 1,1,1,1,2,3,4,5,6,10, + 1,1,1,1,1,3,4, + 2,3 +}; + + + +int oc_ilog(unsigned _v){ + int ret; + for(ret=0;_v;ret++)_v>>=1; + return ret; +} + + + +/*The function used to fill in the chroma plane motion vectors for a macro + block when 4 different motion vectors are specified in the luma plane. + This version is for use with chroma decimated in the X and Y directions + (4:2:0). + _cbmvs: The chroma block-level motion vectors to fill in. + _lbmvs: The luma block-level motion vectors.*/ +static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ + int dx; + int dy; + dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0]; + dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1]; + _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2); + _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2); +} + +/*The function used to fill in the chroma plane motion vectors for a macro + block when 4 different motion vectors are specified in the luma plane. + This version is for use with chroma decimated in the Y direction. + _cbmvs: The chroma block-level motion vectors to fill in. + _lbmvs: The luma block-level motion vectors.*/ +static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ + int dx; + int dy; + dx=_lbmvs[0][0]+_lbmvs[2][0]; + dy=_lbmvs[0][1]+_lbmvs[2][1]; + _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); + _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); + dx=_lbmvs[1][0]+_lbmvs[3][0]; + dy=_lbmvs[1][1]+_lbmvs[3][1]; + _cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); + _cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); +} + +/*The function used to fill in the chroma plane motion vectors for a macro + block when 4 different motion vectors are specified in the luma plane. + This version is for use with chroma decimated in the X direction (4:2:2). + _cbmvs: The chroma block-level motion vectors to fill in. + _lbmvs: The luma block-level motion vectors.*/ +static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ + int dx; + int dy; + dx=_lbmvs[0][0]+_lbmvs[1][0]; + dy=_lbmvs[0][1]+_lbmvs[1][1]; + _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); + _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); + dx=_lbmvs[2][0]+_lbmvs[3][0]; + dy=_lbmvs[2][1]+_lbmvs[3][1]; + _cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); + _cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); +} + +/*The function used to fill in the chroma plane motion vectors for a macro + block when 4 different motion vectors are specified in the luma plane. + This version is for use with no chroma decimation (4:4:4). + _cbmvs: The chroma block-level motion vectors to fill in. + _lmbmv: The luma macro-block level motion vector to fill in for use in + prediction. + _lbmvs: The luma block-level motion vectors.*/ +static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ + memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0])); +} + +/*A table of functions used to fill in the chroma plane motion vectors for a + macro block when 4 different motion vectors are specified in the luma + plane.*/ +const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={ + (oc_set_chroma_mvs_func)oc_set_chroma_mvs00, + (oc_set_chroma_mvs_func)oc_set_chroma_mvs01, + (oc_set_chroma_mvs_func)oc_set_chroma_mvs10, + (oc_set_chroma_mvs_func)oc_set_chroma_mvs11 +}; + + + +void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz){ + size_t rowsz; + size_t colsz; + size_t datsz; + char *ret; + colsz=_height*sizeof(void *); + rowsz=_sz*_width; + datsz=rowsz*_height; + /*Alloc array and row pointers.*/ + ret=(char *)_ogg_malloc(datsz+colsz); + if(ret==NULL)return NULL; + /*Initialize the array.*/ + if(ret!=NULL){ + size_t i; + void **p; + char *datptr; + p=(void **)ret; + i=_height; + for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr; + } + return (void **)ret; +} + +void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz){ + size_t colsz; + size_t rowsz; + size_t datsz; + char *ret; + colsz=_height*sizeof(void *); + rowsz=_sz*_width; + datsz=rowsz*_height; + /*Alloc array and row pointers.*/ + ret=(char *)_ogg_calloc(datsz+colsz,1); + if(ret==NULL)return NULL; + /*Initialize the array.*/ + if(ret!=NULL){ + size_t i; + void **p; + char *datptr; + p=(void **)ret; + i=_height; + for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr; + } + return (void **)ret; +} + +void oc_free_2d(void *_ptr){ + _ogg_free(_ptr); +} + +/*Fills in a Y'CbCr buffer with a pointer to the image data in the first + buffer, but with the opposite vertical orientation. + _dst: The destination buffer. + This can be the same as _src. + _src: The source buffer.*/ +void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst, + const th_ycbcr_buffer _src){ + int pli; + for(pli=0;pli<3;pli++){ + _dst[pli].width=_src[pli].width; + _dst[pli].height=_src[pli].height; + _dst[pli].stride=-_src[pli].stride; + _dst[pli].data=_src[pli].data + +(1-_dst[pli].height)*(ptrdiff_t)_dst[pli].stride; + } +} + +const char *th_version_string(void){ + return OC_VENDOR_STRING; +} + +ogg_uint32_t th_version_number(void){ + return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+TH_VERSION_SUB; +} + +/*Determines the packet type. + Note that this correctly interprets a 0-byte packet as a video data packet. + Return: 1 for a header packet, 0 for a data packet.*/ +int th_packet_isheader(ogg_packet *_op){ + return _op->bytes>0?_op->packet[0]>>7:0; +} + +/*Determines the frame type of a video data packet. + Note that this correctly interprets a 0-byte packet as a delta frame. + Return: 1 for a key frame, 0 for a delta frame, and -1 for a header + packet.*/ +int th_packet_iskeyframe(ogg_packet *_op){ + return _op->bytes<=0?0:_op->packet[0]&0x80?-1:!(_op->packet[0]&0x40); +} diff --git a/engine/code/libtheora-1.1.1/lib/internal.h b/engine/code/libtheora-1.1.1/lib/internal.h new file mode 100644 index 00000000..d81263e1 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/internal.h @@ -0,0 +1,509 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: internal.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#if !defined(_internal_H) +# define _internal_H (1) +# include +# include +# if defined(HAVE_CONFIG_H) +# include +# endif +# include "theora/codec.h" +# include "theora/theora.h" + +# if defined(_MSC_VER) +/*Disable missing EMMS warnings.*/ +# pragma warning(disable:4799) +/*Thank you Microsoft, I know the order of operations.*/ +# pragma warning(disable:4554) +# endif +/*You, too, gcc.*/ +# if defined(__GNUC_PREREQ) +# if __GNUC_PREREQ(4,2) +# pragma GCC diagnostic ignored "-Wparentheses" +# endif +# endif + +# include "ocintrin.h" +# include "huffman.h" +# include "quant.h" + +/*Some assembly constructs require aligned operands.*/ +# if defined(OC_X86_ASM) +# if defined(__GNUC__) +# define OC_ALIGN8(expr) expr __attribute__((aligned(8))) +# define OC_ALIGN16(expr) expr __attribute__((aligned(16))) +# elif defined(_MSC_VER) +# define OC_ALIGN8(expr) __declspec (align(8)) expr +# define OC_ALIGN16(expr) __declspec (align(16)) expr +# endif +# endif +# if !defined(OC_ALIGN8) +# define OC_ALIGN8(expr) expr +# endif +# if !defined(OC_ALIGN16) +# define OC_ALIGN16(expr) expr +# endif + + + +typedef struct oc_sb_flags oc_sb_flags; +typedef struct oc_border_info oc_border_info; +typedef struct oc_fragment oc_fragment; +typedef struct oc_fragment_plane oc_fragment_plane; +typedef struct oc_base_opt_vtable oc_base_opt_vtable; +typedef struct oc_base_opt_data oc_base_opt_data; +typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable; +typedef struct oc_theora_state oc_theora_state; + + + +/*This library's version.*/ +# define OC_VENDOR_STRING "Xiph.Org libtheora 1.1 20090822 (Thusnelda)" + +/*Theora bitstream version.*/ +# define TH_VERSION_MAJOR (3) +# define TH_VERSION_MINOR (2) +# define TH_VERSION_SUB (1) +# define TH_VERSION_CHECK(_info,_maj,_min,_sub) \ + ((_info)->version_major>(_maj)||(_info)->version_major==(_maj)&& \ + ((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \ + (_info)->version_subminor>=(_sub))) + +/*A keyframe.*/ +#define OC_INTRA_FRAME (0) +/*A predicted frame.*/ +#define OC_INTER_FRAME (1) +/*A frame of unknown type (frame type decision has not yet been made).*/ +#define OC_UNKWN_FRAME (-1) + +/*The amount of padding to add to the reconstructed frame buffers on all + sides. + This is used to allow unrestricted motion vectors without special casing. + This must be a multiple of 2.*/ +#define OC_UMV_PADDING (16) + +/*Frame classification indices.*/ +/*The previous golden frame.*/ +#define OC_FRAME_GOLD (0) +/*The previous frame.*/ +#define OC_FRAME_PREV (1) +/*The current frame.*/ +#define OC_FRAME_SELF (2) + +/*The input or output buffer.*/ +#define OC_FRAME_IO (3) + +/*Macroblock modes.*/ +/*Macro block is invalid: It is never coded.*/ +#define OC_MODE_INVALID (-1) +/*Encoded difference from the same macro block in the previous frame.*/ +#define OC_MODE_INTER_NOMV (0) +/*Encoded with no motion compensated prediction.*/ +#define OC_MODE_INTRA (1) +/*Encoded difference from the previous frame offset by the given motion + vector.*/ +#define OC_MODE_INTER_MV (2) +/*Encoded difference from the previous frame offset by the last coded motion + vector.*/ +#define OC_MODE_INTER_MV_LAST (3) +/*Encoded difference from the previous frame offset by the second to last + coded motion vector.*/ +#define OC_MODE_INTER_MV_LAST2 (4) +/*Encoded difference from the same macro block in the previous golden + frame.*/ +#define OC_MODE_GOLDEN_NOMV (5) +/*Encoded difference from the previous golden frame offset by the given motion + vector.*/ +#define OC_MODE_GOLDEN_MV (6) +/*Encoded difference from the previous frame offset by the individual motion + vectors given for each block.*/ +#define OC_MODE_INTER_MV_FOUR (7) +/*The number of (coded) modes.*/ +#define OC_NMODES (8) + +/*Determines the reference frame used for a given MB mode.*/ +#define OC_FRAME_FOR_MODE(_x) \ + OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \ + OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x)) + +/*Constants for the packet state machine common between encoder and decoder.*/ + +/*Next packet to emit/read: Codec info header.*/ +#define OC_PACKET_INFO_HDR (-3) +/*Next packet to emit/read: Comment header.*/ +#define OC_PACKET_COMMENT_HDR (-2) +/*Next packet to emit/read: Codec setup header.*/ +#define OC_PACKET_SETUP_HDR (-1) +/*No more packets to emit/read.*/ +#define OC_PACKET_DONE (INT_MAX) + + + +/*Super blocks are 32x32 segments of pixels in a single color plane indexed + in image order. + Internally, super blocks are broken up into four quadrants, each of which + contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels. + Quadrants, and the blocks within them, are indexed in a special order called + a "Hilbert curve" within the super block. + + In order to differentiate between the Hilbert-curve indexing strategy and + the regular image order indexing strategy, blocks indexed in image order + are called "fragments". + Fragments are indexed in image order, left to right, then bottom to top, + from Y' plane to Cb plane to Cr plane. + + The co-located fragments in all image planes corresponding to the location + of a single quadrant of a luma plane super block form a macro block. + Thus there is only a single set of macro blocks for all planes, each of which + contains between 6 and 12 fragments, depending on the pixel format. + Therefore macro block information is kept in a separate set of arrays from + super blocks to avoid unused space in the other planes. + The lists are indexed in super block order. + That is, the macro block corresponding to the macro block mbi in (luma plane) + super block sbi is at index (sbi<<2|mbi). + Thus the number of macro blocks in each dimension is always twice the number + of super blocks, even when only an odd number fall inside the coded frame. + These "extra" macro blocks are just an artifact of our internal data layout, + and not part of the coded stream; they are flagged with a negative MB mode.*/ + + + +/*A single quadrant of the map from a super block to fragment numbers.*/ +typedef ptrdiff_t oc_sb_map_quad[4]; +/*A map from a super block to fragment numbers.*/ +typedef oc_sb_map_quad oc_sb_map[4]; +/*A single plane of the map from a macro block to fragment numbers.*/ +typedef ptrdiff_t oc_mb_map_plane[4]; +/*A map from a macro block to fragment numbers.*/ +typedef oc_mb_map_plane oc_mb_map[3]; +/*A motion vector.*/ +typedef signed char oc_mv[2]; + + + +/*Super block information.*/ +struct oc_sb_flags{ + unsigned char coded_fully:1; + unsigned char coded_partially:1; + unsigned char quad_valid:4; +}; + + + +/*Information about a fragment which intersects the border of the displayable + region. + This marks which pixels belong to the displayable region.*/ +struct oc_border_info{ + /*A bit mask marking which pixels are in the displayable region. + Pixel (x,y) corresponds to bit (y<<3|x).*/ + ogg_int64_t mask; + /*The number of pixels in the displayable region. + This is always positive, and always less than 64.*/ + int npixels; +}; + + + +/*Fragment information.*/ +struct oc_fragment{ + /*A flag indicating whether or not this fragment is coded.*/ + unsigned coded:1; + /*A flag indicating that this entire fragment lies outside the displayable + region of the frame. + Note the contrast with an invalid macro block, which is outside the coded + frame, not just the displayable one. + There are no fragments outside the coded frame by construction.*/ + unsigned invalid:1; + /*The index of the quality index used for this fragment's AC coefficients.*/ + unsigned qii:6; + /*The mode of the macroblock this fragment belongs to.*/ + unsigned mb_mode:3; + /*The index of the associated border information for fragments which lie + partially outside the displayable region. + For fragments completely inside or outside this region, this is -1. + Note that the C standard requires an explicit signed keyword for bitfield + types, since some compilers may treat them as unsigned without it.*/ + signed int borderi:5; + /*The prediction-corrected DC component. + Note that the C standard requires an explicit signed keyword for bitfield + types, since some compilers may treat them as unsigned without it.*/ + signed int dc:16; +}; + + + +/*A description of each fragment plane.*/ +struct oc_fragment_plane{ + /*The number of fragments in the horizontal direction.*/ + int nhfrags; + /*The number of fragments in the vertical direction.*/ + int nvfrags; + /*The offset of the first fragment in the plane.*/ + ptrdiff_t froffset; + /*The total number of fragments in the plane.*/ + ptrdiff_t nfrags; + /*The number of super blocks in the horizontal direction.*/ + unsigned nhsbs; + /*The number of super blocks in the vertical direction.*/ + unsigned nvsbs; + /*The offset of the first super block in the plane.*/ + unsigned sboffset; + /*The total number of super blocks in the plane.*/ + unsigned nsbs; +}; + + + +/*The shared (encoder and decoder) functions that have accelerated variants.*/ +struct oc_base_opt_vtable{ + void (*frag_copy)(unsigned char *_dst, + const unsigned char *_src,int _ystride); + void (*frag_recon_intra)(unsigned char *_dst,int _ystride, + const ogg_int16_t _residue[64]); + void (*frag_recon_inter)(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); + void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]); + void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi); + void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); + void (*state_frag_copy_list)(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); + void (*state_loop_filter_frag_rows)(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); + void (*restore_fpu)(void); +}; + +/*The shared (encoder and decoder) tables that vary according to which variants + of the above functions are used.*/ +struct oc_base_opt_data{ + const unsigned char *dct_fzig_zag; +}; + + +/*State information common to both the encoder and decoder.*/ +struct oc_theora_state{ + /*The stream information.*/ + th_info info; + /*Table for shared accelerated functions.*/ + oc_base_opt_vtable opt_vtable; + /*Table for shared data used by accelerated functions.*/ + oc_base_opt_data opt_data; + /*CPU flags to detect the presence of extended instruction sets.*/ + ogg_uint32_t cpu_flags; + /*The fragment plane descriptions.*/ + oc_fragment_plane fplanes[3]; + /*The list of fragments, indexed in image order.*/ + oc_fragment *frags; + /*The the offset into the reference frame buffer to the upper-left pixel of + each fragment.*/ + ptrdiff_t *frag_buf_offs; + /*The motion vector for each fragment.*/ + oc_mv *frag_mvs; + /*The total number of fragments in a single frame.*/ + ptrdiff_t nfrags; + /*The list of super block maps, indexed in image order.*/ + oc_sb_map *sb_maps; + /*The list of super block flags, indexed in image order.*/ + oc_sb_flags *sb_flags; + /*The total number of super blocks in a single frame.*/ + unsigned nsbs; + /*The fragments from each color plane that belong to each macro block. + Fragments are stored in image order (left to right then top to bottom). + When chroma components are decimated, the extra fragments have an index of + -1.*/ + oc_mb_map *mb_maps; + /*The list of macro block modes. + A negative number indicates the macro block lies entirely outside the + coded frame.*/ + signed char *mb_modes; + /*The number of macro blocks in the X direction.*/ + unsigned nhmbs; + /*The number of macro blocks in the Y direction.*/ + unsigned nvmbs; + /*The total number of macro blocks.*/ + size_t nmbs; + /*The list of coded fragments, in coded order. + Uncoded fragments are stored in reverse order from the end of the list.*/ + ptrdiff_t *coded_fragis; + /*The number of coded fragments in each plane.*/ + ptrdiff_t ncoded_fragis[3]; + /*The total number of coded fragments.*/ + ptrdiff_t ntotal_coded_fragis; + /*The index of the buffers being used for each OC_FRAME_* reference frame.*/ + int ref_frame_idx[4]; + /*The actual buffers used for the previously decoded frames.*/ + th_ycbcr_buffer ref_frame_bufs[4]; + /*The storage for the reference frame buffers.*/ + unsigned char *ref_frame_data[4]; + /*The strides for each plane in the reference frames.*/ + int ref_ystride[3]; + /*The number of unique border patterns.*/ + int nborders; + /*The unique border patterns for all border fragments. + The borderi field of fragments which straddle the border indexes this + list.*/ + oc_border_info borders[16]; + /*The frame number of the last keyframe.*/ + ogg_int64_t keyframe_num; + /*The frame number of the current frame.*/ + ogg_int64_t curframe_num; + /*The granpos of the current frame.*/ + ogg_int64_t granpos; + /*The type of the current frame.*/ + unsigned char frame_type; + /*The bias to add to the frame count when computing granule positions.*/ + unsigned char granpos_bias; + /*The number of quality indices used in the current frame.*/ + unsigned char nqis; + /*The quality indices of the current frame.*/ + unsigned char qis[3]; + /*The dequantization tables, stored in zig-zag order, and indexed by + qi, pli, qti, and zzi.*/ + ogg_uint16_t *dequant_tables[64][3][2]; + OC_ALIGN16(oc_quant_table dequant_table_data[64][3][2]); + /*Loop filter strength parameters.*/ + unsigned char loop_filter_limits[64]; +}; + + + +/*The function type used to fill in the chroma plane motion vectors for a + macro block when 4 different motion vectors are specified in the luma + plane. + _cbmvs: The chroma block-level motion vectors to fill in. + _lmbmv: The luma macro-block level motion vector to fill in for use in + prediction. + _lbmvs: The luma block-level motion vectors.*/ +typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]); + + + +/*A map from the index in the zig zag scan to the coefficient number in a + block.*/ +extern const unsigned char OC_FZIG_ZAG[128]; +/*A map from the coefficient number in a block to its index in the zig zag + scan.*/ +extern const unsigned char OC_IZIG_ZAG[64]; +/*A map from physical macro block ordering to bitstream macro block + ordering within a super block.*/ +extern const unsigned char OC_MB_MAP[2][2]; +/*A list of the indices in the oc_mb_map array that can be valid for each of + the various chroma decimation types.*/ +extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]; +/*The number of indices in the oc_mb_map array that can be valid for each of + the various chroma decimation types.*/ +extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]; +/*A table of functions used to fill in the Cb,Cr plane motion vectors for a + macro block when 4 different motion vectors are specified in the luma + plane.*/ +extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]; + + + +int oc_ilog(unsigned _v); +void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz); +void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz); +void oc_free_2d(void *_ptr); + +void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst, + const th_ycbcr_buffer _src); + +int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs); +void oc_state_clear(oc_theora_state *_state); +void oc_state_vtable_init_c(oc_theora_state *_state); +void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, + int _y0,int _yend); +void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli); +void oc_state_borders_fill(oc_theora_state *_state,int _refi); +void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx, + th_ycbcr_buffer _img); +int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby); +int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], + int _pli,int _dx,int _dy); + +int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv); +void oc_state_loop_filter(oc_theora_state *_state,int _frame); +#if defined(OC_DUMP_IMAGES) +int oc_state_dump_frame(const oc_theora_state *_state,int _frame, + const char *_suf); +#endif + +/*Shared accelerated functions.*/ +void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride); +void oc_frag_recon_intra(const oc_theora_state *_state, + unsigned char *_dst,int _dst_ystride,const ogg_int16_t _residue[64]); +void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); +void oc_frag_recon_inter2(const oc_theora_state *_state, + unsigned char *_dst,const unsigned char *_src1,const unsigned char *_src2, + int _ystride,const ogg_int16_t _residue[64]); +void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi); +void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); +void oc_state_frag_copy_list(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); +void oc_state_loop_filter_frag_rows(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); +void oc_restore_fpu(const oc_theora_state *_state); + +/*Default pure-C implementations.*/ +void oc_frag_copy_c(unsigned char *_dst, + const unsigned char *_src,int _src_ystride); +void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride, + const ogg_int16_t _residue[64]); +void oc_frag_recon_inter_c(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); +void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]); +void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi); +void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); +void oc_state_frag_copy_list_c(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); +void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); +void oc_restore_fpu_c(void); + +/*We need a way to call a few encoder functions without introducing a link-time + dependency into the decoder, while still allowing the old alpha API which + does not distinguish between encoder and decoder objects to be used. + We do this by placing a function table at the start of the encoder object + which can dispatch into the encoder library. + We do a similar thing for the decoder in case we ever decide to split off a + common base library.*/ +typedef void (*oc_state_clear_func)(theora_state *_th); +typedef int (*oc_state_control_func)(theora_state *th,int _req, + void *_buf,size_t _buf_sz); +typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th, + ogg_int64_t _granulepos); +typedef double (*oc_state_granule_time_func)(theora_state *_th, + ogg_int64_t _granulepos); + + +struct oc_state_dispatch_vtable{ + oc_state_clear_func clear; + oc_state_control_func control; + oc_state_granule_frame_func granule_frame; + oc_state_granule_time_func granule_time; +}; + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/ocintrin.h b/engine/code/libtheora-1.1.1/lib/ocintrin.h new file mode 100644 index 00000000..d49ebb21 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/ocintrin.h @@ -0,0 +1,128 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: ocintrin.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*Some common macros for potential platform-specific optimization.*/ +#include +#if !defined(_ocintrin_H) +# define _ocintrin_H (1) + +/*Some specific platforms may have optimized intrinsic or inline assembly + versions of these functions which can substantially improve performance. + We define macros for them to allow easy incorporation of these non-ANSI + features.*/ + +/*Note that we do not provide a macro for abs(), because it is provided as a + library function, which we assume is translated into an intrinsic to avoid + the function call overhead and then implemented in the smartest way for the + target platform. + With modern gcc (4.x), this is true: it uses cmov instructions if the + architecture supports it and branchless bit-twiddling if it does not (the + speed difference between the two approaches is not measurable). + Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150) + by Sun Microsystems, despite prior art dating back to at least 1996: + http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT + On gcc 3.x, however, our assumption is not true, as abs() is translated to a + conditional jump, which is horrible on deeply piplined architectures (e.g., + all consumer architectures for the past decade or more). + Also be warned that -C*abs(x) where C is a constant is mis-optimized as + abs(C*x) on every gcc release before 4.2.3. + See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */ + +/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if + given an appropriate architecture, but the branchless bit-twiddling versions + are just as fast, and do not require any special target architecture. + Earlier gcc versions (3.x) compiled both code to the same assembly + instructions, because of the way they represented ((_b)>(_a)) internally.*/ +#define OC_MAXI(_a,_b) ((_a)-((_a)-(_b)&-((_b)>(_a)))) +#define OC_MINI(_a,_b) ((_a)+((_b)-(_a)&-((_b)<(_a)))) +/*Clamps an integer into the given range. + If _a>_c, then the lower bound _a is respected over the upper bound _c (this + behavior is required to meet our documented API behavior). + _a: The lower bound. + _b: The value to clamp. + _c: The upper boud.*/ +#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c))) +#define OC_CLAMP255(_x) ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255)))) +/*This has a chance of compiling branchless, and is just as fast as the + bit-twiddling method, which is slightly less portable, since it relies on a + sign-extended rightshift, which is not guaranteed by ANSI (but present on + every relevant platform).*/ +#define OC_SIGNI(_a) (((_a)>0)-((_a)<0)) +/*Slightly more portable than relying on a sign-extended right-shift (which is + not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both) + compile it into the right-shift anyway.*/ +#define OC_SIGNMASK(_a) (-((_a)<0)) +/*Divides an integer by a power of two, truncating towards 0. + _dividend: The integer to divide. + _shift: The non-negative power of two to divide by. + _rmask: (1<<_shift)-1*/ +#define OC_DIV_POW2(_dividend,_shift,_rmask)\ + ((_dividend)+(OC_SIGNMASK(_dividend)&(_rmask))>>(_shift)) +/*Divides _x by 65536, truncating towards 0.*/ +#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF) +/*Divides _x by 2, truncating towards 0.*/ +#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1) +/*Divides _x by 8, truncating towards 0.*/ +#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7) +/*Divides _x by 16, truncating towards 0.*/ +#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF) +/*Right shifts _dividend by _shift, adding _rval, and subtracting one for + negative dividends first. + When _rval is (1<<_shift-1), this is equivalent to division with rounding + ties away from zero.*/ +#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\ + ((_dividend)+OC_SIGNMASK(_dividend)+(_rval)>>(_shift)) +/*Divides a _x by 2, rounding towards even numbers.*/ +#define OC_DIV2_RE(_x) ((_x)+((_x)>>1&1)>>1) +/*Divides a _x by (1<<(_shift)), rounding towards even numbers.*/ +#define OC_DIV_POW2_RE(_x,_shift) \ + ((_x)+((_x)>>(_shift)&1)+((1<<(_shift))-1>>1)>>(_shift)) +/*Swaps two integers _a and _b if _a>_b.*/ +#define OC_SORT2I(_a,_b) \ + do{ \ + int t__; \ + t__=((_a)^(_b))&-((_b)<(_a)); \ + (_a)^=t__; \ + (_b)^=t__; \ + } \ + while(0) + +/*Accesses one of four (signed) bytes given an index. + This can be used to avoid small lookup tables.*/ +#define OC_BYTE_TABLE32(_a,_b,_c,_d,_i) \ + ((signed char) \ + (((_a)&0xFF|((_b)&0xFF)<<8|((_c)&0xFF)<<16|((_d)&0xFF)<<24)>>(_i)*8)) +/*Accesses one of eight (unsigned) nibbles given an index. + This can be used to avoid small lookup tables.*/ +#define OC_UNIBBLE_TABLE32(_a,_b,_c,_d,_e,_f,_g,_h,_i) \ + ((((_a)&0xF|((_b)&0xF)<<4|((_c)&0xF)<<8|((_d)&0xF)<<12| \ + ((_e)&0xF)<<16|((_f)&0xF)<<20|((_g)&0xF)<<24|((_h)&0xF)<<28)>>(_i)*4)&0xF) + + + +/*All of these macros should expect floats as arguments.*/ +#define OC_MAXF(_a,_b) ((_a)<(_b)?(_b):(_a)) +#define OC_MINF(_a,_b) ((_a)>(_b)?(_b):(_a)) +#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c))) +#define OC_FABSF(_f) ((float)fabs(_f)) +#define OC_SQRTF(_f) ((float)sqrt(_f)) +#define OC_POWF(_b,_e) ((float)pow(_b,_e)) +#define OC_LOGF(_f) ((float)log(_f)) +#define OC_IFLOORF(_f) ((int)floor(_f)) +#define OC_ICEILF(_f) ((int)ceil(_f)) + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/quant.c b/engine/code/libtheora-1.1.1/lib/quant.c new file mode 100644 index 00000000..8359f5ab --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/quant.c @@ -0,0 +1,119 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: quant.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "quant.h" +#include "decint.h" + +static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2}; +static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2}; + +/*Initializes the dequantization tables from a set of quantizer info. + Currently the dequantizer (and elsewhere enquantizer) tables are expected to + be initialized as pointing to the storage reserved for them in the + oc_theora_state (resp. oc_enc_ctx) structure. + If some tables are duplicates of others, the pointers will be adjusted to + point to a single copy of the tables, but the storage for them will not be + freed. + If you're concerned about the memory footprint, the obvious thing to do is + to move the storage out of its fixed place in the structures and allocate + it on demand. + However, a much, much better option is to only store the quantization + matrices being used for the current frame, and to recalculate these as the + qi values change between frames (this is what VP3 did).*/ +void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2], + int _pp_dc_scale[64],const th_quant_info *_qinfo){ + /*Coding mode: intra or inter.*/ + int qti; + /*Y', C_b, C_r*/ + int pli; + for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ + /*Quality index.*/ + int qi; + /*Range iterator.*/ + int qri; + for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){ + th_quant_base base; + ogg_uint32_t q; + int qi_start; + int qi_end; + memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri], + sizeof(base)); + qi_start=qi; + if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1; + else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri]; + /*Iterate over quality indicies in this range.*/ + for(;;){ + ogg_uint32_t qfac; + int zzi; + int ci; + /*In the original VP3.2 code, the rounding offset and the size of the + dead zone around 0 were controlled by a "sharpness" parameter. + The size of our dead zone is now controlled by the per-coefficient + quality thresholds returned by our HVS module. + We round down from a more accurate value when the quality of the + reconstruction does not fall below our threshold and it saves bits. + Hence, all of that VP3.2 code is gone from here, and the remaining + floating point code has been implemented as equivalent integer code + with exact precision.*/ + qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]; + /*For postprocessing, not dequantization.*/ + if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160); + /*Scale DC the coefficient from the proper table.*/ + q=(qfac/100)<<2; + q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX); + _dequant[qi][pli][qti][0]=(ogg_uint16_t)q; + /*Now scale AC coefficients from the proper table.*/ + for(zzi=1;zzi<64;zzi++){ + q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2; + q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX); + _dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q; + } + /*If this is a duplicate of a previous matrix, use that instead. + This simple check helps us improve cache coherency later.*/ + { + int dupe; + int qtj; + int plj; + dupe=0; + for(qtj=0;qtj<=qti;qtj++){ + for(plj=0;plj<(qtj=qi_end)break; + /*Interpolate the next base matrix.*/ + for(ci=0;ci<64;ci++){ + base[ci]=(unsigned char)( + (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ + (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) + +_qinfo->qi_ranges[qti][pli].sizes[qri])/ + (2*_qinfo->qi_ranges[qti][pli].sizes[qri])); + } + } + } + } +} diff --git a/engine/code/libtheora-1.1.1/lib/quant.h b/engine/code/libtheora-1.1.1/lib/quant.h new file mode 100644 index 00000000..49ce13a6 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/quant.h @@ -0,0 +1,33 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: quant.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_quant_H) +# define _quant_H (1) +# include "theora/codec.h" +# include "ocintrin.h" + +typedef ogg_uint16_t oc_quant_table[64]; + + +/*Maximum scaled quantizer value.*/ +#define OC_QUANT_MAX (1024<<2) + + +void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2], + int _pp_dc_scale[64],const th_quant_info *_qinfo); + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/state.c b/engine/code/libtheora-1.1.1/lib/state.c new file mode 100644 index 00000000..42ed33a9 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/state.c @@ -0,0 +1,1227 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: state.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include "internal.h" +#if defined(OC_X86_ASM) +#if defined(_MSC_VER) +# include "x86_vc/x86int.h" +#else +# include "x86/x86int.h" +#endif +#endif +#if defined(OC_DUMP_IMAGES) +# include +# include "png.h" +#endif + +/*Returns the fragment index of the top-left block in a macro block. + This can be used to test whether or not the whole macro block is valid. + _sb_map: The super block map. + _quadi: The quadrant number. + Return: The index of the fragment of the upper left block in the macro + block, or -1 if the block lies outside the coded frame.*/ +static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){ + /*It so happens that under the Hilbert curve ordering described below, the + upper-left block in each macro block is at index 0, except in macro block + 3, where it is at index 2.*/ + return _sb_map[_quadi][_quadi&_quadi<<1]; +} + +/*Fills in the mapping from block positions to fragment numbers for a single + color plane. + This function also fills in the "valid" flag of each quadrant in the super + block flags. + _sb_maps: The array of super block maps for the color plane. + _sb_flags: The array of super block flags for the color plane. + _frag0: The index of the first fragment in the plane. + _hfrags: The number of horizontal fragments in a coded frame. + _vfrags: The number of vertical fragments in a coded frame.*/ +static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[], + oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){ + /*Contains the (macro_block,block) indices for a 4x4 grid of + fragments. + The pattern is a 4x4 Hilbert space-filling curve. + A Hilbert curve has the nice property that as the curve grows larger, its + fractal dimension approaches 2. + The intuition is that nearby blocks in the curve are also close spatially, + with the previous element always an immediate neighbor, so that runs of + blocks should be well correlated.*/ + static const int SB_MAP[4][4][2]={ + {{0,0},{0,1},{3,2},{3,3}}, + {{0,3},{0,2},{3,1},{3,0}}, + {{1,0},{1,3},{2,0},{2,3}}, + {{1,1},{1,2},{2,1},{2,2}} + }; + ptrdiff_t yfrag; + unsigned sbi; + int y; + sbi=0; + yfrag=_frag0; + for(y=0;;y+=4){ + int imax; + int x; + /*Figure out how many columns of blocks in this super block lie within the + image.*/ + imax=_vfrags-y; + if(imax>4)imax=4; + else if(imax<=0)break; + for(x=0;;x+=4,sbi++){ + ptrdiff_t xfrag; + int jmax; + int quadi; + int i; + /*Figure out how many rows of blocks in this super block lie within the + image.*/ + jmax=_hfrags-x; + if(jmax>4)jmax=4; + else if(jmax<=0)break; + /*By default, set all fragment indices to -1.*/ + memset(_sb_maps[sbi][0],0xFF,sizeof(_sb_maps[sbi])); + /*Fill in the fragment map for this super block.*/ + xfrag=yfrag+x; + for(i=0;i=0)<nhfrags+_xfrag0+j; + } +} + +/*Fills in the chroma plane fragment maps for a macro block. + This version is for use with chroma decimated in the X and Y directions + (4:2:0). + _mb_map: The macro block map to fill. + _fplanes: The descriptions of the fragment planes. + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ + ptrdiff_t fragi; + _xfrag0>>=1; + _yfrag0>>=1; + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; + _mb_map[1][0]=fragi+_fplanes[1].froffset; + _mb_map[2][0]=fragi+_fplanes[2].froffset; +} + +/*Fills in the chroma plane fragment maps for a macro block. + This version is for use with chroma decimated in the Y direction. + _mb_map: The macro block map to fill. + _fplanes: The descriptions of the fragment planes. + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ + ptrdiff_t fragi; + int j; + _yfrag0>>=1; + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; + for(j=0;j<2;j++){ + _mb_map[1][j]=fragi+_fplanes[1].froffset; + _mb_map[2][j]=fragi+_fplanes[2].froffset; + fragi++; + } +} + +/*Fills in the chroma plane fragment maps for a macro block. + This version is for use with chroma decimated in the X direction (4:2:2). + _mb_map: The macro block map to fill. + _fplanes: The descriptions of the fragment planes. + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ + ptrdiff_t fragi; + int i; + _xfrag0>>=1; + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; + for(i=0;i<2;i++){ + _mb_map[1][i<<1]=fragi+_fplanes[1].froffset; + _mb_map[2][i<<1]=fragi+_fplanes[2].froffset; + fragi+=_fplanes[1].nhfrags; + } +} + +/*Fills in the chroma plane fragment maps for a macro block. + This version is for use with no chroma decimation (4:4:4). + This uses the already filled-in luma plane values. + _mb_map: The macro block map to fill. + _fplanes: The descriptions of the fragment planes.*/ +static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3]){ + int k; + for(k=0;k<4;k++){ + _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset; + _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset; + } +} + +/*The function type used to fill in the chroma plane fragment maps for a + macro block. + _mb_map: The macro block map to fill. + _fplanes: The descriptions of the fragment planes. + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0); + +/*A table of functions used to fill in the chroma plane fragment maps for a + macro block for each type of chrominance decimation.*/ +static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={ + oc_mb_fill_cmapping00, + oc_mb_fill_cmapping01, + oc_mb_fill_cmapping10, + (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11 +}; + +/*Fills in the mapping from macro blocks to their corresponding fragment + numbers in each plane. + _mb_maps: The list of macro block maps. + _mb_modes: The list of macro block modes; macro blocks completely outside + the coded region are marked invalid. + _fplanes: The descriptions of the fragment planes. + _pixel_fmt: The chroma decimation type.*/ +static void oc_mb_create_mapping(oc_mb_map _mb_maps[], + signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){ + oc_mb_fill_cmapping_func mb_fill_cmapping; + unsigned sbi; + int y; + mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt]; + /*Loop through the luma plane super blocks.*/ + for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){ + int x; + for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){ + int ymb; + /*Loop through the macro blocks in each super block in display order.*/ + for(ymb=0;ymb<2;ymb++){ + int xmb; + for(xmb=0;xmb<2;xmb++){ + unsigned mbi; + int mbx; + int mby; + mbi=sbi<<2|OC_MB_MAP[ymb][xmb]; + mbx=x|xmb<<1; + mby=y|ymb<<1; + /*Initialize fragment indices to -1.*/ + memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi])); + /*Make sure this macro block is within the encoded region.*/ + if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){ + _mb_modes[mbi]=OC_MODE_INVALID; + continue; + } + /*Fill in the fragment indices for the luma plane.*/ + oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby); + /*Fill in the fragment indices for the chroma planes.*/ + (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby); + } + } + } + } +} + +/*Marks the fragments which fall all or partially outside the displayable + region of the frame. + _state: The Theora state containing the fragments to be marked.*/ +static void oc_state_border_init(oc_theora_state *_state){ + oc_fragment *frag; + oc_fragment *yfrag_end; + oc_fragment *xfrag_end; + oc_fragment_plane *fplane; + int crop_x0; + int crop_y0; + int crop_xf; + int crop_yf; + int pli; + int y; + int x; + /*The method we use here is slow, but the code is dead simple and handles + all the special cases easily. + We only ever need to do it once.*/ + /*Loop through the fragments, marking those completely outside the + displayable region and constructing a border mask for those that straddle + the border.*/ + _state->nborders=0; + yfrag_end=frag=_state->frags; + for(pli=0;pli<3;pli++){ + fplane=_state->fplanes+pli; + /*Set up the cropping rectangle for this plane.*/ + crop_x0=_state->info.pic_x; + crop_xf=_state->info.pic_x+_state->info.pic_width; + crop_y0=_state->info.pic_y; + crop_yf=_state->info.pic_y+_state->info.pic_height; + if(pli>0){ + if(!(_state->info.pixel_fmt&1)){ + crop_x0=crop_x0>>1; + crop_xf=crop_xf+1>>1; + } + if(!(_state->info.pixel_fmt&2)){ + crop_y0=crop_y0>>1; + crop_yf=crop_yf+1>>1; + } + } + y=0; + for(yfrag_end+=fplane->nfrags;fragnhfrags;frag=crop_xf||crop_y0>=crop_yf){ + frag->invalid=1; + } + /*Otherwise, check to see if it straddles the border.*/ + else if(x=crop_x0&&x+j=crop_y0&&y+i=_state->nborders){ + _state->nborders++; + _state->borders[i].mask=mask; + _state->borders[i].npixels=npixels; + } + else if(_state->borders[i].mask!=mask)continue; + frag->borderi=i; + break; + } + } + else frag->borderi=-1; + } + } + } +} + +static int oc_state_frarray_init(oc_theora_state *_state){ + int yhfrags; + int yvfrags; + int chfrags; + int cvfrags; + ptrdiff_t yfrags; + ptrdiff_t cfrags; + ptrdiff_t nfrags; + unsigned yhsbs; + unsigned yvsbs; + unsigned chsbs; + unsigned cvsbs; + unsigned ysbs; + unsigned csbs; + unsigned nsbs; + size_t nmbs; + int hdec; + int vdec; + int pli; + /*Figure out the number of fragments in each plane.*/ + /*These parameters have already been validated to be multiples of 16.*/ + yhfrags=_state->info.frame_width>>3; + yvfrags=_state->info.frame_height>>3; + hdec=!(_state->info.pixel_fmt&1); + vdec=!(_state->info.pixel_fmt&2); + chfrags=yhfrags+hdec>>hdec; + cvfrags=yvfrags+vdec>>vdec; + yfrags=yhfrags*(ptrdiff_t)yvfrags; + cfrags=chfrags*(ptrdiff_t)cvfrags; + nfrags=yfrags+2*cfrags; + /*Figure out the number of super blocks in each plane.*/ + yhsbs=yhfrags+3>>2; + yvsbs=yvfrags+3>>2; + chsbs=chfrags+3>>2; + cvsbs=cvfrags+3>>2; + ysbs=yhsbs*yvsbs; + csbs=chsbs*cvsbs; + nsbs=ysbs+2*csbs; + nmbs=(size_t)ysbs<<2; + /*Check for overflow. + We support the ridiculous upper limits of the specification (1048560 by + 1048560, or 3 TB frames) if the target architecture has 64-bit pointers, + but for those with 32-bit pointers (or smaller!) we have to check. + If the caller wants to prevent denial-of-service by imposing a more + reasonable upper limit on the size of attempted allocations, they must do + so themselves; we have no platform independent way to determine how much + system memory there is nor an application-independent way to decide what a + "reasonable" allocation is.*/ + if(yfrags/yhfrags!=yvfrags||2*cfrags>2!=ysbs){ + return TH_EIMPL; + } + /*Initialize the fragment array.*/ + _state->fplanes[0].nhfrags=yhfrags; + _state->fplanes[0].nvfrags=yvfrags; + _state->fplanes[0].froffset=0; + _state->fplanes[0].nfrags=yfrags; + _state->fplanes[0].nhsbs=yhsbs; + _state->fplanes[0].nvsbs=yvsbs; + _state->fplanes[0].sboffset=0; + _state->fplanes[0].nsbs=ysbs; + _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags; + _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags; + _state->fplanes[1].froffset=yfrags; + _state->fplanes[2].froffset=yfrags+cfrags; + _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags; + _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs; + _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs; + _state->fplanes[1].sboffset=ysbs; + _state->fplanes[2].sboffset=ysbs+csbs; + _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs; + _state->nfrags=nfrags; + _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags)); + _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs)); + _state->nsbs=nsbs; + _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps)); + _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags)); + _state->nhmbs=yhsbs<<1; + _state->nvmbs=yvsbs<<1; + _state->nmbs=nmbs; + _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps)); + _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes)); + _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis)); + if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL|| + _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL|| + _state->coded_fragis==NULL){ + return TH_EFAULT; + } + /*Create the mapping from super blocks to fragments.*/ + for(pli=0;pli<3;pli++){ + oc_fragment_plane *fplane; + fplane=_state->fplanes+pli; + oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset, + _state->sb_flags+fplane->sboffset,fplane->froffset, + fplane->nhfrags,fplane->nvfrags); + } + /*Create the mapping from macro blocks to fragments.*/ + oc_mb_create_mapping(_state->mb_maps,_state->mb_modes, + _state->fplanes,_state->info.pixel_fmt); + /*Initialize the invalid and borderi fields of each fragment.*/ + oc_state_border_init(_state); + return 0; +} + +static void oc_state_frarray_clear(oc_theora_state *_state){ + _ogg_free(_state->coded_fragis); + _ogg_free(_state->mb_modes); + _ogg_free(_state->mb_maps); + _ogg_free(_state->sb_flags); + _ogg_free(_state->sb_maps); + _ogg_free(_state->frag_mvs); + _ogg_free(_state->frags); +} + + +/*Initializes the buffers used for reconstructed frames. + These buffers are padded with 16 extra pixels on each side, to allow + unrestricted motion vectors without special casing the boundary. + If chroma is decimated in either direction, the padding is reduced by a + factor of 2 on the appropriate sides. + _nrefs: The number of reference buffers to init; must be 3 or 4.*/ +static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){ + th_info *info; + unsigned char *ref_frame_data; + size_t ref_frame_data_sz; + size_t ref_frame_sz; + size_t yplane_sz; + size_t cplane_sz; + int yhstride; + int yheight; + int chstride; + int cheight; + ptrdiff_t yoffset; + ptrdiff_t coffset; + ptrdiff_t *frag_buf_offs; + ptrdiff_t fragi; + int hdec; + int vdec; + int rfi; + int pli; + if(_nrefs<3||_nrefs>4)return TH_EINVAL; + info=&_state->info; + /*Compute the image buffer parameters for each plane.*/ + hdec=!(info->pixel_fmt&1); + vdec=!(info->pixel_fmt&2); + yhstride=info->frame_width+2*OC_UMV_PADDING; + yheight=info->frame_height+2*OC_UMV_PADDING; + chstride=yhstride>>hdec; + cheight=yheight>>vdec; + yplane_sz=yhstride*(size_t)yheight; + cplane_sz=chstride*(size_t)cheight; + yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride; + coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride; + ref_frame_sz=yplane_sz+2*cplane_sz; + ref_frame_data_sz=_nrefs*ref_frame_sz; + /*Check for overflow. + The same caveats apply as for oc_state_frarray_init().*/ + if(yplane_sz/yhstride!=yheight||2*cplane_szfrag_buf_offs= + _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs)); + if(ref_frame_data==NULL||frag_buf_offs==NULL){ + _ogg_free(frag_buf_offs); + _ogg_free(ref_frame_data); + return TH_EFAULT; + } + /*Set up the width, height and stride for the image buffers.*/ + _state->ref_frame_bufs[0][0].width=info->frame_width; + _state->ref_frame_bufs[0][0].height=info->frame_height; + _state->ref_frame_bufs[0][0].stride=yhstride; + _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width= + info->frame_width>>hdec; + _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height= + info->frame_height>>vdec; + _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride= + chstride; + for(rfi=1;rfi<_nrefs;rfi++){ + memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0], + sizeof(_state->ref_frame_bufs[0])); + } + /*Set up the data pointers for the image buffers.*/ + for(rfi=0;rfi<_nrefs;rfi++){ + _state->ref_frame_data[rfi]=ref_frame_data; + _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset; + ref_frame_data+=yplane_sz; + _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset; + ref_frame_data+=cplane_sz; + _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset; + ref_frame_data+=cplane_sz; + /*Flip the buffer upside down. + This allows us to decode Theora's bottom-up frames in their natural + order, yet return a top-down buffer with a positive stride to the user.*/ + oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi], + _state->ref_frame_bufs[rfi]); + } + _state->ref_ystride[0]=-yhstride; + _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride; + /*Initialize the fragment buffer offsets.*/ + ref_frame_data=_state->ref_frame_data[0]; + fragi=0; + for(pli=0;pli<3;pli++){ + th_img_plane *iplane; + oc_fragment_plane *fplane; + unsigned char *vpix; + ptrdiff_t stride; + ptrdiff_t vfragi_end; + int nhfrags; + iplane=_state->ref_frame_bufs[0]+pli; + fplane=_state->fplanes+pli; + vpix=iplane->data; + vfragi_end=fplane->froffset+fplane->nfrags; + nhfrags=fplane->nhfrags; + stride=iplane->stride; + while(fragiref_frame_idx[OC_FRAME_GOLD]= + _state->ref_frame_idx[OC_FRAME_PREV]= + _state->ref_frame_idx[OC_FRAME_SELF]=-1; + _state->ref_frame_idx[OC_FRAME_IO]=_nrefs>3?3:-1; + return 0; +} + +static void oc_state_ref_bufs_clear(oc_theora_state *_state){ + _ogg_free(_state->frag_buf_offs); + _ogg_free(_state->ref_frame_data[0]); +} + + +void oc_state_vtable_init_c(oc_theora_state *_state){ + _state->opt_vtable.frag_copy=oc_frag_copy_c; + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c; + _state->opt_vtable.idct8x8=oc_idct8x8_c; + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c; + _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c; + _state->opt_vtable.state_loop_filter_frag_rows= + oc_state_loop_filter_frag_rows_c; + _state->opt_vtable.restore_fpu=oc_restore_fpu_c; + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG; +} + +/*Initialize the accelerated function pointers.*/ +void oc_state_vtable_init(oc_theora_state *_state){ +#if defined(OC_X86_ASM) + oc_state_vtable_init_x86(_state); +#else + oc_state_vtable_init_c(_state); +#endif +} + + +int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){ + int ret; + /*First validate the parameters.*/ + if(_info==NULL)return TH_EFAULT; + /*The width and height of the encoded frame must be multiples of 16. + They must also, when divided by 16, fit into a 16-bit unsigned integer. + The displayable frame offset coordinates must fit into an 8-bit unsigned + integer. + Note that the offset Y in the API is specified on the opposite side from + how it is specified in the bitstream, because the Y axis is flipped in + the bitstream. + The displayable frame must fit inside the encoded frame. + The color space must be one known by the encoder.*/ + if((_info->frame_width&0xF)||(_info->frame_height&0xF)|| + _info->frame_width<=0||_info->frame_width>=0x100000|| + _info->frame_height<=0||_info->frame_height>=0x100000|| + _info->pic_x+_info->pic_width>_info->frame_width|| + _info->pic_y+_info->pic_height>_info->frame_height|| + _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255|| + /*Note: the following <0 comparisons may generate spurious warnings on + platforms where enums are unsigned. + We could cast them to unsigned and just use the following >= comparison, + but there are a number of compilers which will mis-optimize this. + It's better to live with the spurious warnings.*/ + _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES|| + _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){ + return TH_EINVAL; + } + memset(_state,0,sizeof(*_state)); + memcpy(&_state->info,_info,sizeof(*_info)); + /*Invert the sense of pic_y to match Theora's right-handed coordinate + system.*/ + _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y; + _state->frame_type=OC_UNKWN_FRAME; + oc_state_vtable_init(_state); + ret=oc_state_frarray_init(_state); + if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs); + if(ret<0){ + oc_state_frarray_clear(_state); + return ret; + } + /*If the keyframe_granule_shift is out of range, use the maximum allowable + value.*/ + if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){ + _state->info.keyframe_granule_shift=31; + } + _state->keyframe_num=0; + _state->curframe_num=-1; + /*3.2.0 streams mark the frame index instead of the frame count. + This was changed with stream version 3.2.1 to conform to other Ogg + codecs. + We add an extra bias when computing granule positions for new streams.*/ + _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1); + return 0; +} + +void oc_state_clear(oc_theora_state *_state){ + oc_state_ref_bufs_clear(_state); + oc_state_frarray_clear(_state); +} + + +/*Duplicates the pixels on the border of the image plane out into the + surrounding padding for use by unrestricted motion vectors. + This function only adds the left and right borders, and only for the fragment + rows specified. + _refi: The index of the reference buffer to pad. + _pli: The color plane. + _y0: The Y coordinate of the first row to pad. + _yend: The Y coordinate of the row to stop padding at.*/ +void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, + int _y0,int _yend){ + th_img_plane *iplane; + unsigned char *apix; + unsigned char *bpix; + unsigned char *epix; + int stride; + int hpadding; + hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); + iplane=_state->ref_frame_bufs[_refi]+_pli; + stride=iplane->stride; + apix=iplane->data+_y0*(ptrdiff_t)stride; + bpix=apix+iplane->width-1; + epix=iplane->data+_yend*(ptrdiff_t)stride; + /*Note the use of != instead of <, which allows the stride to be negative.*/ + while(apix!=epix){ + memset(apix-hpadding,apix[0],hpadding); + memset(bpix+1,bpix[0],hpadding); + apix+=stride; + bpix+=stride; + } +} + +/*Duplicates the pixels on the border of the image plane out into the + surrounding padding for use by unrestricted motion vectors. + This function only adds the top and bottom borders, and must be called after + the left and right borders are added. + _refi: The index of the reference buffer to pad. + _pli: The color plane.*/ +void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){ + th_img_plane *iplane; + unsigned char *apix; + unsigned char *bpix; + unsigned char *epix; + int stride; + int hpadding; + int vpadding; + int fullw; + hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); + vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2)); + iplane=_state->ref_frame_bufs[_refi]+_pli; + stride=iplane->stride; + fullw=iplane->width+(hpadding<<1); + apix=iplane->data-hpadding; + bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding; + epix=apix-stride*(ptrdiff_t)vpadding; + while(apix!=epix){ + memcpy(apix-stride,apix,fullw); + memcpy(bpix+stride,bpix,fullw); + apix-=stride; + bpix+=stride; + } +} + +/*Duplicates the pixels on the border of the given reference image out into + the surrounding padding for use by unrestricted motion vectors. + _state: The context containing the reference buffers. + _refi: The index of the reference buffer to pad.*/ +void oc_state_borders_fill(oc_theora_state *_state,int _refi){ + int pli; + for(pli=0;pli<3;pli++){ + oc_state_borders_fill_rows(_state,_refi,pli,0, + _state->ref_frame_bufs[_refi][pli].height); + oc_state_borders_fill_caps(_state,_refi,pli); + } +} + +/*Determines the offsets in an image buffer to use for motion compensation. + _state: The Theora state the offsets are to be computed with. + _offsets: Returns the offset for the buffer(s). + _offsets[0] is always set. + _offsets[1] is set if the motion vector has non-zero fractional + components. + _pli: The color plane index. + _dx: The X component of the motion vector. + _dy: The Y component of the motion vector. + Return: The number of offsets returned: 1 or 2.*/ +int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], + int _pli,int _dx,int _dy){ + /*Here is a brief description of how Theora handles motion vectors: + Motion vector components are specified to half-pixel accuracy in + undecimated directions of each plane, and quarter-pixel accuracy in + decimated directions. + Integer parts are extracted by dividing (not shifting) by the + appropriate amount, with truncation towards zero. + These integer values are used to calculate the first offset. + + If either of the fractional parts are non-zero, then a second offset is + computed. + No third or fourth offsets are computed, even if both components have + non-zero fractional parts. + The second offset is computed by dividing (not shifting) by the + appropriate amount, always truncating _away_ from zero.*/ +#if 0 + /*This version of the code doesn't use any tables, but is slower.*/ + int ystride; + int xprec; + int yprec; + int xfrac; + int yfrac; + int offs; + ystride=_state->ref_ystride[_pli]; + /*These two variables decide whether we are in half- or quarter-pixel + precision in each component.*/ + xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1)); + yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2)); + /*These two variables are either 0 if all the fractional bits are zero or -1 + if any of them are non-zero.*/ + xfrac=OC_SIGNMASK(-(_dx&(xprec|1))); + yfrac=OC_SIGNMASK(-(_dy&(yprec|1))); + offs=(_dx>>xprec)+(_dy>>yprec)*ystride; + if(xfrac||yfrac){ + int xmask; + int ymask; + xmask=OC_SIGNMASK(_dx); + ymask=OC_SIGNMASK(_dy); + yfrac&=ystride; + _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask); + _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask); + return 2; + } + else{ + _offsets[0]=offs; + return 1; + } +#else + /*Using tables simplifies the code, and there's enough arithmetic to hide the + latencies of the memory references.*/ + static const signed char OC_MVMAP[2][64]={ + { + -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8, + -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, + 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 + }, + { + -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4, + -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7 + } + }; + static const signed char OC_MVMAP2[2][64]={ + { + -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, + 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 + }, + { + -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, + 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 + } + }; + int ystride; + int qpx; + int qpy; + int mx; + int my; + int mx2; + int my2; + int offs; + ystride=_state->ref_ystride[_pli]; + qpy=_pli!=0&&!(_state->info.pixel_fmt&2); + my=OC_MVMAP[qpy][_dy+31]; + my2=OC_MVMAP2[qpy][_dy+31]; + qpx=_pli!=0&&!(_state->info.pixel_fmt&1); + mx=OC_MVMAP[qpx][_dx+31]; + mx2=OC_MVMAP2[qpx][_dx+31]; + offs=my*ystride+mx; + if(mx2||my2){ + _offsets[1]=offs+my2*ystride+mx2; + _offsets[0]=offs; + return 2; + } + _offsets[0]=offs; + return 1; +#endif +} + +void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + _state->opt_vtable.state_frag_recon(_state,_fragi,_pli,_dct_coeffs, + _last_zzi,_dc_quant); +} + +void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + unsigned char *dst; + ptrdiff_t frag_buf_off; + int ystride; + int mb_mode; + /*Apply the inverse transform.*/ + /*Special case only having a DC component.*/ + if(_last_zzi<2){ + ogg_int16_t p; + int ci; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); + /*LOOP VECTORIZES.*/ + for(ci=0;ci<64;ci++)_dct_coeffs[ci]=p; + } + else{ + /*First, dequantize the DC coefficient.*/ + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); + oc_idct8x8(_state,_dct_coeffs,_last_zzi); + } + /*Fill in the target buffer.*/ + frag_buf_off=_state->frag_buf_offs[_fragi]; + mb_mode=_state->frags[_fragi].mb_mode; + ystride=_state->ref_ystride[_pli]; + dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; + if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs); + else{ + const unsigned char *ref; + int mvoffsets[2]; + ref= + _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] + +frag_buf_off; + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, + _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ + oc_frag_recon_inter2(_state, + dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs); + } + else oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs); + } +} + +/*Copies the fragments specified by the lists of fragment indices from one + frame to another. + _fragis: A pointer to a list of fragment indices. + _nfragis: The number of fragment indices to copy. + _dst_frame: The reference frame to copy to. + _src_frame: The reference frame to copy from. + _pli: The color plane the fragments lie in.*/ +void oc_state_frag_copy_list(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + _state->opt_vtable.state_frag_copy_list(_state,_fragis,_nfragis,_dst_frame, + _src_frame,_pli); +} + +void oc_state_frag_copy_list_c(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + const ptrdiff_t *frag_buf_offs; + const unsigned char *src_frame_data; + unsigned char *dst_frame_data; + ptrdiff_t fragii; + int ystride; + dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; + src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; + ystride=_state->ref_ystride[_pli]; + frag_buf_offs=_state->frag_buf_offs; + for(fragii=0;fragii<_nfragis;fragii++){ + ptrdiff_t frag_buf_off; + frag_buf_off=frag_buf_offs[_fragis[fragii]]; + oc_frag_copy(_state,dst_frame_data+frag_buf_off, + src_frame_data+frag_buf_off,ystride); + } +} + +static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){ + int y; + _pix-=2; + for(y=0;y<8;y++){ + int f; + f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]); + /*The _bv array is used to compute the function + f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); + where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ + f=*(_bv+(f+4>>3)); + _pix[1]=OC_CLAMP255(_pix[1]+f); + _pix[2]=OC_CLAMP255(_pix[2]-f); + _pix+=_ystride; + } +} + +static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){ + int x; + _pix-=_ystride*2; + for(x=0;x<8;x++){ + int f; + f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]); + /*The _bv array is used to compute the function + f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); + where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ + f=*(_bv+(f+4>>3)); + _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f); + _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f); + } +} + +/*Initialize the bounding values array used by the loop filter. + _bv: Storage for the array. + Return: 0 on success, or a non-zero value if no filtering need be applied.*/ +int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){ + int flimit; + int i; + flimit=_state->loop_filter_limits[_state->qis[0]]; + if(flimit==0)return 1; + memset(_bv,0,sizeof(_bv[0])*256); + for(i=0;i=0)_bv[127-i-flimit]=i-flimit; + _bv[127-i]=-i; + _bv[127+i]=i; + if(127+i+flimit<256)_bv[127+i+flimit]=flimit-i; + } + return 0; +} + +/*Apply the loop filter to a given set of fragment rows in the given plane. + The filter may be run on the bottom edge, affecting pixels in the next row of + fragments, so this row also needs to be available. + _bv: The bounding values array. + _refi: The index of the frame buffer to filter. + _pli: The color plane to filter. + _fragy0: The Y coordinate of the first fragment row to filter. + _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ +void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256], + int _refi,int _pli,int _fragy0,int _fragy_end){ + _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli, + _fragy0,_fragy_end); +} + +void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv, + int _refi,int _pli,int _fragy0,int _fragy_end){ + const oc_fragment_plane *fplane; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + unsigned char *ref_frame_data; + ptrdiff_t fragi_top; + ptrdiff_t fragi_bot; + ptrdiff_t fragi0; + ptrdiff_t fragi0_end; + int ystride; + int nhfrags; + _bv+=127; + fplane=_state->fplanes+_pli; + nhfrags=fplane->nhfrags; + fragi_top=fplane->froffset; + fragi_bot=fragi_top+fplane->nfrags; + fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; + fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; + ystride=_state->ref_ystride[_pli]; + frags=_state->frags; + frag_buf_offs=_state->frag_buf_offs; + ref_frame_data=_state->ref_frame_data[_refi]; + /*The following loops are constructed somewhat non-intuitively on purpose. + The main idea is: if a block boundary has at least one coded fragment on + it, the filter is applied to it. + However, the order that the filters are applied in matters, and VP3 chose + the somewhat strange ordering used below.*/ + while(fragi0fragi0)loop_filter_h(ref,ystride,_bv); + if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv); + if(fragi+1info.frame_width; + height=_state->info.frame_height; + iframe=_state->granpos>>_state->info.keyframe_granule_shift; + pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift); + sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf); + fp=fopen(fname,"wb"); + if(fp==NULL)return TH_EFAULT; + image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image)); + if(image==NULL){ + fclose(fp); + return TH_EFAULT; + } + png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL); + if(png==NULL){ + oc_free_2d(image); + fclose(fp); + return TH_EFAULT; + } + info=png_create_info_struct(png); + if(info==NULL){ + png_destroy_write_struct(&png,NULL); + oc_free_2d(image); + fclose(fp); + return TH_EFAULT; + } + if(setjmp(png_jmpbuf(png))){ + png_destroy_write_struct(&png,&info); + oc_free_2d(image); + fclose(fp); + return TH_EFAULT; + } + framei=_state->ref_frame_idx[_frame]; + y_row=_state->ref_frame_bufs[framei][0].data; + u_row=_state->ref_frame_bufs[framei][1].data; + v_row=_state->ref_frame_bufs[framei][2].data; + y_stride=_state->ref_frame_bufs[framei][0].stride; + u_stride=_state->ref_frame_bufs[framei][1].stride; + v_stride=_state->ref_frame_bufs[framei][2].stride; + /*Chroma up-sampling is just done with a box filter. + This is very likely what will actually be used in practice on a real + display, and also removes one more layer to search in for the source of + artifacts. + As an added bonus, it's dead simple.*/ + for(imgi=height;imgi-->0;){ + int dc; + y=y_row; + u=u_row; + v=v_row; + for(imgj=0;imgj<6*width;){ + float yval; + float uval; + float vval; + unsigned rval; + unsigned gval; + unsigned bval; + /*This is intentionally slow and very accurate.*/ + yval=(*y-16)*(1.0F/219); + uval=(*u-128)*(2*(1-0.114F)/224); + vval=(*v-128)*(2*(1-0.299F)/224); + rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535); + gval=OC_CLAMPI(0,(int)(65535*( + yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535); + bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535); + image[imgi][imgj++]=(unsigned char)(rval>>8); + image[imgi][imgj++]=(unsigned char)(rval&0xFF); + image[imgi][imgj++]=(unsigned char)(gval>>8); + image[imgi][imgj++]=(unsigned char)(gval&0xFF); + image[imgi][imgj++]=(unsigned char)(bval>>8); + image[imgi][imgj++]=(unsigned char)(bval&0xFF); + dc=(y-y_row&1)|(_state->info.pixel_fmt&1); + y++; + u+=dc; + v+=dc; + } + dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1); + y_row+=y_stride; + u_row+=dc&u_stride; + v_row+=dc&v_stride; + } + png_init_io(png,fp); + png_set_compression_level(png,Z_BEST_COMPRESSION); + png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB, + PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT); + switch(_state->info.colorspace){ + case TH_CS_ITU_REC_470M:{ + png_set_gAMA(png,info,2.2); + png_set_cHRM_fixed(png,info,31006,31616, + 67000,32000,21000,71000,14000,8000); + }break; + case TH_CS_ITU_REC_470BG:{ + png_set_gAMA(png,info,2.67); + png_set_cHRM_fixed(png,info,31271,32902, + 64000,33000,29000,60000,15000,6000); + }break; + default:break; + } + png_set_pHYs(png,info,_state->info.aspect_numerator, + _state->info.aspect_denominator,0); + png_set_rows(png,info,image); + png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL); + png_write_end(png,info); + png_destroy_write_struct(&png,&info); + oc_free_2d(image); + fclose(fp); + return 0; +} +#endif + + + +ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){ + oc_theora_state *state; + state=(oc_theora_state *)_encdec; + if(_granpos>=0){ + ogg_int64_t iframe; + ogg_int64_t pframe; + iframe=_granpos>>state->info.keyframe_granule_shift; + pframe=_granpos-(iframe<info.keyframe_granule_shift); + /*3.2.0 streams store the frame index in the granule position. + 3.2.1 and later store the frame count. + We return the index, so adjust the value if we have a 3.2.1 or later + stream.*/ + return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1); + } + return -1; +} + +double th_granule_time(void *_encdec,ogg_int64_t _granpos){ + oc_theora_state *state; + state=(oc_theora_state *)_encdec; + if(_granpos>=0){ + return (th_granule_frame(_encdec, _granpos)+1)*( + (double)state->info.fps_denominator/state->info.fps_numerator); + } + return -1; +} diff --git a/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.c b/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.c new file mode 100644 index 00000000..2c732939 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.c @@ -0,0 +1,293 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxfrag.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*MMX acceleration of fragment reconstruction for motion compensation. + Originally written by Rudolf Marek. + Additional optimization by Nils Pipenbrinck. + Note: Loops are unrolled for best performance. + The iteration each instruction belongs to is marked in the comments as #i.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +void oc_frag_copy_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride){ + OC_FRAG_COPY_MMX(_dst,_src,_ystride); +} + +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, + const ogg_int16_t *_residue){ + __asm__ __volatile__( + /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/ + "pcmpeqw %%mm0,%%mm0\n\t" + /*#0 Load low residue.*/ + "movq 0*8(%[residue]),%%mm1\n\t" + /*#0 Load high residue.*/ + "movq 1*8(%[residue]),%%mm2\n\t" + /*Set mm0 to 0x8000800080008000.*/ + "psllw $15,%%mm0\n\t" + /*#1 Load low residue.*/ + "movq 2*8(%[residue]),%%mm3\n\t" + /*#1 Load high residue.*/ + "movq 3*8(%[residue]),%%mm4\n\t" + /*Set mm0 to 0x0080008000800080.*/ + "psrlw $8,%%mm0\n\t" + /*#2 Load low residue.*/ + "movq 4*8(%[residue]),%%mm5\n\t" + /*#2 Load high residue.*/ + "movq 5*8(%[residue]),%%mm6\n\t" + /*#0 Bias low residue.*/ + "paddsw %%mm0,%%mm1\n\t" + /*#0 Bias high residue.*/ + "paddsw %%mm0,%%mm2\n\t" + /*#0 Pack to byte.*/ + "packuswb %%mm2,%%mm1\n\t" + /*#1 Bias low residue.*/ + "paddsw %%mm0,%%mm3\n\t" + /*#1 Bias high residue.*/ + "paddsw %%mm0,%%mm4\n\t" + /*#1 Pack to byte.*/ + "packuswb %%mm4,%%mm3\n\t" + /*#2 Bias low residue.*/ + "paddsw %%mm0,%%mm5\n\t" + /*#2 Bias high residue.*/ + "paddsw %%mm0,%%mm6\n\t" + /*#2 Pack to byte.*/ + "packuswb %%mm6,%%mm5\n\t" + /*#0 Write row.*/ + "movq %%mm1,(%[dst])\n\t" + /*#1 Write row.*/ + "movq %%mm3,(%[dst],%[ystride])\n\t" + /*#2 Write row.*/ + "movq %%mm5,(%[dst],%[ystride],2)\n\t" + /*#3 Load low residue.*/ + "movq 6*8(%[residue]),%%mm1\n\t" + /*#3 Load high residue.*/ + "movq 7*8(%[residue]),%%mm2\n\t" + /*#4 Load high residue.*/ + "movq 8*8(%[residue]),%%mm3\n\t" + /*#4 Load high residue.*/ + "movq 9*8(%[residue]),%%mm4\n\t" + /*#5 Load high residue.*/ + "movq 10*8(%[residue]),%%mm5\n\t" + /*#5 Load high residue.*/ + "movq 11*8(%[residue]),%%mm6\n\t" + /*#3 Bias low residue.*/ + "paddsw %%mm0,%%mm1\n\t" + /*#3 Bias high residue.*/ + "paddsw %%mm0,%%mm2\n\t" + /*#3 Pack to byte.*/ + "packuswb %%mm2,%%mm1\n\t" + /*#4 Bias low residue.*/ + "paddsw %%mm0,%%mm3\n\t" + /*#4 Bias high residue.*/ + "paddsw %%mm0,%%mm4\n\t" + /*#4 Pack to byte.*/ + "packuswb %%mm4,%%mm3\n\t" + /*#5 Bias low residue.*/ + "paddsw %%mm0,%%mm5\n\t" + /*#5 Bias high residue.*/ + "paddsw %%mm0,%%mm6\n\t" + /*#5 Pack to byte.*/ + "packuswb %%mm6,%%mm5\n\t" + /*#3 Write row.*/ + "movq %%mm1,(%[dst],%[ystride3])\n\t" + /*#4 Write row.*/ + "movq %%mm3,(%[dst4])\n\t" + /*#5 Write row.*/ + "movq %%mm5,(%[dst4],%[ystride])\n\t" + /*#6 Load low residue.*/ + "movq 12*8(%[residue]),%%mm1\n\t" + /*#6 Load high residue.*/ + "movq 13*8(%[residue]),%%mm2\n\t" + /*#7 Load low residue.*/ + "movq 14*8(%[residue]),%%mm3\n\t" + /*#7 Load high residue.*/ + "movq 15*8(%[residue]),%%mm4\n\t" + /*#6 Bias low residue.*/ + "paddsw %%mm0,%%mm1\n\t" + /*#6 Bias high residue.*/ + "paddsw %%mm0,%%mm2\n\t" + /*#6 Pack to byte.*/ + "packuswb %%mm2,%%mm1\n\t" + /*#7 Bias low residue.*/ + "paddsw %%mm0,%%mm3\n\t" + /*#7 Bias high residue.*/ + "paddsw %%mm0,%%mm4\n\t" + /*#7 Pack to byte.*/ + "packuswb %%mm4,%%mm3\n\t" + /*#6 Write row.*/ + "movq %%mm1,(%[dst4],%[ystride],2)\n\t" + /*#7 Write row.*/ + "movq %%mm3,(%[dst4],%[ystride3])\n\t" + : + :[residue]"r"(_residue), + [dst]"r"(_dst), + [dst4]"r"(_dst+(_ystride<<2)), + [ystride]"r"((ptrdiff_t)_ystride), + [ystride3]"r"((ptrdiff_t)_ystride*3) + :"memory" + ); +} + +void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src, + int _ystride,const ogg_int16_t *_residue){ + int i; + /*Zero mm0.*/ + __asm__ __volatile__("pxor %%mm0,%%mm0\n\t"::); + for(i=4;i-->0;){ + __asm__ __volatile__( + /*#0 Load source.*/ + "movq (%[src]),%%mm3\n\t" + /*#1 Load source.*/ + "movq (%[src],%[ystride]),%%mm7\n\t" + /*#0 Get copy of src.*/ + "movq %%mm3,%%mm4\n\t" + /*#0 Expand high source.*/ + "punpckhbw %%mm0,%%mm4\n\t" + /*#0 Expand low source.*/ + "punpcklbw %%mm0,%%mm3\n\t" + /*#0 Add residue high.*/ + "paddsw 8(%[residue]),%%mm4\n\t" + /*#1 Get copy of src.*/ + "movq %%mm7,%%mm2\n\t" + /*#0 Add residue low.*/ + "paddsw (%[residue]), %%mm3\n\t" + /*#1 Expand high source.*/ + "punpckhbw %%mm0,%%mm2\n\t" + /*#0 Pack final row pixels.*/ + "packuswb %%mm4,%%mm3\n\t" + /*#1 Expand low source.*/ + "punpcklbw %%mm0,%%mm7\n\t" + /*#1 Add residue low.*/ + "paddsw 16(%[residue]),%%mm7\n\t" + /*#1 Add residue high.*/ + "paddsw 24(%[residue]),%%mm2\n\t" + /*Advance residue.*/ + "lea 32(%[residue]),%[residue]\n\t" + /*#1 Pack final row pixels.*/ + "packuswb %%mm2,%%mm7\n\t" + /*Advance src.*/ + "lea (%[src],%[ystride],2),%[src]\n\t" + /*#0 Write row.*/ + "movq %%mm3,(%[dst])\n\t" + /*#1 Write row.*/ + "movq %%mm7,(%[dst],%[ystride])\n\t" + /*Advance dst.*/ + "lea (%[dst],%[ystride],2),%[dst]\n\t" + :[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src) + :[ystride]"r"((ptrdiff_t)_ystride) + :"memory" + ); + } +} + +void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){ + int i; + /*Zero mm7.*/ + __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::); + for(i=4;i-->0;){ + __asm__ __volatile__( + /*#0 Load src1.*/ + "movq (%[src1]),%%mm0\n\t" + /*#0 Load src2.*/ + "movq (%[src2]),%%mm2\n\t" + /*#0 Copy src1.*/ + "movq %%mm0,%%mm1\n\t" + /*#0 Copy src2.*/ + "movq %%mm2,%%mm3\n\t" + /*#1 Load src1.*/ + "movq (%[src1],%[ystride]),%%mm4\n\t" + /*#0 Unpack lower src1.*/ + "punpcklbw %%mm7,%%mm0\n\t" + /*#1 Load src2.*/ + "movq (%[src2],%[ystride]),%%mm5\n\t" + /*#0 Unpack higher src1.*/ + "punpckhbw %%mm7,%%mm1\n\t" + /*#0 Unpack lower src2.*/ + "punpcklbw %%mm7,%%mm2\n\t" + /*#0 Unpack higher src2.*/ + "punpckhbw %%mm7,%%mm3\n\t" + /*Advance src1 ptr.*/ + "lea (%[src1],%[ystride],2),%[src1]\n\t" + /*Advance src2 ptr.*/ + "lea (%[src2],%[ystride],2),%[src2]\n\t" + /*#0 Lower src1+src2.*/ + "paddsw %%mm2,%%mm0\n\t" + /*#0 Higher src1+src2.*/ + "paddsw %%mm3,%%mm1\n\t" + /*#1 Copy src1.*/ + "movq %%mm4,%%mm2\n\t" + /*#0 Build lo average.*/ + "psraw $1,%%mm0\n\t" + /*#1 Copy src2.*/ + "movq %%mm5,%%mm3\n\t" + /*#1 Unpack lower src1.*/ + "punpcklbw %%mm7,%%mm4\n\t" + /*#0 Build hi average.*/ + "psraw $1,%%mm1\n\t" + /*#1 Unpack higher src1.*/ + "punpckhbw %%mm7,%%mm2\n\t" + /*#0 low+=residue.*/ + "paddsw (%[residue]),%%mm0\n\t" + /*#1 Unpack lower src2.*/ + "punpcklbw %%mm7,%%mm5\n\t" + /*#0 high+=residue.*/ + "paddsw 8(%[residue]),%%mm1\n\t" + /*#1 Unpack higher src2.*/ + "punpckhbw %%mm7,%%mm3\n\t" + /*#1 Lower src1+src2.*/ + "paddsw %%mm4,%%mm5\n\t" + /*#0 Pack and saturate.*/ + "packuswb %%mm1,%%mm0\n\t" + /*#1 Higher src1+src2.*/ + "paddsw %%mm2,%%mm3\n\t" + /*#0 Write row.*/ + "movq %%mm0,(%[dst])\n\t" + /*#1 Build lo average.*/ + "psraw $1,%%mm5\n\t" + /*#1 Build hi average.*/ + "psraw $1,%%mm3\n\t" + /*#1 low+=residue.*/ + "paddsw 16(%[residue]),%%mm5\n\t" + /*#1 high+=residue.*/ + "paddsw 24(%[residue]),%%mm3\n\t" + /*#1 Pack and saturate.*/ + "packuswb %%mm3,%%mm5\n\t" + /*#1 Write row ptr.*/ + "movq %%mm5,(%[dst],%[ystride])\n\t" + /*Advance residue ptr.*/ + "add $32,%[residue]\n\t" + /*Advance dest ptr.*/ + "lea (%[dst],%[ystride],2),%[dst]\n\t" + :[dst]"+r"(_dst),[residue]"+r"(_residue), + [src1]"+%r"(_src1),[src2]"+r"(_src2) + :[ystride]"r"((ptrdiff_t)_ystride) + :"memory" + ); + } +} + +void oc_restore_fpu_mmx(void){ + __asm__ __volatile__("emms\n\t"); +} +#endif diff --git a/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.h b/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.h new file mode 100644 index 00000000..a3984276 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.h @@ -0,0 +1,64 @@ +#if !defined(_x86_mmxfrag_H) +# define _x86_mmxfrag_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \ + do{ \ + const unsigned char *src; \ + unsigned char *dst; \ + ptrdiff_t ystride3; \ + src=(_src); \ + dst=(_dst); \ + __asm__ __volatile__( \ + /*src+0*ystride*/ \ + "movq (%[src]),%%mm0\n\t" \ + /*src+1*ystride*/ \ + "movq (%[src],%[ystride]),%%mm1\n\t" \ + /*ystride3=ystride*3*/ \ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ + /*src+2*ystride*/ \ + "movq (%[src],%[ystride],2),%%mm2\n\t" \ + /*src+3*ystride*/ \ + "movq (%[src],%[ystride3]),%%mm3\n\t" \ + /*dst+0*ystride*/ \ + "movq %%mm0,(%[dst])\n\t" \ + /*dst+1*ystride*/ \ + "movq %%mm1,(%[dst],%[ystride])\n\t" \ + /*Pointer to next 4.*/ \ + "lea (%[src],%[ystride],4),%[src]\n\t" \ + /*dst+2*ystride*/ \ + "movq %%mm2,(%[dst],%[ystride],2)\n\t" \ + /*dst+3*ystride*/ \ + "movq %%mm3,(%[dst],%[ystride3])\n\t" \ + /*Pointer to next 4.*/ \ + "lea (%[dst],%[ystride],4),%[dst]\n\t" \ + /*src+0*ystride*/ \ + "movq (%[src]),%%mm0\n\t" \ + /*src+1*ystride*/ \ + "movq (%[src],%[ystride]),%%mm1\n\t" \ + /*src+2*ystride*/ \ + "movq (%[src],%[ystride],2),%%mm2\n\t" \ + /*src+3*ystride*/ \ + "movq (%[src],%[ystride3]),%%mm3\n\t" \ + /*dst+0*ystride*/ \ + "movq %%mm0,(%[dst])\n\t" \ + /*dst+1*ystride*/ \ + "movq %%mm1,(%[dst],%[ystride])\n\t" \ + /*dst+2*ystride*/ \ + "movq %%mm2,(%[dst],%[ystride],2)\n\t" \ + /*dst+3*ystride*/ \ + "movq %%mm3,(%[dst],%[ystride3])\n\t" \ + :[dst]"+r"(dst),[src]"+r"(src),[ystride3]"=&r"(ystride3) \ + :[ystride]"r"((ptrdiff_t)(_ystride)) \ + :"memory" \ + ); \ + } \ + while(0) + +# endif +#endif diff --git a/engine/code/libtheora-1.1.1/lib/x86/mmxidct.c b/engine/code/libtheora-1.1.1/lib/x86/mmxidct.c new file mode 100644 index 00000000..76424e63 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/x86/mmxidct.c @@ -0,0 +1,564 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*MMX acceleration of Theora's iDCT. + Originally written by Rudolf Marek, based on code from On2's VP3.*/ +#include "x86int.h" +#include "../dct.h" + +#if defined(OC_X86_ASM) + +/*These are offsets into the table of constants below.*/ +/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/ +#define OC_COSINE_OFFSET (0) +/*A row of 8's.*/ +#define OC_EIGHT_OFFSET (56) + + + +/*A table of constants used by the MMX routines.*/ +static const ogg_uint16_t __attribute__((aligned(8),used)) + OC_IDCT_CONSTS[(7+1)*4]={ + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + 8, 8, 8, 8 +}; + +/*Converts the expression in the argument to a string.*/ +#define OC_M2STR(_s) #_s + +/*38 cycles*/ +#define OC_IDCT_BEGIN \ + "#OC_IDCT_BEGIN\n\t" \ + "movq "OC_I(3)",%%mm2\n\t" \ + "movq "OC_C(3)",%%mm6\n\t" \ + "movq %%mm2,%%mm4\n\t" \ + "movq "OC_J(5)",%%mm7\n\t" \ + "pmulhw %%mm6,%%mm4\n\t" \ + "movq "OC_C(5)",%%mm1\n\t" \ + "pmulhw %%mm7,%%mm6\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "pmulhw %%mm2,%%mm1\n\t" \ + "movq "OC_I(1)",%%mm3\n\t" \ + "pmulhw %%mm7,%%mm5\n\t" \ + "movq "OC_C(1)",%%mm0\n\t" \ + "paddw %%mm2,%%mm4\n\t" \ + "paddw %%mm7,%%mm6\n\t" \ + "paddw %%mm1,%%mm2\n\t" \ + "movq "OC_J(7)",%%mm1\n\t" \ + "paddw %%mm5,%%mm7\n\t" \ + "movq %%mm0,%%mm5\n\t" \ + "pmulhw %%mm3,%%mm0\n\t" \ + "paddw %%mm7,%%mm4\n\t" \ + "pmulhw %%mm1,%%mm5\n\t" \ + "movq "OC_C(7)",%%mm7\n\t" \ + "psubw %%mm2,%%mm6\n\t" \ + "paddw %%mm3,%%mm0\n\t" \ + "pmulhw %%mm7,%%mm3\n\t" \ + "movq "OC_I(2)",%%mm2\n\t" \ + "pmulhw %%mm1,%%mm7\n\t" \ + "paddw %%mm1,%%mm5\n\t" \ + "movq %%mm2,%%mm1\n\t" \ + "pmulhw "OC_C(2)",%%mm2\n\t" \ + "psubw %%mm5,%%mm3\n\t" \ + "movq "OC_J(6)",%%mm5\n\t" \ + "paddw %%mm7,%%mm0\n\t" \ + "movq %%mm5,%%mm7\n\t" \ + "psubw %%mm4,%%mm0\n\t" \ + "pmulhw "OC_C(2)",%%mm5\n\t" \ + "paddw %%mm1,%%mm2\n\t" \ + "pmulhw "OC_C(6)",%%mm1\n\t" \ + "paddw %%mm4,%%mm4\n\t" \ + "paddw %%mm0,%%mm4\n\t" \ + "psubw %%mm6,%%mm3\n\t" \ + "paddw %%mm7,%%mm5\n\t" \ + "paddw %%mm6,%%mm6\n\t" \ + "pmulhw "OC_C(6)",%%mm7\n\t" \ + "paddw %%mm3,%%mm6\n\t" \ + "movq %%mm4,"OC_I(1)"\n\t" \ + "psubw %%mm5,%%mm1\n\t" \ + "movq "OC_C(4)",%%mm4\n\t" \ + "movq %%mm3,%%mm5\n\t" \ + "pmulhw %%mm4,%%mm3\n\t" \ + "paddw %%mm2,%%mm7\n\t" \ + "movq %%mm6,"OC_I(2)"\n\t" \ + "movq %%mm0,%%mm2\n\t" \ + "movq "OC_I(0)",%%mm6\n\t" \ + "pmulhw %%mm4,%%mm0\n\t" \ + "paddw %%mm3,%%mm5\n\t" \ + "movq "OC_J(4)",%%mm3\n\t" \ + "psubw %%mm1,%%mm5\n\t" \ + "paddw %%mm0,%%mm2\n\t" \ + "psubw %%mm3,%%mm6\n\t" \ + "movq %%mm6,%%mm0\n\t" \ + "pmulhw %%mm4,%%mm6\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + "paddw %%mm1,%%mm1\n\t" \ + "paddw %%mm0,%%mm3\n\t" \ + "paddw %%mm5,%%mm1\n\t" \ + "pmulhw %%mm3,%%mm4\n\t" \ + "paddw %%mm0,%%mm6\n\t" \ + "psubw %%mm2,%%mm6\n\t" \ + "paddw %%mm2,%%mm2\n\t" \ + "movq "OC_I(1)",%%mm0\n\t" \ + "paddw %%mm6,%%mm2\n\t" \ + "paddw %%mm3,%%mm4\n\t" \ + "psubw %%mm1,%%mm2\n\t" \ + "#end OC_IDCT_BEGIN\n\t" \ + +/*38+8=46 cycles.*/ +#define OC_ROW_IDCT \ + "#OC_ROW_IDCT\n" \ + OC_IDCT_BEGIN \ + /*r3=D'*/ \ + "movq "OC_I(2)",%%mm3\n\t" \ + /*r4=E'=E-G*/ \ + "psubw %%mm7,%%mm4\n\t" \ + /*r1=H'+H'*/ \ + "paddw %%mm1,%%mm1\n\t" \ + /*r7=G+G*/ \ + "paddw %%mm7,%%mm7\n\t" \ + /*r1=R1=A''+H'*/ \ + "paddw %%mm2,%%mm1\n\t" \ + /*r7=G'=E+G*/ \ + "paddw %%mm4,%%mm7\n\t" \ + /*r4=R4=E'-D'*/ \ + "psubw %%mm3,%%mm4\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + /*r6=R6=F'-B''*/ \ + "psubw %%mm5,%%mm6\n\t" \ + "paddw %%mm5,%%mm5\n\t" \ + /*r3=R3=E'+D'*/ \ + "paddw %%mm4,%%mm3\n\t" \ + /*r5=R5=F'+B''*/ \ + "paddw %%mm6,%%mm5\n\t" \ + /*r7=R7=G'-C'*/ \ + "psubw %%mm0,%%mm7\n\t" \ + "paddw %%mm0,%%mm0\n\t" \ + /*Save R1.*/ \ + "movq %%mm1,"OC_I(1)"\n\t" \ + /*r0=R0=G.+C.*/ \ + "paddw %%mm7,%%mm0\n\t" \ + "#end OC_ROW_IDCT\n\t" \ + +/*The following macro does two 4x4 transposes in place. + At entry, we assume: + r0 = a3 a2 a1 a0 + I(1) = b3 b2 b1 b0 + r2 = c3 c2 c1 c0 + r3 = d3 d2 d1 d0 + + r4 = e3 e2 e1 e0 + r5 = f3 f2 f1 f0 + r6 = g3 g2 g1 g0 + r7 = h3 h2 h1 h0 + + At exit, we have: + I(0) = d0 c0 b0 a0 + I(1) = d1 c1 b1 a1 + I(2) = d2 c2 b2 a2 + I(3) = d3 c3 b3 a3 + + J(4) = h0 g0 f0 e0 + J(5) = h1 g1 f1 e1 + J(6) = h2 g2 f2 e2 + J(7) = h3 g3 f3 e3 + + I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. + J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. + + Since r1 is free at entry, we calculate the Js first.*/ +/*19 cycles.*/ +#define OC_TRANSPOSE \ + "#OC_TRANSPOSE\n\t" \ + "movq %%mm4,%%mm1\n\t" \ + "punpcklwd %%mm5,%%mm4\n\t" \ + "movq %%mm0,"OC_I(0)"\n\t" \ + "punpckhwd %%mm5,%%mm1\n\t" \ + "movq %%mm6,%%mm0\n\t" \ + "punpcklwd %%mm7,%%mm6\n\t" \ + "movq %%mm4,%%mm5\n\t" \ + "punpckldq %%mm6,%%mm4\n\t" \ + "punpckhdq %%mm6,%%mm5\n\t" \ + "movq %%mm1,%%mm6\n\t" \ + "movq %%mm4,"OC_J(4)"\n\t" \ + "punpckhwd %%mm7,%%mm0\n\t" \ + "movq %%mm5,"OC_J(5)"\n\t" \ + "punpckhdq %%mm0,%%mm6\n\t" \ + "movq "OC_I(0)",%%mm4\n\t" \ + "punpckldq %%mm0,%%mm1\n\t" \ + "movq "OC_I(1)",%%mm5\n\t" \ + "movq %%mm4,%%mm0\n\t" \ + "movq %%mm6,"OC_J(7)"\n\t" \ + "punpcklwd %%mm5,%%mm0\n\t" \ + "movq %%mm1,"OC_J(6)"\n\t" \ + "punpckhwd %%mm5,%%mm4\n\t" \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklwd %%mm3,%%mm2\n\t" \ + "movq %%mm0,%%mm1\n\t" \ + "punpckldq %%mm2,%%mm0\n\t" \ + "punpckhdq %%mm2,%%mm1\n\t" \ + "movq %%mm4,%%mm2\n\t" \ + "movq %%mm0,"OC_I(0)"\n\t" \ + "punpckhwd %%mm3,%%mm5\n\t" \ + "movq %%mm1,"OC_I(1)"\n\t" \ + "punpckhdq %%mm5,%%mm4\n\t" \ + "punpckldq %%mm5,%%mm2\n\t" \ + "movq %%mm4,"OC_I(3)"\n\t" \ + "movq %%mm2,"OC_I(2)"\n\t" \ + "#end OC_TRANSPOSE\n\t" \ + +/*38+19=57 cycles.*/ +#define OC_COLUMN_IDCT \ + "#OC_COLUMN_IDCT\n" \ + OC_IDCT_BEGIN \ + "paddw "OC_8",%%mm2\n\t" \ + /*r1=H'+H'*/ \ + "paddw %%mm1,%%mm1\n\t" \ + /*r1=R1=A''+H'*/ \ + "paddw %%mm2,%%mm1\n\t" \ + /*r2=NR2*/ \ + "psraw $4,%%mm2\n\t" \ + /*r4=E'=E-G*/ \ + "psubw %%mm7,%%mm4\n\t" \ + /*r1=NR1*/ \ + "psraw $4,%%mm1\n\t" \ + /*r3=D'*/ \ + "movq "OC_I(2)",%%mm3\n\t" \ + /*r7=G+G*/ \ + "paddw %%mm7,%%mm7\n\t" \ + /*Store NR2 at I(2).*/ \ + "movq %%mm2,"OC_I(2)"\n\t" \ + /*r7=G'=E+G*/ \ + "paddw %%mm4,%%mm7\n\t" \ + /*Store NR1 at I(1).*/ \ + "movq %%mm1,"OC_I(1)"\n\t" \ + /*r4=R4=E'-D'*/ \ + "psubw %%mm3,%%mm4\n\t" \ + "paddw "OC_8",%%mm4\n\t" \ + /*r3=D'+D'*/ \ + "paddw %%mm3,%%mm3\n\t" \ + /*r3=R3=E'+D'*/ \ + "paddw %%mm4,%%mm3\n\t" \ + /*r4=NR4*/ \ + "psraw $4,%%mm4\n\t" \ + /*r6=R6=F'-B''*/ \ + "psubw %%mm5,%%mm6\n\t" \ + /*r3=NR3*/ \ + "psraw $4,%%mm3\n\t" \ + "paddw "OC_8",%%mm6\n\t" \ + /*r5=B''+B''*/ \ + "paddw %%mm5,%%mm5\n\t" \ + /*r5=R5=F'+B''*/ \ + "paddw %%mm6,%%mm5\n\t" \ + /*r6=NR6*/ \ + "psraw $4,%%mm6\n\t" \ + /*Store NR4 at J(4).*/ \ + "movq %%mm4,"OC_J(4)"\n\t" \ + /*r5=NR5*/ \ + "psraw $4,%%mm5\n\t" \ + /*Store NR3 at I(3).*/ \ + "movq %%mm3,"OC_I(3)"\n\t" \ + /*r7=R7=G'-C'*/ \ + "psubw %%mm0,%%mm7\n\t" \ + "paddw "OC_8",%%mm7\n\t" \ + /*r0=C'+C'*/ \ + "paddw %%mm0,%%mm0\n\t" \ + /*r0=R0=G'+C'*/ \ + "paddw %%mm7,%%mm0\n\t" \ + /*r7=NR7*/ \ + "psraw $4,%%mm7\n\t" \ + /*Store NR6 at J(6).*/ \ + "movq %%mm6,"OC_J(6)"\n\t" \ + /*r0=NR0*/ \ + "psraw $4,%%mm0\n\t" \ + /*Store NR5 at J(5).*/ \ + "movq %%mm5,"OC_J(5)"\n\t" \ + /*Store NR7 at J(7).*/ \ + "movq %%mm7,"OC_J(7)"\n\t" \ + /*Store NR0 at I(0).*/ \ + "movq %%mm0,"OC_I(0)"\n\t" \ + "#end OC_COLUMN_IDCT\n\t" \ + +#define OC_MID(_m,_i) OC_M2STR(_m+(_i)*8)"(%[c])" +#define OC_C(_i) OC_MID(OC_COSINE_OFFSET,_i-1) +#define OC_8 OC_MID(OC_EIGHT_OFFSET,0) + +static void oc_idct8x8_slow(ogg_int16_t _y[64]){ + /*This routine accepts an 8x8 matrix, but in partially transposed form. + Every 4x4 block is transposed.*/ + __asm__ __volatile__( +#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" +#define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])" + OC_ROW_IDCT + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) OC_M2STR((_k*16)+64)"(%[y])" +#define OC_J(_k) OC_M2STR(((_k-4)*16)+72)"(%[y])" + OC_ROW_IDCT + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT +#undef OC_I +#undef OC_J +#define OC_I(_k) OC_M2STR((_k*16)+8)"(%[y])" +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT +#undef OC_I +#undef OC_J + : + :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS) + ); +} + +/*25 cycles.*/ +#define OC_IDCT_BEGIN_10 \ + "#OC_IDCT_BEGIN_10\n\t" \ + "movq "OC_I(3)",%%mm2\n\t" \ + "nop\n\t" \ + "movq "OC_C(3)",%%mm6\n\t" \ + "movq %%mm2,%%mm4\n\t" \ + "movq "OC_C(5)",%%mm1\n\t" \ + "pmulhw %%mm6,%%mm4\n\t" \ + "movq "OC_I(1)",%%mm3\n\t" \ + "pmulhw %%mm2,%%mm1\n\t" \ + "movq "OC_C(1)",%%mm0\n\t" \ + "paddw %%mm2,%%mm4\n\t" \ + "pxor %%mm6,%%mm6\n\t" \ + "paddw %%mm1,%%mm2\n\t" \ + "movq "OC_I(2)",%%mm5\n\t" \ + "pmulhw %%mm3,%%mm0\n\t" \ + "movq %%mm5,%%mm1\n\t" \ + "paddw %%mm3,%%mm0\n\t" \ + "pmulhw "OC_C(7)",%%mm3\n\t" \ + "psubw %%mm2,%%mm6\n\t" \ + "pmulhw "OC_C(2)",%%mm5\n\t" \ + "psubw %%mm4,%%mm0\n\t" \ + "movq "OC_I(2)",%%mm7\n\t" \ + "paddw %%mm4,%%mm4\n\t" \ + "paddw %%mm5,%%mm7\n\t" \ + "paddw %%mm0,%%mm4\n\t" \ + "pmulhw "OC_C(6)",%%mm1\n\t" \ + "psubw %%mm6,%%mm3\n\t" \ + "movq %%mm4,"OC_I(1)"\n\t" \ + "paddw %%mm6,%%mm6\n\t" \ + "movq "OC_C(4)",%%mm4\n\t" \ + "paddw %%mm3,%%mm6\n\t" \ + "movq %%mm3,%%mm5\n\t" \ + "pmulhw %%mm4,%%mm3\n\t" \ + "movq %%mm6,"OC_I(2)"\n\t" \ + "movq %%mm0,%%mm2\n\t" \ + "movq "OC_I(0)",%%mm6\n\t" \ + "pmulhw %%mm4,%%mm0\n\t" \ + "paddw %%mm3,%%mm5\n\t" \ + "paddw %%mm0,%%mm2\n\t" \ + "psubw %%mm1,%%mm5\n\t" \ + "pmulhw %%mm4,%%mm6\n\t" \ + "paddw "OC_I(0)",%%mm6\n\t" \ + "paddw %%mm1,%%mm1\n\t" \ + "movq %%mm6,%%mm4\n\t" \ + "paddw %%mm5,%%mm1\n\t" \ + "psubw %%mm2,%%mm6\n\t" \ + "paddw %%mm2,%%mm2\n\t" \ + "movq "OC_I(1)",%%mm0\n\t" \ + "paddw %%mm6,%%mm2\n\t" \ + "psubw %%mm1,%%mm2\n\t" \ + "nop\n\t" \ + "#end OC_IDCT_BEGIN_10\n\t" \ + +/*25+8=33 cycles.*/ +#define OC_ROW_IDCT_10 \ + "#OC_ROW_IDCT_10\n\t" \ + OC_IDCT_BEGIN_10 \ + /*r3=D'*/ \ + "movq "OC_I(2)",%%mm3\n\t" \ + /*r4=E'=E-G*/ \ + "psubw %%mm7,%%mm4\n\t" \ + /*r1=H'+H'*/ \ + "paddw %%mm1,%%mm1\n\t" \ + /*r7=G+G*/ \ + "paddw %%mm7,%%mm7\n\t" \ + /*r1=R1=A''+H'*/ \ + "paddw %%mm2,%%mm1\n\t" \ + /*r7=G'=E+G*/ \ + "paddw %%mm4,%%mm7\n\t" \ + /*r4=R4=E'-D'*/ \ + "psubw %%mm3,%%mm4\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + /*r6=R6=F'-B''*/ \ + "psubw %%mm5,%%mm6\n\t" \ + "paddw %%mm5,%%mm5\n\t" \ + /*r3=R3=E'+D'*/ \ + "paddw %%mm4,%%mm3\n\t" \ + /*r5=R5=F'+B''*/ \ + "paddw %%mm6,%%mm5\n\t" \ + /*r7=R7=G'-C'*/ \ + "psubw %%mm0,%%mm7\n\t" \ + "paddw %%mm0,%%mm0\n\t" \ + /*Save R1.*/ \ + "movq %%mm1,"OC_I(1)"\n\t" \ + /*r0=R0=G'+C'*/ \ + "paddw %%mm7,%%mm0\n\t" \ + "#end OC_ROW_IDCT_10\n\t" \ + +/*25+19=44 cycles'*/ +#define OC_COLUMN_IDCT_10 \ + "#OC_COLUMN_IDCT_10\n\t" \ + OC_IDCT_BEGIN_10 \ + "paddw "OC_8",%%mm2\n\t" \ + /*r1=H'+H'*/ \ + "paddw %%mm1,%%mm1\n\t" \ + /*r1=R1=A''+H'*/ \ + "paddw %%mm2,%%mm1\n\t" \ + /*r2=NR2*/ \ + "psraw $4,%%mm2\n\t" \ + /*r4=E'=E-G*/ \ + "psubw %%mm7,%%mm4\n\t" \ + /*r1=NR1*/ \ + "psraw $4,%%mm1\n\t" \ + /*r3=D'*/ \ + "movq "OC_I(2)",%%mm3\n\t" \ + /*r7=G+G*/ \ + "paddw %%mm7,%%mm7\n\t" \ + /*Store NR2 at I(2).*/ \ + "movq %%mm2,"OC_I(2)"\n\t" \ + /*r7=G'=E+G*/ \ + "paddw %%mm4,%%mm7\n\t" \ + /*Store NR1 at I(1).*/ \ + "movq %%mm1,"OC_I(1)"\n\t" \ + /*r4=R4=E'-D'*/ \ + "psubw %%mm3,%%mm4\n\t" \ + "paddw "OC_8",%%mm4\n\t" \ + /*r3=D'+D'*/ \ + "paddw %%mm3,%%mm3\n\t" \ + /*r3=R3=E'+D'*/ \ + "paddw %%mm4,%%mm3\n\t" \ + /*r4=NR4*/ \ + "psraw $4,%%mm4\n\t" \ + /*r6=R6=F'-B''*/ \ + "psubw %%mm5,%%mm6\n\t" \ + /*r3=NR3*/ \ + "psraw $4,%%mm3\n\t" \ + "paddw "OC_8",%%mm6\n\t" \ + /*r5=B''+B''*/ \ + "paddw %%mm5,%%mm5\n\t" \ + /*r5=R5=F'+B''*/ \ + "paddw %%mm6,%%mm5\n\t" \ + /*r6=NR6*/ \ + "psraw $4,%%mm6\n\t" \ + /*Store NR4 at J(4).*/ \ + "movq %%mm4,"OC_J(4)"\n\t" \ + /*r5=NR5*/ \ + "psraw $4,%%mm5\n\t" \ + /*Store NR3 at I(3).*/ \ + "movq %%mm3,"OC_I(3)"\n\t" \ + /*r7=R7=G'-C'*/ \ + "psubw %%mm0,%%mm7\n\t" \ + "paddw "OC_8",%%mm7\n\t" \ + /*r0=C'+C'*/ \ + "paddw %%mm0,%%mm0\n\t" \ + /*r0=R0=G'+C'*/ \ + "paddw %%mm7,%%mm0\n\t" \ + /*r7=NR7*/ \ + "psraw $4,%%mm7\n\t" \ + /*Store NR6 at J(6).*/ \ + "movq %%mm6,"OC_J(6)"\n\t" \ + /*r0=NR0*/ \ + "psraw $4,%%mm0\n\t" \ + /*Store NR5 at J(5).*/ \ + "movq %%mm5,"OC_J(5)"\n\t" \ + /*Store NR7 at J(7).*/ \ + "movq %%mm7,"OC_J(7)"\n\t" \ + /*Store NR0 at I(0).*/ \ + "movq %%mm0,"OC_I(0)"\n\t" \ + "#end OC_COLUMN_IDCT_10\n\t" \ + +static void oc_idct8x8_10(ogg_int16_t _y[64]){ + __asm__ __volatile__( +#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" +#define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])" + /*Done with dequant, descramble, and partial transpose. + Now do the iDCT itself.*/ + OC_ROW_IDCT_10 + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT_10 +#undef OC_I +#undef OC_J +#define OC_I(_k) OC_M2STR((_k*16)+8)"(%[y])" +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT_10 +#undef OC_I +#undef OC_J + : + :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS) + ); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform.*/ +void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){ + /*_last_zzi is subtly different from an actual count of the number of + coefficients we decoded for this block. + It contains the value of zzi BEFORE the final token in the block was + decoded. + In most cases this is an EOB token (the continuation of an EOB run from a + previous block counts), and so this is the same as the coefficient count. + However, in the case that the last token was NOT an EOB token, but filled + the block up with exactly 64 coefficients, _last_zzi will be less than 64. + Provided the last token was not a pure zero run, the minimum value it can + be is 46, and so that doesn't affect any of the cases in this routine. + However, if the last token WAS a pure zero run of length 63, then _last_zzi + will be 1 while the number of coefficients decoded is 64. + Thus, we will trigger the following special case, where the real + coefficient count would not. + Note also that a zero run of length 64 will give _last_zzi a value of 0, + but we still process the DC coefficient, which might have a non-zero value + due to DC prediction. + Although convoluted, this is arguably the correct behavior: it allows us to + use a smaller transform when the block ends with a long zero run instead + of a normal EOB token. + It could be smarter... multiple separate zero runs at the end of a block + will fool it, but an encoder that generates these really deserves what it + gets. + Needless to say we inherited this approach from VP3.*/ + /*Then perform the iDCT.*/ + if(_last_zzi<10)oc_idct8x8_10(_y); + else oc_idct8x8_slow(_y); +} + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/x86/mmxloop.h b/engine/code/libtheora-1.1.1/lib/x86/mmxloop.h new file mode 100644 index 00000000..2e870c79 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/x86/mmxloop.h @@ -0,0 +1,215 @@ +#if !defined(_x86_mmxloop_H) +# define _x86_mmxloop_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}. + On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and + mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/ +#define OC_LOOP_FILTER8_MMX \ + "#OC_LOOP_FILTER8_MMX\n\t" \ + /*mm7=0*/ \ + "pxor %%mm7,%%mm7\n\t" \ + /*mm6:mm0={a0,...,a7}*/ \ + "movq %%mm0,%%mm6\n\t" \ + "punpcklbw %%mm7,%%mm0\n\t" \ + "punpckhbw %%mm7,%%mm6\n\t" \ + /*mm3:mm5={d0,...,d7}*/ \ + "movq %%mm3,%%mm5\n\t" \ + "punpcklbw %%mm7,%%mm3\n\t" \ + "punpckhbw %%mm7,%%mm5\n\t" \ + /*mm6:mm0={a0-d0,...,a7-d7}*/ \ + "psubw %%mm3,%%mm0\n\t" \ + "psubw %%mm5,%%mm6\n\t" \ + /*mm3:mm1={b0,...,b7}*/ \ + "movq %%mm1,%%mm3\n\t" \ + "punpcklbw %%mm7,%%mm1\n\t" \ + "movq %%mm2,%%mm4\n\t" \ + "punpckhbw %%mm7,%%mm3\n\t" \ + /*mm5:mm4={c0,...,c7}*/ \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklbw %%mm7,%%mm4\n\t" \ + "punpckhbw %%mm7,%%mm5\n\t" \ + /*mm7={3}x4 \ + mm5:mm4={c0-b0,...,c7-b7}*/ \ + "pcmpeqw %%mm7,%%mm7\n\t" \ + "psubw %%mm1,%%mm4\n\t" \ + "psrlw $14,%%mm7\n\t" \ + "psubw %%mm3,%%mm5\n\t" \ + /*Scale by 3.*/ \ + "pmullw %%mm7,%%mm4\n\t" \ + "pmullw %%mm7,%%mm5\n\t" \ + /*mm7={4}x4 \ + mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \ + "psrlw $1,%%mm7\n\t" \ + "paddw %%mm0,%%mm4\n\t" \ + "psllw $2,%%mm7\n\t" \ + "movq (%[ll]),%%mm0\n\t" \ + "paddw %%mm6,%%mm5\n\t" \ + /*R_i has the range [-127,128], so we compute -R_i instead. \ + mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \ + "psubw %%mm7,%%mm4\n\t" \ + "psubw %%mm7,%%mm5\n\t" \ + "psraw $3,%%mm4\n\t" \ + "psraw $3,%%mm5\n\t" \ + "pcmpeqb %%mm7,%%mm7\n\t" \ + "packsswb %%mm5,%%mm4\n\t" \ + "pxor %%mm6,%%mm6\n\t" \ + "pxor %%mm7,%%mm4\n\t" \ + "packuswb %%mm3,%%mm1\n\t" \ + /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \ + /*There's no unsigned byte+signed byte with unsigned saturation op code, so \ + we have to split things by sign (the other option is to work in 16 bits, \ + but working in 8 bits gives much better parallelism). \ + We compute abs(R_i), but save a mask of which terms were negative in mm6. \ + Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \ + Finally, we split mm4 into positive and negative pieces using the mask in \ + mm6, and add and subtract them as appropriate.*/ \ + /*mm4=abs(-R_i)*/ \ + /*mm7=255-2*L*/ \ + "pcmpgtb %%mm4,%%mm6\n\t" \ + "psubb %%mm0,%%mm7\n\t" \ + "pxor %%mm6,%%mm4\n\t" \ + "psubb %%mm0,%%mm7\n\t" \ + "psubb %%mm6,%%mm4\n\t" \ + /*mm7=255-max(2*L-abs(R_i),0)*/ \ + "paddusb %%mm4,%%mm7\n\t" \ + /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \ + "paddusb %%mm7,%%mm4\n\t" \ + "psubusb %%mm7,%%mm4\n\t" \ + /*Now split mm4 by the original sign of -R_i.*/ \ + "movq %%mm4,%%mm5\n\t" \ + "pand %%mm6,%%mm4\n\t" \ + "pandn %%mm5,%%mm6\n\t" \ + /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \ + /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \ + "paddusb %%mm4,%%mm1\n\t" \ + "psubusb %%mm4,%%mm2\n\t" \ + "psubusb %%mm6,%%mm1\n\t" \ + "paddusb %%mm6,%%mm2\n\t" \ + +#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \ + do{ \ + ptrdiff_t ystride3__; \ + __asm__ __volatile__( \ + /*mm0={a0,...,a7}*/ \ + "movq (%[pix]),%%mm0\n\t" \ + /*ystride3=_ystride*3*/ \ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ + /*mm3={d0,...,d7}*/ \ + "movq (%[pix],%[ystride3]),%%mm3\n\t" \ + /*mm1={b0,...,b7}*/ \ + "movq (%[pix],%[ystride]),%%mm1\n\t" \ + /*mm2={c0,...,c7}*/ \ + "movq (%[pix],%[ystride],2),%%mm2\n\t" \ + OC_LOOP_FILTER8_MMX \ + /*Write it back out.*/ \ + "movq %%mm1,(%[pix],%[ystride])\n\t" \ + "movq %%mm2,(%[pix],%[ystride],2)\n\t" \ + :[ystride3]"=&r"(ystride3__) \ + :[pix]"r"(_pix-_ystride*2),[ystride]"r"((ptrdiff_t)(_ystride)), \ + [ll]"r"(_ll) \ + :"memory" \ + ); \ + } \ + while(0) + +#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \ + do{ \ + unsigned char *pix__; \ + ptrdiff_t ystride3__; \ + ptrdiff_t d__; \ + pix__=(_pix)-2; \ + __asm__ __volatile__( \ + /*x x x x d0 c0 b0 a0*/ \ + "movd (%[pix]),%%mm0\n\t" \ + /*x x x x d1 c1 b1 a1*/ \ + "movd (%[pix],%[ystride]),%%mm1\n\t" \ + /*ystride3=_ystride*3*/ \ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ + /*x x x x d2 c2 b2 a2*/ \ + "movd (%[pix],%[ystride],2),%%mm2\n\t" \ + /*x x x x d3 c3 b3 a3*/ \ + "lea (%[pix],%[ystride],4),%[d]\n\t" \ + "movd (%[pix],%[ystride3]),%%mm3\n\t" \ + /*x x x x d4 c4 b4 a4*/ \ + "movd (%[d]),%%mm4\n\t" \ + /*x x x x d5 c5 b5 a5*/ \ + "movd (%[d],%[ystride]),%%mm5\n\t" \ + /*x x x x d6 c6 b6 a6*/ \ + "movd (%[d],%[ystride],2),%%mm6\n\t" \ + /*x x x x d7 c7 b7 a7*/ \ + "movd (%[d],%[ystride3]),%%mm7\n\t" \ + /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + "punpcklbw %%mm1,%%mm0\n\t" \ + /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \ + "punpcklbw %%mm3,%%mm2\n\t" \ + /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + "movq %%mm0,%%mm3\n\t" \ + /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + "punpcklwd %%mm2,%%mm0\n\t" \ + /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + "punpckhwd %%mm2,%%mm3\n\t" \ + /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + "movq %%mm0,%%mm1\n\t" \ + /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + "punpcklbw %%mm5,%%mm4\n\t" \ + /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \ + "punpcklbw %%mm7,%%mm6\n\t" \ + /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + "movq %%mm4,%%mm5\n\t" \ + /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \ + "punpcklwd %%mm6,%%mm4\n\t" \ + /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \ + "punpckhwd %%mm6,%%mm5\n\t" \ + /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + "movq %%mm3,%%mm2\n\t" \ + /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \ + "punpckldq %%mm4,%%mm0\n\t" \ + /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \ + "punpckhdq %%mm4,%%mm1\n\t" \ + /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \ + "punpckldq %%mm5,%%mm2\n\t" \ + /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \ + "punpckhdq %%mm5,%%mm3\n\t" \ + OC_LOOP_FILTER8_MMX \ + /*mm2={b0+R_0'',...,b7+R_7''}*/ \ + "movq %%mm1,%%mm0\n\t" \ + /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \ + "punpcklbw %%mm2,%%mm1\n\t" \ + /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \ + "punpckhbw %%mm2,%%mm0\n\t" \ + /*[d]=c1 b1 c0 b0*/ \ + "movd %%mm1,%[d]\n\t" \ + "movw %w[d],1(%[pix])\n\t" \ + "psrlq $32,%%mm1\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride])\n\t" \ + /*[d]=c3 b3 c2 b2*/ \ + "movd %%mm1,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride],2)\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride3])\n\t" \ + "lea (%[pix],%[ystride],4),%[pix]\n\t" \ + /*[d]=c5 b5 c4 b4*/ \ + "movd %%mm0,%[d]\n\t" \ + "movw %w[d],1(%[pix])\n\t" \ + "psrlq $32,%%mm0\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride])\n\t" \ + /*[d]=c7 b7 c6 b6*/ \ + "movd %%mm0,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride],2)\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride3])\n\t" \ + :[pix]"+r"(pix__),[ystride3]"=&r"(ystride3__),[d]"=&r"(d__) \ + :[ystride]"r"((ptrdiff_t)(_ystride)),[ll]"r"(_ll) \ + :"memory" \ + ); \ + } \ + while(0) + +# endif +#endif diff --git a/engine/code/libtheora-1.1.1/lib/x86/mmxstate.c b/engine/code/libtheora-1.1.1/lib/x86/mmxstate.c new file mode 100644 index 00000000..808b0a78 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/x86/mmxstate.c @@ -0,0 +1,188 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxstate.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*MMX acceleration of complete fragment reconstruction algorithm. + Originally written by Rudolf Marek.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" +#include "mmxloop.h" + +#if defined(OC_X86_ASM) + +void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + unsigned char *dst; + ptrdiff_t frag_buf_off; + int ystride; + int mb_mode; + /*Apply the inverse transform.*/ + /*Special case only having a DC component.*/ + if(_last_zzi<2){ + /*Note that this value must be unsigned, to keep the __asm__ block from + sign-extending it when it puts it in a register.*/ + ogg_uint16_t p; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); + /*Fill _dct_coeffs with p.*/ + __asm__ __volatile__( + /*mm0=0000 0000 0000 AAAA*/ + "movd %[p],%%mm0\n\t" + /*mm0=0000 0000 AAAA AAAA*/ + "punpcklwd %%mm0,%%mm0\n\t" + /*mm0=AAAA AAAA AAAA AAAA*/ + "punpckldq %%mm0,%%mm0\n\t" + "movq %%mm0,(%[y])\n\t" + "movq %%mm0,8(%[y])\n\t" + "movq %%mm0,16(%[y])\n\t" + "movq %%mm0,24(%[y])\n\t" + "movq %%mm0,32(%[y])\n\t" + "movq %%mm0,40(%[y])\n\t" + "movq %%mm0,48(%[y])\n\t" + "movq %%mm0,56(%[y])\n\t" + "movq %%mm0,64(%[y])\n\t" + "movq %%mm0,72(%[y])\n\t" + "movq %%mm0,80(%[y])\n\t" + "movq %%mm0,88(%[y])\n\t" + "movq %%mm0,96(%[y])\n\t" + "movq %%mm0,104(%[y])\n\t" + "movq %%mm0,112(%[y])\n\t" + "movq %%mm0,120(%[y])\n\t" + : + :[y]"r"(_dct_coeffs),[p]"r"((unsigned)p) + :"memory" + ); + } + else{ + /*Dequantize the DC coefficient.*/ + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); + oc_idct8x8_mmx(_dct_coeffs,_last_zzi); + } + /*Fill in the target buffer.*/ + frag_buf_off=_state->frag_buf_offs[_fragi]; + mb_mode=_state->frags[_fragi].mb_mode; + ystride=_state->ref_ystride[_pli]; + dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; + if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs); + else{ + const unsigned char *ref; + int mvoffsets[2]; + ref= + _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] + +frag_buf_off; + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, + _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ + oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, + _dct_coeffs); + } + else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs); + } +} + +/*We copy these entire function to inline the actual MMX routines so that we + use only a single indirect call.*/ + +/*Copies the fragments specified by the lists of fragment indices from one + frame to another. + _fragis: A pointer to a list of fragment indices. + _nfragis: The number of fragment indices to copy. + _dst_frame: The reference frame to copy to. + _src_frame: The reference frame to copy from. + _pli: The color plane the fragments lie in.*/ +void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + const ptrdiff_t *frag_buf_offs; + const unsigned char *src_frame_data; + unsigned char *dst_frame_data; + ptrdiff_t fragii; + int ystride; + dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; + src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; + ystride=_state->ref_ystride[_pli]; + frag_buf_offs=_state->frag_buf_offs; + for(fragii=0;fragii<_nfragis;fragii++){ + ptrdiff_t frag_buf_off; + frag_buf_off=frag_buf_offs[_fragis[fragii]]; + OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off, + src_frame_data+frag_buf_off,ystride); + } +} + +/*Apply the loop filter to a given set of fragment rows in the given plane. + The filter may be run on the bottom edge, affecting pixels in the next row of + fragments, so this row also needs to be available. + _bv: The bounding values array. + _refi: The index of the frame buffer to filter. + _pli: The color plane to filter. + _fragy0: The Y coordinate of the first fragment row to filter. + _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ +void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){ + OC_ALIGN8(unsigned char ll[8]); + const oc_fragment_plane *fplane; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + unsigned char *ref_frame_data; + ptrdiff_t fragi_top; + ptrdiff_t fragi_bot; + ptrdiff_t fragi0; + ptrdiff_t fragi0_end; + int ystride; + int nhfrags; + memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll)); + fplane=_state->fplanes+_pli; + nhfrags=fplane->nhfrags; + fragi_top=fplane->froffset; + fragi_bot=fragi_top+fplane->nfrags; + fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; + fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; + ystride=_state->ref_ystride[_pli]; + frags=_state->frags; + frag_buf_offs=_state->frag_buf_offs; + ref_frame_data=_state->ref_frame_data[_refi]; + /*The following loops are constructed somewhat non-intuitively on purpose. + The main idea is: if a block boundary has at least one coded fragment on + it, the filter is applied to it. + However, the order that the filters are applied in matters, and VP3 chose + the somewhat strange ordering used below.*/ + while(fragi0fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll); + if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll); + if(fragi+1cpu_flags=oc_cpu_flags_get(); + if(_state->cpu_flags&OC_CPU_X86_MMX){ + _state->opt_vtable.frag_copy=oc_frag_copy_mmx; + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx; + _state->opt_vtable.idct8x8=oc_idct8x8_mmx; + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx; + _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx; + _state->opt_vtable.state_loop_filter_frag_rows= + oc_state_loop_filter_frag_rows_mmx; + _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx; + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX; + } + else oc_state_vtable_init_c(_state); +} +#endif diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.c b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.c new file mode 100644 index 00000000..4eb2084d --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.c @@ -0,0 +1,337 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxfrag.c 16578 2009-09-25 19:50:48Z cristianadam $ + + ********************************************************************/ + +/*MMX acceleration of fragment reconstruction for motion compensation. + Originally written by Rudolf Marek. + Additional optimization by Nils Pipenbrinck. + Note: Loops are unrolled for best performance. + The iteration each instruction belongs to is marked in the comments as #i.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +void oc_frag_copy_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride){ +#define SRC edx +#define DST eax +#define YSTRIDE ecx +#define YSTRIDE3 esi + OC_FRAG_COPY_MMX(_dst,_src,_ystride); +#undef SRC +#undef DST +#undef YSTRIDE +#undef YSTRIDE3 +} + +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, + const ogg_int16_t *_residue){ + __asm{ +#define DST edx +#define DST4 esi +#define YSTRIDE eax +#define YSTRIDE3 edi +#define RESIDUE ecx + mov DST,_dst + mov YSTRIDE,_ystride + mov RESIDUE,_residue + lea DST4,[DST+YSTRIDE*4] + lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] + /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/ + pcmpeqw mm0,mm0 + /*#0 Load low residue.*/ + movq mm1,[0*8+RESIDUE] + /*#0 Load high residue.*/ + movq mm2,[1*8+RESIDUE] + /*Set mm0 to 0x8000800080008000.*/ + psllw mm0,15 + /*#1 Load low residue.*/ + movq mm3,[2*8+RESIDUE] + /*#1 Load high residue.*/ + movq mm4,[3*8+RESIDUE] + /*Set mm0 to 0x0080008000800080.*/ + psrlw mm0,8 + /*#2 Load low residue.*/ + movq mm5,[4*8+RESIDUE] + /*#2 Load high residue.*/ + movq mm6,[5*8+RESIDUE] + /*#0 Bias low residue.*/ + paddsw mm1,mm0 + /*#0 Bias high residue.*/ + paddsw mm2,mm0 + /*#0 Pack to byte.*/ + packuswb mm1,mm2 + /*#1 Bias low residue.*/ + paddsw mm3,mm0 + /*#1 Bias high residue.*/ + paddsw mm4,mm0 + /*#1 Pack to byte.*/ + packuswb mm3,mm4 + /*#2 Bias low residue.*/ + paddsw mm5,mm0 + /*#2 Bias high residue.*/ + paddsw mm6,mm0 + /*#2 Pack to byte.*/ + packuswb mm5,mm6 + /*#0 Write row.*/ + movq [DST],mm1 + /*#1 Write row.*/ + movq [DST+YSTRIDE],mm3 + /*#2 Write row.*/ + movq [DST+YSTRIDE*2],mm5 + /*#3 Load low residue.*/ + movq mm1,[6*8+RESIDUE] + /*#3 Load high residue.*/ + movq mm2,[7*8+RESIDUE] + /*#4 Load high residue.*/ + movq mm3,[8*8+RESIDUE] + /*#4 Load high residue.*/ + movq mm4,[9*8+RESIDUE] + /*#5 Load high residue.*/ + movq mm5,[10*8+RESIDUE] + /*#5 Load high residue.*/ + movq mm6,[11*8+RESIDUE] + /*#3 Bias low residue.*/ + paddsw mm1,mm0 + /*#3 Bias high residue.*/ + paddsw mm2,mm0 + /*#3 Pack to byte.*/ + packuswb mm1,mm2 + /*#4 Bias low residue.*/ + paddsw mm3,mm0 + /*#4 Bias high residue.*/ + paddsw mm4,mm0 + /*#4 Pack to byte.*/ + packuswb mm3,mm4 + /*#5 Bias low residue.*/ + paddsw mm5,mm0 + /*#5 Bias high residue.*/ + paddsw mm6,mm0 + /*#5 Pack to byte.*/ + packuswb mm5,mm6 + /*#3 Write row.*/ + movq [DST+YSTRIDE3],mm1 + /*#4 Write row.*/ + movq [DST4],mm3 + /*#5 Write row.*/ + movq [DST4+YSTRIDE],mm5 + /*#6 Load low residue.*/ + movq mm1,[12*8+RESIDUE] + /*#6 Load high residue.*/ + movq mm2,[13*8+RESIDUE] + /*#7 Load low residue.*/ + movq mm3,[14*8+RESIDUE] + /*#7 Load high residue.*/ + movq mm4,[15*8+RESIDUE] + /*#6 Bias low residue.*/ + paddsw mm1,mm0 + /*#6 Bias high residue.*/ + paddsw mm2,mm0 + /*#6 Pack to byte.*/ + packuswb mm1,mm2 + /*#7 Bias low residue.*/ + paddsw mm3,mm0 + /*#7 Bias high residue.*/ + paddsw mm4,mm0 + /*#7 Pack to byte.*/ + packuswb mm3,mm4 + /*#6 Write row.*/ + movq [DST4+YSTRIDE*2],mm1 + /*#7 Write row.*/ + movq [DST4+YSTRIDE3],mm3 +#undef DST +#undef DST4 +#undef YSTRIDE +#undef YSTRIDE3 +#undef RESIDUE + } +} + +void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src, + int _ystride,const ogg_int16_t *_residue){ + int i; + /*Zero mm0.*/ + __asm pxor mm0,mm0; + for(i=4;i-->0;){ + __asm{ +#define DST edx +#define SRC ecx +#define YSTRIDE edi +#define RESIDUE eax + mov DST,_dst + mov SRC,_src + mov YSTRIDE,_ystride + mov RESIDUE,_residue + /*#0 Load source.*/ + movq mm3,[SRC] + /*#1 Load source.*/ + movq mm7,[SRC+YSTRIDE] + /*#0 Get copy of src.*/ + movq mm4,mm3 + /*#0 Expand high source.*/ + punpckhbw mm4,mm0 + /*#0 Expand low source.*/ + punpcklbw mm3,mm0 + /*#0 Add residue high.*/ + paddsw mm4,[8+RESIDUE] + /*#1 Get copy of src.*/ + movq mm2,mm7 + /*#0 Add residue low.*/ + paddsw mm3,[RESIDUE] + /*#1 Expand high source.*/ + punpckhbw mm2,mm0 + /*#0 Pack final row pixels.*/ + packuswb mm3,mm4 + /*#1 Expand low source.*/ + punpcklbw mm7,mm0 + /*#1 Add residue low.*/ + paddsw mm7,[16+RESIDUE] + /*#1 Add residue high.*/ + paddsw mm2,[24+RESIDUE] + /*Advance residue.*/ + lea RESIDUE,[32+RESIDUE] + /*#1 Pack final row pixels.*/ + packuswb mm7,mm2 + /*Advance src.*/ + lea SRC,[SRC+YSTRIDE*2] + /*#0 Write row.*/ + movq [DST],mm3 + /*#1 Write row.*/ + movq [DST+YSTRIDE],mm7 + /*Advance dst.*/ + lea DST,[DST+YSTRIDE*2] + mov _residue,RESIDUE + mov _dst,DST + mov _src,SRC +#undef DST +#undef SRC +#undef YSTRIDE +#undef RESIDUE + } + } +} + +void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){ + int i; + /*Zero mm7.*/ + __asm pxor mm7,mm7; + for(i=4;i-->0;){ + __asm{ +#define SRC1 ecx +#define SRC2 edi +#define YSTRIDE esi +#define RESIDUE edx +#define DST eax + mov YSTRIDE,_ystride + mov DST,_dst + mov RESIDUE,_residue + mov SRC1,_src1 + mov SRC2,_src2 + /*#0 Load src1.*/ + movq mm0,[SRC1] + /*#0 Load src2.*/ + movq mm2,[SRC2] + /*#0 Copy src1.*/ + movq mm1,mm0 + /*#0 Copy src2.*/ + movq mm3,mm2 + /*#1 Load src1.*/ + movq mm4,[SRC1+YSTRIDE] + /*#0 Unpack lower src1.*/ + punpcklbw mm0,mm7 + /*#1 Load src2.*/ + movq mm5,[SRC2+YSTRIDE] + /*#0 Unpack higher src1.*/ + punpckhbw mm1,mm7 + /*#0 Unpack lower src2.*/ + punpcklbw mm2,mm7 + /*#0 Unpack higher src2.*/ + punpckhbw mm3,mm7 + /*Advance src1 ptr.*/ + lea SRC1,[SRC1+YSTRIDE*2] + /*Advance src2 ptr.*/ + lea SRC2,[SRC2+YSTRIDE*2] + /*#0 Lower src1+src2.*/ + paddsw mm0,mm2 + /*#0 Higher src1+src2.*/ + paddsw mm1,mm3 + /*#1 Copy src1.*/ + movq mm2,mm4 + /*#0 Build lo average.*/ + psraw mm0,1 + /*#1 Copy src2.*/ + movq mm3,mm5 + /*#1 Unpack lower src1.*/ + punpcklbw mm4,mm7 + /*#0 Build hi average.*/ + psraw mm1,1 + /*#1 Unpack higher src1.*/ + punpckhbw mm2,mm7 + /*#0 low+=residue.*/ + paddsw mm0,[RESIDUE] + /*#1 Unpack lower src2.*/ + punpcklbw mm5,mm7 + /*#0 high+=residue.*/ + paddsw mm1,[8+RESIDUE] + /*#1 Unpack higher src2.*/ + punpckhbw mm3,mm7 + /*#1 Lower src1+src2.*/ + paddsw mm5,mm4 + /*#0 Pack and saturate.*/ + packuswb mm0,mm1 + /*#1 Higher src1+src2.*/ + paddsw mm3,mm2 + /*#0 Write row.*/ + movq [DST],mm0 + /*#1 Build lo average.*/ + psraw mm5,1 + /*#1 Build hi average.*/ + psraw mm3,1 + /*#1 low+=residue.*/ + paddsw mm5,[16+RESIDUE] + /*#1 high+=residue.*/ + paddsw mm3,[24+RESIDUE] + /*#1 Pack and saturate.*/ + packuswb mm5,mm3 + /*#1 Write row ptr.*/ + movq [DST+YSTRIDE],mm5 + /*Advance residue ptr.*/ + add RESIDUE,32 + /*Advance dest ptr.*/ + lea DST,[DST+YSTRIDE*2] + mov _dst,DST + mov _residue,RESIDUE + mov _src1,SRC1 + mov _src2,SRC2 +#undef SRC1 +#undef SRC2 +#undef YSTRIDE +#undef RESIDUE +#undef DST + } + } +} + +void oc_restore_fpu_mmx(void){ + __asm emms; +} + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.h b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.h new file mode 100644 index 00000000..45ee93e7 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.h @@ -0,0 +1,61 @@ +#if !defined(_x86_vc_mmxfrag_H) +# define _x86_vc_mmxfrag_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \ + do{ \ + const unsigned char *src; \ + unsigned char *dst; \ + src=(_src); \ + dst=(_dst); \ + __asm mov SRC,src \ + __asm mov DST,dst \ + __asm mov YSTRIDE,_ystride \ + /*src+0*ystride*/ \ + __asm movq mm0,[SRC] \ + /*src+1*ystride*/ \ + __asm movq mm1,[SRC+YSTRIDE] \ + /*ystride3=ystride*3*/ \ + __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ + /*src+2*ystride*/ \ + __asm movq mm2,[SRC+YSTRIDE*2] \ + /*src+3*ystride*/ \ + __asm movq mm3,[SRC+YSTRIDE3] \ + /*dst+0*ystride*/ \ + __asm movq [DST],mm0 \ + /*dst+1*ystride*/ \ + __asm movq [DST+YSTRIDE],mm1 \ + /*Pointer to next 4.*/ \ + __asm lea SRC,[SRC+YSTRIDE*4] \ + /*dst+2*ystride*/ \ + __asm movq [DST+YSTRIDE*2],mm2 \ + /*dst+3*ystride*/ \ + __asm movq [DST+YSTRIDE3],mm3 \ + /*Pointer to next 4.*/ \ + __asm lea DST,[DST+YSTRIDE*4] \ + /*src+0*ystride*/ \ + __asm movq mm0,[SRC] \ + /*src+1*ystride*/ \ + __asm movq mm1,[SRC+YSTRIDE] \ + /*src+2*ystride*/ \ + __asm movq mm2,[SRC+YSTRIDE*2] \ + /*src+3*ystride*/ \ + __asm movq mm3,[SRC+YSTRIDE3] \ + /*dst+0*ystride*/ \ + __asm movq [DST],mm0 \ + /*dst+1*ystride*/ \ + __asm movq [DST+YSTRIDE],mm1 \ + /*dst+2*ystride*/ \ + __asm movq [DST+YSTRIDE*2],mm2 \ + /*dst+3*ystride*/ \ + __asm movq [DST+YSTRIDE3],mm3 \ + } \ + while(0) + +# endif +#endif diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/mmxidct.c b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxidct.c new file mode 100644 index 00000000..8f5ff680 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxidct.c @@ -0,0 +1,562 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*MMX acceleration of Theora's iDCT. + Originally written by Rudolf Marek, based on code from On2's VP3.*/ +#include "x86int.h" +#include "../dct.h" + +#if defined(OC_X86_ASM) + +/*These are offsets into the table of constants below.*/ +/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/ +#define OC_COSINE_OFFSET (0) +/*A row of 8's.*/ +#define OC_EIGHT_OFFSET (56) + + + +/*A table of constants used by the MMX routines.*/ +static const __declspec(align(16))ogg_uint16_t + OC_IDCT_CONSTS[(7+1)*4]={ + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + 8, 8, 8, 8 +}; + +/*38 cycles*/ +#define OC_IDCT_BEGIN __asm{ \ + __asm movq mm2,OC_I(3) \ + __asm movq mm6,OC_C(3) \ + __asm movq mm4,mm2 \ + __asm movq mm7,OC_J(5) \ + __asm pmulhw mm4,mm6 \ + __asm movq mm1,OC_C(5) \ + __asm pmulhw mm6,mm7 \ + __asm movq mm5,mm1 \ + __asm pmulhw mm1,mm2 \ + __asm movq mm3,OC_I(1) \ + __asm pmulhw mm5,mm7 \ + __asm movq mm0,OC_C(1) \ + __asm paddw mm4,mm2 \ + __asm paddw mm6,mm7 \ + __asm paddw mm2,mm1 \ + __asm movq mm1,OC_J(7) \ + __asm paddw mm7,mm5 \ + __asm movq mm5,mm0 \ + __asm pmulhw mm0,mm3 \ + __asm paddw mm4,mm7 \ + __asm pmulhw mm5,mm1 \ + __asm movq mm7,OC_C(7) \ + __asm psubw mm6,mm2 \ + __asm paddw mm0,mm3 \ + __asm pmulhw mm3,mm7 \ + __asm movq mm2,OC_I(2) \ + __asm pmulhw mm7,mm1 \ + __asm paddw mm5,mm1 \ + __asm movq mm1,mm2 \ + __asm pmulhw mm2,OC_C(2) \ + __asm psubw mm3,mm5 \ + __asm movq mm5,OC_J(6) \ + __asm paddw mm0,mm7 \ + __asm movq mm7,mm5 \ + __asm psubw mm0,mm4 \ + __asm pmulhw mm5,OC_C(2) \ + __asm paddw mm2,mm1 \ + __asm pmulhw mm1,OC_C(6) \ + __asm paddw mm4,mm4 \ + __asm paddw mm4,mm0 \ + __asm psubw mm3,mm6 \ + __asm paddw mm5,mm7 \ + __asm paddw mm6,mm6 \ + __asm pmulhw mm7,OC_C(6) \ + __asm paddw mm6,mm3 \ + __asm movq OC_I(1),mm4 \ + __asm psubw mm1,mm5 \ + __asm movq mm4,OC_C(4) \ + __asm movq mm5,mm3 \ + __asm pmulhw mm3,mm4 \ + __asm paddw mm7,mm2 \ + __asm movq OC_I(2),mm6 \ + __asm movq mm2,mm0 \ + __asm movq mm6,OC_I(0) \ + __asm pmulhw mm0,mm4 \ + __asm paddw mm5,mm3 \ + __asm movq mm3,OC_J(4) \ + __asm psubw mm5,mm1 \ + __asm paddw mm2,mm0 \ + __asm psubw mm6,mm3 \ + __asm movq mm0,mm6 \ + __asm pmulhw mm6,mm4 \ + __asm paddw mm3,mm3 \ + __asm paddw mm1,mm1 \ + __asm paddw mm3,mm0 \ + __asm paddw mm1,mm5 \ + __asm pmulhw mm4,mm3 \ + __asm paddw mm6,mm0 \ + __asm psubw mm6,mm2 \ + __asm paddw mm2,mm2 \ + __asm movq mm0,OC_I(1) \ + __asm paddw mm2,mm6 \ + __asm paddw mm4,mm3 \ + __asm psubw mm2,mm1 \ +} + +/*38+8=46 cycles.*/ +#define OC_ROW_IDCT __asm{ \ + OC_IDCT_BEGIN \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm3,mm3 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + __asm paddw mm5,mm5 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm0,mm0 \ + /*Save R1.*/ \ + __asm movq OC_I(1),mm1 \ + /*r0=R0=G.+C.*/ \ + __asm paddw mm0,mm7 \ +} + +/*The following macro does two 4x4 transposes in place. + At entry, we assume: + r0 = a3 a2 a1 a0 + I(1) = b3 b2 b1 b0 + r2 = c3 c2 c1 c0 + r3 = d3 d2 d1 d0 + + r4 = e3 e2 e1 e0 + r5 = f3 f2 f1 f0 + r6 = g3 g2 g1 g0 + r7 = h3 h2 h1 h0 + + At exit, we have: + I(0) = d0 c0 b0 a0 + I(1) = d1 c1 b1 a1 + I(2) = d2 c2 b2 a2 + I(3) = d3 c3 b3 a3 + + J(4) = h0 g0 f0 e0 + J(5) = h1 g1 f1 e1 + J(6) = h2 g2 f2 e2 + J(7) = h3 g3 f3 e3 + + I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. + J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. + + Since r1 is free at entry, we calculate the Js first.*/ +/*19 cycles.*/ +#define OC_TRANSPOSE __asm{ \ + __asm movq mm1,mm4 \ + __asm punpcklwd mm4,mm5 \ + __asm movq OC_I(0),mm0 \ + __asm punpckhwd mm1,mm5 \ + __asm movq mm0,mm6 \ + __asm punpcklwd mm6,mm7 \ + __asm movq mm5,mm4 \ + __asm punpckldq mm4,mm6 \ + __asm punpckhdq mm5,mm6 \ + __asm movq mm6,mm1 \ + __asm movq OC_J(4),mm4 \ + __asm punpckhwd mm0,mm7 \ + __asm movq OC_J(5),mm5 \ + __asm punpckhdq mm6,mm0 \ + __asm movq mm4,OC_I(0) \ + __asm punpckldq mm1,mm0 \ + __asm movq mm5,OC_I(1) \ + __asm movq mm0,mm4 \ + __asm movq OC_J(7),mm6 \ + __asm punpcklwd mm0,mm5 \ + __asm movq OC_J(6),mm1 \ + __asm punpckhwd mm4,mm5 \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm2,mm3 \ + __asm movq mm1,mm0 \ + __asm punpckldq mm0,mm2 \ + __asm punpckhdq mm1,mm2 \ + __asm movq mm2,mm4 \ + __asm movq OC_I(0),mm0 \ + __asm punpckhwd mm5,mm3 \ + __asm movq OC_I(1),mm1 \ + __asm punpckhdq mm4,mm5 \ + __asm punpckldq mm2,mm5 \ + __asm movq OC_I(3),mm4 \ + __asm movq OC_I(2),mm2 \ +} + +/*38+19=57 cycles.*/ +#define OC_COLUMN_IDCT __asm{ \ + OC_IDCT_BEGIN \ + __asm paddw mm2,OC_8 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r2=NR2*/ \ + __asm psraw mm2,4 \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=NR1*/ \ + __asm psraw mm1,4 \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*Store NR2 at I(2).*/ \ + __asm movq OC_I(2),mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*Store NR1 at I(1).*/ \ + __asm movq OC_I(1),mm1 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm4,OC_8 \ + /*r3=D'+D'*/ \ + __asm paddw mm3,mm3 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r4=NR4*/ \ + __asm psraw mm4,4 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + /*r3=NR3*/ \ + __asm psraw mm3,4 \ + __asm paddw mm6,OC_8 \ + /*r5=B''+B''*/ \ + __asm paddw mm5,mm5 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r6=NR6*/ \ + __asm psraw mm6,4 \ + /*Store NR4 at J(4).*/ \ + __asm movq OC_J(4),mm4 \ + /*r5=NR5*/ \ + __asm psraw mm5,4 \ + /*Store NR3 at I(3).*/ \ + __asm movq OC_I(3),mm3 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm7,OC_8 \ + /*r0=C'+C'*/ \ + __asm paddw mm0,mm0 \ + /*r0=R0=G'+C'*/ \ + __asm paddw mm0,mm7 \ + /*r7=NR7*/ \ + __asm psraw mm7,4 \ + /*Store NR6 at J(6).*/ \ + __asm movq OC_J(6),mm6 \ + /*r0=NR0*/ \ + __asm psraw mm0,4 \ + /*Store NR5 at J(5).*/ \ + __asm movq OC_J(5),mm5 \ + /*Store NR7 at J(7).*/ \ + __asm movq OC_J(7),mm7 \ + /*Store NR0 at I(0).*/ \ + __asm movq OC_I(0),mm0 \ +} + +#define OC_MID(_m,_i) [CONSTS+_m+(_i)*8] +#define OC_C(_i) OC_MID(OC_COSINE_OFFSET,_i-1) +#define OC_8 OC_MID(OC_EIGHT_OFFSET,0) + +static void oc_idct8x8_slow(ogg_int16_t _y[64]){ + /*This routine accepts an 8x8 matrix, but in partially transposed form. + Every 4x4 block is transposed.*/ + __asm{ +#define CONSTS eax +#define Y edx + mov CONSTS,offset OC_IDCT_CONSTS + mov Y,_y +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) [Y+(_k-4)*16+8] + OC_ROW_IDCT + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+(_k*16)+64] +#define OC_J(_k) [Y+(_k-4)*16+72] + OC_ROW_IDCT + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16+8] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT +#undef OC_I +#undef OC_J +#undef CONSTS +#undef Y + } +} + +/*25 cycles.*/ +#define OC_IDCT_BEGIN_10 __asm{ \ + __asm movq mm2,OC_I(3) \ + __asm nop \ + __asm movq mm6,OC_C(3) \ + __asm movq mm4,mm2 \ + __asm movq mm1,OC_C(5) \ + __asm pmulhw mm4,mm6 \ + __asm movq mm3,OC_I(1) \ + __asm pmulhw mm1,mm2 \ + __asm movq mm0,OC_C(1) \ + __asm paddw mm4,mm2 \ + __asm pxor mm6,mm6 \ + __asm paddw mm2,mm1 \ + __asm movq mm5,OC_I(2) \ + __asm pmulhw mm0,mm3 \ + __asm movq mm1,mm5 \ + __asm paddw mm0,mm3 \ + __asm pmulhw mm3,OC_C(7) \ + __asm psubw mm6,mm2 \ + __asm pmulhw mm5,OC_C(2) \ + __asm psubw mm0,mm4 \ + __asm movq mm7,OC_I(2) \ + __asm paddw mm4,mm4 \ + __asm paddw mm7,mm5 \ + __asm paddw mm4,mm0 \ + __asm pmulhw mm1,OC_C(6) \ + __asm psubw mm3,mm6 \ + __asm movq OC_I(1),mm4 \ + __asm paddw mm6,mm6 \ + __asm movq mm4,OC_C(4) \ + __asm paddw mm6,mm3 \ + __asm movq mm5,mm3 \ + __asm pmulhw mm3,mm4 \ + __asm movq OC_I(2),mm6 \ + __asm movq mm2,mm0 \ + __asm movq mm6,OC_I(0) \ + __asm pmulhw mm0,mm4 \ + __asm paddw mm5,mm3 \ + __asm paddw mm2,mm0 \ + __asm psubw mm5,mm1 \ + __asm pmulhw mm6,mm4 \ + __asm paddw mm6,OC_I(0) \ + __asm paddw mm1,mm1 \ + __asm movq mm4,mm6 \ + __asm paddw mm1,mm5 \ + __asm psubw mm6,mm2 \ + __asm paddw mm2,mm2 \ + __asm movq mm0,OC_I(1) \ + __asm paddw mm2,mm6 \ + __asm psubw mm2,mm1 \ + __asm nop \ +} + +/*25+8=33 cycles.*/ +#define OC_ROW_IDCT_10 __asm{ \ + OC_IDCT_BEGIN_10 \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm3,mm3 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + __asm paddw mm5,mm5 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm0,mm0 \ + /*Save R1.*/ \ + __asm movq OC_I(1),mm1 \ + /*r0=R0=G'+C'*/ \ + __asm paddw mm0,mm7 \ +} + +/*25+19=44 cycles'*/ +#define OC_COLUMN_IDCT_10 __asm{ \ + OC_IDCT_BEGIN_10 \ + __asm paddw mm2,OC_8 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r2=NR2*/ \ + __asm psraw mm2,4 \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=NR1*/ \ + __asm psraw mm1,4 \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*Store NR2 at I(2).*/ \ + __asm movq OC_I(2),mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*Store NR1 at I(1).*/ \ + __asm movq OC_I(1),mm1 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm4,OC_8 \ + /*r3=D'+D'*/ \ + __asm paddw mm3,mm3 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r4=NR4*/ \ + __asm psraw mm4,4 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + /*r3=NR3*/ \ + __asm psraw mm3,4 \ + __asm paddw mm6,OC_8 \ + /*r5=B''+B''*/ \ + __asm paddw mm5,mm5 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r6=NR6*/ \ + __asm psraw mm6,4 \ + /*Store NR4 at J(4).*/ \ + __asm movq OC_J(4),mm4 \ + /*r5=NR5*/ \ + __asm psraw mm5,4 \ + /*Store NR3 at I(3).*/ \ + __asm movq OC_I(3),mm3 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm7,OC_8 \ + /*r0=C'+C'*/ \ + __asm paddw mm0,mm0 \ + /*r0=R0=G'+C'*/ \ + __asm paddw mm0,mm7 \ + /*r7=NR7*/ \ + __asm psraw mm7,4 \ + /*Store NR6 at J(6).*/ \ + __asm movq OC_J(6),mm6 \ + /*r0=NR0*/ \ + __asm psraw mm0,4 \ + /*Store NR5 at J(5).*/ \ + __asm movq OC_J(5),mm5 \ + /*Store NR7 at J(7).*/ \ + __asm movq OC_J(7),mm7 \ + /*Store NR0 at I(0).*/ \ + __asm movq OC_I(0),mm0 \ +} + +static void oc_idct8x8_10(ogg_int16_t _y[64]){ + __asm{ +#define CONSTS eax +#define Y edx + mov CONSTS,offset OC_IDCT_CONSTS + mov Y,_y +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) [Y+(_k-4)*16+8] + /*Done with dequant, descramble, and partial transpose. + Now do the iDCT itself.*/ + OC_ROW_IDCT_10 + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT_10 +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16+8] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT_10 +#undef OC_I +#undef OC_J +#undef CONSTS +#undef Y + } +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform.*/ +void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){ + /*_last_zzi is subtly different from an actual count of the number of + coefficients we decoded for this block. + It contains the value of zzi BEFORE the final token in the block was + decoded. + In most cases this is an EOB token (the continuation of an EOB run from a + previous block counts), and so this is the same as the coefficient count. + However, in the case that the last token was NOT an EOB token, but filled + the block up with exactly 64 coefficients, _last_zzi will be less than 64. + Provided the last token was not a pure zero run, the minimum value it can + be is 46, and so that doesn't affect any of the cases in this routine. + However, if the last token WAS a pure zero run of length 63, then _last_zzi + will be 1 while the number of coefficients decoded is 64. + Thus, we will trigger the following special case, where the real + coefficient count would not. + Note also that a zero run of length 64 will give _last_zzi a value of 0, + but we still process the DC coefficient, which might have a non-zero value + due to DC prediction. + Although convoluted, this is arguably the correct behavior: it allows us to + use a smaller transform when the block ends with a long zero run instead + of a normal EOB token. + It could be smarter... multiple separate zero runs at the end of a block + will fool it, but an encoder that generates these really deserves what it + gets. + Needless to say we inherited this approach from VP3.*/ + /*Perform the iDCT.*/ + if(_last_zzi<10)oc_idct8x8_10(_y); + else oc_idct8x8_slow(_y); +} + +#endif diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/mmxloop.h b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxloop.h new file mode 100644 index 00000000..2561fca2 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxloop.h @@ -0,0 +1,219 @@ +#if !defined(_x86_vc_mmxloop_H) +# define _x86_vc_mmxloop_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}. + On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and + mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/ +#define OC_LOOP_FILTER8_MMX __asm{ \ + /*mm7=0*/ \ + __asm pxor mm7,mm7 \ + /*mm6:mm0={a0,...,a7}*/ \ + __asm movq mm6,mm0 \ + __asm punpcklbw mm0,mm7 \ + __asm punpckhbw mm6,mm7 \ + /*mm3:mm5={d0,...,d7}*/ \ + __asm movq mm5,mm3 \ + __asm punpcklbw mm3,mm7 \ + __asm punpckhbw mm5,mm7 \ + /*mm6:mm0={a0-d0,...,a7-d7}*/ \ + __asm psubw mm0,mm3 \ + __asm psubw mm6,mm5 \ + /*mm3:mm1={b0,...,b7}*/ \ + __asm movq mm3,mm1 \ + __asm punpcklbw mm1,mm7 \ + __asm movq mm4,mm2 \ + __asm punpckhbw mm3,mm7 \ + /*mm5:mm4={c0,...,c7}*/ \ + __asm movq mm5,mm2 \ + __asm punpcklbw mm4,mm7 \ + __asm punpckhbw mm5,mm7 \ + /*mm7={3}x4 \ + mm5:mm4={c0-b0,...,c7-b7}*/ \ + __asm pcmpeqw mm7,mm7 \ + __asm psubw mm4,mm1 \ + __asm psrlw mm7,14 \ + __asm psubw mm5,mm3 \ + /*Scale by 3.*/ \ + __asm pmullw mm4,mm7 \ + __asm pmullw mm5,mm7 \ + /*mm7={4}x4 \ + mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \ + __asm psrlw mm7,1 \ + __asm paddw mm4,mm0 \ + __asm psllw mm7,2 \ + __asm movq mm0,[LL] \ + __asm paddw mm5,mm6 \ + /*R_i has the range [-127,128], so we compute -R_i instead. \ + mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \ + __asm psubw mm4,mm7 \ + __asm psubw mm5,mm7 \ + __asm psraw mm4,3 \ + __asm psraw mm5,3 \ + __asm pcmpeqb mm7,mm7 \ + __asm packsswb mm4,mm5 \ + __asm pxor mm6,mm6 \ + __asm pxor mm4,mm7 \ + __asm packuswb mm1,mm3 \ + /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \ + /*There's no unsigned byte+signed byte with unsigned saturation op code, so \ + we have to split things by sign (the other option is to work in 16 bits, \ + but working in 8 bits gives much better parallelism). \ + We compute abs(R_i), but save a mask of which terms were negative in mm6. \ + Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \ + Finally, we split mm4 into positive and negative pieces using the mask in \ + mm6, and add and subtract them as appropriate.*/ \ + /*mm4=abs(-R_i)*/ \ + /*mm7=255-2*L*/ \ + __asm pcmpgtb mm6,mm4 \ + __asm psubb mm7,mm0 \ + __asm pxor mm4,mm6 \ + __asm psubb mm7,mm0 \ + __asm psubb mm4,mm6 \ + /*mm7=255-max(2*L-abs(R_i),0)*/ \ + __asm paddusb mm7,mm4 \ + /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \ + __asm paddusb mm4,mm7 \ + __asm psubusb mm4,mm7 \ + /*Now split mm4 by the original sign of -R_i.*/ \ + __asm movq mm5,mm4 \ + __asm pand mm4,mm6 \ + __asm pandn mm6,mm5 \ + /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \ + /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \ + __asm paddusb mm1,mm4 \ + __asm psubusb mm2,mm4 \ + __asm psubusb mm1,mm6 \ + __asm paddusb mm2,mm6 \ +} + +#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \ + do{ \ + /*Used local variable pix__ in order to fix compilation errors like: \ + "error C2425: 'SHL' : non-constant expression in 'second operand'".*/ \ + unsigned char *pix__; \ + unsigned char *ll__; \ + ll__=(_ll); \ + pix__=(_pix); \ + __asm mov YSTRIDE,_ystride \ + __asm mov LL,ll__ \ + __asm mov PIX,pix__ \ + __asm sub PIX,YSTRIDE \ + __asm sub PIX,YSTRIDE \ + /*mm0={a0,...,a7}*/ \ + __asm movq mm0,[PIX] \ + /*ystride3=_ystride*3*/ \ + __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ + /*mm3={d0,...,d7}*/ \ + __asm movq mm3,[PIX+YSTRIDE3] \ + /*mm1={b0,...,b7}*/ \ + __asm movq mm1,[PIX+YSTRIDE] \ + /*mm2={c0,...,c7}*/ \ + __asm movq mm2,[PIX+YSTRIDE*2] \ + OC_LOOP_FILTER8_MMX \ + /*Write it back out.*/ \ + __asm movq [PIX+YSTRIDE],mm1 \ + __asm movq [PIX+YSTRIDE*2],mm2 \ + } \ + while(0) + +#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \ + do{ \ + /*Used local variable ll__ in order to fix compilation errors like: \ + "error C2443: operand size conflict".*/ \ + unsigned char *ll__; \ + unsigned char *pix__; \ + ll__=(_ll); \ + pix__=(_pix)-2; \ + __asm mov PIX,pix__ \ + __asm mov YSTRIDE,_ystride \ + __asm mov LL,ll__ \ + /*x x x x d0 c0 b0 a0*/ \ + __asm movd mm0,[PIX] \ + /*x x x x d1 c1 b1 a1*/ \ + __asm movd mm1,[PIX+YSTRIDE] \ + /*ystride3=_ystride*3*/ \ + __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ + /*x x x x d2 c2 b2 a2*/ \ + __asm movd mm2,[PIX+YSTRIDE*2] \ + /*x x x x d3 c3 b3 a3*/ \ + __asm lea D,[PIX+YSTRIDE*4] \ + __asm movd mm3,[PIX+YSTRIDE3] \ + /*x x x x d4 c4 b4 a4*/ \ + __asm movd mm4,[D] \ + /*x x x x d5 c5 b5 a5*/ \ + __asm movd mm5,[D+YSTRIDE] \ + /*x x x x d6 c6 b6 a6*/ \ + __asm movd mm6,[D+YSTRIDE*2] \ + /*x x x x d7 c7 b7 a7*/ \ + __asm movd mm7,[D+YSTRIDE3] \ + /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + __asm punpcklbw mm0,mm1 \ + /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \ + __asm punpcklbw mm2,mm3 \ + /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + __asm movq mm3,mm0 \ + /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + __asm punpcklwd mm0,mm2 \ + /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + __asm punpckhwd mm3,mm2 \ + /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + __asm movq mm1,mm0 \ + /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + __asm punpcklbw mm4,mm5 \ + /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \ + __asm punpcklbw mm6,mm7 \ + /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + __asm movq mm5,mm4 \ + /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \ + __asm punpcklwd mm4,mm6 \ + /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \ + __asm punpckhwd mm5,mm6 \ + /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + __asm movq mm2,mm3 \ + /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \ + __asm punpckldq mm0,mm4 \ + /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \ + __asm punpckhdq mm1,mm4 \ + /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \ + __asm punpckldq mm2,mm5 \ + /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \ + __asm punpckhdq mm3,mm5 \ + OC_LOOP_FILTER8_MMX \ + /*mm2={b0+R_0'',...,b7+R_7''}*/ \ + __asm movq mm0,mm1 \ + /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \ + __asm punpcklbw mm1,mm2 \ + /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \ + __asm punpckhbw mm0,mm2 \ + /*[d]=c1 b1 c0 b0*/ \ + __asm movd D,mm1 \ + __asm mov [PIX+1],D_WORD \ + __asm psrlq mm1,32 \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE+1],D_WORD \ + /*[d]=c3 b3 c2 b2*/ \ + __asm movd D,mm1 \ + __asm mov [PIX+YSTRIDE*2+1],D_WORD \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE3+1],D_WORD \ + __asm lea PIX,[PIX+YSTRIDE*4] \ + /*[d]=c5 b5 c4 b4*/ \ + __asm movd D,mm0 \ + __asm mov [PIX+1],D_WORD \ + __asm psrlq mm0,32 \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE+1],D_WORD \ + /*[d]=c7 b7 c6 b6*/ \ + __asm movd D,mm0 \ + __asm mov [PIX+YSTRIDE*2+1],D_WORD \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE3+1],D_WORD \ + } \ + while(0) + +# endif +#endif diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/mmxstate.c b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxstate.c new file mode 100644 index 00000000..73bd1981 --- /dev/null +++ b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxstate.c @@ -0,0 +1,211 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxstate.c 16584 2009-09-26 19:35:55Z tterribe $ + + ********************************************************************/ + +/*MMX acceleration of complete fragment reconstruction algorithm. + Originally written by Rudolf Marek.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" +#include "mmxloop.h" + +#if defined(OC_X86_ASM) + +void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + unsigned char *dst; + ptrdiff_t frag_buf_off; + int ystride; + int mb_mode; + /*Apply the inverse transform.*/ + /*Special case only having a DC component.*/ + if(_last_zzi<2){ + /*Note that this value must be unsigned, to keep the __asm__ block from + sign-extending it when it puts it in a register.*/ + ogg_uint16_t p; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); + /*Fill _dct_coeffs with p.*/ + __asm{ +#define Y eax +#define P ecx + mov Y,_dct_coeffs + movzx P,p + /*mm0=0000 0000 0000 AAAA*/ + movd mm0,P + /*mm0=0000 0000 AAAA AAAA*/ + punpcklwd mm0,mm0 + /*mm0=AAAA AAAA AAAA AAAA*/ + punpckldq mm0,mm0 + movq [Y],mm0 + movq [8+Y],mm0 + movq [16+Y],mm0 + movq [24+Y],mm0 + movq [32+Y],mm0 + movq [40+Y],mm0 + movq [48+Y],mm0 + movq [56+Y],mm0 + movq [64+Y],mm0 + movq [72+Y],mm0 + movq [80+Y],mm0 + movq [88+Y],mm0 + movq [96+Y],mm0 + movq [104+Y],mm0 + movq [112+Y],mm0 + movq [120+Y],mm0 +#undef Y +#undef P + } + } + else{ + /*Dequantize the DC coefficient.*/ + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); + oc_idct8x8_mmx(_dct_coeffs,_last_zzi); + } + /*Fill in the target buffer.*/ + frag_buf_off=_state->frag_buf_offs[_fragi]; + mb_mode=_state->frags[_fragi].mb_mode; + ystride=_state->ref_ystride[_pli]; + dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; + if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs); + else{ + const unsigned char *ref; + int mvoffsets[2]; + ref= + _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] + +frag_buf_off; + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, + _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ + oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, + _dct_coeffs); + } + else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs); + } +} + +/*We copy these entire function to inline the actual MMX routines so that we + use only a single indirect call.*/ + +/*Copies the fragments specified by the lists of fragment indices from one + frame to another. + _fragis: A pointer to a list of fragment indices. + _nfragis: The number of fragment indices to copy. + _dst_frame: The reference frame to copy to. + _src_frame: The reference frame to copy from. + _pli: The color plane the fragments lie in.*/ +void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + const ptrdiff_t *frag_buf_offs; + const unsigned char *src_frame_data; + unsigned char *dst_frame_data; + ptrdiff_t fragii; + int ystride; + dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; + src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; + ystride=_state->ref_ystride[_pli]; + frag_buf_offs=_state->frag_buf_offs; + for(fragii=0;fragii<_nfragis;fragii++){ + ptrdiff_t frag_buf_off; + frag_buf_off=frag_buf_offs[_fragis[fragii]]; +#define SRC edx +#define DST eax +#define YSTRIDE ecx +#define YSTRIDE3 edi + OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off, + src_frame_data+frag_buf_off,ystride); +#undef SRC +#undef DST +#undef YSTRIDE +#undef YSTRIDE3 + } +} + +/*Apply the loop filter to a given set of fragment rows in the given plane. + The filter may be run on the bottom edge, affecting pixels in the next row of + fragments, so this row also needs to be available. + _bv: The bounding values array. + _refi: The index of the frame buffer to filter. + _pli: The color plane to filter. + _fragy0: The Y coordinate of the first fragment row to filter. + _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ +void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){ + OC_ALIGN8(unsigned char ll[8]); + const oc_fragment_plane *fplane; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + unsigned char *ref_frame_data; + ptrdiff_t fragi_top; + ptrdiff_t fragi_bot; + ptrdiff_t fragi0; + ptrdiff_t fragi0_end; + int ystride; + int nhfrags; + memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll)); + fplane=_state->fplanes+_pli; + nhfrags=fplane->nhfrags; + fragi_top=fplane->froffset; + fragi_bot=fragi_top+fplane->nfrags; + fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; + fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; + ystride=_state->ref_ystride[_pli]; + frags=_state->frags; + frag_buf_offs=_state->frag_buf_offs; + ref_frame_data=_state->ref_frame_data[_refi]; + /*The following loops are constructed somewhat non-intuitively on purpose. + The main idea is: if a block boundary has at least one coded fragment on + it, the filter is applied to it. + However, the order that the filters are applied in matters, and VP3 chose + the somewhat strange ordering used below.*/ + while(fragi0fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll); + if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll); + if(fragi+1cpu_flags=oc_cpu_flags_get(); + if(_state->cpu_flags&OC_CPU_X86_MMX){ + _state->opt_vtable.frag_copy=oc_frag_copy_mmx; + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx; + _state->opt_vtable.idct8x8=oc_idct8x8_mmx; + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx; + _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx; + _state->opt_vtable.state_loop_filter_frag_rows= + oc_state_loop_filter_frag_rows_mmx; + _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx; + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX; + } + else oc_state_vtable_init_c(_state); +} +#endif diff --git a/engine/code/qcommon/files.c b/engine/code/qcommon/files.c index 2c36c1eb..c5bf3b74 100644 --- a/engine/code/qcommon/files.c +++ b/engine/code/qcommon/files.c @@ -2438,6 +2438,12 @@ int FS_GetFileList( const char *path, const char *extension, char *listbuf, int return FS_GetModList(listbuf, bufsize); } +const char *extensions[] = { "RoQ", "roq" +#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA)) + , "ogm", "ogv" +#endif + }; + pFiles = FS_ListFiles(path, extension, &nFiles); for (i =0; i < nFiles; i++) { diff --git a/engine/code/qcommon/q_shared.h b/engine/code/qcommon/q_shared.h index 5fe8899e..81862195 100644 --- a/engine/code/qcommon/q_shared.h +++ b/engine/code/qcommon/q_shared.h @@ -67,7 +67,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define BASETA "missionpack" #ifndef PRODUCT_VERSION -#define PRODUCT_VERSION "v0.4_r515" +#define PRODUCT_VERSION "v0.4_r518" #endif diff --git a/q3rallycode.ppr b/q3rallycode.ppr index ff558679..c796c51e 100644 --- a/q3rallycode.ppr +++ b/q3rallycode.ppr @@ -194,7 +194,8 @@ q3rallycode engine\code\client\snd_openal.c engine\code\client\snd_public.h engine\code\client\snd_wavelet.c - -game + engine\code\client\cl_cin_ogm.c + +game engine\code\game\ai_chat.c engine\code\game\ai_chat.h engine\code\game\ai_cmd.c @@ -456,7 +457,7 @@ q3rallycode engine\code\null\null_main.c engine\code\null\null_net.c engine\code\null\null_snddma.c - +q3_ui + -q3_ui engine\code\q3_ui\ui.def engine\code\q3_ui\ui_addbots.c engine\code\q3_ui\ui_atoms.c @@ -745,7 +746,7 @@ q3rallycode engine\code\tools\lcc\LOG engine\code\tools\lcc\README engine\code\tools\lcc\README.id - +ui + -ui engine\code\ui\ui_atoms.c engine\code\ui\ui_gameinfo.c engine\code\ui\ui_local.h @@ -888,41 +889,29 @@ q3rallycode engine\cross-make-mingw64.sh [Open project files] 0=engine\code\qcommon\q_shared.h -1=engine\code\cgame\cg_draw.c -2=engine\code\cgame\cg_local.h -3=engine\code\cgame\cg_event.c -4=engine\code\ui\ui_local.h -5=engine\code\cgame\cg_scoreboard.c -6=engine\code\q3_ui\ui_rally_credits.c -7=engine\code\q3_ui\ui_menu.c -8=engine\code\q3_ui\ui_video.c +1=engine\Makefile +2=engine\code\client\cl_cin.c +3=engine\code\client\client.h +4=engine\code\qcommon\files.c +5=engine\code\game\g_mover.c [Selected Project Files] Main= Selected=engine\code\qcommon\q_shared.h [engine\code\qcommon\q_shared.h] -TopLine=51 +TopLine=56 Caret=35,70 -[engine\code\cgame\cg_draw.c] -TopLine=828 -Caret=28,848 -[engine\code\cgame\cg_local.h] -TopLine=57 -Caret=27,76 -[engine\code\cgame\cg_event.c] -TopLine=275 -Caret=58,297 -[engine\code\ui\ui_local.h] -TopLine=129 -Caret=1,139 -[engine\code\cgame\cg_scoreboard.c] -TopLine=54 -Caret=41,78 -[engine\code\q3_ui\ui_rally_credits.c] -TopLine=84 -Caret=45,107 -[engine\code\q3_ui\ui_menu.c] -TopLine=572 -Caret=66,592 -[engine\code\q3_ui\ui_video.c] -TopLine=1042 -Caret=1,1085 +[engine\Makefile] +TopLine=342 +Caret=6,347 +[engine\code\client\cl_cin.c] +TopLine=382 +Caret=5,397 +[engine\code\client\client.h] +TopLine=584 +Caret=31,615 +[engine\code\qcommon\files.c] +TopLine=2426 +Caret=6,2445 +[engine\code\game\g_mover.c] +TopLine=1857 +Caret=3,1864