diff --git a/engine/Makefile b/engine/Makefile
index fdb3f31b..53a7fec3 100644
--- a/engine/Makefile
+++ b/engine/Makefile
@@ -190,6 +190,10 @@ ifndef USE_CODEC_OPUS
 USE_CODEC_OPUS=1
 endif
 
+ifndef USE_CODEC_THEORA
+USE_CODEC_THEORA=1
+endif
+
 ifndef USE_MUMBLE
 USE_MUMBLE=1
 endif
@@ -218,6 +222,10 @@ ifndef USE_INTERNAL_OPUS
 USE_INTERNAL_OPUS=$(USE_INTERNAL_LIBS)
 endif
 
+ifndef USE_INTERNAL_THEORA
+USE_INTERNAL_THEORA=$(USE_INTERNAL_LIBS)
+endif
+
 ifndef USE_INTERNAL_ZLIB
 USE_INTERNAL_ZLIB=$(USE_INTERNAL_LIBS)
 endif
@@ -270,6 +278,7 @@ OGGDIR=$(MOUNT_DIR)/libogg-1.3.3
 VORBISDIR=$(MOUNT_DIR)/libvorbis-1.3.6
 OPUSDIR=$(MOUNT_DIR)/opus-1.2.1
 OPUSFILEDIR=$(MOUNT_DIR)/opusfile-0.9
+THEORADIR=$(MOUNT_DIR)/libtheora-1.1.1
 ZDIR=$(MOUNT_DIR)/zlib
 TOOLSDIR=$(MOUNT_DIR)/tools
 Q3ASMDIR=$(MOUNT_DIR)/tools/asm
@@ -326,21 +335,15 @@ ifeq ($(SDL_CFLAGS),)
   endif
 endif
 
-# Add svn version info
-USE_SVN=
-ifeq ($(wildcard ../.svn),../.svn)
-  SVN_REV=$(shell LANG=C svnversion .)
-  ifneq ($(SVN_REV),)
-    VERSION:=$(VERSION)_r$(SVN_REV)
-    USE_SVN=1
+
+# Add git version info
+USE_GIT=
+ifeq ($(wildcard .git),.git)
+  GIT_REV=$(shell git show -s --pretty=format:%ad+%h --date=short | tr -d '-')
+  ifneq ($(GIT_REV),)
+    VERSION:=$(VERSION)+$(GIT_REV)
+    USE_GIT=1
   endif
-else
-ifeq ($(wildcard ../.git/svn/.metadata),../.git/svn/.metadata)
-  SVN_REV=$(shell LANG=C git svn info | awk '$$1 == "Revision:" {print $$2; exit 0}')
-  ifneq ($(SVN_REV),)
-    VERSION:=$(VERSION)_r$(SVN_REV)
-  endif
-endif
 endif
 ifdef IOQ3_REVISION
   VERSION:=$(VERSION)_IOQ3r$(IOQ3_REVISION)
@@ -931,7 +934,7 @@ ifeq ($(PLATFORM),sunos)
   CC=gcc
   INSTALL=ginstall
   MKDIR=gmkdir -p
-  COPYDIR="/usr/local/share/games/quake3"
+  COPYDIR="/usr/local/share/games/q3rally"
 
   ifneq ($(ARCH),x86)
     ifneq ($(ARCH),sparc)
@@ -1099,6 +1102,19 @@ ifeq ($(USE_CURL),1)
   endif
 endif
 
+
+ifeq ($(USE_CODEC_THEORA),1)
+  CLIENT_CFLAGS += -DUSE_CIN_THEORA
+  ifeq ($(USE_INTERNAL_THEORA),1)
+    THEORA_CFLAGS=-I$(THEORADIR)/include
+  else
+    THEORA_CFLAGS ?= $(shell $(PKG_CONFIG) --silence-errors --cflags theoradec || true)
+    THEORA_LIBS ?= $(shell $(PKG_CONFIG) --silence-errors --libs theoradec || echo -ltheoradec)
+  endif
+  CLIENT_CFLAGS += $(THEORA_CFLAGS)
+  CLIENT_LIBS += $(THEORA_LIBS)
+  NEED_OGG=1
+endif
 ifeq ($(USE_VOIP),1)
   CLIENT_CFLAGS += -DUSE_VOIP
   SERVER_CFLAGS += -DUSE_VOIP
@@ -1475,6 +1491,7 @@ endif
 makedirs:
 	@$(MKDIR) $(B)/autoupdater
 	@$(MKDIR) $(B)/client/opus
+	@$(MKDIR) $(B)/client/theora
 	@$(MKDIR) $(B)/client/vorbis
 	@$(MKDIR) $(B)/renderergl1
 	@$(MKDIR) $(B)/renderergl2
@@ -1721,6 +1738,7 @@ $(B)/$(AUTOUPDATER_BIN): $(Q3AUTOUPDATEROBJ)
 Q3OBJ = \
   $(B)/client/cl_cgame.o \
   $(B)/client/cl_cin.o \
+  $(B)/client/cl_cin_ogm.o \
   $(B)/client/cl_console.o \
   $(B)/client/cl_input.o \
   $(B)/client/cl_keys.o \
@@ -2013,6 +2031,38 @@ ifeq ($(ARCH),x86_64)
     $(B)/client/ftola.o
 endif
 
+
+ifeq ($(USE_CODEC_THEORA),1)
+ifeq ($(USE_INTERNAL_THEORA),1)
+Q3OBJ += \
+  $(B)/client/theora/apiwrapper.o \
+  $(B)/client/theora/bitpack.o \
+  $(B)/client/theora/decapiwrapper.o \
+  $(B)/client/theora/decinfo.o \
+  $(B)/client/theora/decode.o \
+  $(B)/client/theora/dequant.o \
+  $(B)/client/theora/fragment.o \
+  $(B)/client/theora/huffdec.o \
+  $(B)/client/theora/idct.o \
+  $(B)/client/theora/info.o \
+  $(B)/client/theora/internal.o \
+  $(B)/client/theora/quant.o \
+  $(B)/client/theora/state.o
+
+THEORA_OBJ_X86 = \
+  $(B)/client/theora/mmxidct.o \
+  $(B)/client/theora/mmxfrag.o \
+  $(B)/client/theora/mmxstate.o \
+  $(B)/client/theora/x86state.o
+
+ifeq ($(ARCH),x86)
+  Q3OBJ += $(THEORA_OBJ_X86)
+endif
+ifeq ($(ARCH),x86_64)
+  Q3OBJ += $(THEORA_OBJ_X86)
+endif
+endif
+endif
 ifeq ($(NEED_OPUS),1)
 ifeq ($(USE_INTERNAL_OPUS),1)
 Q3OBJ += \
@@ -2765,6 +2815,12 @@ $(B)/client/%.o: $(OGGDIR)/src/%.c
 $(B)/client/vorbis/%.o: $(VORBISDIR)/lib/%.c
 	$(DO_CC)
 
+$(B)/client/theora/%.o: $(THEORADIR)/lib/%.c
+	$(DO_CC)
+
+$(B)/client/theora/%.o: $(THEORADIR)/lib/x86/%.c
+	$(DO_CC)
+
 $(B)/client/opus/%.o: $(OPUSDIR)/src/%.c
 	$(DO_CC)
 
diff --git a/engine/code/cgame/cg_local.h b/engine/code/cgame/cg_local.h
index c7af11b0..8ae07e5b 100644
--- a/engine/code/cgame/cg_local.h
+++ b/engine/code/cgame/cg_local.h
@@ -1520,12 +1520,8 @@ screenPlacement_e CG_GetScreenVerticalPlacement(void);
 void CG_AdjustFrom640( float *x, float *y, float *w, float *h );
 void CG_FillRect( float x, float y, float width, float height, const float *color );
 void CG_DrawPic( float x, float y, float width, float height, qhandle_t hShader );
-void CG_DrawString( float x, float y, const char *string, 
-				   float charWidth, float charHeight, const float *modulate );
-
-
-void CG_DrawStringExt( int x, int y, const char *string, const float *setColor, 
-		qboolean forceColor, qboolean shadow, int charWidth, int charHeight, int maxChars );
+void CG_DrawString( float x, float y, const char *string, float charWidth, float charHeight, const float *modulate );
+void CG_DrawStringExt( int x, int y, const char *string, const float *setColor, qboolean forceColor, qboolean shadow, int charWidth, int charHeight, int maxChars );
 void CG_DrawBigString( int x, int y, const char *s, float alpha );
 void CG_DrawBigStringColor( int x, int y, const char *s, vec4_t color );
 void CG_DrawSmallString( int x, int y, const char *s, float alpha );
@@ -1555,15 +1551,10 @@ extern  char systemChat[256];
 extern  char teamChat1[256];
 extern  char teamChat2[256];
 
-// Q3Rally Code Start
 float CG_DrawScores( float x, float y );
-// Q3Rally Code END
 void CG_AddLagometerFrameInfo( void );
 void CG_AddLagometerSnapshotInfo( snapshot_t *snap );
 void CG_CenterPrint( const char *str, int y, int charWidth );
-// Q3Rally Code (removed function)
-// void CG_DrawHead( float x, float y, float w, float h, int clientNum, vec3_t headAngles );
-// Q3Rally Code END
 void CG_DrawActive( stereoFrame_t stereoView );
 void CG_DrawFlagModel( float x, float y, float w, float h, int team, qboolean force2D );
 void CG_DrawTeamBackground( int x, int y, int w, int h, float alpha, int team );
@@ -1876,7 +1867,7 @@ float CG_DrawUpperRightHUD( float y );
 float CG_DrawLowerRightHUD( float y );
 float CG_DrawLowerLeftHUD( float y );
 void CG_DrawMMap( float x, float y, float w, float h );
-// void CG_DrawHUD_DerbyList(float x, float y);
+void CG_DrawHUD_DerbyList(float x, float y);
 
 
 //
diff --git a/engine/code/cgame/cg_rally_hud.c b/engine/code/cgame/cg_rally_hud.c
index f24f8f08..ac506262 100644
--- a/engine/code/cgame/cg_rally_hud.c
+++ b/engine/code/cgame/cg_rally_hud.c
@@ -352,9 +352,8 @@ static float CG_DrawArrowToCheckpoint( float y ) {
 		CG_Draw3DLine( cent->currentState.origin, cg.snap->ps.origin );
         
 */
-
-	CG_DrawStringExt( x, SCREEN_HEIGHT * .30, "WRONG WAY!", color, qfalse, qtrue,
-		BIGCHAR_WIDTH, (int)(BIGCHAR_WIDTH * 1.5), 0 );
+//    CG_SetScreenPlacement(PLACE_CENTER, PLACE_CENTER);
+	CG_DrawStringExt( x, SCREEN_HEIGHT * .30, "WRONG WAY!", color, qfalse, qtrue, BIGCHAR_WIDTH, (int)(BIGCHAR_WIDTH * 1.5), 0 );
 
 	return y;
 }
diff --git a/engine/code/cgame/cg_rally_hud2.c b/engine/code/cgame/cg_rally_hud2.c
index b949fb7b..58bedc7f 100644
--- a/engine/code/cgame/cg_rally_hud2.c
+++ b/engine/code/cgame/cg_rally_hud2.c
@@ -467,7 +467,7 @@ qboolean CG_DrawHUD( void ) {
 		break;
 
 	case GT_DERBY:
-		CG_DrawHUD_DerbyList(44, 130);
+//		CG_DrawHUD_DerbyList(44, 130);
 
 		break;
 	}
diff --git a/engine/code/client/cl_cin.c b/engine/code/client/cl_cin.c
index cdc3ab3e..4e0dbbdf 100644
--- a/engine/code/client/cl_cin.c
+++ b/engine/code/client/cl_cin.c
@@ -73,6 +73,10 @@ static	unsigned short		vq2[256*16*4];
 static	unsigned short		vq4[256*64*4];
 static	unsigned short		vq8[256*256*4];
 
+typedef enum {
+	FT_ROQ = 0,				// normal roq (vq3 stuff)
+	FT_OGM					// ogm(ogg wrapper, vorbis audio, xvid/theora video) for WoP
+} filetype_t;
 
 typedef struct {
 	byte				linbuf[DEFAULT_CIN_WIDTH*DEFAULT_CIN_HEIGHT*4*2];
@@ -125,6 +129,7 @@ typedef struct {
 	int					playonwalls;
 	byte*				buf;
 	long				drawX, drawY;
+    filetype_t			fileType;
 } cin_cache;
 
 static cinematics_t		cin;
@@ -387,8 +392,9 @@ static void blit8_32( byte *src, byte *dst, int spl  )
 ******************************************************************************/
 static void blit4_32( byte *src, byte *dst, int spl  )
 {
-	int i;
 
+    int i;
+    
 	for(i = 0; i < 4; ++i)
 	{
 		memmove(dst, src, 16);
@@ -505,7 +511,7 @@ int		spl;
 *
 ******************************************************************************/
 
-static void ROQ_GenYUVTables( void )
+void ROQ_GenYUVTables( void )
 {
 	float t_ub,t_vr,t_ug,t_vg;
 	long i;
@@ -612,6 +618,51 @@ static unsigned int yuv_to_rgb24( long y, long u, long v )
 	return LittleLong ((unsigned long)((r)|(g<<8)|(b<<16))|(255UL<<24));
 }
 
+/******************************************************************************
+*
+* Function:	Frame_yuv_to_rgb24
+*
+* Description: Used by the Theora(ogm) code
+*		moved the convertion into one function, to reduce the number of function-calls
+*
+******************************************************************************/
+void Frame_yuv_to_rgb24(const unsigned char *y, const unsigned char *u, const unsigned char *v,
+		int width, int height, int y_stride, int uv_stride,
+		int yWShift, int uvWShift, int yHShift, int uvHShift, unsigned int *output)
+{
+	int		i, j, uvI;
+	long	r, g, b, YY;
+
+	for(j = 0; j < height; ++j)
+	{
+		for(i = 0; i < width; ++i)
+		{
+			YY = (long)(ROQ_YY_tab[(y[(i >> yWShift) + (j >> yHShift) * y_stride])]);
+			uvI = (i >> uvWShift) + (j >> uvHShift) * uv_stride;
+
+			r = (YY + ROQ_VR_tab[v[uvI]]) >> 6;
+			g = (YY + ROQ_UG_tab[u[uvI]] + ROQ_VG_tab[v[uvI]]) >> 6;
+			b = (YY + ROQ_UB_tab[u[uvI]]) >> 6;
+
+			if(r < 0)
+				r = 0;
+			if(g < 0)
+				g = 0;
+			if(b < 0)
+				b = 0;
+			if(r > 255)
+				r = 255;
+			if(g > 255)
+				g = 255;
+			if(b > 255)
+				b = 255;
+
+			*output = LittleLong((r) | (g << 8) | (b << 16) | (255 << 24));
+			++output;
+		}
+	}
+}
+
 /******************************************************************************
 *
 * Function:		
@@ -1262,10 +1313,12 @@ static void RoQ_init( void )
 *
 ******************************************************************************/
 
+//FIXME: this isn't realy a "roq-shutdown" (it's more a CIN-shutdown, beside the file-closing)
 static void RoQShutdown( void ) {
 	const char *s;
 
 	if (!cinTable[currentHandle].buf) {
+    //FIXME: there could be something that should be "shutdowned" even if we don't have a output frame (at least in the ogm code)
 		return;
 	}
 
@@ -1294,6 +1347,11 @@ static void RoQShutdown( void ) {
 		CL_handle = -1;
 	}
 	cinTable[currentHandle].fileName[0] = 0;
+    	if (cinTable[currentHandle].fileType == FT_OGM)
+	{
+		Cin_OGM_Shutdown();
+		cinTable[currentHandle].buf = NULL;
+	}
 	currentHandle = -1;
 }
 
@@ -1364,6 +1422,78 @@ e_status CIN_RunCinematic (int handle)
 		return cinTable[currentHandle].status;
 	}
 
+if (cinTable[currentHandle].fileType == FT_OGM)
+	{
+		if (Cin_OGM_Run(cinTable[currentHandle].startTime == 0 ? 0 : CL_ScaledMilliseconds() - cinTable[currentHandle].startTime))
+			cinTable[currentHandle].status = FMV_EOF;
+		else
+		{
+			int			newW, newH;
+			qboolean	resolutionChange = qfalse;
+
+			cinTable[currentHandle].buf = Cin_OGM_GetOutput(&newW, &newH);
+
+			if (newW != cinTable[currentHandle].CIN_WIDTH)
+			{
+				cinTable[currentHandle].CIN_WIDTH = newW;
+				resolutionChange = qtrue;
+			}
+			if (newH != cinTable[currentHandle].CIN_HEIGHT)
+			{
+				cinTable[currentHandle].CIN_HEIGHT = newH;
+				resolutionChange = qtrue;
+			}
+
+			if (resolutionChange)
+			{
+				cinTable[currentHandle].drawX = cinTable[currentHandle].CIN_WIDTH;
+				cinTable[currentHandle].drawY = cinTable[currentHandle].CIN_HEIGHT;
+
+				// some old drivers can't do it at all
+				if (cls.glconfig.maxTextureSize <= 256) {
+					if (cinTable[currentHandle].drawX>256) {
+						cinTable[currentHandle].drawX = 256;
+					}
+					if (cinTable[currentHandle].drawY>256) {
+						cinTable[currentHandle].drawY = 256;
+					}
+					if (cinTable[currentHandle].CIN_WIDTH != 256 || cinTable[currentHandle].CIN_HEIGHT != 256) {
+						Com_Printf("HACK: approxmimating cinematic to 256x256 from %dx%d\n", cinTable[currentHandle].CIN_WIDTH, cinTable[currentHandle].CIN_HEIGHT);
+					}
+				}
+			}
+
+			cinTable[currentHandle].status = FMV_PLAY;
+			cinTable[currentHandle].dirty = qtrue;
+		}
+
+		if (!cinTable[currentHandle].startTime)
+			cinTable[currentHandle].startTime = CL_ScaledMilliseconds();
+
+		if (cinTable[currentHandle].status == FMV_EOF)
+		{
+			if (cinTable[currentHandle].holdAtEnd)
+			{
+				cinTable[currentHandle].status = FMV_IDLE;
+			}
+			else if (cinTable[currentHandle].looping)
+			{
+				Cin_OGM_Shutdown();
+				Cin_OGM_Init(cinTable[currentHandle].fileName);
+				cinTable[currentHandle].buf = NULL;
+				cinTable[currentHandle].startTime = 0;
+				cinTable[currentHandle].status = FMV_PLAY;
+			}
+			else
+			{
+				RoQShutdown();
+//              Cin_OGM_Shutdown();
+			}
+		}
+
+		return cinTable[currentHandle].status;
+	}
+
 	thisTime = CL_ScaledMilliseconds();
 	if (cinTable[currentHandle].shader && (abs(thisTime - cinTable[currentHandle].lastTime))>100) {
 		cinTable[currentHandle].startTime += thisTime - cinTable[currentHandle].lastTime;
@@ -1399,6 +1529,97 @@ e_status CIN_RunCinematic (int handle)
 	return cinTable[currentHandle].status;
 }
 
+// Also see S_TheCheckExtension
+qboolean CIN_TheCheckExtension(char *filename)
+{
+	enum
+	{
+		CIN_RoQ,
+		CIN_roq,
+#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA))
+		CIN_ogm,
+		CIN_ogv,
+#endif
+		CIN_MAX
+	};
+	const char cin_ext[CIN_MAX][4] = { "RoQ\0", "roq\0"
+#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA))
+		, "ogm\0", "ogv\0"
+#endif
+		};
+	qboolean skipCin[CIN_MAX] = { qfalse, qfalse
+#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA))
+		, qfalse, qfalse
+#endif
+		};
+	fileHandle_t hnd;
+	char fn[MAX_QPATH];
+	int stringlen = strlen(filename);
+	char *extptr;
+	int i;
+
+	strncpy(fn, filename, stringlen+1);
+	extptr = strrchr(fn, '.');
+
+	if(!extptr)
+	{
+		extptr = &fn[stringlen];
+
+		extptr[0] = '.';
+		extptr[1] = 'R';
+		extptr[2] = 'o';
+		extptr[3] = 'Q';
+		extptr[4] = '\0';
+
+		stringlen += 4;
+
+		skipCin[CIN_RoQ] = qtrue;
+	}
+
+	FS_FOpenFileRead(fn, &hnd, qtrue);
+
+	if (!hnd)
+	{
+		extptr++;
+
+		for (i = 0; i < CIN_MAX; i++)
+		{
+			if (!strcmp(extptr, cin_ext[i]))
+			{
+				skipCin[i] = qtrue;
+				break;
+			}
+		}
+
+		for (i = 0; i < CIN_MAX; i++)
+		{
+			if (skipCin[i]) {
+				continue;
+			}
+
+			extptr[0] = cin_ext[i][0];
+			extptr[1] = cin_ext[i][1];
+			extptr[2] = cin_ext[i][2];
+			extptr[3] = '\0';
+
+			FS_FOpenFileRead(fn, &hnd, qtrue);
+
+			if (hnd) {
+				break;
+			}
+		}
+
+		if(!hnd) {
+			return qfalse;
+		}
+	}
+
+	FS_FCloseFile(hnd);
+	strcpy(filename, fn);
+
+	return qtrue;
+}
+
 /*
 ==================
 CIN_PlayCinematic
@@ -1407,6 +1628,9 @@ CIN_PlayCinematic
 int CIN_PlayCinematic( const char *arg, int x, int y, int w, int h, int systemBits ) {
 	unsigned short RoQID;
 	char	name[MAX_OSPATH];
+#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA))
+	const char	*ext;
+#endif
 	int		i;
 
 	if (strstr(arg, "/") == NULL && strstr(arg, "\\") == NULL) {
@@ -1415,6 +1639,12 @@ int CIN_PlayCinematic( const char *arg, int x, int y, int w, int h, int systemBi
 		Com_sprintf (name, sizeof(name), "%s", arg);
 	}
 
+	if (!CIN_TheCheckExtension(name))
+	{
+		// Can't find video
+		return -1;
+	}
+
 	if (!(systemBits & CIN_system)) {
 		for ( i = 0 ; i < MAX_VIDEO_HANDLES ; i++ ) {
 			if (!strcmp(cinTable[i].fileName, name) ) {
@@ -1428,10 +1658,56 @@ int CIN_PlayCinematic( const char *arg, int x, int y, int w, int h, int systemBi
 	Com_Memset(&cin, 0, sizeof(cinematics_t) );
 	currentHandle = CIN_HandleForVideo();
 
+    Com_Memset(&cinTable[currentHandle], 0, sizeof(cin_cache));
+
 	cin.currentHandle = currentHandle;
 
 	strcpy(cinTable[currentHandle].fileName, name);
 
+#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA))
+	ext = COM_GetExtension(name);
+	if (!Q_stricmp(ext, "ogm") || !Q_stricmp(ext, "ogv"))
+	{
+		if (Cin_OGM_Init(name))
+		{
+			Com_DPrintf("starting ogm-playback failed(%s)\n", arg);
+			cinTable[currentHandle].fileName[0] = 0;
+			Cin_OGM_Shutdown();
+			return -1;
+		}
+
+		cinTable[currentHandle].fileType = FT_OGM;
+
+		CIN_SetExtents(currentHandle, x, y, w, h);
+		CIN_SetLooping(currentHandle, (systemBits & CIN_loop) != 0);
+
+		cinTable[currentHandle].holdAtEnd = (systemBits & CIN_hold) != 0;
+		cinTable[currentHandle].alterGameState = (systemBits & CIN_system) != 0;
+		cinTable[currentHandle].playonwalls = 1;
+		cinTable[currentHandle].silent = (systemBits & CIN_silent) != 0;
+		cinTable[currentHandle].shader = (systemBits & CIN_shader) != 0;
+
+/* we will set this info after the first xvid-frame
+		cinTable[currentHandle].CIN_HEIGHT = DEFAULT_CIN_HEIGHT;
+		cinTable[currentHandle].CIN_WIDTH  =  DEFAULT_CIN_WIDTH;
+*/
+
+		if (cinTable[currentHandle].alterGameState) {
+			CL_ShowMainMenu();
+		} else {
+			cinTable[currentHandle].playonwalls = cl_inGameVideo->integer;
+		}
+
+		if (cinTable[currentHandle].alterGameState) {
+			clc.state = CA_CINEMATIC;
+		}
+
+		cinTable[currentHandle].status = FMV_PLAY;
+
+		return currentHandle;
+	}
+#endif
+
 	cinTable[currentHandle].ROQSize = 0;
 	cinTable[currentHandle].ROQSize = FS_FOpenFileRead (cinTable[currentHandle].fileName, &cinTable[currentHandle].iFile, qtrue);
 
diff --git a/engine/code/client/cl_cin_ogm.c b/engine/code/client/cl_cin_ogm.c
new file mode 100644
index 00000000..c40b42d3
--- /dev/null
+++ b/engine/code/client/cl_cin_ogm.c
@@ -0,0 +1,999 @@
+/*
+===========================================================================
+Copyright (C) 2008 Stefan Langer <raute@users.sourceforge.net>
+
+This file is part of Spearmint Source Code.
+
+Spearmint Source Code is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+Spearmint Source Code is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Spearmint Source Code.  If not, see <http://www.gnu.org/licenses/>.
+
+In addition, Spearmint Source Code is also subject to certain additional terms.
+You should have received a copy of these additional terms immediately following
+the terms and conditions of the GNU General Public License.  If not, please
+request a copy in writing from id Software at the address below.
+
+If you have questions concerning this license or the applicable additional
+terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc.,
+Suite 120, Rockville, Maryland 20850 USA.
+===========================================================================
+*/
+
+/*
+
+  This is a "ogm"-decoder to use a "better"(smaller files,higher resolutions) Cinematic-Format than roq
+
+  In this code "ogm" is only: ogg wrapper, vorbis audio, xvid video (or theora video)
+    (ogm(Ogg Media) in general is ogg wrapper with all kind of audio/video/subtitle/...)
+
+... infos used for this src:
+xvid:
+ * examples/xvid_decraw.c
+ * xvid.h
+ogg/vobis:
+ * decoder_example.c (libvorbis src)
+ * libogg Documentation ( http://www.xiph.org/ogg/doc/libogg/ )
+ * VLC ogg demux ( http://trac.videolan.org/vlc/browser/trunk/modules/demux/ogg.c )
+theora:
+ * theora doxygen docs (1.0beta1)
+*/
+
+#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA))
+
+#include <ogg/ogg.h>
+#include <vorbis/codec.h>
+
+#ifdef USE_CIN_XVID
+#include <xvid.h>
+#endif
+#ifdef USE_CIN_THEORA
+#include <theora/theora.h>
+#endif
+
+#include "client.h"
+#include "snd_local.h"
+
+#define OGG_BUFFER_SIZE	8*1024	//4096
+
+typedef struct
+{
+	fileHandle_t    ogmFile;
+
+	ogg_sync_state  oy;			/* sync and verify incoming physical bitstream */
+	//ogg_stream_state os; /* take physical pages, weld into a logical stream of packets */
+	ogg_stream_state os_audio;
+	ogg_stream_state os_video;
+
+	vorbis_dsp_state vd;		/* central working state for the packet->PCM decoder */
+	vorbis_info     vi;			/* struct that stores all the static vorbis bitstream settings */
+	vorbis_comment  vc;			/* struct that stores all the bitstream user comments */
+
+	qboolean        videoStreamIsXvid;	//FIXME: atm there isn't realy a check for this (all "video" streams are handelt as xvid, because xvid support more than one "subtype")
+#ifdef USE_CIN_XVID
+	xvid_dec_stats_t xvid_dec_stats;
+	void           *xvid_dec_handle;
+#endif
+	qboolean        videoStreamIsTheora;
+#ifdef USE_CIN_THEORA
+	theora_info     th_info;	// dump_video.c(example decoder): ti
+	theora_comment  th_comment;	// dump_video.c(example decoder): tc
+	theora_state    th_state;	// dump_video.c(example decoder): td
+
+	yuv_buffer      th_yuvbuffer;
+#endif
+
+	unsigned char  *outputBuffer;
+	int             outputWidht;
+	int             outputHeight;
+	int             outputBufferSize;	// in Pixel (so "real Bytesize" = outputBufferSize*4)
+	int             VFrameCount;	// output video-stream
+	ogg_int64_t     Vtime_unit;
+	int             currentTime;	// input from Run-function
+} cin_ogm_t;
+
+static cin_ogm_t g_ogm;
+
+int             nextNeededVFrame(void);
+
+
+/* ####################### #######################
+
+  XVID
+
+*/
+#ifdef USE_CIN_XVID
+
+#define BPP 4
+
+static int init_xvid(void)
+{
+	int             ret;
+
+	xvid_gbl_init_t xvid_gbl_init;
+	xvid_dec_create_t xvid_dec_create;
+
+	/* Reset the structure with zeros */
+	memset(&xvid_gbl_init, 0, sizeof(xvid_gbl_init_t));
+	memset(&xvid_dec_create, 0, sizeof(xvid_dec_create_t));
+
+	/* Version */
+	xvid_gbl_init.version = XVID_VERSION;
+
+	xvid_gbl_init.cpu_flags = 0;
+	xvid_gbl_init.debug = 0;
+
+	xvid_global(NULL, 0, &xvid_gbl_init, NULL);
+
+	/* Version */
+	xvid_dec_create.version = XVID_VERSION;
+
+	/*
+	 * Image dimensions -- set to 0, xvidcore will resize when ever it is
+	 * needed
+	 */
+	xvid_dec_create.width = 0;
+	xvid_dec_create.height = 0;
+
+	ret = xvid_decore(NULL, XVID_DEC_CREATE, &xvid_dec_create, NULL);
+
+	g_ogm.xvid_dec_handle = xvid_dec_create.handle;
+
+	return (ret);
+}
+
+static int dec_xvid(unsigned char *input, int input_size)
+{
+	int             ret;
+
+	xvid_dec_frame_t xvid_dec_frame;
+
+	/* Reset all structures */
+	memset(&xvid_dec_frame, 0, sizeof(xvid_dec_frame_t));
+	memset(&g_ogm.xvid_dec_stats, 0, sizeof(xvid_dec_stats_t));
+
+	/* Set version */
+	xvid_dec_frame.version = XVID_VERSION;
+	g_ogm.xvid_dec_stats.version = XVID_VERSION;
+
+	/* No general flags to set */
+	xvid_dec_frame.general = XVID_LOWDELAY;	//0;
+
+	/* Input stream */
+	xvid_dec_frame.bitstream = input;
+	xvid_dec_frame.length = input_size;
+
+	/* Output frame structure */
+	xvid_dec_frame.output.plane[0] = g_ogm.outputBuffer;
+	xvid_dec_frame.output.stride[0] = g_ogm.outputWidht * BPP;
+	if(g_ogm.outputBuffer == NULL)
+		xvid_dec_frame.output.csp = XVID_CSP_NULL;
+	else
+		xvid_dec_frame.output.csp = XVID_CSP_RGBA;	// example was with XVID_CSP_I420
+
+	ret = xvid_decore(g_ogm.xvid_dec_handle, XVID_DEC_DECODE, &xvid_dec_frame, &g_ogm.xvid_dec_stats);
+
+	return (ret);
+}
+
+static int shutdown_xvid(void)
+{
+	int             ret = 0;
+
+	if(g_ogm.xvid_dec_handle)
+		ret = xvid_decore(g_ogm.xvid_dec_handle, XVID_DEC_DESTROY, NULL, NULL);
+
+	return (ret);
+}
+#endif
+
+/* ####################### #######################
+
+  OGG/OGM
+  ... also calls to vorbis/theora-libs
+
+*/
+
+/*
+  loadBlockToSync
+
+  return:
+  !0 -> no data transferred
+*/
+static int loadBlockToSync(void)
+{
+	int             r = -1;
+	char           *buffer;
+	int             bytes;
+
+	if(g_ogm.ogmFile)
+	{
+		buffer = ogg_sync_buffer(&g_ogm.oy, OGG_BUFFER_SIZE);
+		bytes = FS_Read(buffer, OGG_BUFFER_SIZE, g_ogm.ogmFile);
+		ogg_sync_wrote(&g_ogm.oy, bytes);
+
+		r = (bytes == 0);
+	}
+
+	return r;
+}
+
+/*
+  loadPagesToStreams
+
+  return:
+  !0 -> no data transferred (or not for all Streams)
+*/
+static int loadPagesToStreams(void)
+{
+	int             r = -1;
+	int             AudioPages = 0;
+	int             VideoPages = 0;
+	ogg_stream_state *osptr = NULL;
+	ogg_page        og;
+
+	while(!AudioPages || !VideoPages)
+	{
+		if(ogg_sync_pageout(&g_ogm.oy, &og) != 1)
+			break;
+
+		if(g_ogm.os_audio.serialno == ogg_page_serialno(&og))
+		{
+			osptr = &g_ogm.os_audio;
+			++AudioPages;
+		}
+		if(g_ogm.os_video.serialno == ogg_page_serialno(&og))
+		{
+			osptr = &g_ogm.os_video;
+			++VideoPages;
+		}
+
+		if(osptr != NULL)
+		{
+			ogg_stream_pagein(osptr, &og);
+		}
+	}
+
+	if(AudioPages && VideoPages)
+		r = 0;
+
+	return r;
+}
+
+#define SIZEOF_RAWBUFF 4*1024
+static byte     rawBuffer[SIZEOF_RAWBUFF];
+
+#define MIN_AUDIO_PRELOAD 400	// in ms
+#define MAX_AUDIO_PRELOAD 500	// in ms
+
+
+/*
+
+  return: audio wants more packets
+*/
+static qboolean loadAudio(void)
+{
+	qboolean        anyDataTransferred = qtrue;
+	float         **pcm;
+	float          *right, *left;
+	int             samples, samplesNeeded;
+	int             i;
+	short          *ptr;
+	ogg_packet      op;
+	vorbis_block    vb;
+
+	memset(&op, 0, sizeof(op));
+	memset(&vb, 0, sizeof(vb));
+	vorbis_block_init(&g_ogm.vd, &vb);
+
+	while(anyDataTransferred && g_ogm.currentTime + MAX_AUDIO_PRELOAD > (int)(g_ogm.vd.granulepos * 1000 / g_ogm.vi.rate))
+	{
+		anyDataTransferred = qfalse;
+
+		if((samples = vorbis_synthesis_pcmout(&g_ogm.vd, &pcm)) > 0)
+		{
+			// vorbis -> raw
+			ptr = (short *)rawBuffer;
+			samplesNeeded = (SIZEOF_RAWBUFF) / (2 * 2);	// (width*channel)
+			if(samples < samplesNeeded)
+				samplesNeeded = samples;
+
+			left = pcm[0];
+			right = (g_ogm.vi.channels > 1) ? pcm[1] : pcm[0];
+			for(i = 0; i < samplesNeeded; ++i)
+			{
+				ptr[0] = (left[i] >= -1.0f &&
+						  left[i] <= 1.0f) ? left[i] * 32767.f : 32767 * ((left[i] > 0.0f) - (left[i] < 0.0f));
+				ptr[1] = (right[i] >= -1.0f &&
+						  right[i] <= 1.0f) ? right[i] * 32767.f : 32767 * ((right[i] > 0.0f) - (right[i] < 0.0f));
+				ptr += 2;		//numChans;
+			}
+
+			if(i > 0)
+			{
+				// tell libvorbis how many samples we actually consumed
+				vorbis_synthesis_read(&g_ogm.vd, i);
+
+//              S_RawSamples(ssize, 22050, 2, 2, (byte *)sbuf, 1.0f, -1);
+				S_RawSamples(0, i, g_ogm.vi.rate, 2, 2, rawBuffer, 1.0f, -1);
+
+				anyDataTransferred = qtrue;
+			}
+		}
+
+		if(!anyDataTransferred)
+		{
+			// op -> vorbis
+			if(ogg_stream_packetout(&g_ogm.os_audio, &op))
+			{
+				if(vorbis_synthesis(&vb, &op) == 0)
+					vorbis_synthesis_blockin(&g_ogm.vd, &vb);
+				anyDataTransferred = qtrue;
+			}
+		}
+	}
+
+	vorbis_block_clear(&vb);
+
+	if(g_ogm.currentTime + MIN_AUDIO_PRELOAD > (int)(g_ogm.vd.granulepos * 1000 / g_ogm.vi.rate))
+		return qtrue;
+	else
+		return qfalse;
+}
+
+/*
+
+  return:	1	-> loaded a new Frame ( g_ogm.outputBuffer points to the actual frame )
+			0	-> no new Frame
+			<0	-> error
+*/
+#ifdef USE_CIN_XVID
+static int loadVideoFrameXvid(void)
+{
+	int             r = 0;
+	ogg_packet      op;
+	int             used_bytes = 0;
+
+	memset(&op, 0, sizeof(op));
+
+	while(!r && (ogg_stream_packetout(&g_ogm.os_video, &op)))
+	{
+		used_bytes = dec_xvid(op.packet, op.bytes);
+		if(g_ogm.xvid_dec_stats.type == XVID_TYPE_VOL)
+		{
+			if(g_ogm.outputWidht != g_ogm.xvid_dec_stats.data.vol.width ||
+			   g_ogm.outputHeight != g_ogm.xvid_dec_stats.data.vol.height)
+			{
+				g_ogm.outputWidht = g_ogm.xvid_dec_stats.data.vol.width;
+				g_ogm.outputHeight = g_ogm.xvid_dec_stats.data.vol.height;
+				Com_DPrintf("[XVID]new resolution %dx%d\n", g_ogm.outputWidht, g_ogm.outputHeight);
+			}
+
+			if(g_ogm.outputBufferSize < g_ogm.xvid_dec_stats.data.vol.width * g_ogm.xvid_dec_stats.data.vol.height)
+			{
+
+				g_ogm.outputBufferSize = g_ogm.xvid_dec_stats.data.vol.width * g_ogm.xvid_dec_stats.data.vol.height;
+
+				/* Free old output buffer */
+				if(g_ogm.outputBuffer)
+					free(g_ogm.outputBuffer);
+
+				/* Allocate the new buffer */
+				g_ogm.outputBuffer = (unsigned char *)malloc(g_ogm.outputBufferSize * 4);	//FIXME? should the 4 stay for BPP?
+				if(g_ogm.outputBuffer == NULL)
+				{
+					g_ogm.outputBufferSize = 0;
+					r = -2;
+					break;
+				}
+			}
+
+			// use the rest of this packet
+			used_bytes += dec_xvid(op.packet + used_bytes, op.bytes - used_bytes);
+		}
+
+		// we got a real output frame ...
+		if(g_ogm.xvid_dec_stats.type > 0)
+		{
+			r = 1;
+
+			++g_ogm.VFrameCount;
+//          Com_Printf("frame infos: %d %d %d\n", xvid_dec_stats.data.vop.general, xvid_dec_stats.data.vop.time_base, xvid_dec_stats.data.vop.time_increment);
+//          Com_Printf("frame info time: %d (Frame# %d, %d)\n", xvid_dec_stats.data.vop.time_base, VFrameCount, (int)(VFrameCount*Vtime_unit/10000000));
+		}
+
+//      if((op.bytes-used_bytes)>0)
+//          Com_Printf("unused: %d(firstChar: %X)\n",(op.bytes-used_bytes),(int)(op.packet[used_bytes]));
+	}
+
+	return r;
+}
+#endif
+
+/*
+
+  return:	1	-> loaded a new Frame ( g_ogm.outputBuffer points to the actual frame )
+			0	-> no new Frame
+			<0	-> error
+*/
+#ifdef USE_CIN_THEORA
+/*
+how many >> are needed to make y==x (shifting y>>i)
+return: -1	-> no match
+		>=0	-> number of shifts
+*/
+static int findSizeShift(int x, int y)
+{
+	int             i;
+
+	for(i = 0; (y >> i); ++i)
+		if(x == (y >> i))
+			return i;
+
+	return -1;
+}
+
+static int loadVideoFrameTheora(void)
+{
+	int             r = 0;
+	ogg_packet      op;
+
+	memset(&op, 0, sizeof(op));
+
+	while(!r && (ogg_stream_packetout(&g_ogm.os_video, &op)))
+	{
+		ogg_int64_t     th_frame;
+
+		theora_decode_packetin(&g_ogm.th_state, &op);
+
+		th_frame = theora_granule_frame(&g_ogm.th_state, g_ogm.th_state.granulepos);
+
+		if((g_ogm.VFrameCount < th_frame && th_frame >= nextNeededVFrame()) || !g_ogm.outputBuffer)
+		{
+//          int i,j;
+			int             yWShift, uvWShift;
+			int             yHShift, uvHShift;
+
+			if(theora_decode_YUVout(&g_ogm.th_state, &g_ogm.th_yuvbuffer))
+				continue;
+
+			if(g_ogm.outputWidht != g_ogm.th_info.width || g_ogm.outputHeight != g_ogm.th_info.height)
+			{
+				g_ogm.outputWidht = g_ogm.th_info.width;
+				g_ogm.outputHeight = g_ogm.th_info.height;
+				Com_DPrintf("[Theora(ogg)]new resolution %dx%d\n", g_ogm.outputWidht, g_ogm.outputHeight);
+			}
+
+			if(g_ogm.outputBufferSize < g_ogm.th_info.width * g_ogm.th_info.height)
+			{
+
+				g_ogm.outputBufferSize = g_ogm.th_info.width * g_ogm.th_info.height;
+
+				/* Free old output buffer */
+				if(g_ogm.outputBuffer)
+					free(g_ogm.outputBuffer);
+
+				/* Allocate the new buffer */
+				g_ogm.outputBuffer = (unsigned char *)malloc(g_ogm.outputBufferSize * 4);
+				if(g_ogm.outputBuffer == NULL)
+				{
+					g_ogm.outputBufferSize = 0;
+					r = -2;
+					break;
+				}
+			}
+
+			yWShift = findSizeShift(g_ogm.th_yuvbuffer.y_width, g_ogm.th_info.width);
+			uvWShift = findSizeShift(g_ogm.th_yuvbuffer.uv_width, g_ogm.th_info.width);
+			yHShift = findSizeShift(g_ogm.th_yuvbuffer.y_height, g_ogm.th_info.height);
+			uvHShift = findSizeShift(g_ogm.th_yuvbuffer.uv_height, g_ogm.th_info.height);
+
+			if(yWShift < 0 || uvWShift < 0 || yHShift < 0 || uvHShift < 0)
+			{
+				Com_Printf("[Theora] unexpected resolution in a yuv-Frame\n");
+				r = -1;
+			}
+			else
+			{
+
+				Frame_yuv_to_rgb24(g_ogm.th_yuvbuffer.y, g_ogm.th_yuvbuffer.u, g_ogm.th_yuvbuffer.v,
+								   g_ogm.th_info.width, g_ogm.th_info.height, g_ogm.th_yuvbuffer.y_stride,
+								   g_ogm.th_yuvbuffer.uv_stride, yWShift, uvWShift, yHShift, uvHShift,
+								   (unsigned int *)g_ogm.outputBuffer);
+
+/*				unsigned char*	pixelPtr = g_ogm.outputBuffer;
+				unsigned int*	pixPtr;
+				pixPtr = (unsigned int*)g_ogm.outputBuffer;
+
+				//TODO: use one yuv->rgb funktion for the hole frame (the big amout of stack movement(yuv->rgb calls) couldn't be good ;) )
+				for(j=0;j<g_ogm.th_info.height;++j) {
+					for(i=0;i<g_ogm.th_info.width;++i) {
+#if 1
+						// simple grayscale-output ^^
+						pixelPtr[0] =
+							pixelPtr[1] =
+							pixelPtr[2] = g_ogm.th_yuvbuffer.y[i+j*g_ogm.th_yuvbuffer.y_stride];
+						pixelPtr+=4;
+
+#else
+						// using RoQ yuv->rgb code
+						*pixPtr++ = yuv_to_rgb24( g_ogm.th_yuvbuffer.y[(i>>yWShift)+(j>>yHShift)*g_ogm.th_yuvbuffer.y_stride],
+												g_ogm.th_yuvbuffer.u[(i>>uvWShift)+(j>>uvHShift)*g_ogm.th_yuvbuffer.uv_stride],
+												g_ogm.th_yuvbuffer.v[(i>>uvWShift)+(j>>uvHShift)*g_ogm.th_yuvbuffer.uv_stride]);
+#endif
+					}
+				}
+*/
+
+				r = 1;
+				g_ogm.VFrameCount = th_frame;
+			}
+		}
+
+
+	}
+
+	return r;
+}
+#endif
+
+
+/*
+
+  return:	1	-> loaded a new Frame ( g_ogm.outputBuffer points to the actual frame )
+			0	-> no new Frame
+			<0	-> error
+*/
+static int loadVideoFrame(void)
+{
+#ifdef USE_CIN_XVID
+	if(g_ogm.videoStreamIsXvid)
+		return loadVideoFrameXvid();
+#endif
+#ifdef USE_CIN_THEORA
+	if(g_ogm.videoStreamIsTheora)
+		return loadVideoFrameTheora();
+#endif
+
+	// if we come to this point, there will be no codec that use the stream content ...
+	if(g_ogm.os_video.serialno)
+	{
+		ogg_packet      op;
+
+		while(ogg_stream_packetout(&g_ogm.os_video, &op));
+	}
+
+	return 1;
+}
+
+/*
+
+  return: qtrue => noDataTransferred
+*/
+static qboolean loadFrame(void)
+{
+	qboolean        anyDataTransferred = qtrue;
+	qboolean        needVOutputData = qtrue;
+
+//  qboolean audioSDone = qfalse;
+//  qboolean videoSDone = qfalse;
+	qboolean        audioWantsMoreData = qfalse;
+	int             status;
+
+	while(anyDataTransferred && (needVOutputData || audioWantsMoreData))
+	{
+		anyDataTransferred = qfalse;
+
+//      xvid -> "gl" ? videoDone : needPacket
+//      vorbis -> raw sound ? audioDone : needPacket
+//      anyDataTransferred = videoDone && audioDone;
+//      needVOutputData = videoDone && audioDone;
+//      if needPacket
+		{
+//          videoStream -> xvid ? videoStreamDone : needPage
+//          audioSteam -> vorbis ? audioStreamDone : needPage
+//          anyDataTransferred = audioStreamDone && audioStreamDone;
+
+			if(needVOutputData && (status = loadVideoFrame()))
+			{
+				needVOutputData = qfalse;
+				if(status > 0)
+					anyDataTransferred = qtrue;
+				else
+					anyDataTransferred = qfalse;	// error (we don't need any videodata and we had no transferred)
+			}
+
+//          if needPage
+			if(needVOutputData || audioWantsMoreData)
+			{
+				// try to transfer Pages to the audio- and video-Stream
+				if(loadPagesToStreams())
+				{
+					// try to load a datablock from file
+					anyDataTransferred |= !loadBlockToSync();
+				}
+				else
+					anyDataTransferred = qtrue;	// successful loadPagesToStreams()
+			}
+
+			// load all Audio after loading new pages ...
+			if(g_ogm.VFrameCount > 1)	// wait some videoframes (it's better to have some delay, than a lagy sound)
+				audioWantsMoreData = loadAudio();
+		}
+	}
+
+//  ogg_packet_clear(&op);
+
+	return !anyDataTransferred;
+}
+
+//from VLC ogg.c ( http://trac.videolan.org/vlc/browser/trunk/modules/demux/ogg.c )
+typedef struct
+{
+	char            streamtype[8];
+	char            subtype[4];
+
+	ogg_int32_t     size;		/* size of the structure */
+
+	ogg_int64_t     time_unit;	/* in reference time */// in 10^-7 seconds (dT between frames)
+	ogg_int64_t     samples_per_unit;
+	ogg_int32_t     default_len;	/* in media time */
+
+	ogg_int32_t     buffersize;
+	ogg_int16_t     bits_per_sample;
+
+	union
+	{
+		struct
+		{
+			ogg_int32_t     width;
+			ogg_int32_t     height;
+		} stream_header_video;
+
+		struct
+		{
+			ogg_int16_t     channels;
+			ogg_int16_t     blockalign;
+			ogg_int32_t     avgbytespersec;
+		} stream_header_audio;
+	} sh;
+} stream_header_t;
+
+qboolean isPowerOf2(int x)
+{
+	int             bitsSet = 0;
+	int             i;
+
+	for(i = 0; i < sizeof(int) * 8; ++i)
+		if(x & (1 << i))
+			++bitsSet;
+
+	return (bitsSet <= 1);
+}
+
+/*
+
+  return: 0 -> no problem
+*/
+//TODO: vorbis/theora-header&init in sub-functions
+//TODO: "clean" error-returns ...
+int Cin_OGM_Init(const char *filename)
+{
+	int             status;
+	ogg_page        og;
+	ogg_packet      op;
+	int             i;
+
+	if(g_ogm.ogmFile)
+	{
+		Com_Printf(S_COLOR_YELLOW "WARNING: it seams there was already a ogm running, it will be killed to start %s\n", filename);
+		Cin_OGM_Shutdown();
+	}
+
+	memset(&g_ogm, 0, sizeof(cin_ogm_t));
+
+	FS_FOpenFileRead(filename, &g_ogm.ogmFile, qtrue);
+	if(!g_ogm.ogmFile)
+	{
+		Com_Printf(S_COLOR_YELLOW "WARNING: Can't open ogm-file for reading (%s)\n", filename);
+		return -1;
+	}
+
+	ogg_sync_init(&g_ogm.oy);	/* Now we can read pages */
+
+	//FIXME? can serialno be 0 in ogg? (better way to check inited?)
+	//TODO: support for more than one audio stream? / detect files with one stream(or without correct ones)
+	while(!g_ogm.os_audio.serialno || !g_ogm.os_video.serialno)
+	{
+		if(ogg_sync_pageout(&g_ogm.oy, &og) == 1)
+		{
+			if(strstr((char *)(og.body + 1), "vorbis"))
+			{
+				//FIXME? better way to find audio stream
+				if(g_ogm.os_audio.serialno)
+				{
+					Com_Printf(S_COLOR_YELLOW "WARNING: more than one audio stream, in ogm-file(%s) ... we will stay at the first one\n", filename);
+				}
+				else
+				{
+					ogg_stream_init(&g_ogm.os_audio, ogg_page_serialno(&og));
+					ogg_stream_pagein(&g_ogm.os_audio, &og);
+				}
+			}
+#ifdef USE_CIN_THEORA
+			if(strstr((char *)(og.body + 1), "theora"))
+			{
+				if(g_ogm.os_video.serialno)
+				{
+					Com_Printf(S_COLOR_YELLOW "WARNING: more than one video stream, in ogm-file(%s) ... we will stay at the first one\n", filename);
+				}
+				else
+				{
+					g_ogm.videoStreamIsTheora = qtrue;
+					ogg_stream_init(&g_ogm.os_video, ogg_page_serialno(&og));
+					ogg_stream_pagein(&g_ogm.os_video, &og);
+				}
+			}
+#endif
+#ifdef USE_CIN_XVID
+			if(strstr((char *)(og.body + 1), "video"))
+			{					//FIXME? better way to find video stream
+				if(g_ogm.os_video.serialno)
+				{
+					Com_Printf("more than one video stream, in ogm-file(%s) ... we will stay at the first one\n", filename);
+				}
+				else
+				{
+					stream_header_t *sh;
+
+					g_ogm.videoStreamIsXvid = qtrue;
+
+					sh = (stream_header_t *) (og.body + 1);
+					//TODO: one solution for checking xvid and theora
+					if(!isPowerOf2(sh->sh.stream_header_video.width))
+					{
+						Com_Printf("VideoWidth of the ogm-file isn't a power of 2 value (%s)\n", filename);
+
+						return -5;
+					}
+					if(!isPowerOf2(sh->sh.stream_header_video.height))
+					{
+						Com_Printf("VideoHeight of the ogm-file isn't a power of 2 value (%s)\n", filename);
+
+						return -6;
+					}
+
+					g_ogm.Vtime_unit = sh->time_unit;
+
+					ogg_stream_init(&g_ogm.os_video, ogg_page_serialno(&og));
+					ogg_stream_pagein(&g_ogm.os_video, &og);
+				}
+			}
+#endif
+		}
+		else if(loadBlockToSync())
+			break;
+	}
+
+	if(g_ogm.videoStreamIsXvid && g_ogm.videoStreamIsTheora)
+	{
+		Com_Printf(S_COLOR_YELLOW "WARNING: Found \"video\"- and \"theora\"-stream ,ogm-file (%s)\n", filename);
+		return -2;
+	}
+
+#if 1
+	if(!g_ogm.os_audio.serialno)
+	{
+		Com_Printf(S_COLOR_YELLOW "WARNING: Haven't found a audio(vorbis) stream in ogm-file (%s)\n", filename);
+		return -2;
+	}
+#endif
+	if(!g_ogm.os_video.serialno)
+	{
+		Com_Printf(S_COLOR_YELLOW "WARNING: Haven't found a video stream in ogm-file (%s)\n", filename);
+		return -3;
+	}
+
+	//load vorbis header
+	vorbis_info_init(&g_ogm.vi);
+	vorbis_comment_init(&g_ogm.vc);
+	i = 0;
+	while(i < 3)
+	{
+		status = ogg_stream_packetout(&g_ogm.os_audio, &op);
+		if(status < 0)
+		{
+			Com_Printf(S_COLOR_YELLOW "WARNING: Corrupt ogg packet while loading vorbis-headers, ogm-file(%s)\n", filename);
+			return -8;
+		}
+		if(status > 0)
+		{
+			status = vorbis_synthesis_headerin(&g_ogm.vi, &g_ogm.vc, &op);
+			if(i == 0 && status < 0)
+			{
+				Com_Printf(S_COLOR_YELLOW "WARNING: This Ogg bitstream does not contain Vorbis audio data, ogm-file(%s)\n", filename);
+				return -9;
+			}
+			++i;
+		}
+		else if(loadPagesToStreams())
+		{
+			if(loadBlockToSync())
+			{
+				Com_Printf(S_COLOR_YELLOW "WARNING: Couldn't find all vorbis headers before end of ogm-file (%s)\n", filename);
+				return -10;
+			}
+		}
+	}
+
+	vorbis_synthesis_init(&g_ogm.vd, &g_ogm.vi);
+
+#ifdef USE_CIN_XVID
+	status = init_xvid();
+	if(status)
+	{
+		Com_Printf("[Xvid]Decore INIT problem, return value %d(ogm-file: %s)\n", status, filename);
+
+		return -4;
+	}
+#endif
+
+#ifdef USE_CIN_THEORA
+	if(g_ogm.videoStreamIsTheora)
+	{
+		ROQ_GenYUVTables();
+
+		theora_info_init(&g_ogm.th_info);
+		theora_comment_init(&g_ogm.th_comment);
+
+		i = 0;
+		while(i < 3)
+		{
+			status = ogg_stream_packetout(&g_ogm.os_video, &op);
+			if(status < 0)
+			{
+				Com_Printf(S_COLOR_YELLOW "WARNING: Corrupt ogg packet while loading theora-headers, ogm-file(%s)\n", filename);
+				return -8;
+			}
+			if(status > 0)
+			{
+				status = theora_decode_header(&g_ogm.th_info, &g_ogm.th_comment, &op);
+				if(i == 0 && status != 0)
+				{
+					Com_Printf(S_COLOR_YELLOW "WARNING: This Ogg bitstream does not contain theora data, ogm-file(%s)\n", filename);
+					return -9;
+				}
+				++i;
+			}
+			else if(loadPagesToStreams())
+			{
+				if(loadBlockToSync())
+				{
+					Com_Printf(S_COLOR_YELLOW "WARNING: Couldn't find all theora headers before end of ogm-file (%s)\n", filename);
+					return -10;
+				}
+			}
+		}
+
+		theora_decode_init(&g_ogm.th_state, &g_ogm.th_info);
+
+		if(!isPowerOf2(g_ogm.th_info.width))
+		{
+			Com_Printf(S_COLOR_YELLOW "WARNING: VideoWidth of the ogm-file isn't a power of 2 value (%s)\n", filename);
+			return -5;
+		}
+		if(!isPowerOf2(g_ogm.th_info.height))
+		{
+			Com_Printf(S_COLOR_YELLOW "WARNING: VideoHeight of the ogm-file isn't a power of 2 value (%s)\n", filename);
+			return -6;
+		}
+
+		g_ogm.Vtime_unit = ((ogg_int64_t) g_ogm.th_info.fps_denominator * 1000 * 10000 / g_ogm.th_info.fps_numerator);
+	}
+#endif
+
+	Com_DPrintf("OGM-Init done (%s)\n", filename);
+
+	return 0;
+}
+
+int nextNeededVFrame(void)
+{
+	return (int)(g_ogm.currentTime * (ogg_int64_t) 10000 / g_ogm.Vtime_unit);
+}
+
+/*
+
+  time ~> time in ms to which the movie should run
+  return:	0 => nothing special
+			1 => eof
+*/
+int Cin_OGM_Run(int time)
+{
+
+	g_ogm.currentTime = time;
+
+	while(!g_ogm.VFrameCount || time + 20 >= (int)(g_ogm.VFrameCount * g_ogm.Vtime_unit / 10000))
+	{
+		if(loadFrame())
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+  Gives a Pointer to the current Output-Buffer
+  and the Resolution
+*/
+unsigned char  *Cin_OGM_GetOutput(int *outWidth, int *outHeight)
+{
+	if(outWidth != NULL)
+		*outWidth = g_ogm.outputWidht;
+	if(outHeight != NULL)
+		*outHeight = g_ogm.outputHeight;
+
+	return g_ogm.outputBuffer;
+}
+
+void Cin_OGM_Shutdown()
+{
+#ifdef USE_CIN_XVID
+	int             status;
+
+	status = shutdown_xvid();
+	if(status)
+		Com_Printf("[Xvid]Decore RELEASE problem, return value %d\n", status);
+#endif
+
+#ifdef USE_CIN_THEORA
+	theora_clear(&g_ogm.th_state);
+	theora_comment_clear(&g_ogm.th_comment);
+	theora_info_clear(&g_ogm.th_info);
+#endif
+
+	if(g_ogm.outputBuffer)
+		free(g_ogm.outputBuffer);
+	g_ogm.outputBuffer = NULL;
+
+	vorbis_dsp_clear(&g_ogm.vd);
+	vorbis_comment_clear(&g_ogm.vc);
+	vorbis_info_clear(&g_ogm.vi);	/* must be called last (comment from vorbis example code) */
+
+	ogg_stream_clear(&g_ogm.os_audio);
+	ogg_stream_clear(&g_ogm.os_video);
+
+	ogg_sync_clear(&g_ogm.oy);
+
+	FS_FCloseFile(g_ogm.ogmFile);
+	g_ogm.ogmFile = 0;
+}
+
+#else
+int Cin_OGM_Init(const char *filename)
+{
+	return 1;
+}
+int Cin_OGM_Run(int time)
+{
+	return 1;
+}
+unsigned char *Cin_OGM_GetOutput(int *outWidth, int *outHeight)
+{
+	return 0;
+}
+
+void Cin_OGM_Shutdown(void)
+{
+	return;
+}
+#endif
+
diff --git a/engine/code/client/client.h b/engine/code/client/client.h
index 9bc03068..70420f72 100644
--- a/engine/code/client/client.h
+++ b/engine/code/client/client.h
@@ -599,6 +599,21 @@ void CIN_SetLooping (int handle, qboolean loop);
 void CIN_UploadCinematic(int handle);
 void CIN_CloseAllVideos(void);
 
+// yuv->rgb will be used for Theora(ogm)
+void ROQ_GenYUVTables(void);
+void Frame_yuv_to_rgb24(const unsigned char *y, const unsigned char *u, const unsigned char *v,
+		int width, int height, int y_stride, int uv_stride,
+		int yWShift, int uvWShift, int yHShift, int uvHShift, unsigned int *output);
+
+//
+// cl_cin_ogm.c
+//
+
+int				Cin_OGM_Init(const char *filename);
+int				Cin_OGM_Run(int time);
+unsigned char	*Cin_OGM_GetOutput(int *outWidth, int *outHeight);
+void			Cin_OGM_Shutdown(void);
+
 //
 // cl_cgame.c
 //
diff --git a/engine/code/libtheora-1.1.1/include/theora/codec.h b/engine/code/libtheora-1.1.1/include/theora/codec.h
new file mode 100644
index 00000000..5c266963
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/include/theora/codec.h
@@ -0,0 +1,591 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
+
+ ********************************************************************/
+
+/**\mainpage
+ * 
+ * \section intro Introduction
+ *
+ * This is the documentation for <tt>libtheora</tt> C API.
+ * The current reference
+ * implementation for <a href="http://www.theora.org/">Theora</a>, a free,
+ * patent-unencumbered video codec.
+ * Theora is derived from On2's VP3 codec with additional features and
+ *  integration with Ogg multimedia formats by
+ *  <a href="http://www.xiph.org/">the Xiph.Org Foundation</a>.
+ * Complete documentation of the format itself is available in
+ * <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>.
+ *
+ * \subsection Organization
+ *
+ * The functions documented here are actually subdivided into three 
+ * separate libraries:
+ * - <tt>libtheoraenc</tt> contains the encoder interface,
+ *   described in \ref encfuncs.
+ * - <tt>libtheoradec</tt> contains the decoder interface and
+ *   routines shared with the encoder.
+ *   You must also link to this if you link to <tt>libtheoraenc</tt>.
+ *   The routines in this library are described in \ref decfuncs and 
+ *   \ref basefuncs.
+ * - <tt>libtheora</tt> contains the \ref oldfuncs.
+ *
+ * New code should link to <tt>libtheoradec</tt> and, if using encoder
+ * features, <tt>libtheoraenc</tt>. Together these two export both
+ * the standard and the legacy API, so this is all that is needed by
+ * any code. The older <tt>libtheora</tt> library is provided just for
+ * compatibility with older build configurations.
+ *
+ * In general the recommended 1.x API symbols can be distinguished
+ * by their <tt>th_</tt> or <tt>TH_</tt> namespace prefix.
+ * The older, legacy API uses <tt>theora_</tt> or <tt>OC_</tt>
+ * prefixes instead.
+ */
+
+/**\file
+ * The shared <tt>libtheoradec</tt> and <tt>libtheoraenc</tt> C API.
+ * You don't need to include this directly.*/
+
+#if !defined(_O_THEORA_CODEC_H_)
+# define _O_THEORA_CODEC_H_ (1)
+# include <ogg/ogg.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+
+/**\name Return codes*/
+/*@{*/
+/**An invalid pointer was provided.*/
+#define TH_EFAULT     (-1)
+/**An invalid argument was provided.*/
+#define TH_EINVAL     (-10)
+/**The contents of the header were incomplete, invalid, or unexpected.*/
+#define TH_EBADHEADER (-20)
+/**The header does not belong to a Theora stream.*/
+#define TH_ENOTFORMAT (-21)
+/**The bitstream version is too high.*/
+#define TH_EVERSION   (-22)
+/**The specified function is not implemented.*/
+#define TH_EIMPL      (-23)
+/**There were errors in the video data packet.*/
+#define TH_EBADPACKET (-24)
+/**The decoded packet represented a dropped frame.
+   The player can continue to display the current frame, as the contents of the
+    decoded frame buffer have not changed.*/
+#define TH_DUPFRAME   (1)
+/*@}*/
+
+/**The currently defined color space tags.
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>, Chapter 4, for exact details on the meaning
+ *  of each of these color spaces.*/
+typedef enum{
+  /**The color space was not specified at the encoder.
+      It may be conveyed by an external means.*/
+  TH_CS_UNSPECIFIED,
+  /**A color space designed for NTSC content.*/
+  TH_CS_ITU_REC_470M,
+  /**A color space designed for PAL/SECAM content.*/
+  TH_CS_ITU_REC_470BG,
+  /**The total number of currently defined color spaces.*/
+  TH_CS_NSPACES
+}th_colorspace;
+
+/**The currently defined pixel format tags.
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>, Section 4.4, for details on the precise sample
+ *  locations.*/
+typedef enum{
+  /**Chroma decimation by 2 in both the X and Y directions (4:2:0).
+     The Cb and Cr chroma planes are half the width and half the
+      height of the luma plane.*/
+  TH_PF_420,
+  /**Currently reserved.*/
+  TH_PF_RSVD,
+  /**Chroma decimation by 2 in the X direction (4:2:2).
+     The Cb and Cr chroma planes are half the width of the luma plane, but full
+      height.*/
+  TH_PF_422,
+  /**No chroma decimation (4:4:4).
+     The Cb and Cr chroma planes are full width and full height.*/
+  TH_PF_444,
+  /**The total number of currently defined pixel formats.*/
+  TH_PF_NFORMATS
+}th_pixel_fmt;
+
+
+
+/**A buffer for a single color plane in an uncompressed image.
+ * This contains the image data in a left-to-right, top-down format.
+ * Each row of pixels is stored contiguously in memory, but successive
+ *  rows need not be.
+ * Use \a stride to compute the offset of the next row.
+ * The encoder accepts both positive \a stride values (top-down in memory)
+ *  and negative (bottom-up in memory).
+ * The decoder currently always generates images with positive strides.*/
+typedef struct{
+  /**The width of this plane.*/
+  int            width;
+  /**The height of this plane.*/
+  int            height;
+  /**The offset in bytes between successive rows.*/
+  int            stride;
+  /**A pointer to the beginning of the first row.*/
+  unsigned char *data;
+}th_img_plane;
+
+/**A complete image buffer for an uncompressed frame.
+ * The chroma planes may be decimated by a factor of two in either
+ *  direction, as indicated by th_info#pixel_fmt.
+ * The width and height of the Y' plane must be multiples of 16.
+ * They may need to be cropped for display, using the rectangle
+ *  specified by th_info#pic_x, th_info#pic_y, th_info#pic_width,
+ *  and th_info#pic_height.
+ * All samples are 8 bits.
+ * \note The term YUV often used to describe a colorspace is ambiguous.
+ * The exact parameters of the RGB to YUV conversion process aside, in
+ *  many contexts the U and V channels actually have opposite meanings.
+ * To avoid this confusion, we are explicit: the name of the color
+ *  channels are Y'CbCr, and they appear in that order, always.
+ * The prime symbol denotes that the Y channel is non-linear.
+ * Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/
+typedef th_img_plane th_ycbcr_buffer[3];
+
+/**Theora bitstream information.
+ * This contains the basic playback parameters for a stream, and corresponds to 
+ *  the initial 'info' header packet.
+ * To initialize an encoder, the application fills in this structure and
+ *  passes it to th_encode_alloc().
+ * A default encoding mode is chosen based on the values of the #quality and
+ *  #target_bitrate fields.
+ * On decode, it is filled in by th_decode_headerin(), and then passed to
+ *  th_decode_alloc().
+ *
+ * Encoded Theora frames must be a multiple of 16 in size;
+ *  this is what the #frame_width and #frame_height members represent.
+ * To handle arbitrary picture sizes, a crop rectangle is specified in the
+ *  #pic_x, #pic_y, #pic_width and #pic_height members.
+ *
+ * All frame buffers contain pointers to the full, padded frame.
+ * However, the current encoder <em>will not</em> reference pixels outside of
+ *  the cropped picture region, and the application does not need to fill them
+ *  in.
+ * The decoder <em>will</em> allocate storage for a full frame, but the
+ *  application <em>should not</em> rely on the padding containing sensible
+ *  data.
+ *
+ * It is also generally recommended that the offsets and sizes should still be
+ *  multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled.
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>, Section 4.4, for more details.
+ *
+ * Frame rate, in frames per second, is stored as a rational fraction, as is
+ *  the pixel aspect ratio.
+ * Note that this refers to the aspect ratio of the individual pixels, not of
+ *  the overall frame itself.
+ * The frame aspect ratio can be computed from pixel aspect ratio using the
+ *  image dimensions.*/
+typedef struct{
+  /**\name Theora version
+   * Bitstream version information.*/
+  /*@{*/
+  unsigned char version_major;
+  unsigned char version_minor;
+  unsigned char version_subminor;
+  /*@}*/
+  /**The encoded frame width.
+   * This must be a multiple of 16, and less than 1048576.*/
+  ogg_uint32_t  frame_width;
+  /**The encoded frame height.
+   * This must be a multiple of 16, and less than 1048576.*/
+  ogg_uint32_t  frame_height;
+  /**The displayed picture width.
+   * This must be no larger than width.*/
+  ogg_uint32_t  pic_width;
+  /**The displayed picture height.
+   * This must be no larger than height.*/
+  ogg_uint32_t  pic_height;
+  /**The X offset of the displayed picture.
+   * This must be no larger than #frame_width-#pic_width or 255, whichever is
+   *  smaller.*/
+  ogg_uint32_t  pic_x;
+  /**The Y offset of the displayed picture.
+   * This must be no larger than #frame_height-#pic_height, and
+   *  #frame_height-#pic_height-#pic_y must be no larger than 255.
+   * This slightly funny restriction is due to the fact that the offset is
+   *  specified from the top of the image for consistency with the standard
+   *  graphics left-handed coordinate system used throughout this API, while
+   *  it is stored in the encoded stream as an offset from the bottom.*/
+  ogg_uint32_t  pic_y;
+  /**\name Frame rate
+   * The frame rate, as a fraction.
+   * If either is 0, the frame rate is undefined.*/
+  /*@{*/
+  ogg_uint32_t  fps_numerator;
+  ogg_uint32_t  fps_denominator;
+  /*@}*/
+  /**\name Aspect ratio
+   * The aspect ratio of the pixels.
+   * If either value is zero, the aspect ratio is undefined.
+   * If not specified by any external means, 1:1 should be assumed.
+   * The aspect ratio of the full picture can be computed as
+   * \code
+   *  aspect_numerator*pic_width/(aspect_denominator*pic_height).
+   * \endcode */
+  /*@{*/
+  ogg_uint32_t  aspect_numerator;
+  ogg_uint32_t  aspect_denominator;
+  /*@}*/
+  /**The color space.*/
+  th_colorspace colorspace;
+  /**The pixel format.*/
+  th_pixel_fmt  pixel_fmt;
+  /**The target bit-rate in bits per second.
+     If initializing an encoder with this struct, set this field to a non-zero
+      value to activate CBR encoding by default.*/
+  int           target_bitrate;
+  /**The target quality level.
+     Valid values range from 0 to 63, inclusive, with higher values giving
+      higher quality.
+     If initializing an encoder with this struct, and #target_bitrate is set
+      to zero, VBR encoding at this quality will be activated by default.*/
+  /*Currently this is set so that a qi of 0 corresponds to distortions of 24
+     times the JND, and each increase by 16 halves that value.
+    This gives us fine discrimination at low qualities, yet effective rate
+     control at high qualities.
+    The qi value 63 is special, however.
+    For this, the highest quality, we use one half of a JND for our threshold.
+    Due to the lower bounds placed on allowable quantizers in Theora, we will
+     not actually be able to achieve quality this good, but this should
+     provide as close to visually lossless quality as Theora is capable of.
+    We could lift the quantizer restrictions without breaking VP3.1
+     compatibility, but this would result in quantized coefficients that are
+     too large for the current bitstream to be able to store.
+    We'd have to redesign the token syntax to store these large coefficients,
+     which would make transcoding complex.*/
+  int           quality;
+  /**The amount to shift to extract the last keyframe number from the granule
+   *  position.
+   * This can be at most 31.
+   * th_info_init() will set this to a default value (currently <tt>6</tt>,
+   *  which is good for streaming applications), but you can set it to 0 to
+   *  make every frame a keyframe.
+   * The maximum distance between key frames is
+   *  <tt>1<<#keyframe_granule_shift</tt>.
+   * The keyframe frequency can be more finely controlled with
+   *  #TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, which can also be adjusted
+   *  during encoding (for example, to force the next frame to be a keyframe),
+   *  but it cannot be set larger than the amount permitted by this field after
+   *  the headers have been output.*/
+  int           keyframe_granule_shift;
+}th_info;
+
+/**The comment information.
+ *
+ * This structure holds the in-stream metadata corresponding to
+ *  the 'comment' header packet.
+ * The comment header is meant to be used much like someone jotting a quick
+ *  note on the label of a video.
+ * It should be a short, to the point text note that can be more than a couple
+ *  words, but not more than a short paragraph.
+ *
+ * The metadata is stored as a series of (tag, value) pairs, in
+ *  length-encoded string vectors.
+ * The first occurrence of the '=' character delimits the tag and value.
+ * A particular tag may occur more than once, and order is significant.
+ * The character set encoding for the strings is always UTF-8, but the tag
+ *  names are limited to ASCII, and treated as case-insensitive.
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>, Section 6.3.3 for details.
+ *
+ * In filling in this structure, th_decode_headerin() will null-terminate
+ *  the user_comment strings for safety.
+ * However, the bitstream format itself treats them as 8-bit clean vectors,
+ *  possibly containing null characters, and so the length array should be
+ *  treated as their authoritative length.
+ */
+typedef struct th_comment{
+  /**The array of comment string vectors.*/
+  char **user_comments;
+  /**An array of the corresponding length of each vector, in bytes.*/
+  int   *comment_lengths;
+  /**The total number of comment strings.*/
+  int    comments;
+  /**The null-terminated vendor string.
+     This identifies the software used to encode the stream.*/
+  char  *vendor;
+}th_comment;
+
+
+
+/**A single base matrix.*/
+typedef unsigned char th_quant_base[64];
+
+/**A set of \a qi ranges.*/
+typedef struct{
+  /**The number of ranges in the set.*/
+  int                  nranges;
+  /**The size of each of the #nranges ranges.
+     These must sum to 63.*/
+  const int           *sizes;
+  /**#nranges <tt>+1</tt> base matrices.
+     Matrices \a i and <tt>i+1</tt> form the endpoints of range \a i.*/
+  const th_quant_base *base_matrices;
+}th_quant_ranges;
+
+/**A complete set of quantization parameters.
+   The quantizer for each coefficient is calculated as:
+   \code
+    Q=MAX(MIN(qmin[qti][ci!=0],scale[ci!=0][qi]*base[qti][pli][qi][ci]/100),
+     1024).
+   \endcode
+
+   \a qti is the quantization type index: 0 for intra, 1 for inter.
+   <tt>ci!=0</tt> is 0 for the DC coefficient and 1 for AC coefficients.
+   \a qi is the quality index, ranging between 0 (low quality) and 63 (high
+    quality).
+   \a pli is the color plane index: 0 for Y', 1 for Cb, 2 for Cr.
+   \a ci is the DCT coefficient index.
+   Coefficient indices correspond to the normal 2D DCT block
+    ordering--row-major with low frequencies first--\em not zig-zag order.
+
+   Minimum quantizers are constant, and are given by:
+   \code
+   qmin[2][2]={{4,2},{8,4}}.
+   \endcode
+
+   Parameters that can be stored in the bitstream are as follows:
+    - The two scale matrices ac_scale and dc_scale.
+      \code
+      scale[2][64]={dc_scale,ac_scale}.
+      \endcode
+    - The base matrices for each \a qi, \a qti and \a pli (up to 384 in all).
+      In order to avoid storing a full 384 base matrices, only a sparse set of
+       matrices are stored, and the rest are linearly interpolated.
+      This is done as follows.
+      For each \a qti and \a pli, a series of \a n \a qi ranges is defined.
+      The size of each \a qi range can vary arbitrarily, but they must sum to
+       63.
+      Then, <tt>n+1</tt> matrices are specified, one for each endpoint of the
+       ranges.
+      For interpolation purposes, each range's endpoints are the first \a qi
+       value it contains and one past the last \a qi value it contains.
+      Fractional values are rounded to the nearest integer, with ties rounded
+       away from zero.
+
+      Base matrices are stored by reference, so if the same matrices are used
+       multiple times, they will only appear once in the bitstream.
+      The bitstream is also capable of omitting an entire set of ranges and
+       its associated matrices if they are the same as either the previous
+       set (indexed in row-major order) or if the inter set is the same as the
+       intra set.
+
+    - Loop filter limit values.
+      The same limits are used for the loop filter in all color planes, despite
+       potentially differing levels of quantization in each.
+
+   For the current encoder, <tt>scale[ci!=0][qi]</tt> must be no greater
+    than <tt>scale[ci!=0][qi-1]</tt> and <tt>base[qti][pli][qi][ci]</tt> must
+    be no greater than <tt>base[qti][pli][qi-1][ci]</tt>.
+   These two conditions ensure that the actual quantizer for a given \a qti,
+    \a pli, and \a ci does not increase as \a qi increases.
+   This is not required by the decoder.*/
+typedef struct{
+  /**The DC scaling factors.*/
+  ogg_uint16_t    dc_scale[64];
+  /**The AC scaling factors.*/
+  ogg_uint16_t    ac_scale[64];
+  /**The loop filter limit values.*/
+  unsigned char   loop_filter_limits[64];
+  /**The \a qi ranges for each \a ci and \a pli.*/
+  th_quant_ranges qi_ranges[2][3];
+}th_quant_info;
+
+
+
+/**The number of Huffman tables used by Theora.*/
+#define TH_NHUFFMAN_TABLES (80)
+/**The number of DCT token values in each table.*/
+#define TH_NDCT_TOKENS     (32)
+
+/**A Huffman code for a Theora DCT token.
+ * Each set of Huffman codes in a given table must form a complete, prefix-free
+ *  code.
+ * There is no requirement that all the tokens in a table have a valid code,
+ *  but the current encoder is not optimized to take advantage of this.
+ * If each of the five grouops of 16 tables does not contain at least one table
+ *  with a code for every token, then the encoder may fail to encode certain
+ *  frames.
+ * The complete table in the first group of 16 does not have to be in the same
+ *  place as the complete table in the other groups, but the complete tables in
+ *  the remaining four groups must all be in the same place.*/
+typedef struct{
+  /**The bit pattern for the code, with the LSbit of the pattern aligned in
+   *   the LSbit of the word.*/
+  ogg_uint32_t pattern;
+  /**The number of bits in the code.
+   * This must be between 0 and 32, inclusive.*/
+  int          nbits;
+}th_huff_code;
+
+
+
+/**\defgroup basefuncs Functions Shared by Encode and Decode*/
+/*@{*/
+/**\name Basic shared functions*/
+/*@{*/
+/**Retrieves a human-readable string to identify the library vendor and
+ *  version.
+ * \return the version string.*/
+extern const char *th_version_string(void);
+/**Retrieves the library version number.
+ * This is the highest bitstream version that the encoder library will produce,
+ *  or that the decoder library can decode.
+ * This number is composed of a 16-bit major version, 8-bit minor version
+ * and 8 bit sub-version, composed as follows:
+ * \code
+ * (VERSION_MAJOR<<16)+(VERSION_MINOR<<8)+(VERSION_SUBMINOR)
+ * \endcode
+ * \return the version number.*/
+extern ogg_uint32_t th_version_number(void);
+/**Converts a granule position to an absolute frame index, starting at
+ *  <tt>0</tt>.
+ * The granule position is interpreted in the context of a given
+ *  #th_enc_ctx or #th_dec_ctx handle (either will suffice).
+ * \param _encdec  A previously allocated #th_enc_ctx or #th_dec_ctx
+ *                  handle.
+ * \param _granpos The granule position to convert.
+ * \returns The absolute frame index corresponding to \a _granpos.
+ * \retval -1 The given granule position was invalid (i.e. negative).*/
+extern ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos);
+/**Converts a granule position to an absolute time in seconds.
+ * The granule position is interpreted in the context of a given
+ *  #th_enc_ctx or #th_dec_ctx handle (either will suffice).
+ * \param _encdec  A previously allocated #th_enc_ctx or #th_dec_ctx
+ *                  handle.
+ * \param _granpos The granule position to convert.
+ * \return The absolute time in seconds corresponding to \a _granpos.
+ *         This is the "end time" for the frame, or the latest time it should
+ *          be displayed.
+ *         It is not the presentation time.
+ * \retval -1 The given granule position was invalid (i.e. negative).*/
+extern double th_granule_time(void *_encdec,ogg_int64_t _granpos);
+/**Determines whether a Theora packet is a header or not.
+ * This function does no verification beyond checking the packet type bit, so
+ *  it should not be used for bitstream identification; use
+ *  th_decode_headerin() for that.
+ * As per the Theora specification, an empty (0-byte) packet is treated as a
+ *  data packet (a delta frame with no coded blocks).
+ * \param _op An <tt>ogg_packet</tt> containing encoded Theora data.
+ * \retval 1 The packet is a header packet
+ * \retval 0 The packet is a video data packet.*/
+extern int th_packet_isheader(ogg_packet *_op);
+/**Determines whether a theora packet is a key frame or not.
+ * This function does no verification beyond checking the packet type and
+ *  key frame bits, so it should not be used for bitstream identification; use
+ *  th_decode_headerin() for that.
+ * As per the Theora specification, an empty (0-byte) packet is treated as a
+ *  delta frame (with no coded blocks).
+ * \param _op An <tt>ogg_packet</tt> containing encoded Theora data.
+ * \retval 1  The packet contains a key frame.
+ * \retval 0  The packet contains a delta frame.
+ * \retval -1 The packet is not a video data packet.*/
+extern int th_packet_iskeyframe(ogg_packet *_op);
+/*@}*/
+
+
+/**\name Functions for manipulating header data*/
+/*@{*/
+/**Initializes a th_info structure.
+ * This should be called on a freshly allocated #th_info structure before
+ *  attempting to use it.
+ * \param _info The #th_info struct to initialize.*/
+extern void th_info_init(th_info *_info);
+/**Clears a #th_info structure.
+ * This should be called on a #th_info structure after it is no longer
+ *  needed.
+ * \param _info The #th_info struct to clear.*/
+extern void th_info_clear(th_info *_info);
+
+/**Initialize a #th_comment structure.
+ * This should be called on a freshly allocated #th_comment structure
+ *  before attempting to use it.
+ * \param _tc The #th_comment struct to initialize.*/
+extern void th_comment_init(th_comment *_tc);
+/**Add a comment to an initialized #th_comment structure.
+ * \note Neither th_comment_add() nor th_comment_add_tag() support
+ *  comments containing null values, although the bitstream format does
+ *  support them.
+ * To add such comments you will need to manipulate the #th_comment
+ *  structure directly.
+ * \param _tc      The #th_comment struct to add the comment to.
+ * \param _comment Must be a null-terminated UTF-8 string containing the
+ *                  comment in "TAG=the value" form.*/
+extern void th_comment_add(th_comment *_tc, char *_comment);
+/**Add a comment to an initialized #th_comment structure.
+ * \note Neither th_comment_add() nor th_comment_add_tag() support
+ *  comments containing null values, although the bitstream format does
+ *  support them.
+ * To add such comments you will need to manipulate the #th_comment
+ *  structure directly.
+ * \param _tc  The #th_comment struct to add the comment to.
+ * \param _tag A null-terminated string containing the tag  associated with
+ *              the comment.
+ * \param _val The corresponding value as a null-terminated string.*/
+extern void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val);
+/**Look up a comment value by its tag.
+ * \param _tc    An initialized #th_comment structure.
+ * \param _tag   The tag to look up.
+ * \param _count The instance of the tag.
+ *               The same tag can appear multiple times, each with a distinct
+ *                value, so an index is required to retrieve them all.
+ *               The order in which these values appear is significant and
+ *                should be preserved.
+ *               Use th_comment_query_count() to get the legal range for
+ *                the \a _count parameter.
+ * \return A pointer to the queried tag's value.
+ *         This points directly to data in the #th_comment structure.
+ *         It should not be modified or freed by the application, and
+ *          modifications to the structure may invalidate the pointer.
+ * \retval NULL If no matching tag is found.*/
+extern char *th_comment_query(th_comment *_tc,char *_tag,int _count);
+/**Look up the number of instances of a tag.
+ * Call this first when querying for a specific tag and then iterate over the
+ *  number of instances with separate calls to th_comment_query() to
+ *  retrieve all the values for that tag in order.
+ * \param _tc    An initialized #th_comment structure.
+ * \param _tag   The tag to look up.
+ * \return The number on instances of this particular tag.*/
+extern int th_comment_query_count(th_comment *_tc,char *_tag);
+/**Clears a #th_comment structure.
+ * This should be called on a #th_comment structure after it is no longer
+ *  needed.
+ * It will free all memory used by the structure members.
+ * \param _tc The #th_comment struct to clear.*/
+extern void th_comment_clear(th_comment *_tc);
+/*@}*/
+/*@}*/
+
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/include/theora/theora.h b/engine/code/libtheora-1.1.1/include/theora/theora.h
new file mode 100644
index 00000000..af6eb6f3
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/include/theora/theora.h
@@ -0,0 +1,784 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.17 2003/12/06 18:06:19 arc Exp $
+
+ ********************************************************************/
+
+#ifndef _O_THEORA_H_
+#define _O_THEORA_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif /* __cplusplus */
+
+#include <stddef.h>	/* for size_t */
+
+#include <ogg/ogg.h>
+
+/** \file
+ * The libtheora pre-1.0 legacy C API.
+ *
+ * \ingroup oldfuncs
+ *
+ * \section intro Introduction
+ *
+ * This is the documentation for the libtheora legacy C API, declared in 
+ * the theora.h header, which describes the old interface used before
+ * the 1.0 release. This API was widely deployed for several years and
+ * remains supported, but for new code we recommend the cleaner API 
+ * declared in theoradec.h and theoraenc.h.
+ *
+ * libtheora is the reference implementation for
+ * <a href="http://www.theora.org/">Theora</a>, a free video codec.
+ * Theora is derived from On2's VP3 codec with improved integration with
+ * Ogg multimedia formats by <a href="http://www.xiph.org/">Xiph.Org</a>.
+ * 
+ * \section overview Overview
+ *
+ * This library will both decode and encode theora packets to/from raw YUV 
+ * frames.  In either case, the packets will most likely either come from or
+ * need to be embedded in an Ogg stream.  Use 
+ * <a href="http://xiph.org/ogg/">libogg</a> or 
+ * <a href="http://www.annodex.net/software/liboggz/index.html">liboggz</a>
+ * to extract/package these packets.
+ *
+ * \section decoding Decoding Process
+ *
+ * Decoding can be separated into the following steps:
+ * -# initialise theora_info and theora_comment structures using 
+ *    theora_info_init() and theora_comment_init():
+ \verbatim
+ theora_info     info;
+ theora_comment  comment;
+   
+ theora_info_init(&info);
+ theora_comment_init(&comment);
+ \endverbatim
+ * -# retrieve header packets from Ogg stream (there should be 3) and decode 
+ *    into theora_info and theora_comment structures using 
+ *    theora_decode_header().  See \ref identification for more information on 
+ *    identifying which packets are theora packets.
+ \verbatim
+ int i;
+ for (i = 0; i < 3; i++)
+ {
+   (get a theora packet "op" from the Ogg stream)
+   theora_decode_header(&info, &comment, op);
+ }
+ \endverbatim
+ * -# initialise the decoder based on the information retrieved into the
+ *    theora_info struct by theora_decode_header().  You will need a 
+ *    theora_state struct.
+ \verbatim
+ theora_state state;
+ 
+ theora_decode_init(&state, &info);
+ \endverbatim
+ * -# pass in packets and retrieve decoded frames!  See the yuv_buffer 
+ *    documentation for information on how to retrieve raw YUV data.
+ \verbatim
+ yuf_buffer buffer;
+ while (last packet was not e_o_s) {
+   (get a theora packet "op" from the Ogg stream)
+   theora_decode_packetin(&state, op);
+   theora_decode_YUVout(&state, &buffer);
+ }
+ \endverbatim
+ *  
+ *
+ * \subsection identification Identifying Theora Packets
+ *
+ * All streams inside an Ogg file have a unique serial_no attached to the 
+ * stream.  Typically, you will want to 
+ *  - retrieve the serial_no for each b_o_s (beginning of stream) page 
+ *    encountered within the Ogg file; 
+ *  - test the first (only) packet on that page to determine if it is a theora 
+ *    packet;
+ *  - once you have found a theora b_o_s page then use the retrieved serial_no 
+ *    to identify future packets belonging to the same theora stream.
+ * 
+ * Note that you \e cannot use theora_packet_isheader() to determine if a 
+ * packet is a theora packet or not, as this function does not perform any
+ * checking beyond whether a header bit is present.  Instead, use the
+ * theora_decode_header() function and check the return value; or examine the
+ * header bytes at the beginning of the Ogg page.
+ */
+
+
+/** \defgroup oldfuncs Legacy pre-1.0 C API */
+/*  @{ */
+
+/**
+ * A YUV buffer for passing uncompressed frames to and from the codec.
+ * This holds a Y'CbCr frame in planar format. The CbCr planes can be
+ * subsampled and have their own separate dimensions and row stride
+ * offsets. Note that the strides may be negative in some 
+ * configurations. For theora the width and height of the largest plane
+ * must be a multiple of 16. The actual meaningful picture size and 
+ * offset are stored in the theora_info structure; frames returned by
+ * the decoder may need to be cropped for display.
+ *
+ * All samples are 8 bits. Within each plane samples are ordered by
+ * row from the top of the frame to the bottom. Within each row samples
+ * are ordered from left to right.
+ *
+ * During decode, the yuv_buffer struct is allocated by the user, but all
+ * fields (including luma and chroma pointers) are filled by the library.  
+ * These pointers address library-internal memory and their contents should 
+ * not be modified.
+ *
+ * Conversely, during encode the user allocates the struct and fills out all
+ * fields.  The user also manages the data addressed by the luma and chroma
+ * pointers.  See the encoder_example.c and dump_video.c example files in
+ * theora/examples/ for more information.
+ */
+typedef struct {
+    int   y_width;      /**< Width of the Y' luminance plane */
+    int   y_height;     /**< Height of the luminance plane */
+    int   y_stride;     /**< Offset in bytes between successive rows */
+
+    int   uv_width;     /**< Width of the Cb and Cr chroma planes */
+    int   uv_height;    /**< Height of the chroma planes */
+    int   uv_stride;    /**< Offset between successive chroma rows */
+    unsigned char *y;   /**< Pointer to start of luminance data */
+    unsigned char *u;   /**< Pointer to start of Cb data */
+    unsigned char *v;   /**< Pointer to start of Cr data */
+
+} yuv_buffer;
+
+/**
+ * A Colorspace.
+ */
+typedef enum {
+  OC_CS_UNSPECIFIED,    /**< The colorspace is unknown or unspecified */
+  OC_CS_ITU_REC_470M,   /**< This is the best option for 'NTSC' content */
+  OC_CS_ITU_REC_470BG,  /**< This is the best option for 'PAL' content */
+  OC_CS_NSPACES         /**< This marks the end of the defined colorspaces */
+} theora_colorspace;
+
+/**
+ * A Chroma subsampling
+ *
+ * These enumerate the available chroma subsampling options supported
+ * by the theora format. See Section 4.4 of the specification for
+ * exact definitions.
+ */
+typedef enum {
+  OC_PF_420,    /**< Chroma subsampling by 2 in each direction (4:2:0) */
+  OC_PF_RSVD,   /**< Reserved value */
+  OC_PF_422,    /**< Horizonatal chroma subsampling by 2 (4:2:2) */
+  OC_PF_444,    /**< No chroma subsampling at all (4:4:4) */
+} theora_pixelformat;
+
+/**
+ * Theora bitstream info.
+ * Contains the basic playback parameters for a stream,
+ * corresponding to the initial 'info' header packet.
+ * 
+ * Encoded theora frames must be a multiple of 16 in width and height.
+ * To handle other frame sizes, a crop rectangle is specified in
+ * frame_height and frame_width, offset_x and * offset_y. The offset
+ * and size should still be a multiple of 2 to avoid chroma sampling
+ * shifts. Offset values in this structure are measured from the
+ * upper left of the image.
+ *
+ * Frame rate, in frames per second, is stored as a rational
+ * fraction. Aspect ratio is also stored as a rational fraction, and
+ * refers to the aspect ratio of the frame pixels, not of the
+ * overall frame itself.
+ * 
+ * See <a href="http://svn.xiph.org/trunk/theora/examples/encoder_example.c">
+ * examples/encoder_example.c</a> for usage examples of the
+ * other paramters and good default settings for the encoder parameters.
+ */
+typedef struct {
+  ogg_uint32_t  width;		/**< encoded frame width  */
+  ogg_uint32_t  height;		/**< encoded frame height */
+  ogg_uint32_t  frame_width;	/**< display frame width  */
+  ogg_uint32_t  frame_height;	/**< display frame height */
+  ogg_uint32_t  offset_x;	/**< horizontal offset of the displayed frame */
+  ogg_uint32_t  offset_y;	/**< vertical offset of the displayed frame */
+  ogg_uint32_t  fps_numerator;	    /**< frame rate numerator **/
+  ogg_uint32_t  fps_denominator;    /**< frame rate denominator **/
+  ogg_uint32_t  aspect_numerator;   /**< pixel aspect ratio numerator */
+  ogg_uint32_t  aspect_denominator; /**< pixel aspect ratio denominator */
+  theora_colorspace colorspace;	    /**< colorspace */
+  int           target_bitrate;	    /**< nominal bitrate in bits per second */
+  int           quality;  /**< Nominal quality setting, 0-63 */
+  int           quick_p;  /**< Quick encode/decode */
+
+  /* decode only */
+  unsigned char version_major;
+  unsigned char version_minor;
+  unsigned char version_subminor;
+
+  void *codec_setup;
+
+  /* encode only */
+  int           dropframes_p;
+  int           keyframe_auto_p;
+  ogg_uint32_t  keyframe_frequency;
+  ogg_uint32_t  keyframe_frequency_force;  /* also used for decode init to
+                                              get granpos shift correct */
+  ogg_uint32_t  keyframe_data_target_bitrate;
+  ogg_int32_t   keyframe_auto_threshold;
+  ogg_uint32_t  keyframe_mindistance;
+  ogg_int32_t   noise_sensitivity;
+  ogg_int32_t   sharpness;
+
+  theora_pixelformat pixelformat;	/**< chroma subsampling mode to expect */
+
+} theora_info;
+
+/** Codec internal state and context.
+ */
+typedef struct{
+  theora_info *i;
+  ogg_int64_t granulepos;
+
+  void *internal_encode;
+  void *internal_decode;
+
+} theora_state;
+
+/** 
+ * Comment header metadata.
+ *
+ * This structure holds the in-stream metadata corresponding to
+ * the 'comment' header packet.
+ *
+ * Meta data is stored as a series of (tag, value) pairs, in
+ * length-encoded string vectors. The first occurence of the 
+ * '=' character delimits the tag and value. A particular tag
+ * may occur more than once. The character set encoding for
+ * the strings is always UTF-8, but the tag names are limited
+ * to case-insensitive ASCII. See the spec for details.
+ *
+ * In filling in this structure, theora_decode_header() will
+ * null-terminate the user_comment strings for safety. However,
+ * the bitstream format itself treats them as 8-bit clean,
+ * and so the length array should be treated as authoritative
+ * for their length.
+ */
+typedef struct theora_comment{
+  char **user_comments;         /**< An array of comment string vectors */
+  int   *comment_lengths;       /**< An array of corresponding string vector lengths in bytes */
+  int    comments;              /**< The total number of comment string vectors */
+  char  *vendor;                /**< The vendor string identifying the encoder, null terminated */
+
+} theora_comment;
+
+
+/**\name theora_control() codes */
+/* \anchor decctlcodes_old
+ * These are the available request codes for theora_control()
+ * when called with a decoder instance.
+ * By convention decoder control codes are odd, to distinguish 
+ * them from \ref encctlcodes_old "encoder control codes" which
+ * are even.
+ *
+ * Note that since the 1.0 release, both the legacy and the final
+ * implementation accept all the same control codes, but only the
+ * final API declares the newer codes.
+ *
+ * Keep any experimental or vendor-specific values above \c 0x8000.*/
+
+/*@{*/
+
+/**Get the maximum post-processing level.
+ * The decoder supports a post-processing filter that can improve
+ * the appearance of the decoded images. This returns the highest
+ * level setting for this post-processor, corresponding to maximum
+ * improvement and computational expense.
+ */
+#define TH_DECCTL_GET_PPLEVEL_MAX (1)
+
+/**Set the post-processing level.
+ * Sets the level of post-processing to use when decoding the 
+ * compressed stream. This must be a value between zero (off)
+ * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX.
+ */
+#define TH_DECCTL_SET_PPLEVEL (3)
+
+/**Sets the maximum distance between key frames.
+ * This can be changed during an encode, but will be bounded by
+ *  <tt>1<<th_info#keyframe_granule_shift</tt>.
+ * If it is set before encoding begins, th_info#keyframe_granule_shift will
+ *  be enlarged appropriately.
+ *
+ * \param[in]  buf <tt>ogg_uint32_t</tt>: The maximum distance between key
+ *                   frames.
+ * \param[out] buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
+
+/**Set the granule position.
+ * Call this after a seek, to update the internal granulepos
+ * in the decoder, to insure that subsequent frames are marked
+ * properly. If you track timestamps yourself and do not use
+ * the granule postion returned by the decoder, then you do
+ * not need to use this control.
+ */
+#define TH_DECCTL_SET_GRANPOS (5)
+
+/**\anchor encctlcodes_old */
+
+/**Sets the quantization parameters to use.
+ * The parameters are copied, not stored by reference, so they can be freed
+ *  after this call.
+ * <tt>NULL</tt> may be specified to revert to the default parameters.
+ *
+ * \param[in] buf #th_quant_info
+ * \retval OC_FAULT  \a theora_state is <tt>NULL</tt>.
+ * \retval OC_EINVAL Encoding has already begun, the quantization parameters
+ *                    are not acceptable to this version of the encoder, 
+ *                    \a buf is <tt>NULL</tt> and \a buf_sz is not zero, 
+ *                    or \a buf is non-<tt>NULL</tt> and \a buf_sz is 
+ *                    not <tt>sizeof(#th_quant_info)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_QUANT_PARAMS (2)
+
+/**Disables any encoder features that would prevent lossless transcoding back
+ *  to VP3.
+ * This primarily means disabling block-level QI values and not using 4MV mode
+ *  when any of the luma blocks in a macro block are not coded.
+ * It also includes using the VP3 quantization tables and Huffman codes; if you
+ *  set them explicitly after calling this function, the resulting stream will
+ *  not be VP3-compatible.
+ * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source
+ *  material, or when using a picture region smaller than the full frame (e.g.
+ *  a non-multiple-of-16 width or height), then non-VP3 bitstream features will
+ *  still be disabled, but the stream will still not be VP3-compatible, as VP3
+ *  was not capable of encoding such formats.
+ * If you call this after encoding has already begun, then the quantization
+ *  tables and codebooks cannot be changed, but the frame-level features will
+ *  be enabled or disabled as requested.
+ *
+ * \param[in]  buf <tt>int</tt>: a non-zero value to enable VP3 compatibility,
+ *                   or 0 to disable it (the default).
+ * \param[out] buf <tt>int</tt>: 1 if all bitstream features required for
+ *                   VP3-compatibility could be set, and 0 otherwise.
+ *                  The latter will be returned if the pixel format is not
+ *                   4:2:0, the picture region is smaller than the full frame,
+ *                   or if encoding has begun, preventing the quantization
+ *                   tables and codebooks from being set.
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
+
+/**Gets the maximum speed level.
+ * Higher speed levels favor quicker encoding over better quality per bit.
+ * Depending on the encoding mode, and the internal algorithms used, quality
+ *  may actually improve, but in this case bitrate will also likely increase.
+ * In any case, overall rate/distortion performance will probably decrease.
+ * The maximum value, and the meaning of each value, may change depending on
+ *  the current encoding mode (VBR vs. CQI, etc.).
+ *
+ * \param[out] buf int: The maximum encoding speed level.
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_GET_SPLEVEL_MAX (12)
+
+/**Sets the speed level.
+ * By default a speed value of 1 is used.
+ *
+ * \param[in] buf int: The new encoding speed level.
+ *                      0 is slowest, larger values use less CPU.
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                    encoding speed level is out of bounds.
+ *                   The maximum encoding speed level may be
+ *                    implementation- and encoding mode-specific, and can be
+ *                    obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
+ * \retval OC_IMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_SPLEVEL (14)
+
+/*@}*/
+
+#define OC_FAULT       -1       /**< General failure */
+#define OC_EINVAL      -10      /**< Library encountered invalid internal data */
+#define OC_DISABLED    -11      /**< Requested action is disabled */
+#define OC_BADHEADER   -20      /**< Header packet was corrupt/invalid */
+#define OC_NOTFORMAT   -21      /**< Packet is not a theora packet */
+#define OC_VERSION     -22      /**< Bitstream version is not handled */
+#define OC_IMPL        -23      /**< Feature or action not implemented */
+#define OC_BADPACKET   -24      /**< Packet is corrupt */
+#define OC_NEWPACKET   -25      /**< Packet is an (ignorable) unhandled extension */
+#define OC_DUPFRAME    1        /**< Packet is a dropped frame */
+
+/** 
+ * Retrieve a human-readable string to identify the encoder vendor and version.
+ * \returns A version string.
+ */
+extern const char *theora_version_string(void);
+
+/**
+ * Retrieve a 32-bit version number.
+ * This number is composed of a 16-bit major version, 8-bit minor version
+ * and 8 bit sub-version, composed as follows:
+<pre>
+   (VERSION_MAJOR<<16) + (VERSION_MINOR<<8) + (VERSION_SUB)
+</pre>
+* \returns The version number.
+*/
+extern ogg_uint32_t theora_version_number(void);
+
+/**
+ * Initialize the theora encoder.
+ * \param th The theora_state handle to initialize for encoding.
+ * \param ti A theora_info struct filled with the desired encoding parameters.
+ * \retval 0 Success
+ */
+extern int theora_encode_init(theora_state *th, theora_info *ti);
+
+/**
+ * Submit a YUV buffer to the theora encoder.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param yuv A buffer of YUV data to encode.  Note that both the yuv_buffer
+ *            struct and the luma/chroma buffers within should be allocated by
+ *            the user.
+ * \retval OC_EINVAL Encoder is not ready, or is finished.
+ * \retval -1 The size of the given frame differs from those previously input
+ * \retval 0 Success
+ */
+extern int theora_encode_YUVin(theora_state *t, yuv_buffer *yuv);
+
+/**
+ * Request the next packet of encoded video. 
+ * The encoded data is placed in a user-provided ogg_packet structure.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param last_p whether this is the last packet the encoder should produce.
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to encoded
+ *           data. The memory for the encoded data is owned by libtheora.
+ * \retval 0 No internal storage exists OR no packet is ready
+ * \retval -1 The encoding process has completed
+ * \retval 1 Success
+ */
+extern int theora_encode_packetout( theora_state *t, int last_p,
+                                    ogg_packet *op);
+
+/**
+ * Request a packet containing the initial header.
+ * A pointer to the header data is placed in a user-provided ogg_packet
+ * structure.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to the header
+ *           data. The memory for the header data is owned by libtheora.
+ * \retval 0 Success
+ */
+extern int theora_encode_header(theora_state *t, ogg_packet *op);
+
+/**
+ * Request a comment header packet from provided metadata.
+ * A pointer to the comment data is placed in a user-provided ogg_packet
+ * structure.
+ * \param tc A theora_comment structure filled with the desired metadata
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to the encoded
+ *           comment data. The memory for the comment data is owned by
+ *           libtheora.
+ * \retval 0 Success
+ */
+extern int theora_encode_comment(theora_comment *tc, ogg_packet *op);
+
+/**
+ * Request a packet containing the codebook tables for the stream.
+ * A pointer to the codebook data is placed in a user-provided ogg_packet
+ * structure.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to the codebook
+ *           data. The memory for the header data is owned by libtheora.
+ * \retval 0 Success
+ */
+extern int theora_encode_tables(theora_state *t, ogg_packet *op);
+
+/**
+ * Decode an Ogg packet, with the expectation that the packet contains
+ * an initial header, comment data or codebook tables.
+ *
+ * \param ci A theora_info structure to fill. This must have been previously
+ *           initialized with theora_info_init(). If \a op contains an initial
+ *           header, theora_decode_header() will fill \a ci with the
+ *           parsed header values. If \a op contains codebook tables,
+ *           theora_decode_header() will parse these and attach an internal
+ *           representation to \a ci->codec_setup.
+ * \param cc A theora_comment structure to fill. If \a op contains comment
+ *           data, theora_decode_header() will fill \a cc with the parsed
+ *           comments.
+ * \param op An ogg_packet structure which you expect contains an initial
+ *           header, comment data or codebook tables.
+ *
+ * \retval OC_BADHEADER \a op is NULL; OR the first byte of \a op->packet
+ *                      has the signature of an initial packet, but op is
+ *                      not a b_o_s packet; OR this packet has the signature
+ *                      of an initial header packet, but an initial header
+ *                      packet has already been seen; OR this packet has the
+ *                      signature of a comment packet, but the initial header
+ *                      has not yet been seen; OR this packet has the signature
+ *                      of a comment packet, but contains invalid data; OR
+ *                      this packet has the signature of codebook tables,
+ *                      but the initial header or comments have not yet
+ *                      been seen; OR this packet has the signature of codebook
+ *                      tables, but contains invalid data;
+ *                      OR the stream being decoded has a compatible version
+ *                      but this packet does not have the signature of a
+ *                      theora initial header, comments, or codebook packet
+ * \retval OC_VERSION   The packet data of \a op is an initial header with
+ *                      a version which is incompatible with this version of
+ *                      libtheora.
+ * \retval OC_NEWPACKET the stream being decoded has an incompatible (future)
+ *                      version and contains an unknown signature.
+ * \retval 0            Success
+ *
+ * \note The normal usage is that theora_decode_header() be called on the
+ *       first three packets of a theora logical bitstream in succession.
+ */
+extern int theora_decode_header(theora_info *ci, theora_comment *cc,
+                                ogg_packet *op);
+
+/**
+ * Initialize a theora_state handle for decoding.
+ * \param th The theora_state handle to initialize.
+ * \param c  A theora_info struct filled with the desired decoding parameters.
+ *           This is of course usually obtained from a previous call to
+ *           theora_decode_header().
+ * \retval 0 Success
+ */
+extern int theora_decode_init(theora_state *th, theora_info *c);
+
+/**
+ * Input a packet containing encoded data into the theora decoder.
+ * \param th A theora_state handle previously initialized for decoding.
+ * \param op An ogg_packet containing encoded theora data.
+ * \retval 0 Success
+ * \retval OC_BADPACKET \a op does not contain encoded video data
+ */
+extern int theora_decode_packetin(theora_state *th,ogg_packet *op);
+
+/**
+ * Output the next available frame of decoded YUV data.
+ * \param th A theora_state handle previously initialized for decoding.
+ * \param yuv A yuv_buffer in which libtheora should place the decoded data.
+ *            Note that the buffer struct itself is allocated by the user, but
+ *            that the luma and chroma pointers will be filled in by the 
+ *            library.  Also note that these luma and chroma regions should be 
+ *            considered read-only by the user.
+ * \retval 0 Success
+ */
+extern int theora_decode_YUVout(theora_state *th,yuv_buffer *yuv);
+
+/**
+ * Report whether a theora packet is a header or not
+ * This function does no verification beyond checking the header
+ * flag bit so it should not be used for bitstream identification;
+ * use theora_decode_header() for that.
+ *
+ * \param op An ogg_packet containing encoded theora data.
+ * \retval 1 The packet is a header packet
+ * \retval 0 The packet is not a header packet (and so contains frame data)
+ *
+ * Thus function was added in the 1.0alpha4 release.
+ */
+extern int theora_packet_isheader(ogg_packet *op);
+
+/**
+ * Report whether a theora packet is a keyframe or not
+ *
+ * \param op An ogg_packet containing encoded theora data.
+ * \retval 1 The packet contains a keyframe image
+ * \retval 0 The packet is contains an interframe delta
+ * \retval -1 The packet is not an image data packet at all
+ *
+ * Thus function was added in the 1.0alpha4 release.
+ */
+extern int theora_packet_iskeyframe(ogg_packet *op);
+
+/**
+ * Report the granulepos shift radix
+ *
+ * When embedded in Ogg, Theora uses a two-part granulepos, 
+ * splitting the 64-bit field into two pieces. The more-significant
+ * section represents the frame count at the last keyframe,
+ * and the less-significant section represents the count of
+ * frames since the last keyframe. In this way the overall
+ * field is still non-decreasing with time, but usefully encodes
+ * a pointer to the last keyframe, which is necessary for
+ * correctly restarting decode after a seek. 
+ *
+ * This function reports the number of bits used to represent
+ * the distance to the last keyframe, and thus how the granulepos
+ * field must be shifted or masked to obtain the two parts.
+ * 
+ * Since libtheora returns compressed data in an ogg_packet
+ * structure, this may be generally useful even if the Theora
+ * packets are not being used in an Ogg container. 
+ *
+ * \param ti A previously initialized theora_info struct
+ * \returns The bit shift dividing the two granulepos fields
+ *
+ * This function was added in the 1.0alpha5 release.
+ */
+int theora_granule_shift(theora_info *ti);
+
+/**
+ * Convert a granulepos to an absolute frame index, starting at 0.
+ * The granulepos is interpreted in the context of a given theora_state handle.
+ * 
+ * Note that while the granulepos encodes the frame count (i.e. starting
+ * from 1) this call returns the frame index, starting from zero. Thus
+ * One can calculate the presentation time by multiplying the index by
+ * the rate.
+ *
+ * \param th A previously initialized theora_state handle (encode or decode)
+ * \param granulepos The granulepos to convert.
+ * \returns The frame index corresponding to \a granulepos.
+ * \retval -1 The given granulepos is undefined (i.e. negative)
+ *
+ * Thus function was added in the 1.0alpha4 release.
+ */
+extern ogg_int64_t theora_granule_frame(theora_state *th,ogg_int64_t granulepos);
+
+/**
+ * Convert a granulepos to absolute time in seconds. The granulepos is
+ * interpreted in the context of a given theora_state handle, and gives
+ * the end time of a frame's presentation as used in Ogg mux ordering.
+ *
+ * \param th A previously initialized theora_state handle (encode or decode)
+ * \param granulepos The granulepos to convert.
+ * \returns The absolute time in seconds corresponding to \a granulepos.
+ *          This is the "end time" for the frame, or the latest time it should
+ *           be displayed.
+ *          It is not the presentation time.
+ * \retval -1. The given granulepos is undefined (i.e. negative), or
+ * \retval -1. The function has been disabled because floating 
+ *              point support is not available.
+ */
+extern double theora_granule_time(theora_state *th,ogg_int64_t granulepos);
+
+/**
+ * Initialize a theora_info structure. All values within the given theora_info
+ * structure are initialized, and space is allocated within libtheora for
+ * internal codec setup data.
+ * \param c A theora_info struct to initialize.
+ */
+extern void theora_info_init(theora_info *c);
+
+/**
+ * Clear a theora_info structure. All values within the given theora_info
+ * structure are cleared, and associated internal codec setup data is freed.
+ * \param c A theora_info struct to initialize.
+ */
+extern void theora_info_clear(theora_info *c);
+
+/**
+ * Free all internal data associated with a theora_state handle.
+ * \param t A theora_state handle.
+ */
+extern void theora_clear(theora_state *t);
+
+/**
+ * Initialize an allocated theora_comment structure
+ * \param tc An allocated theora_comment structure 
+ **/
+extern void theora_comment_init(theora_comment *tc);
+
+/**
+ * Add a comment to an initialized theora_comment structure
+ * \param tc A previously initialized theora comment structure
+ * \param comment A null-terminated string encoding the comment in the form
+ *                "TAG=the value"
+ *
+ * Neither theora_comment_add() nor theora_comment_add_tag() support
+ * comments containing null values, although the bitstream format
+ * supports this. To add such comments you will need to manipulate
+ * the theora_comment structure directly.
+ **/
+
+extern void theora_comment_add(theora_comment *tc, char *comment);
+
+/**
+ * Add a comment to an initialized theora_comment structure.
+ * \param tc A previously initialized theora comment structure
+ * \param tag A null-terminated string containing the tag 
+ *            associated with the comment.
+ * \param value The corresponding value as a null-terminated string
+ *
+ * Neither theora_comment_add() nor theora_comment_add_tag() support
+ * comments containing null values, although the bitstream format
+ * supports this. To add such comments you will need to manipulate
+ * the theora_comment structure directly.
+ **/
+extern void theora_comment_add_tag(theora_comment *tc,
+                                       char *tag, char *value);
+
+/**
+ * Look up a comment value by tag.
+ * \param tc Tn initialized theora_comment structure
+ * \param tag The tag to look up
+ * \param count The instance of the tag. The same tag can appear multiple
+ *              times, each with a distinct and ordered value, so an index
+ *              is required to retrieve them all.
+ * \returns A pointer to the queried tag's value
+ * \retval NULL No matching tag is found
+ *
+ * \note Use theora_comment_query_count() to get the legal range for the
+ * count parameter.
+ **/
+
+extern char *theora_comment_query(theora_comment *tc, char *tag, int count);
+
+/** Look up the number of instances of a tag.
+ *  \param tc An initialized theora_comment structure
+ *  \param tag The tag to look up
+ *  \returns The number on instances of a particular tag.
+ * 
+ *  Call this first when querying for a specific tag and then interate
+ *  over the number of instances with separate calls to 
+ *  theora_comment_query() to retrieve all instances in order.
+ **/
+extern int   theora_comment_query_count(theora_comment *tc, char *tag);
+
+/**
+ * Clear an allocated theora_comment struct so that it can be freed.
+ * \param tc An allocated theora_comment structure.
+ **/
+extern void  theora_comment_clear(theora_comment *tc);
+
+/**Encoder control function.
+ * This is used to provide advanced control the encoding process.
+ * \param th     A #theora_state handle.
+ * \param req    The control code to process.
+ *                See \ref encctlcodes_old "the list of available 
+ *			control codes" for details.
+ * \param buf    The parameters for this control code.
+ * \param buf_sz The size of the parameter buffer.*/
+extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz);
+
+/* @} */ /* end oldfuncs doxygen group */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _O_THEORA_H_ */
diff --git a/engine/code/libtheora-1.1.1/include/theora/theoradec.h b/engine/code/libtheora-1.1.1/include/theora/theoradec.h
new file mode 100644
index 00000000..b20f0e3a
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/include/theora/theoradec.h
@@ -0,0 +1,325 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
+
+ ********************************************************************/
+
+/**\file
+ * The <tt>libtheoradec</tt> C decoding API.*/
+
+#if !defined(_O_THEORA_THEORADEC_H_)
+# define _O_THEORA_THEORADEC_H_ (1)
+# include <stddef.h>
+# include <ogg/ogg.h>
+# include "codec.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+
+/**\name th_decode_ctl() codes
+ * \anchor decctlcodes
+ * These are the available request codes for th_decode_ctl().
+ * By convention, these are odd, to distinguish them from the
+ *  \ref encctlcodes "encoder control codes".
+ * Keep any experimental or vendor-specific values above \c 0x8000.*/
+/*@{*/
+/**Gets the maximum post-processing level.
+ * The decoder supports a post-processing filter that can improve
+ * the appearance of the decoded images. This returns the highest
+ * level setting for this post-processor, corresponding to maximum
+ * improvement and computational expense.
+ *
+ * \param[out] _buf int: The maximum post-processing level.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_DECCTL_GET_PPLEVEL_MAX (1)
+/**Sets the post-processing level.
+ * By default, post-processing is disabled.
+ *
+ * Sets the level of post-processing to use when decoding the
+ * compressed stream. This must be a value between zero (off)
+ * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX.
+ *
+ * \param[in] _buf int: The new post-processing level.
+ *                      0 to disable; larger values use more CPU.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                     post-processing level is out of bounds.
+ *                    The maximum post-processing level may be
+ *                     implementation-specific, and can be obtained via
+ *                     #TH_DECCTL_GET_PPLEVEL_MAX.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_DECCTL_SET_PPLEVEL (3)
+/**Sets the granule position.
+ * Call this after a seek, before decoding the first frame, to ensure that the
+ *  proper granule position is returned for all subsequent frames.
+ * If you track timestamps yourself and do not use the granule position
+ *  returned by the decoder, then you need not call this function.
+ *
+ * \param[in] _buf <tt>ogg_int64_t</tt>: The granule position of the next
+ *                  frame.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not <tt>sizeof(ogg_int64_t)</tt>, or the
+ *                     granule position is negative.*/
+#define TH_DECCTL_SET_GRANPOS (5)
+/**Sets the striped decode callback function.
+ * If set, this function will be called as each piece of a frame is fully
+ *  decoded in th_decode_packetin().
+ * You can pass in a #th_stripe_callback with
+ *  th_stripe_callback#stripe_decoded set to <tt>NULL</tt> to disable the
+ *  callbacks at any point.
+ * Enabling striped decode does not prevent you from calling
+ *  th_decode_ycbcr_out() after the frame is fully decoded.
+ *
+ * \param[in]  _buf #th_stripe_callback: The callback parameters.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not
+ *                     <tt>sizeof(th_stripe_callback)</tt>.*/
+#define TH_DECCTL_SET_STRIPE_CB (7)
+
+/**Enables telemetry and sets the macroblock display mode */
+#define TH_DECCTL_SET_TELEMETRY_MBMODE (9)
+/**Enables telemetry and sets the motion vector display mode */
+#define TH_DECCTL_SET_TELEMETRY_MV (11)
+/**Enables telemetry and sets the adaptive quantization display mode */
+#define TH_DECCTL_SET_TELEMETRY_QI (13)
+/**Enables telemetry and sets the bitstream breakdown visualization mode */
+#define TH_DECCTL_SET_TELEMETRY_BITS (15)
+/*@}*/
+
+
+
+/**A callback function for striped decode.
+ * This is a function pointer to an application-provided function that will be
+ *  called each time a section of the image is fully decoded in
+ *  th_decode_packetin().
+ * This allows the application to process the section immediately, while it is
+ *  still in cache.
+ * Note that the frame is decoded bottom to top, so \a _yfrag0 will steadily
+ *  decrease with each call until it reaches 0, at which point the full frame
+ *  is decoded.
+ * The number of fragment rows made available in each call depends on the pixel
+ *  format and the number of post-processing filters enabled, and may not even
+ *  be constant for the entire frame.
+ * If a non-<tt>NULL</tt> \a _granpos pointer is passed to
+ *  th_decode_packetin(), the granule position for the frame will be stored
+ *  in it before the first callback is made.
+ * If an entire frame is dropped (a 0-byte packet), then no callbacks will be
+ *  made at all for that frame.
+ * \param _ctx       An application-provided context pointer.
+ * \param _buf       The image buffer for the decoded frame.
+ * \param _yfrag0    The Y coordinate of the first row of 8x8 fragments
+ *                    decoded.
+ *                   Multiply this by 8 to obtain the pixel row number in the
+ *                    luma plane.
+ *                   If the chroma planes are subsampled in the Y direction,
+ *                    this will always be divisible by two.
+ * \param _yfrag_end The Y coordinate of the first row of 8x8 fragments past
+ *                    the newly decoded section.
+ *                   If the chroma planes are subsampled in the Y direction,
+ *                    this will always be divisible by two.
+ *                   I.e., this section contains fragment rows
+ *                    <tt>\a _yfrag0 ...\a _yfrag_end -1</tt>.*/
+typedef void (*th_stripe_decoded_func)(void *_ctx,th_ycbcr_buffer _buf,
+ int _yfrag0,int _yfrag_end);
+
+/**The striped decode callback data to pass to #TH_DECCTL_SET_STRIPE_CB.*/
+typedef struct{
+  /**An application-provided context pointer.
+   * This will be passed back verbatim to the application.*/
+  void                   *ctx;
+  /**The callback function pointer.*/
+  th_stripe_decoded_func  stripe_decoded;
+}th_stripe_callback;
+
+
+
+/**\name Decoder state
+   The following data structures are opaque, and their contents are not
+    publicly defined by this API.
+   Referring to their internals directly is unsupported, and may break without
+    warning.*/
+/*@{*/
+/**The decoder context.*/
+typedef struct th_dec_ctx    th_dec_ctx;
+/**Setup information.
+   This contains auxiliary information (Huffman tables and quantization
+    parameters) decoded from the setup header by th_decode_headerin() to be
+    passed to th_decode_alloc().
+   It can be re-used to initialize any number of decoders, and can be freed
+    via th_setup_free() at any time.*/
+typedef struct th_setup_info th_setup_info;
+/*@}*/
+
+
+
+/**\defgroup decfuncs Functions for Decoding*/
+/*@{*/
+/**\name Functions for decoding
+ * You must link to <tt>libtheoradec</tt> if you use any of the 
+ * functions in this section.
+ *
+ * The functions are listed in the order they are used in a typical decode.
+ * The basic steps are:
+ * - Parse the header packets by repeatedly calling th_decode_headerin().
+ * - Allocate a #th_dec_ctx handle with th_decode_alloc().
+ * - Call th_setup_free() to free any memory used for codec setup
+ *    information.
+ * - Perform any additional decoder configuration with th_decode_ctl().
+ * - For each video data packet:
+ *   - Submit the packet to the decoder via th_decode_packetin().
+ *   - Retrieve the uncompressed video data via th_decode_ycbcr_out().
+ * - Call th_decode_free() to release all decoder memory.*/
+/*@{*/
+/**Decodes the header packets of a Theora stream.
+ * This should be called on the initial packets of the stream, in succession,
+ *  until it returns <tt>0</tt>, indicating that all headers have been
+ *  processed, or an error is encountered.
+ * At least three header packets are required, and additional optional header
+ *  packets may follow.
+ * This can be used on the first packet of any logical stream to determine if
+ *  that stream is a Theora stream.
+ * \param _info  A #th_info structure to fill in.
+ *               This must have been previously initialized with
+ *                th_info_init().
+ *               The application may immediately begin using the contents of
+ *                this structure after the first header is decoded, though it
+ *                must continue to be passed in on all subsequent calls.
+ * \param _tc    A #th_comment structure to fill in.
+ *               The application may immediately begin using the contents of
+ *                this structure after the second header is decoded, though it
+ *                must continue to be passed in on all subsequent calls.
+ * \param _setup Returns a pointer to additional, private setup information
+ *                needed by the decoder.
+ *               The contents of this pointer must be initialized to
+ *                <tt>NULL</tt> on the first call, and the returned value must
+ *                continue to be passed in on all subsequent calls.
+ * \param _op    An <tt>ogg_packet</tt> structure which contains one of the
+ *                initial packets of an Ogg logical stream.
+ * \return A positive value indicates that a Theora header was successfully
+ *          processed.
+ * \retval 0             The first video data packet was encountered after all
+ *                        required header packets were parsed.
+ *                       The packet just passed in on this call should be saved
+ *                        and fed to th_decode_packetin() to begin decoding
+ *                        video data.
+ * \retval TH_EFAULT     One of \a _info, \a _tc, or \a _setup was
+ *                        <tt>NULL</tt>.
+ * \retval TH_EBADHEADER \a _op was <tt>NULL</tt>, the packet was not the next
+ *                        header packet in the expected sequence, or the format
+ *                        of the header data was invalid.
+ * \retval TH_EVERSION   The packet data was a Theora info header, but for a
+ *                        bitstream version not decodable with this version of
+ *                        <tt>libtheoradec</tt>.
+ * \retval TH_ENOTFORMAT The packet was not a Theora header.
+ */
+extern int th_decode_headerin(th_info *_info,th_comment *_tc,
+ th_setup_info **_setup,ogg_packet *_op);
+/**Allocates a decoder instance.
+ *
+ * <b>Security Warning:</b> The Theora format supports very large frame sizes,
+ *  potentially even larger than the address space of a 32-bit machine, and
+ *  creating a decoder context allocates the space for several frames of data.
+ * If the allocation fails here, your program will crash, possibly at some
+ *  future point because the OS kernel returned a valid memory range and will
+ *  only fail when it tries to map the pages in it the first time they are
+ *  used.
+ * Even if it succeeds, you may experience a denial of service if the frame
+ *  size is large enough to cause excessive paging.
+ * If you are integrating libtheora in a larger application where such things
+ *  are undesirable, it is highly recommended that you check the frame size in
+ *  \a _info before calling this function and refuse to decode streams where it
+ *  is larger than some reasonable maximum.
+ * libtheora will not check this for you, because there may be machines that
+ *  can handle such streams and applications that wish to.
+ * \param _info  A #th_info struct filled via th_decode_headerin().
+ * \param _setup A #th_setup_info handle returned via
+ *                th_decode_headerin().
+ * \return The initialized #th_dec_ctx handle.
+ * \retval NULL If the decoding parameters were invalid.*/
+extern th_dec_ctx *th_decode_alloc(const th_info *_info,
+ const th_setup_info *_setup);
+/**Releases all storage used for the decoder setup information.
+ * This should be called after you no longer want to create any decoders for
+ *  a stream whose headers you have parsed with th_decode_headerin().
+ * \param _setup The setup information to free.
+ *               This can safely be <tt>NULL</tt>.*/
+extern void th_setup_free(th_setup_info *_setup);
+/**Decoder control function.
+ * This is used to provide advanced control of the decoding process.
+ * \param _dec    A #th_dec_ctx handle.
+ * \param _req    The control code to process.
+ *                See \ref decctlcodes "the list of available control codes"
+ *                 for details.
+ * \param _buf    The parameters for this control code.
+ * \param _buf_sz The size of the parameter buffer.*/
+extern int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
+ size_t _buf_sz);
+/**Submits a packet containing encoded video data to the decoder.
+ * \param _dec     A #th_dec_ctx handle.
+ * \param _op      An <tt>ogg_packet</tt> containing encoded video data.
+ * \param _granpos Returns the granule position of the decoded packet.
+ *                 If non-<tt>NULL</tt>, the granule position for this specific
+ *                  packet is stored in this location.
+ *                 This is computed incrementally from previously decoded
+ *                  packets.
+ *                 After a seek, the correct granule position must be set via
+ *                  #TH_DECCTL_SET_GRANPOS for this to work properly.
+ * \retval 0             Success.
+ *                       A new decoded frame can be retrieved by calling
+ *                        th_decode_ycbcr_out().
+ * \retval TH_DUPFRAME   The packet represented a dropped (0-byte) frame.
+ *                       The player can skip the call to th_decode_ycbcr_out(),
+ *                        as the contents of the decoded frame buffer have not
+ *                        changed.
+ * \retval TH_EFAULT     \a _dec or \a _op was <tt>NULL</tt>.
+ * \retval TH_EBADPACKET \a _op does not contain encoded video data.
+ * \retval TH_EIMPL      The video data uses bitstream features which this
+ *                        library does not support.*/
+extern int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
+ ogg_int64_t *_granpos);
+/**Outputs the next available frame of decoded Y'CbCr data.
+ * If a striped decode callback has been set with #TH_DECCTL_SET_STRIPE_CB,
+ *  then the application does not need to call this function.
+ * \param _dec   A #th_dec_ctx handle.
+ * \param _ycbcr A video buffer structure to fill in.
+ *               <tt>libtheoradec</tt> will fill in all the members of this
+ *                structure, including the pointers to the uncompressed video
+ *                data.
+ *               The memory for this video data is owned by
+ *                <tt>libtheoradec</tt>.
+ *               It may be freed or overwritten without notification when
+ *                subsequent frames are decoded.
+ * \retval 0 Success
+ * \retval TH_EFAULT     \a _dec or \a _ycbcr was <tt>NULL</tt>.
+ */
+extern int th_decode_ycbcr_out(th_dec_ctx *_dec,
+ th_ycbcr_buffer _ycbcr);
+/**Frees an allocated decoder instance.
+ * \param _dec A #th_dec_ctx handle.*/
+extern void th_decode_free(th_dec_ctx *_dec);
+/*@}*/
+/*@}*/
+
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/include/theora/theoraenc.h b/engine/code/libtheora-1.1.1/include/theora/theoraenc.h
new file mode 100644
index 00000000..fdf2ab21
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/include/theora/theoraenc.h
@@ -0,0 +1,486 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
+
+ ********************************************************************/
+
+/**\file
+ * The <tt>libtheoraenc</tt> C encoding API.*/
+
+#if !defined(_O_THEORA_THEORAENC_H_)
+# define _O_THEORA_THEORAENC_H_ (1)
+# include <stddef.h>
+# include <ogg/ogg.h>
+# include "codec.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+
+/**\name th_encode_ctl() codes
+ * \anchor encctlcodes
+ * These are the available request codes for th_encode_ctl().
+ * By convention, these are even, to distinguish them from the
+ *  \ref decctlcodes "decoder control codes".
+ * Keep any experimental or vendor-specific values above \c 0x8000.*/
+/*@{*/
+/**Sets the Huffman tables to use.
+ * The tables are copied, not stored by reference, so they can be freed after
+ *  this call.
+ * <tt>NULL</tt> may be specified to revert to the default tables.
+ *
+ * \param[in] _buf <tt>#th_huff_code[#TH_NHUFFMAN_TABLES][#TH_NDCT_TOKENS]</tt>
+ * \retval TH_EFAULT \a _enc_ctx is <tt>NULL</tt>.
+ * \retval TH_EINVAL Encoding has already begun or one or more of the given
+ *                     tables is not full or prefix-free, \a _buf is
+ *                     <tt>NULL</tt> and \a _buf_sz is not zero, or \a _buf is
+ *                     non-<tt>NULL</tt> and \a _buf_sz is not
+ *                     <tt>sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_HUFFMAN_CODES (0)
+/**Sets the quantization parameters to use.
+ * The parameters are copied, not stored by reference, so they can be freed
+ *  after this call.
+ * <tt>NULL</tt> may be specified to revert to the default parameters.
+ *
+ * \param[in] _buf #th_quant_info
+ * \retval TH_EFAULT \a _enc_ctx is <tt>NULL</tt>.
+ * \retval TH_EINVAL Encoding has already begun, \a _buf is 
+ *                    <tt>NULL</tt> and \a _buf_sz is not zero,
+ *                    or \a _buf is non-<tt>NULL</tt> and
+ *                    \a _buf_sz is not <tt>sizeof(#th_quant_info)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_QUANT_PARAMS (2)
+/**Sets the maximum distance between key frames.
+ * This can be changed during an encode, but will be bounded by
+ *  <tt>1<<th_info#keyframe_granule_shift</tt>.
+ * If it is set before encoding begins, th_info#keyframe_granule_shift will
+ *  be enlarged appropriately.
+ *
+ * \param[in]  _buf <tt>ogg_uint32_t</tt>: The maximum distance between key
+ *                   frames.
+ * \param[out] _buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
+/**Disables any encoder features that would prevent lossless transcoding back
+ *  to VP3.
+ * This primarily means disabling block-adaptive quantization and always coding
+ *  all four luma blocks in a macro block when 4MV is used.
+ * It also includes using the VP3 quantization tables and Huffman codes; if you
+ *  set them explicitly after calling this function, the resulting stream will
+ *  not be VP3-compatible.
+ * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source
+ *  material, or when using a picture region smaller than the full frame (e.g.
+ *  a non-multiple-of-16 width or height), then non-VP3 bitstream features will
+ *  still be disabled, but the stream will still not be VP3-compatible, as VP3
+ *  was not capable of encoding such formats.
+ * If you call this after encoding has already begun, then the quantization
+ *  tables and codebooks cannot be changed, but the frame-level features will
+ *  be enabled or disabled as requested.
+ *
+ * \param[in]  _buf <tt>int</tt>: a non-zero value to enable VP3 compatibility,
+ *                   or 0 to disable it (the default).
+ * \param[out] _buf <tt>int</tt>: 1 if all bitstream features required for
+ *                   VP3-compatibility could be set, and 0 otherwise.
+ *                  The latter will be returned if the pixel format is not
+ *                   4:2:0, the picture region is smaller than the full frame,
+ *                   or if encoding has begun, preventing the quantization
+ *                   tables and codebooks from being set.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
+/**Gets the maximum speed level.
+ * Higher speed levels favor quicker encoding over better quality per bit.
+ * Depending on the encoding mode, and the internal algorithms used, quality
+ *  may actually improve, but in this case bitrate will also likely increase.
+ * In any case, overall rate/distortion performance will probably decrease.
+ * The maximum value, and the meaning of each value, may change depending on
+ *  the current encoding mode (VBR vs. constant quality, etc.).
+ *
+ * \param[out] _buf <tt>int</tt>: The maximum encoding speed level.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_GET_SPLEVEL_MAX (12)
+/**Sets the speed level.
+ * The current speed level may be retrieved using #TH_ENCCTL_GET_SPLEVEL.
+ *
+ * \param[in] _buf <tt>int</tt>: The new encoding speed level.
+ *                 0 is slowest, larger values use less CPU.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                    encoding speed level is out of bounds.
+ *                   The maximum encoding speed level may be
+ *                    implementation- and encoding mode-specific, and can be
+ *                    obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_SPLEVEL (14)
+/**Gets the current speed level.
+ * The default speed level may vary according to encoder implementation, but if
+ *  this control code is not supported (it returns #TH_EIMPL), the default may
+ *  be assumed to be the slowest available speed (0).
+ * The maximum encoding speed level may be implementation- and encoding
+ *  mode-specific, and can be obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
+ *
+ * \param[out] _buf <tt>int</tt>: The current encoding speed level.
+ *                  0 is slowest, larger values use less CPU.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_GET_SPLEVEL (16)
+/**Sets the number of duplicates of the next frame to produce.
+ * Although libtheora can encode duplicate frames very cheaply, it costs some
+ *  amount of CPU to detect them, and a run of duplicates cannot span a
+ *  keyframe boundary.
+ * This control code tells the encoder to produce the specified number of extra
+ *  duplicates of the next frame.
+ * This allows the encoder to make smarter keyframe placement decisions and
+ *  rate control decisions, and reduces CPU usage as well, when compared to
+ *  just submitting the same frame for encoding multiple times.
+ * This setting only applies to the next frame submitted for encoding.
+ * You MUST call th_encode_packetout() repeatedly until it returns 0, or the
+ *  extra duplicate frames will be lost.
+ *
+ * \param[in] _buf <tt>int</tt>: The number of duplicates to produce.
+ *                 If this is negative or zero, no duplicates will be produced.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                    number of duplicates is greater than or equal to the
+ *                    maximum keyframe interval.
+ *                   In the latter case, NO duplicate frames will be produced.
+ *                   You must ensure that the maximum keyframe interval is set
+ *                    larger than the maximum number of duplicates you will
+ *                    ever wish to insert prior to encoding.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_DUP_COUNT (18)
+/**Modifies the default bitrate management behavior.
+ * Use to allow or disallow frame dropping, and to enable or disable capping
+ *  bit reservoir overflows and underflows.
+ * See \ref encctlcodes "the list of available flags".
+ * The flags are set by default to
+ *  <tt>#TH_RATECTL_DROP_FRAMES|#TH_RATECTL_CAP_OVERFLOW</tt>.
+ *
+ * \param[in] _buf <tt>int</tt>: Any combination of
+ *                  \ref ratectlflags "the available flags":
+ *                 - #TH_RATECTL_DROP_FRAMES: Enable frame dropping.
+ *                 - #TH_RATECTL_CAP_OVERFLOW: Don't bank excess bits for later
+ *                    use.
+ *                 - #TH_RATECTL_CAP_UNDERFLOW: Don't try to make up shortfalls
+ *                    later.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt> or rate control
+ *                    is not enabled.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_RATE_FLAGS (20)
+/**Sets the size of the bitrate management bit reservoir as a function
+ *  of number of frames.
+ * The reservoir size affects how quickly bitrate management reacts to
+ *  instantaneous changes in the video complexity.
+ * Larger reservoirs react more slowly, and provide better overall quality, but
+ *  require more buffering by a client, adding more latency to live streams.
+ * By default, libtheora sets the reservoir to the maximum distance between
+ *  keyframes, subject to a minimum and maximum limit.
+ * This call may be used to increase or decrease the reservoir, increasing or
+ *  decreasing the allowed temporary variance in bitrate.
+ * An implementation may impose some limits on the size of a reservoir it can
+ *  handle, in which case the actual reservoir size may not be exactly what was
+ *  requested.
+ * The actual value set will be returned.
+ *
+ * \param[in]  _buf <tt>int</tt>: Requested size of the reservoir measured in
+ *                   frames.
+ * \param[out] _buf <tt>int</tt>: The actual size of the reservoir set.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or rate control
+ *                    is not enabled.  The buffer has an implementation
+ *                    defined minimum and maximum size and the value in _buf
+ *                    will be adjusted to match the actual value set.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_RATE_BUFFER (22)
+/**Enable pass 1 of two-pass encoding mode and retrieve the first pass metrics.
+ * Pass 1 mode must be enabled before the first frame is encoded, and a target
+ *  bitrate must have already been specified to the encoder.
+ * Although this does not have to be the exact rate that will be used in the
+ *  second pass, closer values may produce better results.
+ * The first call returns the size of the two-pass header data, along with some
+ *  placeholder content, and sets the encoder into pass 1 mode implicitly.
+ * This call sets the encoder to pass 1 mode implicitly.
+ * Then, a subsequent call must be made after each call to
+ *  th_encode_ycbcr_in() to retrieve the metrics for that frame.
+ * An additional, final call must be made to retrieve the summary data,
+ *  containing such information as the total number of frames, etc.
+ * This must be stored in place of the placeholder data that was returned
+ *  in the first call, before the frame metrics data.
+ * All of this data must be presented back to the encoder during pass 2 using
+ *  #TH_ENCCTL_2PASS_IN.
+ *
+ * \param[out] <tt>char *</tt>_buf: Returns a pointer to internal storage
+ *              containing the two pass metrics data.
+ *             This storage is only valid until the next call, or until the
+ *              encoder context is freed, and must be copied by the
+ *              application.
+ * \retval >=0       The number of bytes of metric data available in the
+ *                    returned buffer.
+ * \retval TH_EFAULT \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(char *)</tt>, no target
+ *                    bitrate has been set, or the first call was made after
+ *                    the first frame was submitted for encoding.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_2PASS_OUT (24)
+/**Submits two-pass encoding metric data collected the first encoding pass to
+ *  the second pass.
+ * The first call must be made before the first frame is encoded, and a target
+ *  bitrate must have already been specified to the encoder.
+ * It sets the encoder to pass 2 mode implicitly; this cannot be disabled.
+ * The encoder may require reading data from some or all of the frames in
+ *  advance, depending on, e.g., the reservoir size used in the second pass.
+ * You must call this function repeatedly before each frame to provide data
+ *  until either a) it fails to consume all of the data presented or b) all of
+ *  the pass 1 data has been consumed.
+ * In the first case, you must save the remaining data to be presented after
+ *  the next frame.
+ * You can call this function with a NULL argument to get an upper bound on
+ *  the number of bytes that will be required before the next frame.
+ *
+ * When pass 2 is first enabled, the default bit reservoir is set to the entire
+ *  file; this gives maximum flexibility but can lead to very high peak rates.
+ * You can subsequently set it to another value with #TH_ENCCTL_SET_RATE_BUFFER
+ *  (e.g., to set it to the keyframe interval for non-live streaming), however,
+ *  you may then need to provide more data before the next frame.
+ *
+ * \param[in] _buf <tt>char[]</tt>: A buffer containing the data returned by
+ *                  #TH_ENCCTL_2PASS_OUT in pass 1.
+ *                 You may pass <tt>NULL</tt> for \a _buf to return an upper
+ *                  bound on the number of additional bytes needed before the
+ *                  next frame.
+ *                 The summary data returned at the end of pass 1 must be at
+ *                  the head of the buffer on the first call with a
+ *                  non-<tt>NULL</tt> \a _buf, and the placeholder data
+ *                  returned at the start of pass 1 should be omitted.
+ *                 After each call you should advance this buffer by the number
+ *                  of bytes consumed.
+ * \retval >0            The number of bytes of metric data required/consumed.
+ * \retval 0             No more data is required before the next frame.
+ * \retval TH_EFAULT     \a _enc_ctx is <tt>NULL</tt>.
+ * \retval TH_EINVAL     No target bitrate has been set, or the first call was
+ *                        made after the first frame was submitted for
+ *                        encoding.
+ * \retval TH_ENOTFORMAT The data did not appear to be pass 1 from a compatible
+ *                        implementation of this library.
+ * \retval TH_EBADHEADER The data was invalid; this may be returned when
+ *                        attempting to read an aborted pass 1 file that still
+ *                        has the placeholder data in place of the summary
+ *                        data.
+ * \retval TH_EIMPL       Not supported by this implementation.*/
+#define TH_ENCCTL_2PASS_IN (26)
+/**Sets the current encoding quality.
+ * This is only valid so long as no bitrate has been specified, either through
+ *  the #th_info struct used to initialize the encoder or through
+ *  #TH_ENCCTL_SET_BITRATE (this restriction may be relaxed in a future
+ *  version).
+ * If it is set before the headers are emitted, the target quality encoded in
+ *  them will be updated.
+ *
+ * \param[in] _buf <tt>int</tt>: The new target quality, in the range 0...63,
+ *                  inclusive.
+ * \retval 0             Success.
+ * \retval TH_EFAULT     \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL     A target bitrate has already been specified, or the
+ *                        quality index was not in the range 0...63.
+ * \retval TH_EIMPL       Not supported by this implementation.*/
+#define TH_ENCCTL_SET_QUALITY (28)
+/**Sets the current encoding bitrate.
+ * Once a bitrate is set, the encoder must use a rate-controlled mode for all
+ *  future frames (this restriction may be relaxed in a future version).
+ * If it is set before the headers are emitted, the target bitrate encoded in
+ *  them will be updated.
+ * Due to the buffer delay, the exact bitrate of each section of the encode is
+ *  not guaranteed.
+ * The encoder may have already used more bits than allowed for the frames it
+ *  has encoded, expecting to make them up in future frames, or it may have
+ *  used fewer, holding the excess in reserve.
+ * The exact transition between the two bitrates is not well-defined by this
+ *  API, but may be affected by flags set with #TH_ENCCTL_SET_RATE_FLAGS.
+ * After a number of frames equal to the buffer delay, one may expect further
+ *  output to average at the target bitrate.
+ *
+ * \param[in] _buf <tt>long</tt>: The new target bitrate, in bits per second.
+ * \retval 0             Success.
+ * \retval TH_EFAULT     \a _enc_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL     The target bitrate was not positive.
+ * \retval TH_EIMPL       Not supported by this implementation.*/
+#define TH_ENCCTL_SET_BITRATE (30)
+
+/*@}*/
+
+
+/**\name TH_ENCCTL_SET_RATE_FLAGS flags
+ * \anchor ratectlflags
+ * These are the flags available for use with #TH_ENCCTL_SET_RATE_FLAGS.*/
+/*@{*/
+/**Drop frames to keep within bitrate buffer constraints.
+ * This can have a severe impact on quality, but is the only way to ensure that
+ *  bitrate targets are met at low rates during sudden bursts of activity.*/
+#define TH_RATECTL_DROP_FRAMES   (0x1)
+/**Ignore bitrate buffer overflows.
+ * If the encoder uses so few bits that the reservoir of available bits
+ *  overflows, ignore the excess.
+ * The encoder will not try to use these extra bits in future frames.
+ * At high rates this may cause the result to be undersized, but allows a
+ *  client to play the stream using a finite buffer; it should normally be
+ *  enabled.*/
+#define TH_RATECTL_CAP_OVERFLOW  (0x2)
+/**Ignore bitrate buffer underflows.
+ * If the encoder uses so many bits that the reservoir of available bits
+ *  underflows, ignore the deficit.
+ * The encoder will not try to make up these extra bits in future frames.
+ * At low rates this may cause the result to be oversized; it should normally
+ *  be disabled.*/
+#define TH_RATECTL_CAP_UNDERFLOW (0x4)
+/*@}*/
+
+
+
+/**The quantization parameters used by VP3.*/
+extern const th_quant_info TH_VP31_QUANT_INFO;
+
+/**The Huffman tables used by VP3.*/
+extern const th_huff_code
+ TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
+
+
+
+/**\name Encoder state
+   The following data structure is opaque, and its contents are not publicly
+    defined by this API.
+   Referring to its internals directly is unsupported, and may break without
+    warning.*/
+/*@{*/
+/**The encoder context.*/
+typedef struct th_enc_ctx    th_enc_ctx;
+/*@}*/
+
+
+
+/**\defgroup encfuncs Functions for Encoding*/
+/*@{*/
+/**\name Functions for encoding
+ * You must link to <tt>libtheoraenc</tt> and <tt>libtheoradec</tt>
+ *  if you use any of the functions in this section.
+ *
+ * The functions are listed in the order they are used in a typical encode.
+ * The basic steps are:
+ * - Fill in a #th_info structure with details on the format of the video you
+ *    wish to encode.
+ * - Allocate a #th_enc_ctx handle with th_encode_alloc().
+ * - Perform any additional encoder configuration required with
+ *    th_encode_ctl().
+ * - Repeatedly call th_encode_flushheader() to retrieve all the header
+ *    packets.
+ * - For each uncompressed frame:
+ *   - Submit the uncompressed frame via th_encode_ycbcr_in()
+ *   - Repeatedly call th_encode_packetout() to retrieve any video data packets
+ *      that are ready.
+ * - Call th_encode_free() to release all encoder memory.*/
+/*@{*/
+/**Allocates an encoder instance.
+ * \param _info A #th_info struct filled with the desired encoding parameters.
+ * \return The initialized #th_enc_ctx handle.
+ * \retval NULL If the encoding parameters were invalid.*/
+extern th_enc_ctx *th_encode_alloc(const th_info *_info);
+/**Encoder control function.
+ * This is used to provide advanced control the encoding process.
+ * \param _enc    A #th_enc_ctx handle.
+ * \param _req    The control code to process.
+ *                See \ref encctlcodes "the list of available control codes"
+ *                 for details.
+ * \param _buf    The parameters for this control code.
+ * \param _buf_sz The size of the parameter buffer.*/
+extern int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz);
+/**Outputs the next header packet.
+ * This should be called repeatedly after encoder initialization until it
+ *  returns 0 in order to get all of the header packets, in order, before
+ *  encoding actual video data.
+ * \param _enc      A #th_enc_ctx handle.
+ * \param _comments The metadata to place in the comment header, when it is
+ *                   encoded.
+ * \param _op       An <tt>ogg_packet</tt> structure to fill.
+ *                  All of the elements of this structure will be set,
+ *                   including a pointer to the header data.
+ *                  The memory for the header data is owned by
+ *                   <tt>libtheoraenc</tt>, and may be invalidated when the
+ *                   next encoder function is called.
+ * \return A positive value indicates that a header packet was successfully
+ *          produced.
+ * \retval 0         No packet was produced, and no more header packets remain.
+ * \retval TH_EFAULT \a _enc, \a _comments, or \a _op was <tt>NULL</tt>.*/
+extern int th_encode_flushheader(th_enc_ctx *_enc,
+ th_comment *_comments,ogg_packet *_op);
+/**Submits an uncompressed frame to the encoder.
+ * \param _enc   A #th_enc_ctx handle.
+ * \param _ycbcr A buffer of Y'CbCr data to encode.
+ * \retval 0         Success.
+ * \retval TH_EFAULT \a _enc or \a _ycbcr is <tt>NULL</tt>.
+ * \retval TH_EINVAL The buffer size does not match the frame size the encoder
+ *                    was initialized with, or encoding has already
+ *                    completed.*/
+extern int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _ycbcr);
+/**Retrieves encoded video data packets.
+ * This should be called repeatedly after each frame is submitted to flush any
+ *  encoded packets, until it returns 0.
+ * The encoder will not buffer these packets as subsequent frames are
+ *  compressed, so a failure to do so will result in lost video data.
+ * \note Currently the encoder operates in a one-frame-in, one-packet-out
+ *        manner.
+ *       However, this may be changed in the future.
+ * \param _enc  A #th_enc_ctx handle.
+ * \param _last Set this flag to a non-zero value if no more uncompressed
+ *               frames will be submitted.
+ *              This ensures that a proper EOS flag is set on the last packet.
+ * \param _op   An <tt>ogg_packet</tt> structure to fill.
+ *              All of the elements of this structure will be set, including a
+ *               pointer to the video data.
+ *              The memory for the video data is owned by
+ *               <tt>libtheoraenc</tt>, and may be invalidated when the next
+ *               encoder function is called.
+ * \return A positive value indicates that a video data packet was successfully
+ *          produced.
+ * \retval 0         No packet was produced, and no more encoded video data
+ *                    remains.
+ * \retval TH_EFAULT \a _enc or \a _op was <tt>NULL</tt>.*/
+extern int th_encode_packetout(th_enc_ctx *_enc,int _last,ogg_packet *_op);
+/**Frees an allocated encoder instance.
+ * \param _enc A #th_enc_ctx handle.*/
+extern void th_encode_free(th_enc_ctx *_enc);
+/*@}*/
+/*@}*/
+
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/apiwrapper.c b/engine/code/libtheora-1.1.1/lib/apiwrapper.c
new file mode 100644
index 00000000..dc959b8d
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/apiwrapper.c
@@ -0,0 +1,166 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: apiwrapper.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+
+
+
+const char *theora_version_string(void){
+  return th_version_string();
+}
+
+ogg_uint32_t theora_version_number(void){
+  return th_version_number();
+}
+
+void theora_info_init(theora_info *_ci){
+  memset(_ci,0,sizeof(*_ci));
+}
+
+void theora_info_clear(theora_info *_ci){
+  th_api_wrapper *api;
+  api=(th_api_wrapper *)_ci->codec_setup;
+  memset(_ci,0,sizeof(*_ci));
+  if(api!=NULL){
+    if(api->clear!=NULL)(*api->clear)(api);
+    _ogg_free(api);
+  }
+}
+
+void theora_clear(theora_state *_th){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    (*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th);
+  }
+  if(_th->internal_encode!=NULL){
+    (*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th);
+  }
+  if(_th->i!=NULL)theora_info_clear(_th->i);
+  memset(_th,0,sizeof(*_th));
+}
+
+int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th,
+     _req,_buf,_buf_sz);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th,
+     _req,_buf,_buf_sz);
+  }
+  else return TH_EINVAL;
+}
+
+ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)(
+     _th,_gp);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)(
+     _th,_gp);
+  }
+  else return -1;
+}
+
+double theora_granule_time(theora_state *_th, ogg_int64_t _gp){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)(
+     _th,_gp);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)(
+     _th,_gp);
+  }
+  else return -1;
+}
+
+void oc_theora_info2th_info(th_info *_info,const theora_info *_ci){
+  _info->version_major=_ci->version_major;
+  _info->version_minor=_ci->version_minor;
+  _info->version_subminor=_ci->version_subminor;
+  _info->frame_width=_ci->width;
+  _info->frame_height=_ci->height;
+  _info->pic_width=_ci->frame_width;
+  _info->pic_height=_ci->frame_height;
+  _info->pic_x=_ci->offset_x;
+  _info->pic_y=_ci->offset_y;
+  _info->fps_numerator=_ci->fps_numerator;
+  _info->fps_denominator=_ci->fps_denominator;
+  _info->aspect_numerator=_ci->aspect_numerator;
+  _info->aspect_denominator=_ci->aspect_denominator;
+  switch(_ci->colorspace){
+    case OC_CS_ITU_REC_470M:_info->colorspace=TH_CS_ITU_REC_470M;break;
+    case OC_CS_ITU_REC_470BG:_info->colorspace=TH_CS_ITU_REC_470BG;break;
+    default:_info->colorspace=TH_CS_UNSPECIFIED;break;
+  }
+  switch(_ci->pixelformat){
+    case OC_PF_420:_info->pixel_fmt=TH_PF_420;break;
+    case OC_PF_422:_info->pixel_fmt=TH_PF_422;break;
+    case OC_PF_444:_info->pixel_fmt=TH_PF_444;break;
+    default:_info->pixel_fmt=TH_PF_RSVD;
+  }
+  _info->target_bitrate=_ci->target_bitrate;
+  _info->quality=_ci->quality;
+  _info->keyframe_granule_shift=_ci->keyframe_frequency_force>0?
+   OC_MINI(31,oc_ilog(_ci->keyframe_frequency_force-1)):0;
+}
+
+int theora_packet_isheader(ogg_packet *_op){
+  return th_packet_isheader(_op);
+}
+
+int theora_packet_iskeyframe(ogg_packet *_op){
+  return th_packet_iskeyframe(_op);
+}
+
+int theora_granule_shift(theora_info *_ci){
+  /*This breaks when keyframe_frequency_force is not positive or is larger than
+     2**31 (if your int is more than 32 bits), but that's what the original
+     function does.*/
+  return oc_ilog(_ci->keyframe_frequency_force-1);
+}
+
+void theora_comment_init(theora_comment *_tc){
+  th_comment_init((th_comment *)_tc);
+}
+
+char *theora_comment_query(theora_comment *_tc,char *_tag,int _count){
+  return th_comment_query((th_comment *)_tc,_tag,_count);
+}
+
+int theora_comment_query_count(theora_comment *_tc,char *_tag){
+  return th_comment_query_count((th_comment *)_tc,_tag);
+}
+
+void theora_comment_clear(theora_comment *_tc){
+  th_comment_clear((th_comment *)_tc);
+}
+
+void theora_comment_add(theora_comment *_tc,char *_comment){
+  th_comment_add((th_comment *)_tc,_comment);
+}
+
+void theora_comment_add_tag(theora_comment *_tc, char *_tag, char *_value){
+  th_comment_add_tag((th_comment *)_tc,_tag,_value);
+}
diff --git a/engine/code/libtheora-1.1.1/lib/apiwrapper.h b/engine/code/libtheora-1.1.1/lib/apiwrapper.h
new file mode 100644
index 00000000..93454d7b
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/apiwrapper.h
@@ -0,0 +1,54 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: apiwrapper.h 13596 2007-08-23 20:05:38Z tterribe $
+
+ ********************************************************************/
+
+#if !defined(_apiwrapper_H)
+# define _apiwrapper_H (1)
+# include <ogg/ogg.h>
+# include <theora/theora.h>
+# include "theora/theoradec.h"
+# include "theora/theoraenc.h"
+# include "internal.h"
+
+typedef struct th_api_wrapper th_api_wrapper;
+typedef struct th_api_info    th_api_info;
+
+/*Provide an entry point for the codec setup to clear itself in case we ever
+   want to break pieces off into a common base library shared by encoder and
+   decoder.
+  In addition, this makes several other pieces of the API wrapper cleaner.*/
+typedef void (*oc_setup_clear_func)(void *_ts);
+
+/*Generally only one of these pointers will be non-NULL in any given instance.
+  Technically we do not even really need this struct, since we should be able
+   to figure out which one from "context", but doing it this way makes sure we
+   don't flub it up.*/
+struct th_api_wrapper{
+  oc_setup_clear_func  clear;
+  th_setup_info       *setup;
+  th_dec_ctx          *decode;
+  th_enc_ctx          *encode;
+};
+
+struct th_api_info{
+  th_api_wrapper api;
+  theora_info    info;
+};
+
+
+void oc_theora_info2th_info(th_info *_info,const theora_info *_ci);
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/bitpack.c b/engine/code/libtheora-1.1.1/lib/bitpack.c
new file mode 100644
index 00000000..8195003b
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/bitpack.c
@@ -0,0 +1,111 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id: bitpack.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include <string.h>
+#include <stdlib.h>
+#include "bitpack.h"
+
+/*We're 'MSb' endian; if we write a word but read individual bits,
+   then we'll read the MSb first.*/
+
+void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){
+  memset(_b,0,sizeof(*_b));
+  _b->ptr=_buf;
+  _b->stop=_buf+_bytes;
+}
+
+static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  oc_pb_window         window;
+  int                  available;
+  window=_b->window;
+  available=_b->bits;
+  ptr=_b->ptr;
+  stop=_b->stop;
+  while(available<=OC_PB_WINDOW_SIZE-8&&ptr<stop){
+    available+=8;
+    window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
+  }
+  _b->ptr=ptr;
+  if(_bits>available){
+    if(ptr>=stop){
+      _b->eof=1;
+      available=OC_LOTS_OF_BITS;
+    }
+    else window|=*ptr>>(available&7);
+  }
+  _b->bits=available;
+  return window;
+}
+
+int oc_pack_look1(oc_pack_buf *_b){
+  oc_pb_window window;
+  int          available;
+  window=_b->window;
+  available=_b->bits;
+  if(available<1)_b->window=window=oc_pack_refill(_b,1);
+  return window>>OC_PB_WINDOW_SIZE-1;
+}
+
+void oc_pack_adv1(oc_pack_buf *_b){
+  _b->window<<=1;
+  _b->bits--;
+}
+
+/*Here we assume that 0<=_bits&&_bits<=32.*/
+long oc_pack_read(oc_pack_buf *_b,int _bits){
+  oc_pb_window window;
+  int          available;
+  long         result;
+  window=_b->window;
+  available=_b->bits;
+  if(_bits==0)return 0;
+  if(available<_bits){
+    window=oc_pack_refill(_b,_bits);
+    available=_b->bits;
+  }
+  result=window>>OC_PB_WINDOW_SIZE-_bits;
+  available-=_bits;
+  window<<=1;
+  window<<=_bits-1;
+  _b->bits=available;
+  _b->window=window;
+  return result;
+}
+
+int oc_pack_read1(oc_pack_buf *_b){
+  oc_pb_window window;
+  int          available;
+  int          result;
+  window=_b->window;
+  available=_b->bits;
+  if(available<1){
+    window=oc_pack_refill(_b,1);
+    available=_b->bits;
+  }
+  result=window>>OC_PB_WINDOW_SIZE-1;
+  available--;
+  window<<=1;
+  _b->bits=available;
+  _b->window=window;
+  return result;
+}
+
+long oc_pack_bytes_left(oc_pack_buf *_b){
+  if(_b->eof)return -1;
+  return _b->stop-_b->ptr+(_b->bits>>3);
+}
diff --git a/engine/code/libtheora-1.1.1/lib/bitpack.h b/engine/code/libtheora-1.1.1/lib/bitpack.h
new file mode 100644
index 00000000..a020a292
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/bitpack.h
@@ -0,0 +1,59 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $
+
+ ********************************************************************/
+#if !defined(_bitpack_H)
+# define _bitpack_H (1)
+# include <limits.h>
+
+
+
+typedef unsigned long      oc_pb_window;
+typedef struct oc_pack_buf oc_pack_buf;
+
+
+
+# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT)
+/*This is meant to be a large, positive constant that can still be efficiently
+   loaded as an immediate (on platforms like ARM, for example).
+  Even relatively modest values like 100 would work fine.*/
+# define OC_LOTS_OF_BITS (0x40000000)
+
+
+
+struct oc_pack_buf{
+  oc_pb_window         window;
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  int                  bits;
+  int                  eof;
+};
+
+void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes);
+int oc_pack_look1(oc_pack_buf *_b);
+void oc_pack_adv1(oc_pack_buf *_b);
+/*Here we assume 0<=_bits&&_bits<=32.*/
+long oc_pack_read(oc_pack_buf *_b,int _bits);
+int oc_pack_read1(oc_pack_buf *_b);
+/* returns -1 for read beyond EOF, or the number of whole bytes available */
+long oc_pack_bytes_left(oc_pack_buf *_b);
+
+/*These two functions are implemented locally in huffdec.c*/
+/*Read in bits without advancing the bitptr.
+  Here we assume 0<=_bits&&_bits<=32.*/
+/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/
+/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/dct.h b/engine/code/libtheora-1.1.1/lib/dct.h
new file mode 100644
index 00000000..24ba6f11
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/dct.h
@@ -0,0 +1,31 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: dct.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+/*Definitions shared by the forward and inverse DCT transforms.*/
+#if !defined(_dct_H)
+# define _dct_H (1)
+
+/*cos(n*pi/16) (resp. sin(m*pi/16)) scaled by 65536.*/
+#define OC_C1S7 ((ogg_int32_t)64277)
+#define OC_C2S6 ((ogg_int32_t)60547)
+#define OC_C3S5 ((ogg_int32_t)54491)
+#define OC_C4S4 ((ogg_int32_t)46341)
+#define OC_C5S3 ((ogg_int32_t)36410)
+#define OC_C6S2 ((ogg_int32_t)25080)
+#define OC_C7S1 ((ogg_int32_t)12785)
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/decapiwrapper.c b/engine/code/libtheora-1.1.1/lib/decapiwrapper.c
new file mode 100644
index 00000000..12ea475d
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/decapiwrapper.c
@@ -0,0 +1,193 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: decapiwrapper.c 13596 2007-08-23 20:05:38Z tterribe $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+#include "decint.h"
+#include "theora/theoradec.h"
+
+static void th_dec_api_clear(th_api_wrapper *_api){
+  if(_api->setup)th_setup_free(_api->setup);
+  if(_api->decode)th_decode_free(_api->decode);
+  memset(_api,0,sizeof(*_api));
+}
+
+static void theora_decode_clear(theora_state *_td){
+  if(_td->i!=NULL)theora_info_clear(_td->i);
+  memset(_td,0,sizeof(*_td));
+}
+
+static int theora_decode_control(theora_state *_td,int _req,
+ void *_buf,size_t _buf_sz){
+  return th_decode_ctl(((th_api_wrapper *)_td->i->codec_setup)->decode,
+   _req,_buf,_buf_sz);
+}
+
+static ogg_int64_t theora_decode_granule_frame(theora_state *_td,
+ ogg_int64_t _gp){
+  return th_granule_frame(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
+}
+
+static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){
+  return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
+}
+
+static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={
+  (oc_state_clear_func)theora_decode_clear,
+  (oc_state_control_func)theora_decode_control,
+  (oc_state_granule_frame_func)theora_decode_granule_frame,
+  (oc_state_granule_time_func)theora_decode_granule_time,
+};
+
+static void th_info2theora_info(theora_info *_ci,const th_info *_info){
+  _ci->version_major=_info->version_major;
+  _ci->version_minor=_info->version_minor;
+  _ci->version_subminor=_info->version_subminor;
+  _ci->width=_info->frame_width;
+  _ci->height=_info->frame_height;
+  _ci->frame_width=_info->pic_width;
+  _ci->frame_height=_info->pic_height;
+  _ci->offset_x=_info->pic_x;
+  _ci->offset_y=_info->pic_y;
+  _ci->fps_numerator=_info->fps_numerator;
+  _ci->fps_denominator=_info->fps_denominator;
+  _ci->aspect_numerator=_info->aspect_numerator;
+  _ci->aspect_denominator=_info->aspect_denominator;
+  switch(_info->colorspace){
+    case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break;
+    case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break;
+    default:_ci->colorspace=OC_CS_UNSPECIFIED;break;
+  }
+  switch(_info->pixel_fmt){
+    case TH_PF_420:_ci->pixelformat=OC_PF_420;break;
+    case TH_PF_422:_ci->pixelformat=OC_PF_422;break;
+    case TH_PF_444:_ci->pixelformat=OC_PF_444;break;
+    default:_ci->pixelformat=OC_PF_RSVD;
+  }
+  _ci->target_bitrate=_info->target_bitrate;
+  _ci->quality=_info->quality;
+  _ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift;
+}
+
+int theora_decode_init(theora_state *_td,theora_info *_ci){
+  th_api_info    *apiinfo;
+  th_api_wrapper *api;
+  th_info         info;
+  api=(th_api_wrapper *)_ci->codec_setup;
+  /*Allocate our own combined API wrapper/theora_info struct.
+    We put them both in one malloc'd block so that when the API wrapper is
+     freed, the info struct goes with it.
+    This avoids having to figure out whether or not we need to free the info
+     struct in either theora_info_clear() or theora_clear().*/
+  apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo));
+  if(apiinfo==NULL)return OC_FAULT;
+  /*Make our own copy of the info struct, since its lifetime should be
+     independent of the one we were passed in.*/
+  *&apiinfo->info=*_ci;
+  /*Convert the info struct now instead of saving the the one we decoded with
+     theora_decode_header(), since the user might have modified values (i.e.,
+     color space, aspect ratio, etc. can be specified from a higher level).
+    The user also might be doing something "clever" with the header packets if
+     they are not using an Ogg encapsulation.*/
+  oc_theora_info2th_info(&info,_ci);
+  /*Don't bother to copy the setup info; th_decode_alloc() makes its own copy
+     of the stuff it needs.*/
+  apiinfo->api.decode=th_decode_alloc(&info,api->setup);
+  if(apiinfo->api.decode==NULL){
+    _ogg_free(apiinfo);
+    return OC_EINVAL;
+  }
+  apiinfo->api.clear=(oc_setup_clear_func)th_dec_api_clear;
+  _td->internal_encode=NULL;
+  /*Provide entry points for ABI compatibility with old decoder shared libs.*/
+  _td->internal_decode=(void *)&OC_DEC_DISPATCH_VTBL;
+  _td->granulepos=0;
+  _td->i=&apiinfo->info;
+  _td->i->codec_setup=&apiinfo->api;
+  return 0;
+}
+
+int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){
+  th_api_wrapper *api;
+  th_info         info;
+  int             ret;
+  api=(th_api_wrapper *)_ci->codec_setup;
+  /*Allocate an API wrapper struct on demand, since it will not also include a
+     theora_info struct like the ones that are used in a theora_state struct.*/
+  if(api==NULL){
+    _ci->codec_setup=_ogg_calloc(1,sizeof(*api));
+    if(_ci->codec_setup==NULL)return OC_FAULT;
+    api=(th_api_wrapper *)_ci->codec_setup;
+    api->clear=(oc_setup_clear_func)th_dec_api_clear;
+  }
+  /*Convert from the theora_info struct instead of saving our own th_info
+     struct between calls.
+    The user might be doing something "clever" with the header packets if they
+     are not using an Ogg encapsulation, and we don't want to break this.*/
+  oc_theora_info2th_info(&info,_ci);
+  /*We rely on the fact that theora_comment and th_comment structures are
+     actually identical.
+    Take care not to change this fact unless you change the code here as
+     well!*/
+  ret=th_decode_headerin(&info,(th_comment *)_cc,&api->setup,_op);
+  /*We also rely on the fact that the error return code values are the same,
+    and that the implementations of these two functions return the same set of
+    them.
+   Note that theora_decode_header() really can return OC_NOTFORMAT, even
+    though it is not currently documented to do so.*/
+  if(ret<0)return ret;
+  th_info2theora_info(_ci,&info);
+  return 0;
+}
+
+int theora_decode_packetin(theora_state *_td,ogg_packet *_op){
+  th_api_wrapper *api;
+  ogg_int64_t     gp;
+  int             ret;
+  if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
+  api=(th_api_wrapper *)_td->i->codec_setup;
+  ret=th_decode_packetin(api->decode,_op,&gp);
+  if(ret<0)return OC_BADPACKET;
+  _td->granulepos=gp;
+  return 0;
+}
+
+int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
+  th_api_wrapper  *api;
+  th_dec_ctx      *decode;
+  th_ycbcr_buffer  buf;
+  int              ret;
+  if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
+  api=(th_api_wrapper *)_td->i->codec_setup;
+  decode=(th_dec_ctx *)api->decode;
+  if(!decode)return OC_FAULT;
+  ret=th_decode_ycbcr_out(decode,buf);
+  if(ret>=0){
+    _yuv->y_width=buf[0].width;
+    _yuv->y_height=buf[0].height;
+    _yuv->y_stride=buf[0].stride;
+    _yuv->uv_width=buf[1].width;
+    _yuv->uv_height=buf[1].height;
+    _yuv->uv_stride=buf[1].stride;
+    _yuv->y=buf[0].data;
+    _yuv->u=buf[1].data;
+    _yuv->v=buf[2].data;
+  }
+  return ret;
+}
diff --git a/engine/code/libtheora-1.1.1/lib/decinfo.c b/engine/code/libtheora-1.1.1/lib/decinfo.c
new file mode 100644
index 00000000..845eb136
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/decinfo.c
@@ -0,0 +1,246 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: decinfo.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "decint.h"
+
+
+
+/*Unpacks a series of octets from a given byte array into the pack buffer.
+  No checking is done to ensure the buffer contains enough data.
+  _opb: The pack buffer to read the octets from.
+  _buf: The byte array to store the unpacked bytes in.
+  _len: The number of octets to unpack.*/
+static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){
+  while(_len-->0){
+    long val;
+    val=oc_pack_read(_opb,8);
+    *_buf++=(char)val;
+  }
+}
+
+/*Unpacks a 32-bit integer encoded by octets in little-endian form.*/
+static long oc_unpack_length(oc_pack_buf *_opb){
+  long ret[4];
+  int  i;
+  for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8);
+  return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24;
+}
+
+static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){
+  long val;
+  /*Check the codec bitstream version.*/
+  val=oc_pack_read(_opb,8);
+  _info->version_major=(unsigned char)val;
+  val=oc_pack_read(_opb,8);
+  _info->version_minor=(unsigned char)val;
+  val=oc_pack_read(_opb,8);
+  _info->version_subminor=(unsigned char)val;
+  /*verify we can parse this bitstream version.
+     We accept earlier minors and all subminors, by spec*/
+  if(_info->version_major>TH_VERSION_MAJOR||
+   _info->version_major==TH_VERSION_MAJOR&&
+   _info->version_minor>TH_VERSION_MINOR){
+    return TH_EVERSION;
+  }
+  /*Read the encoded frame description.*/
+  val=oc_pack_read(_opb,16);
+  _info->frame_width=(ogg_uint32_t)val<<4;
+  val=oc_pack_read(_opb,16);
+  _info->frame_height=(ogg_uint32_t)val<<4;
+  val=oc_pack_read(_opb,24);
+  _info->pic_width=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,24);
+  _info->pic_height=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,8);
+  _info->pic_x=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,8);
+  _info->pic_y=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,32);
+  _info->fps_numerator=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,32);
+  _info->fps_denominator=(ogg_uint32_t)val;
+  if(_info->frame_width==0||_info->frame_height==0||
+   _info->pic_width+_info->pic_x>_info->frame_width||
+   _info->pic_height+_info->pic_y>_info->frame_height||
+   _info->fps_numerator==0||_info->fps_denominator==0){
+    return TH_EBADHEADER;
+  }
+  /*Note: The sense of pic_y is inverted in what we pass back to the
+     application compared to how it is stored in the bitstream.
+    This is because the bitstream uses a right-handed coordinate system, while
+     applications expect a left-handed one.*/
+  _info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
+  val=oc_pack_read(_opb,24);
+  _info->aspect_numerator=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,24);
+  _info->aspect_denominator=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,8);
+  _info->colorspace=(th_colorspace)val;
+  val=oc_pack_read(_opb,24);
+  _info->target_bitrate=(int)val;
+  val=oc_pack_read(_opb,6);
+  _info->quality=(int)val;
+  val=oc_pack_read(_opb,5);
+  _info->keyframe_granule_shift=(int)val;
+  val=oc_pack_read(_opb,2);
+  _info->pixel_fmt=(th_pixel_fmt)val;
+  if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER;
+  val=oc_pack_read(_opb,3);
+  if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+  return 0;
+}
+
+static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){
+  long len;
+  int  i;
+  /*Read the vendor string.*/
+  len=oc_unpack_length(_opb);
+  if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER;
+  _tc->vendor=_ogg_malloc((size_t)len+1);
+  if(_tc->vendor==NULL)return TH_EFAULT;
+  oc_unpack_octets(_opb,_tc->vendor,len);
+  _tc->vendor[len]='\0';
+  /*Read the user comments.*/
+  _tc->comments=(int)oc_unpack_length(_opb);
+  len=_tc->comments;
+  if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){
+    _tc->comments=0;
+    return TH_EBADHEADER;
+  }
+  _tc->comment_lengths=(int *)_ogg_malloc(
+   _tc->comments*sizeof(_tc->comment_lengths[0]));
+  _tc->user_comments=(char **)_ogg_malloc(
+   _tc->comments*sizeof(_tc->user_comments[0]));
+  for(i=0;i<_tc->comments;i++){
+    len=oc_unpack_length(_opb);
+    if(len<0||len>oc_pack_bytes_left(_opb)){
+      _tc->comments=i;
+      return TH_EBADHEADER;
+    }
+    _tc->comment_lengths[i]=len;
+    _tc->user_comments[i]=_ogg_malloc((size_t)len+1);
+    if(_tc->user_comments[i]==NULL){
+      _tc->comments=i;
+      return TH_EFAULT;
+    }
+    oc_unpack_octets(_opb,_tc->user_comments[i],len);
+    _tc->user_comments[i][len]='\0';
+  }
+  return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0;
+}
+
+static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){
+  int ret;
+  /*Read the quantizer tables.*/
+  ret=oc_quant_params_unpack(_opb,&_setup->qinfo);
+  if(ret<0)return ret;
+  /*Read the Huffman trees.*/
+  return oc_huff_trees_unpack(_opb,_setup->huff_tables);
+}
+
+static void oc_setup_clear(th_setup_info *_setup){
+  oc_quant_params_clear(&_setup->qinfo);
+  oc_huff_trees_clear(_setup->huff_tables);
+}
+
+static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info,
+ th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){
+  char buffer[6];
+  long val;
+  int  packtype;
+  int  ret;
+  val=oc_pack_read(_opb,8);
+  packtype=(int)val;
+  /*If we're at a data packet and we have received all three headers, we're
+     done.*/
+  if(!(packtype&0x80)&&_info->frame_width>0&&_tc->vendor!=NULL&&*_setup!=NULL){
+    return 0;
+  }
+  /*Check the codec string.*/
+  oc_unpack_octets(_opb,buffer,6);
+  if(memcmp(buffer,"theora",6)!=0)return TH_ENOTFORMAT;
+  switch(packtype){
+    /*Codec info header.*/
+    case 0x80:{
+      /*This should be the first packet, and we should not already be
+         initialized.*/
+      if(!_op->b_o_s||_info->frame_width>0)return TH_EBADHEADER;
+      ret=oc_info_unpack(_opb,_info);
+      if(ret<0)th_info_clear(_info);
+      else ret=3;
+    }break;
+    /*Comment header.*/
+    case 0x81:{
+      if(_tc==NULL)return TH_EFAULT;
+      /*We shoud have already decoded the info header, and should not yet have
+         decoded the comment header.*/
+      if(_info->frame_width==0||_tc->vendor!=NULL)return TH_EBADHEADER;
+      ret=oc_comment_unpack(_opb,_tc);
+      if(ret<0)th_comment_clear(_tc);
+      else ret=2;
+    }break;
+    /*Codec setup header.*/
+    case 0x82:{
+      oc_setup_info *setup;
+      if(_tc==NULL||_setup==NULL)return TH_EFAULT;
+      /*We should have already decoded the info header and the comment header,
+         and should not yet have decoded the setup header.*/
+      if(_info->frame_width==0||_tc->vendor==NULL||*_setup!=NULL){
+        return TH_EBADHEADER;
+      }
+      setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup));
+      if(setup==NULL)return TH_EFAULT;
+      ret=oc_setup_unpack(_opb,setup);
+      if(ret<0){
+        oc_setup_clear(setup);
+        _ogg_free(setup);
+      }
+      else{
+        *_setup=setup;
+        ret=1;
+      }
+    }break;
+    default:{
+      /*We don't know what this header is.*/
+      return TH_EBADHEADER;
+    }break;
+  }
+  return ret;
+}
+
+
+/*Decodes one header packet.
+  This should be called repeatedly with the packets at the beginning of the
+   stream until it returns 0.*/
+int th_decode_headerin(th_info *_info,th_comment *_tc,
+ th_setup_info **_setup,ogg_packet *_op){
+  oc_pack_buf opb;
+  if(_op==NULL)return TH_EBADHEADER;
+  if(_info==NULL)return TH_EFAULT;
+  oc_pack_readinit(&opb,_op->packet,_op->bytes);
+  return oc_dec_headerin(&opb,_info,_tc,_setup,_op);
+}
+
+void th_setup_free(th_setup_info *_setup){
+  if(_setup!=NULL){
+    oc_setup_clear(_setup);
+    _ogg_free(_setup);
+  }
+}
diff --git a/engine/code/libtheora-1.1.1/lib/decint.h b/engine/code/libtheora-1.1.1/lib/decint.h
new file mode 100644
index 00000000..261b6763
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/decint.h
@@ -0,0 +1,107 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: decint.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <limits.h>
+#if !defined(_decint_H)
+# define _decint_H (1)
+# include "theora/theoradec.h"
+# include "internal.h"
+# include "bitpack.h"
+
+typedef struct th_setup_info oc_setup_info;
+typedef struct th_dec_ctx    oc_dec_ctx;
+
+# include "huffdec.h"
+# include "dequant.h"
+
+/*Constants for the packet-in state machine specific to the decoder.*/
+
+/*Next packet to read: Data packet.*/
+#define OC_PACKET_DATA (0)
+
+
+
+struct th_setup_info{
+  /*The Huffman codes.*/
+  oc_huff_node      *huff_tables[TH_NHUFFMAN_TABLES];
+  /*The quantization parameters.*/
+  th_quant_info  qinfo;
+};
+
+
+
+struct th_dec_ctx{
+  /*Shared encoder/decoder state.*/
+  oc_theora_state      state;
+  /*Whether or not packets are ready to be emitted.
+    This takes on negative values while there are remaining header packets to
+     be emitted, reaches 0 when the codec is ready for input, and goes to 1
+     when a frame has been processed and a data packet is ready.*/
+  int                  packet_state;
+  /*Buffer in which to assemble packets.*/
+  oc_pack_buf          opb;
+  /*Huffman decode trees.*/
+  oc_huff_node        *huff_tables[TH_NHUFFMAN_TABLES];
+  /*The index of the first token in each plane for each coefficient.*/
+  ptrdiff_t            ti0[3][64];
+  /*The number of outstanding EOB runs at the start of each coefficient in each
+     plane.*/
+  ptrdiff_t            eob_runs[3][64];
+  /*The DCT token lists.*/
+  unsigned char       *dct_tokens;
+  /*The extra bits associated with DCT tokens.*/
+  unsigned char       *extra_bits;
+  /*The number of dct tokens unpacked so far.*/
+  int                  dct_tokens_count;
+  /*The out-of-loop post-processing level.*/
+  int                  pp_level;
+  /*The DC scale used for out-of-loop deblocking.*/
+  int                  pp_dc_scale[64];
+  /*The sharpen modifier used for out-of-loop deringing.*/
+  int                  pp_sharp_mod[64];
+  /*The DC quantization index of each block.*/
+  unsigned char       *dc_qis;
+  /*The variance of each block.*/
+  int                 *variances;
+  /*The storage for the post-processed frame buffer.*/
+  unsigned char       *pp_frame_data;
+  /*Whether or not the post-processsed frame buffer has space for chroma.*/
+  int                  pp_frame_state;
+  /*The buffer used for the post-processed frame.
+    Note that this is _not_ guaranteed to have the same strides and offsets as
+     the reference frame buffers.*/
+  th_ycbcr_buffer      pp_frame_buf;
+  /*The striped decode callback function.*/
+  th_stripe_callback   stripe_cb;
+# if defined(HAVE_CAIRO)
+  /*Output metrics for debugging.*/
+  int                  telemetry;
+  int                  telemetry_mbmode;
+  int                  telemetry_mv;
+  int                  telemetry_qi;
+  int                  telemetry_bits;
+  int                  telemetry_frame_bytes;
+  int                  telemetry_coding_bytes;
+  int                  telemetry_mode_bytes;
+  int                  telemetry_mv_bytes;
+  int                  telemetry_qi_bytes;
+  int                  telemetry_dc_bytes;
+  unsigned char       *telemetry_frame_data;
+# endif
+};
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/decode.c b/engine/code/libtheora-1.1.1/lib/decode.c
new file mode 100644
index 00000000..7be66463
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/decode.c
@@ -0,0 +1,2943 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: decode.c 16581 2009-09-25 22:56:16Z gmaxwell $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "decint.h"
+#if defined(OC_DUMP_IMAGES)
+# include <stdio.h>
+# include "png.h"
+#endif
+#if defined(HAVE_CAIRO)
+# include <cairo.h>
+#endif
+
+
+/*No post-processing.*/
+#define OC_PP_LEVEL_DISABLED  (0)
+/*Keep track of DC qi for each block only.*/
+#define OC_PP_LEVEL_TRACKDCQI (1)
+/*Deblock the luma plane.*/
+#define OC_PP_LEVEL_DEBLOCKY  (2)
+/*Dering the luma plane.*/
+#define OC_PP_LEVEL_DERINGY   (3)
+/*Stronger luma plane deringing.*/
+#define OC_PP_LEVEL_SDERINGY  (4)
+/*Deblock the chroma planes.*/
+#define OC_PP_LEVEL_DEBLOCKC  (5)
+/*Dering the chroma planes.*/
+#define OC_PP_LEVEL_DERINGC   (6)
+/*Stronger chroma plane deringing.*/
+#define OC_PP_LEVEL_SDERINGC  (7)
+/*Maximum valid post-processing level.*/
+#define OC_PP_LEVEL_MAX       (7)
+
+
+
+/*The mode alphabets for the various mode coding schemes.
+  Scheme 0 uses a custom alphabet, which is not stored in this table.*/
+static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
+  /*Last MV dominates */
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
+    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
+    OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
+  },
+  /*No MV dominates.*/
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
+    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  /*Default ordering.*/
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  }
+};
+
+
+/*The original DCT tokens are extended and reordered during the construction of
+   the Huffman tables.
+  The extension means more bits can be read with fewer calls to the bitpacker
+   during the Huffman decoding process (at the cost of larger Huffman tables),
+   and fewer tokens require additional extra bits (reducing the average storage
+   per decoded token).
+  The revised ordering reveals essential information in the token value
+   itself; specifically, whether or not there are additional extra bits to read
+   and the parameter to which those extra bits are applied.
+  The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
+  The extra bits are added into code word at the bit position inferred from the
+   token value, giving the final code word from which all required parameters
+   are derived.
+  The number of EOBs and the leading zero run length can be extracted directly.
+  The coefficient magnitude is optionally negated before extraction, according
+   to a 'flip' bit.*/
+
+/*The number of additional extra bits that are decoded with each of the
+   internal DCT tokens.*/
+static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
+  12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
+};
+
+/*Whether or not an internal token needs any additional extra bits.*/
+#define OC_DCT_TOKEN_NEEDS_MORE(token) \
+ (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
+  sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
+
+/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
+#define OC_DCT_TOKEN_FAT_EOB (0)
+
+/*The number of EOBs to use for an end-of-frame token.
+  Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
+   is not yet available everywhere; this should be equivalent.*/
+#define OC_DCT_EOB_FINISH (~(size_t)0>>1)
+
+/*The location of the (6) run legth bits in the code word.
+  These are placed at index 0 and given 8 bits (even though 6 would suffice)
+   because it may be faster to extract the lower byte on some platforms.*/
+#define OC_DCT_CW_RLEN_SHIFT (0)
+/*The location of the (12) EOB bits in the code word.*/
+#define OC_DCT_CW_EOB_SHIFT  (8)
+/*The location of the (1) flip bit in the code word.
+  This must be right under the magnitude bits.*/
+#define OC_DCT_CW_FLIP_BIT   (20)
+/*The location of the (11) token magnitude bits in the code word.
+  These must be last, and rely on a sign-extending right shift.*/
+#define OC_DCT_CW_MAG_SHIFT  (21)
+
+/*Pack the given fields into a code word.*/
+#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
+ ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
+ (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
+ (_flip)<<OC_DCT_CW_FLIP_BIT| \
+ (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
+
+/*A special code word value that signals the end of the frame (a long EOB run
+   of zero).*/
+#define OC_DCT_CW_FINISH (0)
+
+/*The position at which to insert the extra bits in the code word.
+  We use this formulation because Intel has no useful cmov.
+  A real architecture would probably do better with two of those.
+  This translates to 11 instructions(!), and is _still_ faster than either a
+   table lookup (just barely) or the naive double-ternary implementation (which
+   gcc translates to a jump and a cmov).
+  This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
+   you want to make one of the other shifts zero.*/
+#define OC_DCT_TOKEN_EB_POS(_token) \
+ ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
+ +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
+
+/*The code words for each internal token.
+  See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
+   order.*/
+static const ogg_int32_t OC_DCT_CODE_WORD[92]={
+  /*These tokens require additional extra bits for the EOB count.*/
+  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
+  OC_DCT_CW_FINISH,
+  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
+  OC_DCT_CW_PACK(16, 0,  0,0),
+  /*These tokens require additional extra bits for the magnitude.*/
+  /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 13,0),
+  OC_DCT_CW_PACK( 0, 0, 13,1),
+  /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 21,0),
+  OC_DCT_CW_PACK( 0, 0, 21,1),
+  /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 37,0),
+  OC_DCT_CW_PACK( 0, 0, 37,1),
+  /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 69,0),
+  OC_DCT_CW_PACK( 0, 0,325,0),
+  OC_DCT_CW_PACK( 0, 0, 69,1),
+  OC_DCT_CW_PACK( 0, 0,325,1),
+  /*These tokens require additional extra bits for the run length.*/
+  /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0,10, +1,0),
+  OC_DCT_CW_PACK( 0,10, -1,0),
+  /*OC_DCT_ZRL_TOKEN (6 extra bits)
+    Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
+  OC_DCT_CW_PACK( 0, 0,  0,1),
+  /*The remaining tokens require no additional extra bits.*/
+  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 1, 0,  0,0),
+  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 2, 0,  0,0),
+  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 3, 0,  0,0),
+  /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
+  OC_DCT_CW_PACK( 0, 1, +1,0),
+  OC_DCT_CW_PACK( 0, 1, -1,0),
+  OC_DCT_CW_PACK( 0, 2, +1,0),
+  OC_DCT_CW_PACK( 0, 2, -1,0),
+  OC_DCT_CW_PACK( 0, 3, +1,0),
+  OC_DCT_CW_PACK( 0, 3, -1,0),
+  OC_DCT_CW_PACK( 0, 4, +1,0),
+  OC_DCT_CW_PACK( 0, 4, -1,0),
+  OC_DCT_CW_PACK( 0, 5, +1,0),
+  OC_DCT_CW_PACK( 0, 5, -1,0),
+  /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 1, +2,0),
+  OC_DCT_CW_PACK( 0, 1, +3,0),
+  OC_DCT_CW_PACK( 0, 1, -2,0),
+  OC_DCT_CW_PACK( 0, 1, -3,0),
+  /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 6, +1,0),
+  OC_DCT_CW_PACK( 0, 7, +1,0),
+  OC_DCT_CW_PACK( 0, 8, +1,0),
+  OC_DCT_CW_PACK( 0, 9, +1,0),
+  OC_DCT_CW_PACK( 0, 6, -1,0),
+  OC_DCT_CW_PACK( 0, 7, -1,0),
+  OC_DCT_CW_PACK( 0, 8, -1,0),
+  OC_DCT_CW_PACK( 0, 9, -1,0),
+  /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 2, +2,0),
+  OC_DCT_CW_PACK( 0, 3, +2,0),
+  OC_DCT_CW_PACK( 0, 2, +3,0),
+  OC_DCT_CW_PACK( 0, 3, +3,0),
+  OC_DCT_CW_PACK( 0, 2, -2,0),
+  OC_DCT_CW_PACK( 0, 3, -2,0),
+  OC_DCT_CW_PACK( 0, 2, -3,0),
+  OC_DCT_CW_PACK( 0, 3, -3,0),
+  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
+    Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
+  OC_DCT_CW_PACK( 0, 0,  0,1),
+  OC_DCT_CW_PACK( 0, 1,  0,0),
+  OC_DCT_CW_PACK( 0, 2,  0,0),
+  OC_DCT_CW_PACK( 0, 3,  0,0),
+  OC_DCT_CW_PACK( 0, 4,  0,0),
+  OC_DCT_CW_PACK( 0, 5,  0,0),
+  OC_DCT_CW_PACK( 0, 6,  0,0),
+  OC_DCT_CW_PACK( 0, 7,  0,0),
+  /*OC_ONE_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, +1,0),
+  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, -1,0),
+  /*OC_TWO_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, +2,0),
+  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, -2,0),
+  /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
+  OC_DCT_CW_PACK( 0, 0, +3,0),
+  OC_DCT_CW_PACK( 0, 0, -3,0),
+  OC_DCT_CW_PACK( 0, 0, +4,0),
+  OC_DCT_CW_PACK( 0, 0, -4,0),
+  OC_DCT_CW_PACK( 0, 0, +5,0),
+  OC_DCT_CW_PACK( 0, 0, -5,0),
+  OC_DCT_CW_PACK( 0, 0, +6,0),
+  OC_DCT_CW_PACK( 0, 0, -6,0),
+  /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 0, +7,0),
+  OC_DCT_CW_PACK( 0, 0, +8,0),
+  OC_DCT_CW_PACK( 0, 0, -7,0),
+  OC_DCT_CW_PACK( 0, 0, -8,0),
+  /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 0, +9,0),
+  OC_DCT_CW_PACK( 0, 0,+10,0),
+  OC_DCT_CW_PACK( 0, 0,+11,0),
+  OC_DCT_CW_PACK( 0, 0,+12,0),
+  OC_DCT_CW_PACK( 0, 0, -9,0),
+  OC_DCT_CW_PACK( 0, 0,-10,0),
+  OC_DCT_CW_PACK( 0, 0,-11,0),
+  OC_DCT_CW_PACK( 0, 0,-12,0),
+  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 8, 0,  0,0),
+  OC_DCT_CW_PACK( 9, 0,  0,0),
+  OC_DCT_CW_PACK(10, 0,  0,0),
+  OC_DCT_CW_PACK(11, 0,  0,0),
+  OC_DCT_CW_PACK(12, 0,  0,0),
+  OC_DCT_CW_PACK(13, 0,  0,0),
+  OC_DCT_CW_PACK(14, 0,  0,0),
+  OC_DCT_CW_PACK(15, 0,  0,0),
+  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 4, 0,  0,0),
+  OC_DCT_CW_PACK( 5, 0,  0,0),
+  OC_DCT_CW_PACK( 6, 0,  0,0),
+  OC_DCT_CW_PACK( 7, 0,  0,0),
+};
+
+
+
+static int oc_sb_run_unpack(oc_pack_buf *_opb){
+  long bits;
+  int ret;
+  /*Coding scheme:
+       Codeword            Run Length
+     0                       1
+     10x                     2-3
+     110x                    4-5
+     1110xx                  6-9
+     11110xxx                10-17
+     111110xxxx              18-33
+     111111xxxxxxxxxxxx      34-4129*/
+  bits=oc_pack_read1(_opb);
+  if(bits==0)return 1;
+  bits=oc_pack_read(_opb,2);
+  if((bits&2)==0)return 2+(int)bits;
+  else if((bits&1)==0){
+    bits=oc_pack_read1(_opb);
+    return 4+(int)bits;
+  }
+  bits=oc_pack_read(_opb,3);
+  if((bits&4)==0)return 6+(int)bits;
+  else if((bits&2)==0){
+    ret=10+((bits&1)<<2);
+    bits=oc_pack_read(_opb,2);
+    return ret+(int)bits;
+  }
+  else if((bits&1)==0){
+    bits=oc_pack_read(_opb,4);
+    return 18+(int)bits;
+  }
+  bits=oc_pack_read(_opb,12);
+  return 34+(int)bits;
+}
+
+static int oc_block_run_unpack(oc_pack_buf *_opb){
+  long bits;
+  long bits2;
+  /*Coding scheme:
+     Codeword             Run Length
+     0x                      1-2
+     10x                     3-4
+     110x                    5-6
+     1110xx                  7-10
+     11110xx                 11-14
+     11111xxxx               15-30*/
+  bits=oc_pack_read(_opb,2);
+  if((bits&2)==0)return 1+(int)bits;
+  else if((bits&1)==0){
+    bits=oc_pack_read1(_opb);
+    return 3+(int)bits;
+  }
+  bits=oc_pack_read(_opb,2);
+  if((bits&2)==0)return 5+(int)bits;
+  else if((bits&1)==0){
+    bits=oc_pack_read(_opb,2);
+    return 7+(int)bits;
+  }
+  bits=oc_pack_read(_opb,3);
+  if((bits&4)==0)return 11+bits;
+  bits2=oc_pack_read(_opb,2);
+  return 15+((bits&3)<<2)+bits2;
+}
+
+
+
+static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
+ const th_setup_info *_setup){
+  int qti;
+  int pli;
+  int qi;
+  int ret;
+  ret=oc_state_init(&_dec->state,_info,3);
+  if(ret<0)return ret;
+  ret=oc_huff_trees_copy(_dec->huff_tables,
+   (const oc_huff_node *const *)_setup->huff_tables);
+  if(ret<0){
+    oc_state_clear(&_dec->state);
+    return ret;
+  }
+  /*For each fragment, allocate one byte for every DCT coefficient token, plus
+     one byte for extra-bits for each token, plus one more byte for the long
+     EOB run, just in case it's the very last token and has a run length of
+     one.*/
+  _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
+   _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
+  if(_dec->dct_tokens==NULL){
+    oc_huff_trees_clear(_dec->huff_tables);
+    oc_state_clear(&_dec->state);
+    return TH_EFAULT;
+  }
+  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+    _dec->state.dequant_tables[qi][pli][qti]=
+     _dec->state.dequant_table_data[qi][pli][qti];
+  }
+  oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
+   &_setup->qinfo);
+  for(qi=0;qi<64;qi++){
+    int qsum;
+    qsum=0;
+    for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+      qsum+=_dec->state.dequant_tables[qti][pli][qi][12]+
+       _dec->state.dequant_tables[qti][pli][qi][17]+
+       _dec->state.dequant_tables[qti][pli][qi][18]+
+       _dec->state.dequant_tables[qti][pli][qi][24]<<(pli==0);
+    }
+    _dec->pp_sharp_mod[qi]=-(qsum>>11);
+  }
+  memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
+   sizeof(_dec->state.loop_filter_limits));
+  _dec->pp_level=OC_PP_LEVEL_DISABLED;
+  _dec->dc_qis=NULL;
+  _dec->variances=NULL;
+  _dec->pp_frame_data=NULL;
+  _dec->stripe_cb.ctx=NULL;
+  _dec->stripe_cb.stripe_decoded=NULL;
+#if defined(HAVE_CAIRO)
+  _dec->telemetry=0;
+  _dec->telemetry_bits=0;
+  _dec->telemetry_qi=0;
+  _dec->telemetry_mbmode=0;
+  _dec->telemetry_mv=0;
+  _dec->telemetry_frame_data=NULL;
+#endif
+  return 0;
+}
+
+static void oc_dec_clear(oc_dec_ctx *_dec){
+#if defined(HAVE_CAIRO)
+  _ogg_free(_dec->telemetry_frame_data);
+#endif
+  _ogg_free(_dec->pp_frame_data);
+  _ogg_free(_dec->variances);
+  _ogg_free(_dec->dc_qis);
+  _ogg_free(_dec->dct_tokens);
+  oc_huff_trees_clear(_dec->huff_tables);
+  oc_state_clear(&_dec->state);
+}
+
+
+static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
+  long val;
+  /*Check to make sure this is a data packet.*/
+  val=oc_pack_read1(&_dec->opb);
+  if(val!=0)return TH_EBADPACKET;
+  /*Read in the frame type (I or P).*/
+  val=oc_pack_read1(&_dec->opb);
+  _dec->state.frame_type=(int)val;
+  /*Read in the qi list.*/
+  val=oc_pack_read(&_dec->opb,6);
+  _dec->state.qis[0]=(unsigned char)val;
+  val=oc_pack_read1(&_dec->opb);
+  if(!val)_dec->state.nqis=1;
+  else{
+    val=oc_pack_read(&_dec->opb,6);
+    _dec->state.qis[1]=(unsigned char)val;
+    val=oc_pack_read1(&_dec->opb);
+    if(!val)_dec->state.nqis=2;
+    else{
+      val=oc_pack_read(&_dec->opb,6);
+      _dec->state.qis[2]=(unsigned char)val;
+      _dec->state.nqis=3;
+    }
+  }
+  if(_dec->state.frame_type==OC_INTRA_FRAME){
+    /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
+      Most of the other unused bits in the VP3 headers were eliminated.
+      I don't know why these remain.*/
+    /*I wanted to eliminate wasted bits, but not all config wiggle room
+       --Monty.*/
+    val=oc_pack_read(&_dec->opb,3);
+    if(val!=0)return TH_EIMPL;
+  }
+  return 0;
+}
+
+/*Mark all fragments as coded and in OC_MODE_INTRA.
+  This also builds up the coded fragment list (in coded order), and clears the
+   uncoded fragment list.
+  It does not update the coded macro block list nor the super block flags, as
+   those are not used when decoding INTRA frames.*/
+static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  oc_fragment       *frags;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          prev_ncoded_fragis;
+  unsigned           nsbs;
+  unsigned           sbi;
+  int                pli;
+  coded_fragis=_dec->state.coded_fragis;
+  prev_ncoded_fragis=ncoded_fragis=0;
+  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+  sb_flags=_dec->state.sb_flags;
+  frags=_dec->state.frags;
+  sbi=nsbs=0;
+  for(pli=0;pli<3;pli++){
+    nsbs+=_dec->state.fplanes[pli].nsbs;
+    for(;sbi<nsbs;sbi++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++){
+          ptrdiff_t fragi;
+          fragi=sb_maps[sbi][quadi][bi];
+          if(fragi>=0){
+            frags[fragi].coded=1;
+            frags[fragi].mb_mode=OC_MODE_INTRA;
+            coded_fragis[ncoded_fragis++]=fragi;
+          }
+        }
+      }
+    }
+    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+  }
+  _dec->state.ntotal_coded_fragis=ncoded_fragis;
+}
+
+/*Decodes the bit flags indicating whether each super block is partially coded
+   or not.
+  Return: The number of partially coded super blocks.*/
+static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
+  oc_sb_flags *sb_flags;
+  unsigned     nsbs;
+  unsigned     sbi;
+  unsigned     npartial;
+  unsigned     run_count;
+  long         val;
+  int          flag;
+  val=oc_pack_read1(&_dec->opb);
+  flag=(int)val;
+  sb_flags=_dec->state.sb_flags;
+  nsbs=_dec->state.nsbs;
+  sbi=npartial=0;
+  while(sbi<nsbs){
+    int full_run;
+    run_count=oc_sb_run_unpack(&_dec->opb);
+    full_run=run_count>=4129;
+    do{
+      sb_flags[sbi].coded_partially=flag;
+      sb_flags[sbi].coded_fully=0;
+      npartial+=flag;
+      sbi++;
+    }
+    while(--run_count>0&&sbi<nsbs);
+    if(full_run&&sbi<nsbs){
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+    }
+    else flag=!flag;
+  }
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+  return npartial;
+}
+
+/*Decodes the bit flags for whether or not each non-partially-coded super
+   block is fully coded or not.
+  This function should only be called if there is at least one
+   non-partially-coded super block.
+  Return: The number of partially coded super blocks.*/
+static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
+  oc_sb_flags *sb_flags;
+  unsigned     nsbs;
+  unsigned     sbi;
+  unsigned     run_count;
+  long         val;
+  int          flag;
+  sb_flags=_dec->state.sb_flags;
+  nsbs=_dec->state.nsbs;
+  /*Skip partially coded super blocks.*/
+  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
+  val=oc_pack_read1(&_dec->opb);
+  flag=(int)val;
+  do{
+    int full_run;
+    run_count=oc_sb_run_unpack(&_dec->opb);
+    full_run=run_count>=4129;
+    for(;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially)continue;
+      if(run_count--<=0)break;
+      sb_flags[sbi].coded_fully=flag;
+    }
+    if(full_run&&sbi<nsbs){
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+    }
+    else flag=!flag;
+  }
+  while(sbi<nsbs);
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+}
+
+static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  oc_fragment       *frags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  unsigned           npartial;
+  long               val;
+  int                pli;
+  int                flag;
+  int                run_count;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t         *uncoded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          nuncoded_fragis;
+  ptrdiff_t          prev_ncoded_fragis;
+  npartial=oc_dec_partial_sb_flags_unpack(_dec);
+  if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
+  if(npartial>0){
+    val=oc_pack_read1(&_dec->opb);
+    flag=!(int)val;
+  }
+  else flag=0;
+  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+  sb_flags=_dec->state.sb_flags;
+  frags=_dec->state.frags;
+  sbi=nsbs=run_count=0;
+  coded_fragis=_dec->state.coded_fragis;
+  uncoded_fragis=coded_fragis+_dec->state.nfrags;
+  prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
+  for(pli=0;pli<3;pli++){
+    nsbs+=_dec->state.fplanes[pli].nsbs;
+    for(;sbi<nsbs;sbi++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++){
+          ptrdiff_t fragi;
+          fragi=sb_maps[sbi][quadi][bi];
+          if(fragi>=0){
+            int coded;
+            if(sb_flags[sbi].coded_fully)coded=1;
+            else if(!sb_flags[sbi].coded_partially)coded=0;
+            else{
+              if(run_count<=0){
+                run_count=oc_block_run_unpack(&_dec->opb);
+                flag=!flag;
+              }
+              run_count--;
+              coded=flag;
+            }
+            if(coded)coded_fragis[ncoded_fragis++]=fragi;
+            else *(uncoded_fragis-++nuncoded_fragis)=fragi;
+            frags[fragi].coded=coded;
+          }
+        }
+      }
+    }
+    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+  }
+  _dec->state.ntotal_coded_fragis=ncoded_fragis;
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+}
+
+
+
+typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb);
+
+static int oc_vlc_mode_unpack(oc_pack_buf *_opb){
+  long val;
+  int  i;
+  for(i=0;i<7;i++){
+    val=oc_pack_read1(_opb);
+    if(!val)break;
+  }
+  return i;
+}
+
+static int oc_clc_mode_unpack(oc_pack_buf *_opb){
+  long val;
+  val=oc_pack_read(_opb,3);
+  return (int)val;
+}
+
+/*Unpacks the list of macro block modes for INTER frames.*/
+static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
+  const oc_mb_map     *mb_maps;
+  signed char         *mb_modes;
+  const oc_fragment   *frags;
+  const unsigned char *alphabet;
+  unsigned char        scheme0_alphabet[8];
+  oc_mode_unpack_func  mode_unpack;
+  size_t               nmbs;
+  size_t               mbi;
+  long                 val;
+  int                  mode_scheme;
+  val=oc_pack_read(&_dec->opb,3);
+  mode_scheme=(int)val;
+  if(mode_scheme==0){
+    int mi;
+    /*Just in case, initialize the modes to something.
+      If the bitstream doesn't contain each index exactly once, it's likely
+       corrupt and the rest of the packet is garbage anyway, but this way we
+       won't crash, and we'll decode SOMETHING.*/
+    /*LOOP VECTORIZES*/
+    for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
+    for(mi=0;mi<OC_NMODES;mi++){
+      val=oc_pack_read(&_dec->opb,3);
+      scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
+    }
+    alphabet=scheme0_alphabet;
+  }
+  else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
+  if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
+  else mode_unpack=oc_vlc_mode_unpack;
+  mb_modes=_dec->state.mb_modes;
+  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+  nmbs=_dec->state.nmbs;
+  frags=_dec->state.frags;
+  for(mbi=0;mbi<nmbs;mbi++){
+    if(mb_modes[mbi]!=OC_MODE_INVALID){
+      int bi;
+      /*Check for a coded luma block in this macro block.*/
+      for(bi=0;bi<4&&!frags[mb_maps[mbi][0][bi]].coded;bi++);
+      /*We found one, decode a mode.*/
+      if(bi<4)mb_modes[mbi]=alphabet[(*mode_unpack)(&_dec->opb)];
+      /*There were none: INTER_NOMV is forced.*/
+      else mb_modes[mbi]=OC_MODE_INTER_NOMV;
+    }
+  }
+}
+
+
+
+typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb);
+
+static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){
+  long bits;
+  int  mask;
+  int  mv;
+  bits=oc_pack_read(_opb,3);
+  switch(bits){
+    case  0:return 0;
+    case  1:return 1;
+    case  2:return -1;
+    case  3:
+    case  4:{
+      mv=(int)(bits-1);
+      bits=oc_pack_read1(_opb);
+    }break;
+    /*case  5:
+    case  6:
+    case  7:*/
+    default:{
+      mv=1<<bits-3;
+      bits=oc_pack_read(_opb,bits-2);
+      mv+=(int)(bits>>1);
+      bits&=1;
+    }break;
+  }
+  mask=-(int)bits;
+  return mv+mask^mask;
+}
+
+static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){
+  long bits;
+  int  mask;
+  int  mv;
+  bits=oc_pack_read(_opb,6);
+  mv=(int)bits>>1;
+  mask=-((int)bits&1);
+  return mv+mask^mask;
+}
+
+/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
+   block modes and motion vectors to the individual fragments.*/
+static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
+  const oc_mb_map        *mb_maps;
+  const signed char      *mb_modes;
+  oc_set_chroma_mvs_func  set_chroma_mvs;
+  oc_mv_comp_unpack_func  mv_comp_unpack;
+  oc_fragment            *frags;
+  oc_mv                  *frag_mvs;
+  const unsigned char    *map_idxs;
+  int                     map_nidxs;
+  oc_mv                   last_mv[2];
+  oc_mv                   cbmvs[4];
+  size_t                  nmbs;
+  size_t                  mbi;
+  long                    val;
+  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
+  val=oc_pack_read1(&_dec->opb);
+  mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
+  map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
+  memset(last_mv,0,sizeof(last_mv));
+  frags=_dec->state.frags;
+  frag_mvs=_dec->state.frag_mvs;
+  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+  mb_modes=_dec->state.mb_modes;
+  nmbs=_dec->state.nmbs;
+  for(mbi=0;mbi<nmbs;mbi++){
+    int          mb_mode;
+    mb_mode=mb_modes[mbi];
+    if(mb_mode!=OC_MODE_INVALID){
+      oc_mv        mbmv;
+      ptrdiff_t    fragi;
+      int          coded[13];
+      int          codedi;
+      int          ncoded;
+      int          mapi;
+      int          mapii;
+      /*Search for at least one coded fragment.*/
+      ncoded=mapii=0;
+      do{
+        mapi=map_idxs[mapii];
+        fragi=mb_maps[mbi][mapi>>2][mapi&3];
+        if(frags[fragi].coded)coded[ncoded++]=mapi;
+      }
+      while(++mapii<map_nidxs);
+      if(ncoded<=0)continue;
+      switch(mb_mode){
+        case OC_MODE_INTER_MV_FOUR:{
+          oc_mv       lbmvs[4];
+          int         bi;
+          /*Mark the tail of the list, so we don't accidentally go past it.*/
+          coded[ncoded]=-1;
+          for(bi=codedi=0;bi<4;bi++){
+            if(coded[codedi]==bi){
+              codedi++;
+              fragi=mb_maps[mbi][0][bi];
+              frags[fragi].mb_mode=mb_mode;
+              lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+              lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+              memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi]));
+            }
+            else lbmvs[bi][0]=lbmvs[bi][1]=0;
+          }
+          if(codedi>0){
+            memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+            memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0]));
+          }
+          if(codedi<ncoded){
+            (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
+            for(;codedi<ncoded;codedi++){
+              mapi=coded[codedi];
+              bi=mapi&3;
+              fragi=mb_maps[mbi][mapi>>2][bi];
+              frags[fragi].mb_mode=mb_mode;
+              memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi]));
+            }
+          }
+        }break;
+        case OC_MODE_INTER_MV:{
+          memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+          mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+          mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+        }break;
+        case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break;
+        case OC_MODE_INTER_MV_LAST2:{
+          memcpy(mbmv,last_mv[1],sizeof(mbmv));
+          memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+          memcpy(last_mv[0],mbmv,sizeof(last_mv[0]));
+        }break;
+        case OC_MODE_GOLDEN_MV:{
+          mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+          mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+        }break;
+        default:memset(mbmv,0,sizeof(mbmv));break;
+      }
+      /*4MV mode fills in the fragments itself.
+        For all other modes we can use this common code.*/
+      if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+        for(codedi=0;codedi<ncoded;codedi++){
+          mapi=coded[codedi];
+          fragi=mb_maps[mbi][mapi>>2][mapi&3];
+          frags[fragi].mb_mode=mb_mode;
+          memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv));
+        }
+      }
+    }
+  }
+}
+
+static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
+  oc_fragment     *frags;
+  const ptrdiff_t *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  ptrdiff_t        fragii;
+  ptrdiff_t        fragi;
+  ncoded_fragis=_dec->state.ntotal_coded_fragis;
+  if(ncoded_fragis<=0)return;
+  frags=_dec->state.frags;
+  coded_fragis=_dec->state.coded_fragis;
+  if(_dec->state.nqis==1){
+    /*If this frame has only a single qi value, then just use it for all coded
+       fragments.*/
+    for(fragii=0;fragii<ncoded_fragis;fragii++){
+      frags[coded_fragis[fragii]].qii=0;
+    }
+  }
+  else{
+    long val;
+    int  flag;
+    int  nqi1;
+    int  run_count;
+    /*Otherwise, we decode a qi index for each fragment, using two passes of
+      the same binary RLE scheme used for super-block coded bits.
+     The first pass marks each fragment as having a qii of 0 or greater than
+      0, and the second pass (if necessary), distinguishes between a qii of
+      1 and 2.
+     At first we just store the qii in the fragment.
+     After all the qii's are decoded, we make a final pass to replace them
+      with the corresponding qi's for this frame.*/
+    val=oc_pack_read1(&_dec->opb);
+    flag=(int)val;
+    nqi1=0;
+    fragii=0;
+    while(fragii<ncoded_fragis){
+      int full_run;
+      run_count=oc_sb_run_unpack(&_dec->opb);
+      full_run=run_count>=4129;
+      do{
+        frags[coded_fragis[fragii++]].qii=flag;
+        nqi1+=flag;
+      }
+      while(--run_count>0&&fragii<ncoded_fragis);
+      if(full_run&&fragii<ncoded_fragis){
+        val=oc_pack_read1(&_dec->opb);
+        flag=(int)val;
+      }
+      else flag=!flag;
+    }
+    /*TODO: run_count should be 0 here.
+      If it's not, we should issue a warning of some kind.*/
+    /*If we have 3 different qi's for this frame, and there was at least one
+       fragment with a non-zero qi, make the second pass.*/
+    if(_dec->state.nqis==3&&nqi1>0){
+      /*Skip qii==0 fragments.*/
+      for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+      do{
+        int full_run;
+        run_count=oc_sb_run_unpack(&_dec->opb);
+        full_run=run_count>=4129;
+        for(;fragii<ncoded_fragis;fragii++){
+          fragi=coded_fragis[fragii];
+          if(frags[fragi].qii==0)continue;
+          if(run_count--<=0)break;
+          frags[fragi].qii+=flag;
+        }
+        if(full_run&&fragii<ncoded_fragis){
+          val=oc_pack_read1(&_dec->opb);
+          flag=(int)val;
+        }
+        else flag=!flag;
+      }
+      while(fragii<ncoded_fragis);
+      /*TODO: run_count should be 0 here.
+        If it's not, we should issue a warning of some kind.*/
+    }
+  }
+}
+
+
+
+/*Unpacks the DC coefficient tokens.
+  Unlike when unpacking the AC coefficient tokens, we actually need to decode
+   the DC coefficient values now so that we can do DC prediction.
+  _huff_idx:   The index of the Huffman table to use for each color plane.
+  _ntoks_left: The number of tokens left to be decoded in each color plane for
+                each coefficient.
+               This is updated as EOB tokens and zero run tokens are decoded.
+  Return: The length of any outstanding EOB run.*/
+static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
+ ptrdiff_t _ntoks_left[3][64]){
+  unsigned char   *dct_tokens;
+  oc_fragment     *frags;
+  const ptrdiff_t *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  ptrdiff_t        fragii;
+  ptrdiff_t        eobs;
+  ptrdiff_t        ti;
+  int              pli;
+  dct_tokens=_dec->dct_tokens;
+  frags=_dec->state.frags;
+  coded_fragis=_dec->state.coded_fragis;
+  ncoded_fragis=fragii=eobs=ti=0;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t run_counts[64];
+    ptrdiff_t eob_count;
+    ptrdiff_t eobi;
+    int       rli;
+    ncoded_fragis+=_dec->state.ncoded_fragis[pli];
+    memset(run_counts,0,sizeof(run_counts));
+    _dec->eob_runs[pli][0]=eobs;
+    _dec->ti0[pli][0]=ti;
+    /*Continue any previous EOB run, if there was one.*/
+    eobi=eobs;
+    if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
+    eob_count=eobi;
+    eobs-=eobi;
+    while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
+    while(fragii<ncoded_fragis){
+      int token;
+      int cw;
+      int eb;
+      int skip;
+      token=oc_huff_token_decode(&_dec->opb,
+       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
+      dct_tokens[ti++]=(unsigned char)token;
+      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+        eb=(int)oc_pack_read(&_dec->opb,
+         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
+        dct_tokens[ti++]=(unsigned char)eb;
+        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
+        eb<<=OC_DCT_TOKEN_EB_POS(token);
+      }
+      else eb=0;
+      cw=OC_DCT_CODE_WORD[token]+eb;
+      eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+      if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
+      if(eobs){
+        eobi=OC_MINI(eobs,ncoded_fragis-fragii);
+        eob_count+=eobi;
+        eobs-=eobi;
+        while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
+      }
+      else{
+        int coeff;
+        skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
+        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
+        if(skip)coeff=0;
+        run_counts[skip]++;
+        frags[coded_fragis[fragii++]].dc=coeff;
+      }
+    }
+    /*Add the total EOB count to the longest run length.*/
+    run_counts[63]+=eob_count;
+    /*And convert the run_counts array to a moment table.*/
+    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
+    /*Finally, subtract off the number of coefficients that have been
+       accounted for by runs started in this coefficient.*/
+    for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
+  }
+  _dec->dct_tokens_count=ti;
+  return eobs;
+}
+
+/*Unpacks the AC coefficient tokens.
+  This can completely discard coefficient values while unpacking, and so is
+   somewhat simpler than unpacking the DC coefficient tokens.
+  _huff_idx:   The index of the Huffman table to use for each color plane.
+  _ntoks_left: The number of tokens left to be decoded in each color plane for
+                each coefficient.
+               This is updated as EOB tokens and zero run tokens are decoded.
+  _eobs:       The length of any outstanding EOB run from previous
+                coefficients.
+  Return: The length of any outstanding EOB run.*/
+static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
+ ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
+  unsigned char *dct_tokens;
+  ptrdiff_t      ti;
+  int            pli;
+  dct_tokens=_dec->dct_tokens;
+  ti=_dec->dct_tokens_count;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t run_counts[64];
+    ptrdiff_t eob_count;
+    size_t    ntoks_left;
+    size_t    ntoks;
+    int       rli;
+    _dec->eob_runs[pli][_zzi]=_eobs;
+    _dec->ti0[pli][_zzi]=ti;
+    ntoks_left=_ntoks_left[pli][_zzi];
+    memset(run_counts,0,sizeof(run_counts));
+    eob_count=0;
+    ntoks=0;
+    while(ntoks+_eobs<ntoks_left){
+      int token;
+      int cw;
+      int eb;
+      int skip;
+      ntoks+=_eobs;
+      eob_count+=_eobs;
+      token=oc_huff_token_decode(&_dec->opb,
+       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
+      dct_tokens[ti++]=(unsigned char)token;
+      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+        eb=(int)oc_pack_read(&_dec->opb,
+         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
+        dct_tokens[ti++]=(unsigned char)eb;
+        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
+        eb<<=OC_DCT_TOKEN_EB_POS(token);
+      }
+      else eb=0;
+      cw=OC_DCT_CODE_WORD[token]+eb;
+      skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+      _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+      if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
+      if(_eobs==0){
+        run_counts[skip]++;
+        ntoks++;
+      }
+    }
+    /*Add the portion of the last EOB run actually used by this coefficient.*/
+    eob_count+=ntoks_left-ntoks;
+    /*And remove it from the remaining EOB count.*/
+    _eobs-=ntoks_left-ntoks;
+    /*Add the total EOB count to the longest run length.*/
+    run_counts[63]+=eob_count;
+    /*And convert the run_counts array to a moment table.*/
+    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
+    /*Finally, subtract off the number of coefficients that have been
+       accounted for by runs started in this coefficient.*/
+    for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
+  }
+  _dec->dct_tokens_count=ti;
+  return _eobs;
+}
+
+/*Tokens describing the DCT coefficients that belong to each fragment are
+   stored in the bitstream grouped by coefficient, not by fragment.
+
+  This means that we either decode all the tokens in order, building up a
+   separate coefficient list for each fragment as we go, and then go back and
+   do the iDCT on each fragment, or we have to create separate lists of tokens
+   for each coefficient, so that we can pull the next token required off the
+   head of the appropriate list when decoding a specific fragment.
+
+  The former was VP3's choice, and it meant 2*w*h extra storage for all the
+   decoded coefficient values.
+
+  We take the second option, which lets us store just one to three bytes per
+   token (generally far fewer than the number of coefficients, due to EOB
+   tokens and zero runs), and which requires us to only maintain a counter for
+   each of the 64 coefficients, instead of a counter for every fragment to
+   determine where the next token goes.
+
+  We actually use 3 counters per coefficient, one for each color plane, so we
+   can decode all color planes simultaneously.
+  This lets color conversion, etc., be done as soon as a full MCU (one or
+   two super block rows) is decoded, while the image data is still in cache.*/
+
+static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
+  static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
+  ptrdiff_t  ntoks_left[3][64];
+  int        huff_idxs[2];
+  ptrdiff_t  eobs;
+  long       val;
+  int        pli;
+  int        zzi;
+  int        hgi;
+  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
+    ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
+  }
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[0]=(int)val;
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[1]=(int)val;
+  _dec->eob_runs[0][0]=0;
+  eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
+#if defined(HAVE_CAIRO)
+  _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[0]=(int)val;
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[1]=(int)val;
+  zzi=1;
+  for(hgi=1;hgi<5;hgi++){
+    huff_idxs[0]+=16;
+    huff_idxs[1]+=16;
+    for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
+      eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
+    }
+  }
+  /*TODO: eobs should be exactly zero, or 4096 or greater.
+    The second case occurs when an EOB run of size zero is encountered, which
+     gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
+    If neither of these conditions holds, then a warning should be issued.*/
+}
+
+
+static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
+  /*pp_level 0: disabled; free any memory used and return*/
+  if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
+    if(_dec->dc_qis!=NULL){
+      _ogg_free(_dec->dc_qis);
+      _dec->dc_qis=NULL;
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+    }
+    return 1;
+  }
+  if(_dec->dc_qis==NULL){
+    /*If we haven't been tracking DC quantization indices, there's no point in
+       starting now.*/
+    if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
+    _dec->dc_qis=(unsigned char *)_ogg_malloc(
+     _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
+    if(_dec->dc_qis==NULL)return 1;
+    memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
+  }
+  else{
+    unsigned char   *dc_qis;
+    const ptrdiff_t *coded_fragis;
+    ptrdiff_t        ncoded_fragis;
+    ptrdiff_t        fragii;
+    unsigned char    qi0;
+    /*Update the DC quantization index of each coded block.*/
+    dc_qis=_dec->dc_qis;
+    coded_fragis=_dec->state.coded_fragis;
+    ncoded_fragis=_dec->state.ncoded_fragis[0]+
+     _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
+    qi0=(unsigned char)_dec->state.qis[0];
+    for(fragii=0;fragii<ncoded_fragis;fragii++){
+      dc_qis[coded_fragis[fragii]]=qi0;
+    }
+  }
+  /*pp_level 1: Stop after updating DC quantization indices.*/
+  if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
+    if(_dec->variances!=NULL){
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+    }
+    return 1;
+  }
+  if(_dec->variances==NULL){
+    size_t frame_sz;
+    size_t c_sz;
+    int    c_w;
+    int    c_h;
+    frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
+    c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
+    c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
+    c_sz=c_w*(size_t)c_h;
+    /*Allocate space for the chroma planes, even if we're not going to use
+       them; this simplifies allocation state management, though it may waste
+       memory on the few systems that don't overcommit pages.*/
+    frame_sz+=c_sz<<1;
+    _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
+     frame_sz*sizeof(_dec->pp_frame_data[0]));
+    _dec->variances=(int *)_ogg_malloc(
+     _dec->state.nfrags*sizeof(_dec->variances[0]));
+    if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      return 1;
+    }
+    /*Force an update of the PP buffer pointers.*/
+    _dec->pp_frame_state=0;
+  }
+  /*Update the PP buffer pointers if necessary.*/
+  if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
+    if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
+      /*If chroma processing is disabled, just use the PP luma plane.*/
+      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
+      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
+      _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
+      _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
+       (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
+    }
+    else{
+      size_t y_sz;
+      size_t c_sz;
+      int    c_w;
+      int    c_h;
+      /*Otherwise, set up pointers to all three PP planes.*/
+      y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
+      c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
+      c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
+      c_sz=c_w*(size_t)c_h;
+      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
+      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
+      _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
+      _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
+      _dec->pp_frame_buf[1].width=c_w;
+      _dec->pp_frame_buf[1].height=c_h;
+      _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
+      _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
+      _dec->pp_frame_buf[2].width=c_w;
+      _dec->pp_frame_buf[2].height=c_h;
+      _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
+      _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
+      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
+    }
+    _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
+  }
+  /*If we're not processing chroma, copy the reference frame's chroma planes.*/
+  if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
+    memcpy(_dec->pp_frame_buf+1,
+     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
+     sizeof(_dec->pp_frame_buf[1])*2);
+  }
+  return 0;
+}
+
+
+
+typedef struct{
+  int                 bounding_values[256];
+  ptrdiff_t           ti[3][64];
+  ptrdiff_t           eob_runs[3][64];
+  const ptrdiff_t    *coded_fragis[3];
+  const ptrdiff_t    *uncoded_fragis[3];
+  ptrdiff_t           ncoded_fragis[3];
+  ptrdiff_t           nuncoded_fragis[3];
+  const ogg_uint16_t *dequant[3][3][2];
+  int                 fragy0[3];
+  int                 fragy_end[3];
+  int                 pred_last[3][3];
+  int                 mcu_nvfrags;
+  int                 loop_filter;
+  int                 pp_level;
+}oc_dec_pipeline_state;
+
+
+
+/*Initialize the main decoding pipeline.*/
+static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe){
+  const ptrdiff_t *coded_fragis;
+  const ptrdiff_t *uncoded_fragis;
+  int              pli;
+  int              qii;
+  int              qti;
+  /*If chroma is sub-sampled in the vertical direction, we have to decode two
+     super block rows of Y' for each super block row of Cb and Cr.*/
+  _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
+  /*Initialize the token and extra bits indices for each plane and
+     coefficient.*/
+  memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
+  /*Also copy over the initial the EOB run counts.*/
+  memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
+  /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
+  coded_fragis=_dec->state.coded_fragis;
+  uncoded_fragis=coded_fragis+_dec->state.nfrags;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t ncoded_fragis;
+    _pipe->coded_fragis[pli]=coded_fragis;
+    _pipe->uncoded_fragis[pli]=uncoded_fragis;
+    ncoded_fragis=_dec->state.ncoded_fragis[pli];
+    coded_fragis+=ncoded_fragis;
+    uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
+  }
+  /*Set up condensed quantizer tables.*/
+  for(pli=0;pli<3;pli++){
+    for(qii=0;qii<_dec->state.nqis;qii++){
+      for(qti=0;qti<2;qti++){
+        _pipe->dequant[pli][qii][qti]=
+         _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
+      }
+    }
+  }
+  /*Set the previous DC predictor to 0 for all color planes and frame types.*/
+  memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
+  /*Initialize the bounding value array for the loop filter.*/
+  _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state,
+   _pipe->bounding_values);
+  /*Initialize any buffers needed for post-processing.
+    We also save the current post-processing level, to guard against the user
+     changing it from a callback.*/
+  if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
+  /*If we don't have enough information to post-process, disable it, regardless
+     of the user-requested level.*/
+  else{
+    _pipe->pp_level=OC_PP_LEVEL_DISABLED;
+    memcpy(_dec->pp_frame_buf,
+     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
+     sizeof(_dec->pp_frame_buf[0])*3);
+  }
+}
+
+/*Undo the DC prediction in a single plane of an MCU (one or two super block
+   rows).
+  As a side effect, the number of coded and uncoded fragments in this plane of
+   the MCU is also computed.*/
+static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+  const oc_fragment_plane *fplane;
+  oc_fragment             *frags;
+  int                     *pred_last;
+  ptrdiff_t                ncoded_fragis;
+  ptrdiff_t                fragi;
+  int                      fragx;
+  int                      fragy;
+  int                      fragy0;
+  int                      fragy_end;
+  int                      nhfrags;
+  /*Compute the first and last fragment row of the current MCU for this
+     plane.*/
+  fplane=_dec->state.fplanes+_pli;
+  fragy0=_pipe->fragy0[_pli];
+  fragy_end=_pipe->fragy_end[_pli];
+  nhfrags=fplane->nhfrags;
+  pred_last=_pipe->pred_last[_pli];
+  frags=_dec->state.frags;
+  ncoded_fragis=0;
+  fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
+  for(fragy=fragy0;fragy<fragy_end;fragy++){
+    if(fragy==0){
+      /*For the first row, all of the cases reduce to just using the previous
+         predictor for the same reference frame.*/
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        if(frags[fragi].coded){
+          int ref;
+          ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+          pred_last[ref]=frags[fragi].dc+=pred_last[ref];
+          ncoded_fragis++;
+        }
+      }
+    }
+    else{
+      oc_fragment *u_frags;
+      int          l_ref;
+      int          ul_ref;
+      int          u_ref;
+      u_frags=frags-nhfrags;
+      l_ref=-1;
+      ul_ref=-1;
+      u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1;
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        int ur_ref;
+        if(fragx+1>=nhfrags)ur_ref=-1;
+        else{
+          ur_ref=u_frags[fragi+1].coded?
+           OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
+        }
+        if(frags[fragi].coded){
+          int pred;
+          int ref;
+          ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+          /*We break out a separate case based on which of our neighbors use
+             the same reference frames.
+            This is somewhat faster than trying to make a generic case which
+             handles all of them, since it reduces lots of poorly predicted
+             jumps to one switch statement, and also lets a number of the
+             multiplications be optimized out by strength reduction.*/
+          switch((l_ref==ref)|(ul_ref==ref)<<1|
+           (u_ref==ref)<<2|(ur_ref==ref)<<3){
+            default:pred=pred_last[ref];break;
+            case  1:
+            case  3:pred=frags[fragi-1].dc;break;
+            case  2:pred=u_frags[fragi-1].dc;break;
+            case  4:
+            case  6:
+            case 12:pred=u_frags[fragi].dc;break;
+            case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
+            case  8:pred=u_frags[fragi+1].dc;break;
+            case  9:
+            case 11:
+            case 13:{
+              pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
+            }break;
+            case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
+            case 14:{
+              pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
+               +10*u_frags[fragi].dc)/16;
+            }break;
+            case  7:
+            case 15:{
+              int p0;
+              int p1;
+              int p2;
+              p0=frags[fragi-1].dc;
+              p1=u_frags[fragi-1].dc;
+              p2=u_frags[fragi].dc;
+              pred=(29*(p0+p2)-26*p1)/32;
+              if(abs(pred-p2)>128)pred=p2;
+              else if(abs(pred-p0)>128)pred=p0;
+              else if(abs(pred-p1)>128)pred=p1;
+            }break;
+          }
+          pred_last[ref]=frags[fragi].dc+=pred;
+          ncoded_fragis++;
+          l_ref=ref;
+        }
+        else l_ref=-1;
+        ul_ref=u_ref;
+        u_ref=ur_ref;
+      }
+    }
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+  /*Also save the number of uncoded fragments so we know how many to copy.*/
+  _pipe->nuncoded_fragis[_pli]=
+   (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
+}
+
+/*Reconstructs all coded fragments in a single MCU (one or two super block
+   rows).
+  This requires that each coded fragment have a proper macro block mode and
+   motion vector (if not in INTRA mode), and have it's DC value decoded, with
+   the DC prediction process reversed, and the number of coded and uncoded
+   fragments in this plane of the MCU be counted.
+  The token lists for each color plane and coefficient should also be filled
+   in, along with initial token offsets, extra bits offsets, and EOB run
+   counts.*/
+static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+  unsigned char       *dct_tokens;
+  const unsigned char *dct_fzig_zag;
+  ogg_uint16_t         dc_quant[2];
+  const oc_fragment   *frags;
+  const ptrdiff_t     *coded_fragis;
+  ptrdiff_t            ncoded_fragis;
+  ptrdiff_t            fragii;
+  ptrdiff_t           *ti;
+  ptrdiff_t           *eob_runs;
+  int                  qti;
+  dct_tokens=_dec->dct_tokens;
+  dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
+  frags=_dec->state.frags;
+  coded_fragis=_pipe->coded_fragis[_pli];
+  ncoded_fragis=_pipe->ncoded_fragis[_pli];
+  ti=_pipe->ti[_pli];
+  eob_runs=_pipe->eob_runs[_pli];
+  for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
+  for(fragii=0;fragii<ncoded_fragis;fragii++){
+    /*This array is made one element larger because the zig-zag index array
+       uses the final element as a dumping ground for out-of-range indices
+       to protect us from buffer overflow.*/
+    OC_ALIGN8(ogg_int16_t dct_coeffs[65]);
+    const ogg_uint16_t *ac_quant;
+    ptrdiff_t           fragi;
+    int                 last_zzi;
+    int                 zzi;
+    fragi=coded_fragis[fragii];
+    for(zzi=0;zzi<64;zzi++)dct_coeffs[zzi]=0;
+    qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
+    ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
+    /*Decode the AC coefficients.*/
+    for(zzi=0;zzi<64;){
+      int token;
+      last_zzi=zzi;
+      if(eob_runs[zzi]){
+        eob_runs[zzi]--;
+        break;
+      }
+      else{
+        ptrdiff_t eob;
+        int       cw;
+        int       rlen;
+        int       coeff;
+        int       lti;
+        lti=ti[zzi];
+        token=dct_tokens[lti++];
+        cw=OC_DCT_CODE_WORD[token];
+        /*These parts could be done branchless, but the branches are fairly
+           predictable and the C code translates into more than a few
+           instructions, so it's worth it to avoid them.*/
+        if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+          cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
+        }
+        eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+        if(token==OC_DCT_TOKEN_FAT_EOB){
+          eob+=dct_tokens[lti++]<<8;
+          if(eob==0)eob=OC_DCT_EOB_FINISH;
+        }
+        rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
+        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
+        eob_runs[zzi]=eob;
+        ti[zzi]=lti;
+        zzi+=rlen;
+        dct_coeffs[dct_fzig_zag[zzi]]=(ogg_int16_t)(coeff*(int)ac_quant[zzi]);
+        zzi+=!eob;
+      }
+    }
+    /*TODO: zzi should be exactly 64 here.
+      If it's not, we should report some kind of warning.*/
+    zzi=OC_MINI(zzi,64);
+    dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
+    /*last_zzi is always initialized.
+      If your compiler thinks otherwise, it is dumb.*/
+    oc_state_frag_recon(&_dec->state,fragi,_pli,
+     dct_coeffs,last_zzi,dc_quant[qti]);
+  }
+  _pipe->coded_fragis[_pli]+=ncoded_fragis;
+  /*Right now the reconstructed MCU has only the coded blocks in it.*/
+  /*TODO: We make the decision here to always copy the uncoded blocks into it
+     from the reference frame.
+    We could also copy the coded blocks back over the reference frame, if we
+     wait for an additional MCU to be decoded, which might be faster if only a
+     small number of blocks are coded.
+    However, this introduces more latency, creating a larger cache footprint.
+    It's unknown which decision is better, but this one results in simpler
+     code, and the hard case (high bitrate, high resolution) is handled
+     correctly.*/
+  /*Copy the uncoded blocks from the previous reference frame.*/
+  _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
+  oc_state_frag_copy_list(&_dec->state,_pipe->uncoded_fragis[_pli],
+   _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
+}
+
+/*Filter a horizontal block edge.*/
+static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
+ int *_variance0,int *_variance1){
+  unsigned char       *rdst;
+  const unsigned char *rsrc;
+  unsigned char       *cdst;
+  const unsigned char *csrc;
+  int                  r[10];
+  int                  sum0;
+  int                  sum1;
+  int                  bx;
+  int                  by;
+  rdst=_dst;
+  rsrc=_src;
+  for(bx=0;bx<8;bx++){
+    cdst=rdst;
+    csrc=rsrc;
+    for(by=0;by<10;by++){
+      r[by]=*csrc;
+      csrc+=_src_ystride;
+    }
+    sum0=sum1=0;
+    for(by=0;by<4;by++){
+      sum0+=abs(r[by+1]-r[by]);
+      sum1+=abs(r[by+5]-r[by+6]);
+    }
+    *_variance0+=OC_MINI(255,sum0);
+    *_variance1+=OC_MINI(255,sum1);
+    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+      *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
+      cdst+=_dst_ystride;
+      *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
+      cdst+=_dst_ystride;
+      for(by=0;by<4;by++){
+        *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
+         r[by+4]+r[by+5]+r[by+6]+4>>3);
+        cdst+=_dst_ystride;
+      }
+      *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
+      cdst+=_dst_ystride;
+      *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
+    }
+    else{
+      for(by=1;by<=8;by++){
+        *cdst=(unsigned char)r[by];
+        cdst+=_dst_ystride;
+      }
+    }
+    rdst++;
+    rsrc++;
+  }
+}
+
+/*Filter a vertical block edge.*/
+static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
+ int _qstep,int _flimit,int *_variances){
+  unsigned char       *rdst;
+  const unsigned char *rsrc;
+  unsigned char       *cdst;
+  int                  r[10];
+  int                  sum0;
+  int                  sum1;
+  int                  bx;
+  int                  by;
+  cdst=_dst;
+  for(by=0;by<8;by++){
+    rsrc=cdst-1;
+    rdst=cdst;
+    for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
+    sum0=sum1=0;
+    for(bx=0;bx<4;bx++){
+      sum0+=abs(r[bx+1]-r[bx]);
+      sum1+=abs(r[bx+5]-r[bx+6]);
+    }
+    _variances[0]+=OC_MINI(255,sum0);
+    _variances[1]+=OC_MINI(255,sum1);
+    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+      *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
+      *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
+      for(bx=0;bx<4;bx++){
+        *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
+         r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
+      }
+      *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
+      *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
+    }
+    cdst+=_dst_ystride;
+  }
+}
+
+static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
+ th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
+ int _fragy_end){
+  oc_fragment_plane   *fplane;
+  int                 *variance;
+  unsigned char       *dc_qi;
+  unsigned char       *dst;
+  const unsigned char *src;
+  ptrdiff_t            froffset;
+  int                  dst_ystride;
+  int                  src_ystride;
+  int                  nhfrags;
+  int                  width;
+  int                  notstart;
+  int                  notdone;
+  int                  flimit;
+  int                  qstep;
+  int                  y_end;
+  int                  y;
+  int                  x;
+  _dst+=_pli;
+  _src+=_pli;
+  fplane=_dec->state.fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
+  variance=_dec->variances+froffset;
+  dc_qi=_dec->dc_qis+froffset;
+  notstart=_fragy0>0;
+  notdone=_fragy_end<fplane->nvfrags;
+  /*We want to clear an extra row of variances, except at the end.*/
+  memset(variance+(nhfrags&-notstart),0,
+   (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
+  /*Except for the first time, we want to point to the middle of the row.*/
+  y=(_fragy0<<3)+(notstart<<2);
+  dst_ystride=_dst->stride;
+  src_ystride=_src->stride;
+  dst=_dst->data+y*(ptrdiff_t)dst_ystride;
+  src=_src->data+y*(ptrdiff_t)src_ystride;
+  width=_dst->width;
+  for(;y<4;y++){
+    memcpy(dst,src,width*sizeof(dst[0]));
+    dst+=dst_ystride;
+    src+=src_ystride;
+  }
+  /*We also want to skip the last row in the frame for this loop.*/
+  y_end=_fragy_end-!notdone<<3;
+  for(;y<y_end;y+=8){
+    qstep=_dec->pp_dc_scale[*dc_qi];
+    flimit=(qstep*3)>>2;
+    oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
+     qstep,flimit,variance,variance+nhfrags);
+    variance++;
+    dc_qi++;
+    for(x=8;x<width;x+=8){
+      qstep=_dec->pp_dc_scale[*dc_qi];
+      flimit=(qstep*3)>>2;
+      oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
+       qstep,flimit,variance,variance+nhfrags);
+      oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
+       qstep,flimit,variance-1);
+      variance++;
+      dc_qi++;
+    }
+    dst+=dst_ystride<<3;
+    src+=src_ystride<<3;
+  }
+  /*And finally, handle the last row in the frame, if it's in the range.*/
+  if(!notdone){
+    int height;
+    height=_dst->height;
+    for(;y<height;y++){
+      memcpy(dst,src,width*sizeof(dst[0]));
+      dst+=dst_ystride;
+      src+=src_ystride;
+    }
+    /*Filter the last row of vertical block edges.*/
+    dc_qi++;
+    for(x=8;x<width;x+=8){
+      qstep=_dec->pp_dc_scale[*dc_qi++];
+      flimit=(qstep*3)>>2;
+      oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
+       qstep,flimit,variance++);
+    }
+  }
+}
+
+static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
+ int _dc_scale,int _sharp_mod,int _strong){
+  static const unsigned char OC_MOD_MAX[2]={24,32};
+  static const unsigned char OC_MOD_SHIFT[2]={1,0};
+  const unsigned char *psrc;
+  const unsigned char *src;
+  const unsigned char *nsrc;
+  unsigned char       *dst;
+  int                  vmod[72];
+  int                  hmod[72];
+  int                  mod_hi;
+  int                  by;
+  int                  bx;
+  mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
+  dst=_idata;
+  src=dst;
+  psrc=src-(_ystride&-!(_b&4));
+  for(by=0;by<9;by++){
+    for(bx=0;bx<8;bx++){
+      int mod;
+      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
+      vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
+    }
+    psrc=src;
+    src+=_ystride&-(!(_b&8)|by<7);
+  }
+  nsrc=dst;
+  psrc=dst-!(_b&1);
+  for(bx=0;bx<9;bx++){
+    src=nsrc;
+    for(by=0;by<8;by++){
+      int mod;
+      mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
+      hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
+      psrc+=_ystride;
+      src+=_ystride;
+    }
+    psrc=nsrc;
+    nsrc+=!(_b&2)|bx<7;
+  }
+  src=dst;
+  psrc=src-(_ystride&-!(_b&4));
+  nsrc=src+_ystride;
+  for(by=0;by<8;by++){
+    int a;
+    int b;
+    int w;
+    a=128;
+    b=64;
+    w=hmod[by];
+    a-=w;
+    b+=w**(src-!(_b&1));
+    w=vmod[by<<3];
+    a-=w;
+    b+=w*psrc[0];
+    w=vmod[by+1<<3];
+    a-=w;
+    b+=w*nsrc[0];
+    w=hmod[(1<<3)+by];
+    a-=w;
+    b+=w*src[1];
+    dst[0]=OC_CLAMP255(a*src[0]+b>>7);
+    for(bx=1;bx<7;bx++){
+      a=128;
+      b=64;
+      w=hmod[(bx<<3)+by];
+      a-=w;
+      b+=w*src[bx-1];
+      w=vmod[(by<<3)+bx];
+      a-=w;
+      b+=w*psrc[bx];
+      w=vmod[(by+1<<3)+bx];
+      a-=w;
+      b+=w*nsrc[bx];
+      w=hmod[(bx+1<<3)+by];
+      a-=w;
+      b+=w*src[bx+1];
+      dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
+    }
+    a=128;
+    b=64;
+    w=hmod[(7<<3)+by];
+    a-=w;
+    b+=w*src[6];
+    w=vmod[(by<<3)+7];
+    a-=w;
+    b+=w*psrc[7];
+    w=vmod[(by+1<<3)+7];
+    a-=w;
+    b+=w*nsrc[7];
+    w=hmod[(8<<3)+by];
+    a-=w;
+    b+=w*src[7+!(_b&2)];
+    dst[7]=OC_CLAMP255(a*src[7]+b>>7);
+    dst+=_ystride;
+    psrc=src;
+    src=nsrc;
+    nsrc+=_ystride&-(!(_b&8)|by<6);
+  }
+}
+
+#define OC_DERING_THRESH1 (384)
+#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
+#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
+#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
+
+static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
+ int _pli,int _fragy0,int _fragy_end){
+  th_img_plane      *iplane;
+  oc_fragment_plane *fplane;
+  oc_fragment       *frag;
+  int               *variance;
+  unsigned char     *idata;
+  ptrdiff_t          froffset;
+  int                ystride;
+  int                nhfrags;
+  int                sthresh;
+  int                strong;
+  int                y_end;
+  int                width;
+  int                height;
+  int                y;
+  int                x;
+  iplane=_img+_pli;
+  fplane=_dec->state.fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
+  variance=_dec->variances+froffset;
+  frag=_dec->state.frags+froffset;
+  strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
+  sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
+  y=_fragy0<<3;
+  ystride=iplane->stride;
+  idata=iplane->data+y*(ptrdiff_t)ystride;
+  y_end=_fragy_end<<3;
+  width=iplane->width;
+  height=iplane->height;
+  for(;y<y_end;y+=8){
+    for(x=0;x<width;x+=8){
+      int b;
+      int qi;
+      int var;
+      qi=_dec->state.qis[frag->qii];
+      var=*variance;
+      b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
+      if(strong&&var>sthresh){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+        if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
+         !(b&2)&&variance[1]>OC_DERING_THRESH4||
+         !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
+         !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
+          oc_dering_block(idata+x,ystride,b,
+           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+          oc_dering_block(idata+x,ystride,b,
+           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+        }
+      }
+      else if(var>OC_DERING_THRESH2){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+      }
+      else if(var>OC_DERING_THRESH1){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
+      }
+      frag++;
+      variance++;
+    }
+    idata+=ystride<<3;
+  }
+}
+
+
+
+th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
+  oc_dec_ctx *dec;
+  if(_info==NULL||_setup==NULL)return NULL;
+  dec=_ogg_malloc(sizeof(*dec));
+  if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
+    _ogg_free(dec);
+    return NULL;
+  }
+  dec->state.curframe_num=0;
+  return dec;
+}
+
+void th_decode_free(th_dec_ctx *_dec){
+  if(_dec!=NULL){
+    oc_dec_clear(_dec);
+    _ogg_free(_dec);
+  }
+}
+
+int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
+ size_t _buf_sz){
+  switch(_req){
+  case TH_DECCTL_GET_PPLEVEL_MAX:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    (*(int *)_buf)=OC_PP_LEVEL_MAX;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_PPLEVEL:{
+    int pp_level;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    pp_level=*(int *)_buf;
+    if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
+    _dec->pp_level=pp_level;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_GRANPOS:{
+    ogg_int64_t granpos;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
+    granpos=*(ogg_int64_t *)_buf;
+    if(granpos<0)return TH_EINVAL;
+    _dec->state.granpos=granpos;
+    _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
+     -_dec->state.granpos_bias;
+    _dec->state.curframe_num=_dec->state.keyframe_num
+     +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
+    return 0;
+  }break;
+  case TH_DECCTL_SET_STRIPE_CB:{
+    th_stripe_callback *cb;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
+    cb=(th_stripe_callback *)_buf;
+    _dec->stripe_cb.ctx=cb->ctx;
+    _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
+    return 0;
+  }break;
+#ifdef HAVE_CAIRO
+  case TH_DECCTL_SET_TELEMETRY_MBMODE:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_mbmode=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_MV:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_mv=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_QI:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_qi=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_BITS:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_bits=*(int *)_buf;
+    return 0;
+  }break;
+#endif
+  default:return TH_EIMPL;
+  }
+}
+
+/*We're decoding an INTER frame, but have no initialized reference
+   buffers (i.e., decoding did not start on a key frame).
+  We initialize them to a solid gray here.*/
+static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
+  th_info *info;
+  size_t   yplane_sz;
+  size_t   cplane_sz;
+  int      yhstride;
+  int      yheight;
+  int      chstride;
+  int      cheight;
+  _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
+  _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
+  _dec->state.ref_frame_idx[OC_FRAME_SELF]=1;
+  info=&_dec->state.info;
+  yhstride=info->frame_width+2*OC_UMV_PADDING;
+  yheight=info->frame_height+2*OC_UMV_PADDING;
+  chstride=yhstride>>!(info->pixel_fmt&1);
+  cheight=yheight>>!(info->pixel_fmt&2);
+  yplane_sz=yhstride*(size_t)yheight;
+  cplane_sz=chstride*(size_t)cheight;
+  memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz);
+}
+
+int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
+ ogg_int64_t *_granpos){
+  int ret;
+  if(_dec==NULL||_op==NULL)return TH_EFAULT;
+  /*A completely empty packet indicates a dropped frame and is treated exactly
+     like an inter frame with no coded blocks.
+    Only proceed if we have a non-empty packet.*/
+  if(_op->bytes!=0){
+    oc_dec_pipeline_state pipe;
+    th_ycbcr_buffer       stripe_buf;
+    int                   stripe_fragy;
+    int                   refi;
+    int                   pli;
+    int                   notstart;
+    int                   notdone;
+    oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
+#if defined(HAVE_CAIRO)
+    _dec->telemetry_frame_bytes=_op->bytes;
+#endif
+    ret=oc_dec_frame_header_unpack(_dec);
+    if(ret<0)return ret;
+    /*Select a free buffer to use for the reconstructed version of this
+       frame.*/
+    if(_dec->state.frame_type!=OC_INTRA_FRAME&&
+     (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
+     _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
+      /*No reference frames yet!*/
+      oc_dec_init_dummy_frame(_dec);
+      refi=_dec->state.ref_frame_idx[OC_FRAME_SELF];
+    }
+    else{
+      for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
+       refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
+      _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+    }
+    if(_dec->state.frame_type==OC_INTRA_FRAME){
+      oc_dec_mark_all_intra(_dec);
+      _dec->state.keyframe_num=_dec->state.curframe_num;
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_coding_bytes=
+       _dec->telemetry_mode_bytes=
+       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    }
+    else{
+      oc_dec_coded_flags_unpack(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+      oc_dec_mb_modes_unpack(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+      oc_dec_mv_unpack_and_frag_modes_fill(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    }
+    oc_dec_block_qis_unpack(_dec);
+#if defined(HAVE_CAIRO)
+    _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    oc_dec_residual_tokens_unpack(_dec);
+    /*Update granule position.
+      This must be done before the striped decode callbacks so that the
+       application knows what to do with the frame data.*/
+    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
+     _dec->state.info.keyframe_granule_shift)
+     +(_dec->state.curframe_num-_dec->state.keyframe_num);
+    _dec->state.curframe_num++;
+    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+    /*All of the rest of the operations -- DC prediction reversal,
+       reconstructing coded fragments, copying uncoded fragments, loop
+       filtering, extending borders, and out-of-loop post-processing -- should
+       be pipelined.
+      I.e., DC prediction reversal, reconstruction, and uncoded fragment
+       copying are done for one or two super block rows, then loop filtering is
+       run as far as it can, then bordering copying, then post-processing.
+      For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
+       block rows, and one chroma.
+      Otherwise, an MCU consists of one super block row from each plane.
+      Inside each MCU, we perform all of the steps on one color plane before
+       moving on to the next.
+      After reconstruction, the additional filtering stages introduce a delay
+       since they need some pixels from the next fragment row.
+      Thus the actual number of decoded rows available is slightly smaller for
+       the first MCU, and slightly larger for the last.
+
+      This entire process allows us to operate on the data while it is still in
+       cache, resulting in big performance improvements.
+      An application callback allows further application processing (blitting
+       to video memory, color conversion, etc.) to also use the data while it's
+       in cache.*/
+    oc_dec_pipeline_init(_dec,&pipe);
+    oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
+    notstart=0;
+    notdone=1;
+    for(stripe_fragy=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){
+      int avail_fragy0;
+      int avail_fragy_end;
+      avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
+      notdone=stripe_fragy+pipe.mcu_nvfrags<avail_fragy_end;
+      for(pli=0;pli<3;pli++){
+        oc_fragment_plane *fplane;
+        int                frag_shift;
+        int                pp_offset;
+        int                sdelay;
+        int                edelay;
+        fplane=_dec->state.fplanes+pli;
+        /*Compute the first and last fragment row of the current MCU for this
+           plane.*/
+        frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
+        pipe.fragy0[pli]=stripe_fragy>>frag_shift;
+        pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
+         pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift));
+        oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli);
+        oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli);
+        sdelay=edelay=0;
+        if(pipe.loop_filter){
+          sdelay+=notstart;
+          edelay+=notdone;
+          oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values,
+           refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+        }
+        /*To fill the borders, we have an additional two pixel delay, since a
+           fragment in the next row could filter its top edge, using two pixels
+           from a fragment in this row.
+          But there's no reason to delay a full fragment between the two.*/
+        oc_state_borders_fill_rows(&_dec->state,refi,pli,
+         (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
+         (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
+        /*Out-of-loop post-processing.*/
+        pp_offset=3*(pli!=0);
+        if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
+          /*Perform de-blocking in one plane.*/
+          sdelay+=notstart;
+          edelay+=notdone;
+          oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
+           _dec->state.ref_frame_bufs[refi],pli,
+           pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+          if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
+            /*Perform de-ringing in one plane.*/
+            sdelay+=notstart;
+            edelay+=notdone;
+            oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
+             pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay);
+          }
+        }
+        /*If no post-processing is done, we still need to delay a row for the
+           loop filter, thanks to the strange filtering order VP3 chose.*/
+        else if(pipe.loop_filter){
+          sdelay+=notstart;
+          edelay+=notdone;
+        }
+        /*Compute the intersection of the available rows in all planes.
+          If chroma is sub-sampled, the effect of each of its delays is
+           doubled, but luma might have more post-processing filters enabled
+           than chroma, so we don't know up front which one is the limiting
+           factor.*/
+        avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<<frag_shift);
+        avail_fragy_end=OC_MINI(avail_fragy_end,
+         pipe.fragy_end[pli]-edelay<<frag_shift);
+      }
+      if(_dec->stripe_cb.stripe_decoded!=NULL){
+        /*The callback might want to use the FPU, so let's make sure they can.
+          We violate all kinds of ABI restrictions by not doing this until
+           now, but none of them actually matter since we don't use floating
+           point ourselves.*/
+        oc_restore_fpu(&_dec->state);
+        /*Make the callback, ensuring we flip the sense of the "start" and
+           "end" of the available region upside down.*/
+        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
+         _dec->state.fplanes[0].nvfrags-avail_fragy_end,
+         _dec->state.fplanes[0].nvfrags-avail_fragy0);
+      }
+      notstart=1;
+    }
+    /*Finish filling in the reference frame borders.*/
+    for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
+    /*Update the reference frame indices.*/
+    if(_dec->state.frame_type==OC_INTRA_FRAME){
+      /*The new frame becomes both the previous and gold reference frames.*/
+      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
+       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
+       _dec->state.ref_frame_idx[OC_FRAME_SELF];
+    }
+    else{
+      /*Otherwise, just replace the previous reference frame.*/
+      _dec->state.ref_frame_idx[OC_FRAME_PREV]=
+       _dec->state.ref_frame_idx[OC_FRAME_SELF];
+    }
+    /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
+       gamma values, if nothing else).*/
+    oc_restore_fpu(&_dec->state);
+#if defined(OC_DUMP_IMAGES)
+    /*Don't dump images for dropped frames.*/
+    oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
+#endif
+    return 0;
+  }
+  else{
+    if(_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
+     _dec->state.ref_frame_idx[OC_FRAME_PREV]<0){
+      int refi;
+      /*No reference frames yet!*/
+      oc_dec_init_dummy_frame(_dec);
+      refi=_dec->state.ref_frame_idx[OC_FRAME_PREV];
+      _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+      memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[refi],
+       sizeof(_dec->pp_frame_buf[0])*3);
+    }
+    /*Just update the granule position and return.*/
+    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
+     _dec->state.info.keyframe_granule_shift)
+     +(_dec->state.curframe_num-_dec->state.keyframe_num);
+    _dec->state.curframe_num++;
+    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+    return TH_DUPFRAME;
+  }
+}
+
+int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
+  if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
+  oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
+#if defined(HAVE_CAIRO)
+  /*If telemetry ioctls are active, we need to draw to the output buffer.
+    Stuff the plane into cairo.*/
+  if(_dec->telemetry){
+    cairo_surface_t *cs;
+    unsigned char   *data;
+    unsigned char   *y_row;
+    unsigned char   *u_row;
+    unsigned char   *v_row;
+    unsigned char   *rgb_row;
+    int              cstride;
+    int              w;
+    int              h;
+    int              x;
+    int              y;
+    int              hdec;
+    int              vdec;
+    w=_ycbcr[0].width;
+    h=_ycbcr[0].height;
+    hdec=!(_dec->state.info.pixel_fmt&1);
+    vdec=!(_dec->state.info.pixel_fmt&2);
+    /*Lazy data buffer init.
+      We could try to re-use the post-processing buffer, which would save
+       memory, but complicate the allocation logic there.
+      I don't think anyone cares about memory usage when using telemetry; it is
+       not meant for embedded devices.*/
+    if(_dec->telemetry_frame_data==NULL){
+      _dec->telemetry_frame_data=_ogg_malloc(
+       (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
+      if(_dec->telemetry_frame_data==NULL)return 0;
+    }
+    cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
+    /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
+    data=cairo_image_surface_get_data(cs);
+    if(data==NULL){
+      cairo_surface_destroy(cs);
+      return 0;
+    }
+    cstride=cairo_image_surface_get_stride(cs);
+    y_row=_ycbcr[0].data;
+    u_row=_ycbcr[1].data;
+    v_row=_ycbcr[2].data;
+    rgb_row=data;
+    for(y=0;y<h;y++){
+      for(x=0;x<w;x++){
+        int r;
+        int g;
+        int b;
+        r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
+        g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
+         -2672387*v_row[x>>hdec]+447306710)/3287200;
+        b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
+        rgb_row[4*x+0]=OC_CLAMP255(b);
+        rgb_row[4*x+1]=OC_CLAMP255(g);
+        rgb_row[4*x+2]=OC_CLAMP255(r);
+      }
+      y_row+=_ycbcr[0].stride;
+      u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
+      v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
+      rgb_row+=cstride;
+    }
+    /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
+    {
+      cairo_t           *c;
+      const oc_fragment *frags;
+      oc_mv             *frag_mvs;
+      const signed char *mb_modes;
+      oc_mb_map         *mb_maps;
+      size_t             nmbs;
+      size_t             mbi;
+      int                row2;
+      int                col2;
+      int                qim[3]={0,0,0};
+      if(_dec->state.nqis==2){
+        int bqi;
+        bqi=_dec->state.qis[0];
+        if(_dec->state.qis[1]>bqi)qim[1]=1;
+        if(_dec->state.qis[1]<bqi)qim[1]=-1;
+      }
+      if(_dec->state.nqis==3){
+        int bqi;
+        int cqi;
+        int dqi;
+        bqi=_dec->state.qis[0];
+        cqi=_dec->state.qis[1];
+        dqi=_dec->state.qis[2];
+        if(cqi>bqi&&dqi>bqi){
+          if(dqi>cqi){
+            qim[1]=1;
+            qim[2]=2;
+          }
+          else{
+            qim[1]=2;
+            qim[2]=1;
+          }
+        }
+        else if(cqi<bqi&&dqi<bqi){
+          if(dqi<cqi){
+            qim[1]=-1;
+            qim[2]=-2;
+          }
+          else{
+            qim[1]=-2;
+            qim[2]=-1;
+          }
+        }
+        else{
+          if(cqi<bqi)qim[1]=-1;
+          else qim[1]=1;
+          if(dqi<bqi)qim[2]=-1;
+          else qim[2]=1;
+        }
+      }
+      c=cairo_create(cs);
+      frags=_dec->state.frags;
+      frag_mvs=_dec->state.frag_mvs;
+      mb_modes=_dec->state.mb_modes;
+      mb_maps=_dec->state.mb_maps;
+      nmbs=_dec->state.nmbs;
+      row2=0;
+      col2=0;
+      for(mbi=0;mbi<nmbs;mbi++){
+        float x;
+        float y;
+        int   bi;
+        y=h-(row2+((col2+1>>1)&1))*16-16;
+        x=(col2>>1)*16;
+        cairo_set_line_width(c,1.);
+        /*Keyframe (all intra) red box.*/
+        if(_dec->state.frame_type==OC_INTRA_FRAME){
+          if(_dec->telemetry_mbmode&0x02){
+            cairo_set_source_rgba(c,1.,0,0,.5);
+            cairo_rectangle(c,x+2.5,y+2.5,11,11);
+            cairo_stroke_preserve(c);
+            cairo_set_source_rgba(c,1.,0,0,.25);
+            cairo_fill(c);
+          }
+        }
+        else{
+          const signed char *frag_mv;
+          ptrdiff_t          fragi;
+          for(bi=0;bi<4;bi++){
+            fragi=mb_maps[mbi][0][bi];
+            if(fragi>=0&&frags[fragi].coded){
+              frag_mv=frag_mvs[fragi];
+              break;
+            }
+          }
+          if(bi<4){
+            switch(mb_modes[mbi]){
+              case OC_MODE_INTRA:{
+                if(_dec->telemetry_mbmode&0x02){
+                  cairo_set_source_rgba(c,1.,0,0,.5);
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,0,0,.25);
+                  cairo_fill(c);
+                }
+              }break;
+              case OC_MODE_INTER_NOMV:{
+                if(_dec->telemetry_mbmode&0x01){
+                  cairo_set_source_rgba(c,0,0,1.,.5);
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,0,0,1.,.25);
+                  cairo_fill(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV:{
+                if(_dec->telemetry_mbmode&0x04){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x04){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV_LAST:{
+                if(_dec->telemetry_mbmode&0x08){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_move_to(c,x+13.5,y+2.5);
+                  cairo_line_to(c,x+2.5,y+8);
+                  cairo_line_to(c,x+13.5,y+13.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x08){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV_LAST2:{
+                if(_dec->telemetry_mbmode&0x10){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_move_to(c,x+8,y+2.5);
+                  cairo_line_to(c,x+2.5,y+8);
+                  cairo_line_to(c,x+8,y+13.5);
+                  cairo_move_to(c,x+13.5,y+2.5);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_line_to(c,x+13.5,y+13.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x10){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_GOLDEN_NOMV:{
+                if(_dec->telemetry_mbmode&0x20){
+                  cairo_set_source_rgba(c,1.,1.,0,.5);
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,1.,0,.25);
+                  cairo_fill(c);
+                }
+              }break;
+              case OC_MODE_GOLDEN_MV:{
+                if(_dec->telemetry_mbmode&0x40){
+                  cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                  cairo_set_source_rgba(c,1.,1.,0,.5);
+                  cairo_stroke(c);
+                }
+                if(_dec->telemetry_mv&0x40){
+                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+8,y+8);
+                  cairo_stroke(c);
+                }
+              }break;
+              case OC_MODE_INTER_MV_FOUR:{
+                if(_dec->telemetry_mbmode&0x80){
+                  cairo_rectangle(c,x+2.5,y+2.5,4,4);
+                  cairo_rectangle(c,x+9.5,y+2.5,4,4);
+                  cairo_rectangle(c,x+2.5,y+9.5,4,4);
+                  cairo_rectangle(c,x+9.5,y+9.5,4,4);
+                  cairo_set_source_rgba(c,0,1.,0,.5);
+                  cairo_stroke(c);
+                }
+                /*4mv is odd, coded in raster order.*/
+                fragi=mb_maps[mbi][0][0];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+4,y+12);
+                  cairo_stroke(c);
+                }
+                fragi=mb_maps[mbi][0][1];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+12,y+12);
+                  cairo_stroke(c);
+                }
+                fragi=mb_maps[mbi][0][2];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+4,y+4);
+                  cairo_stroke(c);
+                }
+                fragi=mb_maps[mbi][0][3];
+                if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                  frag_mv=frag_mvs[fragi];
+                  cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]);
+                  cairo_set_source_rgba(c,1.,1.,1.,.9);
+                  cairo_set_line_width(c,3.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,2.);
+                  cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
+                  cairo_stroke_preserve(c);
+                  cairo_set_line_width(c,1.);
+                  cairo_line_to(c,x+12,y+4);
+                  cairo_stroke(c);
+                }
+              }break;
+            }
+          }
+        }
+        /*qii illustration.*/
+        if(_dec->telemetry_qi&0x2){
+          cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
+          for(bi=0;bi<4;bi++){
+            ptrdiff_t fragi;
+            int       qiv;
+            int       xp;
+            int       yp;
+            xp=x+(bi&1)*8;
+            yp=y+8-(bi&2)*4;
+            fragi=mb_maps[mbi][0][bi];
+            if(fragi>=0&&frags[fragi].coded){
+              qiv=qim[frags[fragi].qii];
+              cairo_set_line_width(c,3.);
+              cairo_set_source_rgba(c,0.,0.,0.,.5);
+              switch(qiv){
+                /*Double plus:*/
+                case 2:{
+                  if((bi&1)^((bi&2)>>1)){
+                    cairo_move_to(c,xp+2.5,yp+1.5);
+                    cairo_line_to(c,xp+2.5,yp+3.5);
+                    cairo_move_to(c,xp+1.5,yp+2.5);
+                    cairo_line_to(c,xp+3.5,yp+2.5);
+                    cairo_move_to(c,xp+5.5,yp+4.5);
+                    cairo_line_to(c,xp+5.5,yp+6.5);
+                    cairo_move_to(c,xp+4.5,yp+5.5);
+                    cairo_line_to(c,xp+6.5,yp+5.5);
+                    cairo_stroke_preserve(c);
+                    cairo_set_source_rgba(c,0.,1.,1.,1.);
+                  }
+                  else{
+                    cairo_move_to(c,xp+5.5,yp+1.5);
+                    cairo_line_to(c,xp+5.5,yp+3.5);
+                    cairo_move_to(c,xp+4.5,yp+2.5);
+                    cairo_line_to(c,xp+6.5,yp+2.5);
+                    cairo_move_to(c,xp+2.5,yp+4.5);
+                    cairo_line_to(c,xp+2.5,yp+6.5);
+                    cairo_move_to(c,xp+1.5,yp+5.5);
+                    cairo_line_to(c,xp+3.5,yp+5.5);
+                    cairo_stroke_preserve(c);
+                    cairo_set_source_rgba(c,0.,1.,1.,1.);
+                  }
+                }break;
+                /*Double minus:*/
+                case -2:{
+                  cairo_move_to(c,xp+2.5,yp+2.5);
+                  cairo_line_to(c,xp+5.5,yp+2.5);
+                  cairo_move_to(c,xp+2.5,yp+5.5);
+                  cairo_line_to(c,xp+5.5,yp+5.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,1.,1.,1.);
+                }break;
+                /*Plus:*/
+                case 1:{
+                  if(bi&2==0)yp-=2;
+                  if(bi&1==0)xp-=2;
+                  cairo_move_to(c,xp+4.5,yp+2.5);
+                  cairo_line_to(c,xp+4.5,yp+6.5);
+                  cairo_move_to(c,xp+2.5,yp+4.5);
+                  cairo_line_to(c,xp+6.5,yp+4.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,.1,1.,.3,1.);
+                  break;
+                }
+                /*Fall through.*/
+                /*Minus:*/
+                case -1:{
+                  cairo_move_to(c,xp+2.5,yp+4.5);
+                  cairo_line_to(c,xp+6.5,yp+4.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,1.,.3,.1,1.);
+                }break;
+                default:continue;
+              }
+              cairo_set_line_width(c,1.);
+              cairo_stroke(c);
+            }
+          }
+        }
+        col2++;
+        if((col2>>1)>=_dec->state.nhmbs){
+          col2=0;
+          row2+=2;
+        }
+      }
+      /*Bit usage indicator[s]:*/
+      if(_dec->telemetry_bits){
+        int widths[6];
+        int fpsn;
+        int fpsd;
+        int mult;
+        int fullw;
+        int padw;
+        int i;
+        fpsn=_dec->state.info.fps_numerator;
+        fpsd=_dec->state.info.fps_denominator;
+        mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
+        fullw=250.f*h*fpsd*mult/fpsn;
+        padw=w-24;
+        /*Header and coded block bits.*/
+        if(_dec->telemetry_frame_bytes<0||
+         _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
+          _dec->telemetry_frame_bytes=0;
+        }
+        if(_dec->telemetry_coding_bytes<0||
+         _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_coding_bytes=0;
+        }
+        if(_dec->telemetry_mode_bytes<0||
+         _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_mode_bytes=0;
+        }
+        if(_dec->telemetry_mv_bytes<0||
+         _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_mv_bytes=0;
+        }
+        if(_dec->telemetry_qi_bytes<0||
+         _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_qi_bytes=0;
+        }
+        if(_dec->telemetry_dc_bytes<0||
+         _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
+          _dec->telemetry_dc_bytes=0;
+        }
+        widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
+        widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
+        widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
+        widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
+        widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
+        widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
+        for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
+        cairo_set_source_rgba(c,.0,.0,.0,.6);
+        cairo_rectangle(c,10,h-33,widths[0]+1,5);
+        cairo_rectangle(c,10,h-29,widths[1]+1,5);
+        cairo_rectangle(c,10,h-25,widths[2]+1,5);
+        cairo_rectangle(c,10,h-21,widths[3]+1,5);
+        cairo_rectangle(c,10,h-17,widths[4]+1,5);
+        cairo_rectangle(c,10,h-13,widths[5]+1,5);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,1,0,0);
+        cairo_rectangle(c,10.5,h-32.5,widths[0],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,0,1,0);
+        cairo_rectangle(c,10.5,h-28.5,widths[1],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,0,0,1);
+        cairo_rectangle(c,10.5,h-24.5,widths[2],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,.6,.4,.0);
+        cairo_rectangle(c,10.5,h-20.5,widths[3],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,.3,.3,.3);
+        cairo_rectangle(c,10.5,h-16.5,widths[4],4);
+        cairo_fill(c);
+        cairo_set_source_rgb(c,.5,.5,.8);
+        cairo_rectangle(c,10.5,h-12.5,widths[5],4);
+        cairo_fill(c);
+      }
+      /*Master qi indicator[s]:*/
+      if(_dec->telemetry_qi&0x1){
+        cairo_text_extents_t extents;
+        char                 buffer[10];
+        int                  p;
+        int                  y;
+        p=0;
+        y=h-7.5;
+        if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
+        buffer[p++]=48+_dec->state.qis[0]%10;
+        if(_dec->state.nqis>=2){
+          buffer[p++]=' ';
+          if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
+          buffer[p++]=48+_dec->state.qis[1]%10;
+        }
+        if(_dec->state.nqis==3){
+          buffer[p++]=' ';
+          if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
+          buffer[p++]=48+_dec->state.qis[2]%10;
+        }
+        buffer[p++]='\0';
+        cairo_select_font_face(c,"sans",
+         CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
+        cairo_set_font_size(c,18);
+        cairo_text_extents(c,buffer,&extents);
+        cairo_set_source_rgb(c,1,1,1);
+        cairo_move_to(c,w-extents.x_advance-10,y);
+        cairo_show_text(c,buffer);
+        cairo_set_source_rgb(c,0,0,0);
+        cairo_move_to(c,w-extents.x_advance-10,y);
+        cairo_text_path(c,buffer);
+        cairo_set_line_width(c,.8);
+        cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
+        cairo_stroke(c);
+      }
+      cairo_destroy(c);
+    }
+    /*Out of the Cairo plane into the telemetry YUV buffer.*/
+    _ycbcr[0].data=_dec->telemetry_frame_data;
+    _ycbcr[0].stride=_ycbcr[0].width;
+    _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
+    _ycbcr[1].stride=_ycbcr[1].width;
+    _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
+    _ycbcr[2].stride=_ycbcr[2].width;
+    y_row=_ycbcr[0].data;
+    u_row=_ycbcr[1].data;
+    v_row=_ycbcr[2].data;
+    rgb_row=data;
+    /*This is one of the few places it's worth handling chroma on a
+       case-by-case basis.*/
+    switch(_dec->state.info.pixel_fmt){
+      case TH_PF_420:{
+        for(y=0;y<h;y+=2){
+          unsigned char *y_row2;
+          unsigned char *rgb_row2;
+          y_row2=y_row+_ycbcr[0].stride;
+          rgb_row2=rgb_row+cstride;
+          for(x=0;x<w;x+=2){
+            int y;
+            int u;
+            int v;
+            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+             +24966*rgb_row[4*x+0]+4207500)/255000;
+            y_row[x]=OC_CLAMP255(y);
+            y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
+             +24966*rgb_row[4*x+4]+4207500)/255000;
+            y_row[x+1]=OC_CLAMP255(y);
+            y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
+             +24966*rgb_row2[4*x+0]+4207500)/255000;
+            y_row2[x]=OC_CLAMP255(y);
+            y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
+             +24966*rgb_row2[4*x+4]+4207500)/255000;
+            y_row2[x+1]=OC_CLAMP255(y);
+            u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
+             +rgb_row2[4*x+2]+rgb_row2[4*x+6])
+             -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
+             +rgb_row2[4*x+1]+rgb_row2[4*x+5])
+             +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
+             +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
+            v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
+             +rgb_row2[4*x+2]+rgb_row2[4*x+6])
+             -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
+              +rgb_row2[4*x+1]+rgb_row2[4*x+5])
+             -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
+              +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
+            u_row[x>>1]=OC_CLAMP255(u);
+            v_row[x>>1]=OC_CLAMP255(v);
+          }
+          y_row+=_ycbcr[0].stride<<1;
+          u_row+=_ycbcr[1].stride;
+          v_row+=_ycbcr[2].stride;
+          rgb_row+=cstride<<1;
+        }
+      }break;
+      case TH_PF_422:{
+        for(y=0;y<h;y++){
+          for(x=0;x<w;x+=2){
+            int y;
+            int u;
+            int v;
+            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+             +24966*rgb_row[4*x+0]+4207500)/255000;
+            y_row[x]=OC_CLAMP255(y);
+            y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
+             +24966*rgb_row[4*x+4]+4207500)/255000;
+            y_row[x+1]=OC_CLAMP255(y);
+            u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
+             -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
+             +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
+            v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
+             -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
+             -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
+            u_row[x>>1]=OC_CLAMP255(u);
+            v_row[x>>1]=OC_CLAMP255(v);
+          }
+          y_row+=_ycbcr[0].stride;
+          u_row+=_ycbcr[1].stride;
+          v_row+=_ycbcr[2].stride;
+          rgb_row+=cstride;
+        }
+      }break;
+      /*case TH_PF_444:*/
+      default:{
+        for(y=0;y<h;y++){
+          for(x=0;x<w;x++){
+            int y;
+            int u;
+            int v;
+            y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+             +24966*rgb_row[4*x+0]+4207500)/255000;
+            u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
+             +99232*rgb_row[4*x+0]+29032005)/225930;
+            v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
+             -25536*rgb_row[4*x+0]+45940035)/357510;
+            y_row[x]=OC_CLAMP255(y);
+            u_row[x]=OC_CLAMP255(u);
+            v_row[x]=OC_CLAMP255(v);
+          }
+          y_row+=_ycbcr[0].stride;
+          u_row+=_ycbcr[1].stride;
+          v_row+=_ycbcr[2].stride;
+          rgb_row+=cstride;
+        }
+      }break;
+    }
+    /*Finished.
+      Destroy the surface.*/
+    cairo_surface_destroy(cs);
+  }
+#endif
+  return 0;
+}
diff --git a/engine/code/libtheora-1.1.1/lib/dequant.c b/engine/code/libtheora-1.1.1/lib/dequant.c
new file mode 100644
index 00000000..e554872d
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/dequant.c
@@ -0,0 +1,182 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: dequant.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "dequant.h"
+#include "decint.h"
+
+int oc_quant_params_unpack(oc_pack_buf *_opb,th_quant_info *_qinfo){
+  th_quant_base *base_mats;
+  long           val;
+  int            nbase_mats;
+  int            sizes[64];
+  int            indices[64];
+  int            nbits;
+  int            bmi;
+  int            ci;
+  int            qti;
+  int            pli;
+  int            qri;
+  int            qi;
+  int            i;
+  val=oc_pack_read(_opb,3);
+  nbits=(int)val;
+  for(qi=0;qi<64;qi++){
+    val=oc_pack_read(_opb,nbits);
+    _qinfo->loop_filter_limits[qi]=(unsigned char)val;
+  }
+  val=oc_pack_read(_opb,4);
+  nbits=(int)val+1;
+  for(qi=0;qi<64;qi++){
+    val=oc_pack_read(_opb,nbits);
+    _qinfo->ac_scale[qi]=(ogg_uint16_t)val;
+  }
+  val=oc_pack_read(_opb,4);
+  nbits=(int)val+1;
+  for(qi=0;qi<64;qi++){
+    val=oc_pack_read(_opb,nbits);
+    _qinfo->dc_scale[qi]=(ogg_uint16_t)val;
+  }
+  val=oc_pack_read(_opb,9);
+  nbase_mats=(int)val+1;
+  base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0]));
+  if(base_mats==NULL)return TH_EFAULT;
+  for(bmi=0;bmi<nbase_mats;bmi++){
+    for(ci=0;ci<64;ci++){
+      val=oc_pack_read(_opb,8);
+      base_mats[bmi][ci]=(unsigned char)val;
+    }
+  }
+  nbits=oc_ilog(nbase_mats-1);
+  for(i=0;i<6;i++){
+    th_quant_ranges *qranges;
+    th_quant_base   *qrbms;
+    int             *qrsizes;
+    qti=i/3;
+    pli=i%3;
+    qranges=_qinfo->qi_ranges[qti]+pli;
+    if(i>0){
+      val=oc_pack_read1(_opb);
+      if(!val){
+        int qtj;
+        int plj;
+        if(qti>0){
+          val=oc_pack_read1(_opb);
+          if(val){
+            qtj=qti-1;
+            plj=pli;
+          }
+          else{
+            qtj=(i-1)/3;
+            plj=(i-1)%3;
+          }
+        }
+        else{
+          qtj=(i-1)/3;
+          plj=(i-1)%3;
+        }
+        *qranges=*(_qinfo->qi_ranges[qtj]+plj);
+        continue;
+      }
+    }
+    val=oc_pack_read(_opb,nbits);
+    indices[0]=(int)val;
+    for(qi=qri=0;qi<63;){
+      val=oc_pack_read(_opb,oc_ilog(62-qi));
+      sizes[qri]=(int)val+1;
+      qi+=(int)val+1;
+      val=oc_pack_read(_opb,nbits);
+      indices[++qri]=(int)val;
+    }
+    /*Note: The caller is responsible for cleaning up any partially
+       constructed qinfo.*/
+    if(qi>63){
+      _ogg_free(base_mats);
+      return TH_EBADHEADER;
+    }
+    qranges->nranges=qri;
+    qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0]));
+    if(qranges->sizes==NULL){
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      _ogg_free(base_mats);
+      return TH_EFAULT;
+    }
+    memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0]));
+    qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0]));
+    if(qrbms==NULL){
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      _ogg_free(base_mats);
+      return TH_EFAULT;
+    }
+    qranges->base_matrices=(const th_quant_base *)qrbms;
+    do{
+      bmi=indices[qri];
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      if(bmi>=nbase_mats){
+        _ogg_free(base_mats);
+        return TH_EBADHEADER;
+      }
+      memcpy(qrbms[qri],base_mats[bmi],sizeof(qrbms[qri]));
+    }
+    while(qri-->0);
+  }
+  _ogg_free(base_mats);
+  return 0;
+}
+
+void oc_quant_params_clear(th_quant_info *_qinfo){
+  int i;
+  for(i=6;i-->0;){
+    int qti;
+    int pli;
+    qti=i/3;
+    pli=i%3;
+    /*Clear any duplicate pointer references.*/
+    if(i>0){
+      int qtj;
+      int plj;
+      qtj=(i-1)/3;
+      plj=(i-1)%3;
+      if(_qinfo->qi_ranges[qti][pli].sizes==
+       _qinfo->qi_ranges[qtj][plj].sizes){
+        _qinfo->qi_ranges[qti][pli].sizes=NULL;
+      }
+      if(_qinfo->qi_ranges[qti][pli].base_matrices==
+       _qinfo->qi_ranges[qtj][plj].base_matrices){
+        _qinfo->qi_ranges[qti][pli].base_matrices=NULL;
+      }
+    }
+    if(qti>0){
+      if(_qinfo->qi_ranges[1][pli].sizes==
+       _qinfo->qi_ranges[0][pli].sizes){
+        _qinfo->qi_ranges[1][pli].sizes=NULL;
+      }
+      if(_qinfo->qi_ranges[1][pli].base_matrices==
+       _qinfo->qi_ranges[0][pli].base_matrices){
+        _qinfo->qi_ranges[1][pli].base_matrices=NULL;
+      }
+    }
+    /*Now free all the non-duplicate storage.*/
+    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].sizes);
+    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices);
+  }
+}
diff --git a/engine/code/libtheora-1.1.1/lib/dequant.h b/engine/code/libtheora-1.1.1/lib/dequant.h
new file mode 100644
index 00000000..ef25838e
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/dequant.h
@@ -0,0 +1,27 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: dequant.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#if !defined(_dequant_H)
+# define _dequant_H (1)
+# include "quant.h"
+# include "bitpack.h"
+
+int oc_quant_params_unpack(oc_pack_buf *_opb,
+ th_quant_info *_qinfo);
+void oc_quant_params_clear(th_quant_info *_qinfo);
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/fragment.c b/engine/code/libtheora-1.1.1/lib/fragment.c
new file mode 100644
index 00000000..15372e9d
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/fragment.c
@@ -0,0 +1,87 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: fragment.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#include <string.h>
+#include "internal.h"
+
+void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src,int _ystride){
+  (*_state->opt_vtable.frag_copy)(_dst,_src,_ystride);
+}
+
+void oc_frag_copy_c(unsigned char *_dst,const unsigned char *_src,int _ystride){
+  int i;
+  for(i=8;i-->0;){
+    memcpy(_dst,_src,8*sizeof(*_dst));
+    _dst+=_ystride;
+    _src+=_ystride;
+  }
+}
+
+void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst,
+ int _ystride,const ogg_int16_t _residue[64]){
+  _state->opt_vtable.frag_recon_intra(_dst,_ystride,_residue);
+}
+
+void oc_frag_recon_intra_c(unsigned char *_dst,int _ystride,
+ const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+128);
+    _dst+=_ystride;
+  }
+}
+
+void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
+  _state->opt_vtable.frag_recon_inter(_dst,_src,_ystride,_residue);
+}
+
+void oc_frag_recon_inter_c(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+_src[j]);
+    _dst+=_ystride;
+    _src+=_ystride;
+  }
+}
+
+void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride,
+ const ogg_int16_t _residue[64]){
+  _state->opt_vtable.frag_recon_inter2(_dst,_src1,_src2,_ystride,_residue);
+}
+
+void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+(_src1[j]+_src2[j]>>1));
+    _dst+=_ystride;
+    _src1+=_ystride;
+    _src2+=_ystride;
+  }
+}
+
+void oc_restore_fpu(const oc_theora_state *_state){
+  _state->opt_vtable.restore_fpu();
+}
+
+void oc_restore_fpu_c(void){}
diff --git a/engine/code/libtheora-1.1.1/lib/huffdec.c b/engine/code/libtheora-1.1.1/lib/huffdec.c
new file mode 100644
index 00000000..8cf27f03
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/huffdec.c
@@ -0,0 +1,489 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: huffdec.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "huffdec.h"
+#include "decint.h"
+
+
+/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/
+#define _ogg_offsetof(_type,_field)\
+ ((size_t)((char *)&((_type *)0)->_field-(char *)0))
+
+/*The number of internal tokens associated with each of the spec tokens.*/
+static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={
+  1,1,1,4,8,1,1,8,1,1,1,1,1,2,2,2,2,4,8,2,2,2,4,2,2,2,2,2,8,2,4,8
+};
+
+/*The map from external spec-defined tokens to internal tokens.
+  This is constructed so that any extra bits read with the original token value
+   can be masked off the least significant bits of its internal token index.
+  In addition, all of the tokens which require additional extra bits are placed
+   at the start of the list, and grouped by type.
+  OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
+   giving it index 0 may simplify comparisons on some architectures.
+  These requirements require some substantial reordering.*/
+static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
+  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
+  15,
+  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
+  16,
+  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
+  17,
+  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
+  88,
+  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
+  80,
+  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
+   1,
+  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
+   0,
+  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
+  48,
+  /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
+  14,
+  /*OC_ONE_TOKEN (0 extra bits)*/
+  56,
+  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
+  57,
+  /*OC_TWO_TOKEN (0 extra bits)*/
+  58,
+  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
+  59,
+  /*OC_DCT_VAL_CAT2 (1 extra bit)*/
+  60,
+  62,
+  64,
+  66,
+  /*OC_DCT_VAL_CAT3 (2 extra bits)*/
+  68,
+  /*OC_DCT_VAL_CAT4 (3 extra bits)*/
+  72,
+  /*OC_DCT_VAL_CAT5 (4 extra bits)*/
+   2,
+  /*OC_DCT_VAL_CAT6 (5 extra bits)*/
+   4,
+  /*OC_DCT_VAL_CAT7 (6 extra bits)*/
+   6,
+  /*OC_DCT_VAL_CAT8 (10 extra bits)*/
+   8,
+  /*OC_DCT_RUN_CAT1A (1 extra bit)*/
+  18,
+  20,
+  22,
+  24,
+  26,
+  /*OC_DCT_RUN_CAT1B (3 extra bits)*/
+  32,
+  /*OC_DCT_RUN_CAT1C (4 extra bits)*/
+  12,
+  /*OC_DCT_RUN_CAT2A (2 extra bits)*/
+  28,
+  /*OC_DCT_RUN_CAT2B (3 extra bits)*/
+  40
+};
+
+/*These three functions are really part of the bitpack.c module, but
+   they are only used here.
+  Declaring local static versions so they can be inlined saves considerable
+   function call overhead.*/
+
+static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  oc_pb_window         window;
+  int                  available;
+  window=_b->window;
+  available=_b->bits;
+  ptr=_b->ptr;
+  stop=_b->stop;
+  /*This version of _refill() doesn't bother setting eof because we won't
+     check for it after we've started decoding DCT tokens.*/
+  if(ptr>=stop)available=OC_LOTS_OF_BITS;
+  while(available<=OC_PB_WINDOW_SIZE-8){
+    available+=8;
+    window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
+    if(ptr>=stop)available=OC_LOTS_OF_BITS;
+  }
+  _b->ptr=ptr;
+  if(_bits>available)window|=*ptr>>(available&7);
+  _b->bits=available;
+  return window;
+}
+
+
+/*Read in bits without advancing the bit pointer.
+  Here we assume 0<=_bits&&_bits<=32.*/
+static long oc_pack_look(oc_pack_buf *_b,int _bits){
+  oc_pb_window window;
+  int          available;
+  long         result;
+  window=_b->window;
+  available=_b->bits;
+  if(_bits==0)return 0;
+  if(_bits>available)_b->window=window=oc_pack_refill(_b,_bits);
+  result=window>>OC_PB_WINDOW_SIZE-_bits;
+  return result;
+}
+
+/*Advance the bit pointer.*/
+static void oc_pack_adv(oc_pack_buf *_b,int _bits){
+  /*We ignore the special cases for _bits==0 and _bits==32 here, since they are
+     never used actually used.
+    OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read
+     32 bits in a single go, and would require a 32 GB lookup table (assuming
+     8 byte pointers, since 4 byte pointers couldn't fit such a table).*/
+  _b->window<<=_bits;
+  _b->bits-=_bits;
+}
+
+
+/*The log_2 of the size of a lookup table is allowed to grow to relative to
+   the number of unique nodes it contains.
+  E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
+   wasted (each node will have an amortized cost of at most 20 bytes when using
+   4-byte pointers).
+  Larger numbers can decode tokens with fewer read operations, while smaller
+   numbers may save more space (requiring as little as 8 bytes amortized per
+   node, though there will be more nodes).
+  With a sample file:
+  32233473 read calls are required when no tree collapsing is done (100.0%).
+  19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%).
+  11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%).
+  10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%).
+  10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%).
+  Since a value of 1 gets us the vast majority of the speed-up with only a
+   small amount of wasted memory, this is what we use.*/
+#define OC_HUFF_SLUSH (1)
+
+
+/*Determines the size in bytes of a Huffman tree node that represents a
+   subtree of depth _nbits.
+  _nbits: The depth of the subtree.
+          If this is 0, the node is a leaf node.
+          Otherwise 1<<_nbits pointers are allocated for children.
+  Return: The number of bytes required to store the node.*/
+static size_t oc_huff_node_size(int _nbits){
+  size_t size;
+  size=_ogg_offsetof(oc_huff_node,nodes);
+  if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits);
+  return size;
+}
+
+static oc_huff_node *oc_huff_node_init(char **_storage,size_t _size,int _nbits){
+  oc_huff_node *ret;
+  ret=(oc_huff_node *)*_storage;
+  ret->nbits=(unsigned char)_nbits;
+  (*_storage)+=_size;
+  return ret;
+}
+
+
+/*Determines the size in bytes of a Huffman tree.
+  _nbits: The depth of the subtree.
+          If this is 0, the node is a leaf node.
+          Otherwise storage for 1<<_nbits pointers are added for children.
+  Return: The number of bytes required to store the tree.*/
+static size_t oc_huff_tree_size(const oc_huff_node *_node){
+  size_t size;
+  size=oc_huff_node_size(_node->nbits);
+  if(_node->nbits){
+    int nchildren;
+    int i;
+    nchildren=1<<_node->nbits;
+    for(i=0;i<nchildren;i+=1<<_node->nbits-_node->nodes[i]->depth){
+      size+=oc_huff_tree_size(_node->nodes[i]);
+    }
+  }
+  return size;
+}
+
+
+/*Unpacks a sub-tree from the given buffer.
+  _opb:      The buffer to unpack from.
+  _binodes:  The nodes to store the sub-tree in.
+  _nbinodes: The number of nodes available for the sub-tree.
+  Return: 0 on success, or a negative value on error.*/
+static int oc_huff_tree_unpack(oc_pack_buf *_opb,
+ oc_huff_node *_binodes,int _nbinodes){
+  oc_huff_node *binode;
+  long          bits;
+  int           nused;
+  if(_nbinodes<1)return TH_EBADHEADER;
+  binode=_binodes;
+  nused=0;
+  bits=oc_pack_read1(_opb);
+  if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+  /*Read an internal node:*/
+  if(!bits){
+    int ret;
+    nused++;
+    binode->nbits=1;
+    binode->depth=1;
+    binode->nodes[0]=_binodes+nused;
+    ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
+    if(ret>=0){
+      nused+=ret;
+      binode->nodes[1]=_binodes+nused;
+      ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
+    }
+    if(ret<0)return ret;
+    nused+=ret;
+  }
+  /*Read a leaf node:*/
+  else{
+    int ntokens;
+    int token;
+    int i;
+    bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
+    if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+    /*Find out how many internal tokens we translate this external token into.*/
+    ntokens=OC_DCT_TOKEN_MAP_ENTRIES[bits];
+    if(_nbinodes<2*ntokens-1)return TH_EBADHEADER;
+    /*Fill in a complete binary tree pointing to the internal tokens.*/
+    for(i=1;i<ntokens;i<<=1){
+      int j;
+      binode=_binodes+nused;
+      nused+=i;
+      for(j=0;j<i;j++){
+        binode[j].nbits=1;
+        binode[j].depth=1;
+        binode[j].nodes[0]=_binodes+nused+2*j;
+        binode[j].nodes[1]=_binodes+nused+2*j+1;
+      }
+    }
+    /*And now the leaf nodes with those tokens.*/
+    token=OC_DCT_TOKEN_MAP[bits];
+    for(i=0;i<ntokens;i++){
+      binode=_binodes+nused++;
+      binode->nbits=0;
+      binode->depth=1;
+      binode->token=token+i;
+    }
+  }
+  return nused;
+}
+
+/*Finds the depth of shortest branch of the given sub-tree.
+  The tree must be binary.
+  _binode: The root of the given sub-tree.
+           _binode->nbits must be 0 or 1.
+  Return: The smallest depth of a leaf node in this sub-tree.
+          0 indicates this sub-tree is a leaf node.*/
+static int oc_huff_tree_mindepth(oc_huff_node *_binode){
+  int depth0;
+  int depth1;
+  if(_binode->nbits==0)return 0;
+  depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
+  depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
+  return OC_MINI(depth0,depth1)+1;
+}
+
+/*Finds the number of internal nodes at a given depth, plus the number of
+   leaves at that depth or shallower.
+  The tree must be binary.
+  _binode: The root of the given sub-tree.
+           _binode->nbits must be 0 or 1.
+  Return: The number of entries that would be contained in a jump table of the
+           given depth.*/
+static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
+  if(_binode->nbits==0||_depth<=0)return 1;
+  else{
+    return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
+     oc_huff_tree_occupancy(_binode->nodes[1],_depth-1);
+  }
+}
+
+/*Makes a copy of the given Huffman tree.
+  _node: The Huffman tree to copy.
+  Return: The copy of the Huffman tree.*/
+static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node,
+ char **_storage){
+  oc_huff_node *ret;
+  ret=oc_huff_node_init(_storage,oc_huff_node_size(_node->nbits),_node->nbits);
+  ret->depth=_node->depth;
+  if(_node->nbits){
+    int nchildren;
+    int i;
+    int inext;
+    nchildren=1<<_node->nbits;
+    for(i=0;i<nchildren;){
+      ret->nodes[i]=oc_huff_tree_copy(_node->nodes[i],_storage);
+      inext=i+(1<<_node->nbits-ret->nodes[i]->depth);
+      while(++i<inext)ret->nodes[i]=ret->nodes[i-1];
+    }
+  }
+  else ret->token=_node->token;
+  return ret;
+}
+
+static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){
+  size_t size;
+  int    mindepth;
+  int    depth;
+  int    loccupancy;
+  int    occupancy;
+  if(_binode->nbits!=0&&_depth>0){
+    return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+
+     oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1);
+  }
+  depth=mindepth=oc_huff_tree_mindepth(_binode);
+  occupancy=1<<mindepth;
+  do{
+    loccupancy=occupancy;
+    occupancy=oc_huff_tree_occupancy(_binode,++depth);
+  }
+  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
+  depth--;
+  size=oc_huff_node_size(depth);
+  if(depth>0){
+    size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1);
+    size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1);
+  }
+  return size;
+}
+
+static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
+ char **_storage);
+
+/*Fills the given nodes table with all the children in the sub-tree at the
+   given depth.
+  The nodes in the sub-tree with a depth less than that stored in the table
+   are freed.
+  The sub-tree must be binary and complete up until the given depth.
+  _nodes:  The nodes table to fill.
+  _binode: The root of the sub-tree to fill it with.
+           _binode->nbits must be 0 or 1.
+  _level:  The current level in the table.
+           0 indicates that the current node should be stored, regardless of
+            whether it is a leaf node or an internal node.
+  _depth:  The depth of the nodes to fill the table with, relative to their
+            parent.*/
+static void oc_huff_node_fill(oc_huff_node **_nodes,
+ oc_huff_node *_binode,int _level,int _depth,char **_storage){
+  if(_level<=0||_binode->nbits==0){
+    int i;
+    _binode->depth=(unsigned char)(_depth-_level);
+    _nodes[0]=oc_huff_tree_collapse(_binode,_storage);
+    for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
+  }
+  else{
+    _level--;
+    oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth,_storage);
+    _nodes+=1<<_level;
+    oc_huff_node_fill(_nodes,_binode->nodes[1],_level,_depth,_storage);
+  }
+}
+
+/*Finds the largest complete sub-tree rooted at the current node and collapses
+   it into a single node.
+  This procedure is then applied recursively to all the children of that node.
+  _binode: The root of the sub-tree to collapse.
+           _binode->nbits must be 0 or 1.
+  Return: The new root of the collapsed sub-tree.*/
+static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
+ char **_storage){
+  oc_huff_node *root;
+  size_t        size;
+  int           mindepth;
+  int           depth;
+  int           loccupancy;
+  int           occupancy;
+  depth=mindepth=oc_huff_tree_mindepth(_binode);
+  occupancy=1<<mindepth;
+  do{
+    loccupancy=occupancy;
+    occupancy=oc_huff_tree_occupancy(_binode,++depth);
+  }
+  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
+  depth--;
+  if(depth<=1)return oc_huff_tree_copy(_binode,_storage);
+  size=oc_huff_node_size(depth);
+  root=oc_huff_node_init(_storage,size,depth);
+  root->depth=_binode->depth;
+  oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage);
+  return root;
+}
+
+/*Unpacks a set of Huffman trees, and reduces them to a collapsed
+   representation.
+  _opb:   The buffer to unpack the trees from.
+  _nodes: The table to fill with the Huffman trees.
+  Return: 0 on success, or a negative value on error.*/
+int oc_huff_trees_unpack(oc_pack_buf *_opb,
+ oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    oc_huff_node  nodes[511];
+    char         *storage;
+    size_t        size;
+    int           ret;
+    /*Unpack the full tree into a temporary buffer.*/
+    ret=oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes));
+    if(ret<0)return ret;
+    /*Figure out how big the collapsed tree will be.*/
+    size=oc_huff_tree_collapse_size(nodes,0);
+    storage=(char *)_ogg_calloc(1,size);
+    if(storage==NULL)return TH_EFAULT;
+    /*And collapse it.*/
+    _nodes[i]=oc_huff_tree_collapse(nodes,&storage);
+  }
+  return 0;
+}
+
+/*Makes a copy of the given set of Huffman trees.
+  _dst: The array to store the copy in.
+  _src: The array of trees to copy.*/
+int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
+ const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    size_t  size;
+    char   *storage;
+    size=oc_huff_tree_size(_src[i]);
+    storage=(char *)_ogg_calloc(1,size);
+    if(storage==NULL){
+      while(i-->0)_ogg_free(_dst[i]);
+      return TH_EFAULT;
+    }
+    _dst[i]=oc_huff_tree_copy(_src[i],&storage);
+  }
+  return 0;
+}
+
+/*Frees the memory used by a set of Huffman trees.
+  _nodes: The array of trees to free.*/
+void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
+}
+
+/*Unpacks a single token using the given Huffman tree.
+  _opb:  The buffer to unpack the token from.
+  _node: The tree to unpack the token with.
+  Return: The token value.*/
+int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node){
+  long bits;
+  while(_node->nbits!=0){
+    bits=oc_pack_look(_opb,_node->nbits);
+    _node=_node->nodes[bits];
+    oc_pack_adv(_opb,_node->depth);
+  }
+  return _node->token;
+}
diff --git a/engine/code/libtheora-1.1.1/lib/huffdec.h b/engine/code/libtheora-1.1.1/lib/huffdec.h
new file mode 100644
index 00000000..d7ffa0e9
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/huffdec.h
@@ -0,0 +1,92 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: huffdec.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#if !defined(_huffdec_H)
+# define _huffdec_H (1)
+# include "huffman.h"
+# include "bitpack.h"
+
+
+
+typedef struct oc_huff_node oc_huff_node;
+
+/*A node in the Huffman tree.
+  Instead of storing every branching in the tree, subtrees can be collapsed
+   into one node, with a table of size 1<<nbits pointing directly to its
+   descedents nbits levels down.
+  This allows more than one bit to be read at a time, and avoids following all
+   the intermediate branches with next to no increased code complexity once
+   the collapsed tree has been built.
+  We do _not_ require that a subtree be complete to be collapsed, but instead
+   store duplicate pointers in the table, and record the actual depth of the
+   node below its parent.
+  This tells us the number of bits to advance the stream after reaching it.
+
+  This turns out to be equivalent to the method described in \cite{Hash95},
+   without the requirement that codewords be sorted by length.
+  If the codewords were sorted by length (so-called ``canonical-codes''), they
+   could be decoded much faster via either Lindell and Moffat's approach or
+   Hashemian's Condensed Huffman Code approach, the latter of which has an
+   extremely small memory footprint.
+  We can't use Choueka et al.'s finite state machine approach, which is
+   extremely fast, because we can't allow multiple symbols to be output at a
+   time; the codebook can and does change between symbols.
+  It also has very large memory requirements, which impairs cache coherency.
+
+  @ARTICLE{Hash95,
+    author="Reza Hashemian",
+    title="Memory Efficient and High-Speed Search {Huffman} Coding",
+    journal="{IEEE} Transactions on Communications",
+    volume=43,
+    number=10,
+    pages="2576--2581",
+    month=Oct,
+    year=1995
+  }*/
+struct oc_huff_node{
+  /*The number of bits of the code needed to descend through this node.
+    0 indicates a leaf node.
+    Otherwise there are 1<<nbits nodes in the nodes table, which can be
+     indexed by reading nbits bits from the stream.*/
+  unsigned char  nbits;
+  /*The value of a token stored in a leaf node.
+    The value in non-leaf nodes is undefined.*/
+  unsigned char  token;
+  /*The depth of the current node, relative to its parent in the collapsed
+     tree.
+    This can be less than its parent's nbits value, in which case there are
+     1<<nbits-depth copies of this node in the table, and the bitstream should
+     only be advanced depth bits after reaching this node.*/
+  unsigned char  depth;
+  /*The table of child nodes.
+    The ACTUAL size of this array is 1<<nbits, despite what the declaration
+     below claims.
+    The exception is that for leaf nodes the size is 0.*/
+  oc_huff_node  *nodes[2];
+};
+
+
+
+int oc_huff_trees_unpack(oc_pack_buf *_opb,
+ oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
+ const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]);
+void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node);
+
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/huffman.h b/engine/code/libtheora-1.1.1/lib/huffman.h
new file mode 100644
index 00000000..36cf7572
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/huffman.h
@@ -0,0 +1,70 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: huffman.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#if !defined(_huffman_H)
+# define _hufffman_H (1)
+# include "theora/codec.h"
+# include "ocintrin.h"
+
+/*The range of valid quantized DCT coefficient values.
+  VP3 used 511 in the encoder, but the bitstream is capable of 580.*/
+#define OC_DCT_VAL_RANGE         (580)
+
+#define OC_NDCT_TOKEN_BITS       (5)
+
+#define OC_DCT_EOB1_TOKEN        (0)
+#define OC_DCT_EOB2_TOKEN        (1)
+#define OC_DCT_EOB3_TOKEN        (2)
+#define OC_DCT_REPEAT_RUN0_TOKEN (3)
+#define OC_DCT_REPEAT_RUN1_TOKEN (4)
+#define OC_DCT_REPEAT_RUN2_TOKEN (5)
+#define OC_DCT_REPEAT_RUN3_TOKEN (6)
+
+#define OC_DCT_SHORT_ZRL_TOKEN   (7)
+#define OC_DCT_ZRL_TOKEN         (8)
+
+#define OC_ONE_TOKEN             (9)
+#define OC_MINUS_ONE_TOKEN       (10)
+#define OC_TWO_TOKEN             (11)
+#define OC_MINUS_TWO_TOKEN       (12)
+
+#define OC_DCT_VAL_CAT2          (13)
+#define OC_DCT_VAL_CAT3          (17)
+#define OC_DCT_VAL_CAT4          (18)
+#define OC_DCT_VAL_CAT5          (19)
+#define OC_DCT_VAL_CAT6          (20)
+#define OC_DCT_VAL_CAT7          (21)
+#define OC_DCT_VAL_CAT8          (22)
+
+#define OC_DCT_RUN_CAT1A         (23)
+#define OC_DCT_RUN_CAT1B         (28)
+#define OC_DCT_RUN_CAT1C         (29)
+#define OC_DCT_RUN_CAT2A         (30)
+#define OC_DCT_RUN_CAT2B         (31)
+
+#define OC_NDCT_EOB_TOKEN_MAX    (7)
+#define OC_NDCT_ZRL_TOKEN_MAX    (9)
+#define OC_NDCT_VAL_MAX          (23)
+#define OC_NDCT_VAL_CAT1_MAX     (13)
+#define OC_NDCT_VAL_CAT2_MAX     (17)
+#define OC_NDCT_VAL_CAT2_SIZE    (OC_NDCT_VAL_CAT2_MAX-OC_DCT_VAL_CAT2)
+#define OC_NDCT_RUN_MAX          (32)
+#define OC_NDCT_RUN_CAT1A_MAX    (28)
+
+extern const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS];
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/idct.c b/engine/code/libtheora-1.1.1/lib/idct.c
new file mode 100644
index 00000000..0e68ac76
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/idct.c
@@ -0,0 +1,335 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: idct.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <string.h>
+#include "internal.h"
+#include "dct.h"
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      The first 8 entries are used (e.g., from a row of an 8x8 block).*/
+static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  /*0-1 butterfly.*/
+  t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
+  t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
+  /*2-3 rotation by 6pi/16.*/
+  t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
+  t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
+  /*4-7 rotation by 7pi/16.*/
+  t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
+  /*5-6 rotation by 3pi/16.*/
+  t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
+  t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
+  t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
+  /*Stage 2:*/
+  /*4-5 butterfly.*/
+  r=t[4]+t[5];
+  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
+  t[4]=r;
+  /*7-6 butterfly.*/
+  r=t[7]+t[6];
+  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
+  t[7]=r;
+  /*Stage 3:*/
+  /*0-3 butterfly.*/
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  /*1-2 butterfly.*/
+  r=t[1]+t[2];
+  t[2]=t[1]-t[2];
+  t[1]=r;
+  /*6-5 butterfly.*/
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  /*0-7 butterfly.*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  /*1-6 butterfly.*/
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  /*2-5 butterfly.*/
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  /*3-4 butterfly.*/
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 4 entries are used.
+      The other 4 are assumed to be 0.*/
+static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[2]=OC_C6S2*_x[2]>>16;
+  t[3]=OC_C2S6*_x[2]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[5]=-(OC_C5S3*_x[3]>>16);
+  t[6]=OC_C3S5*_x[3]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  r=t[4]+t[5];
+  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
+  t[4]=r;
+  r=t[7]+t[6];
+  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
+  t[7]=r;
+  /*Stage 3:*/
+  t[1]=t[0]+t[2];
+  t[2]=t[0]-t[2];
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 3 entries are used.
+      The other 5 are assumed to be 0.*/
+static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[2]=OC_C6S2*_x[2]>>16;
+  t[3]=OC_C2S6*_x[2]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  t[5]=OC_C4S4*t[4]>>16;
+  t[6]=OC_C4S4*t[7]>>16;
+  /*Stage 3:*/
+  t[1]=t[0]+t[2];
+  t[2]=t[0]-t[2];
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 2 entries are used.
+      The other 6 are assumed to be 0.*/
+static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  t[5]=OC_C4S4*t[4]>>16;
+  t[6]=OC_C4S4*t[7]>>16;
+  /*Stage 3:*/
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first entry is used.
+      The other 7 are assumed to be 0.*/
+static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
+  _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
+   _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  All coefficients but the first 3 in zig-zag scan order are assumed to be 0:
+   x  x  0  0  0  0  0  0
+   x  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients.*/
+static void oc_idct8x8_3(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  /*Transform rows of x into columns of w.*/
+  idct8_2(w,_x);
+  idct8_1(w+1,_x+8);
+  /*Transform rows of w into columns of y.*/
+  for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8_2(out,in);
+  /*Adjust for the scale factor.*/
+  for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
+   x  x  x  x  0  0  0  0
+   x  x  x  0  0  0  0  0
+   x  x  0  0  0  0  0  0
+   x  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients.*/
+static void oc_idct8x8_10(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  /*Transform rows of x into columns of w.*/
+  idct8_4(w,_x);
+  idct8_3(w+1,_x+8);
+  idct8_2(w+2,_x+16);
+  idct8_1(w+3,_x+24);
+  /*Transform rows of w into columns of y.*/
+  for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8_4(out,in);
+  /*Adjust for the scale factor.*/
+  for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients.*/
+static void oc_idct8x8_slow(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  /*Transform rows of x into columns of w.*/
+  for(in=_x,out=w,end=out+8;out<end;in+=8,out++)idct8(out,in);
+  /*Transform rows of w into columns of y.*/
+  for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8(out,in);
+  /*Adjust for the scale factor.*/
+  for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
+}
+
+void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],
+ int _last_zzi){
+  (*_state->opt_vtable.idct8x8)(_y,_last_zzi);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.*/
+void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi){
+  /*_last_zzi is subtly different from an actual count of the number of
+     coefficients we decoded for this block.
+    It contains the value of zzi BEFORE the final token in the block was
+     decoded.
+    In most cases this is an EOB token (the continuation of an EOB run from a
+     previous block counts), and so this is the same as the coefficient count.
+    However, in the case that the last token was NOT an EOB token, but filled
+     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
+    Provided the last token was not a pure zero run, the minimum value it can
+     be is 46, and so that doesn't affect any of the cases in this routine.
+    However, if the last token WAS a pure zero run of length 63, then _last_zzi
+     will be 1 while the number of coefficients decoded is 64.
+    Thus, we will trigger the following special case, where the real
+     coefficient count would not.
+    Note also that a zero run of length 64 will give _last_zzi a value of 0,
+     but we still process the DC coefficient, which might have a non-zero value
+     due to DC prediction.
+    Although convoluted, this is arguably the correct behavior: it allows us to
+     use a smaller transform when the block ends with a long zero run instead
+     of a normal EOB token.
+    It could be smarter... multiple separate zero runs at the end of a block
+     will fool it, but an encoder that generates these really deserves what it
+     gets.
+    Needless to say we inherited this approach from VP3.*/
+  /*Then perform the iDCT.*/
+  if(_last_zzi<3)oc_idct8x8_3(_y,_y);
+  else if(_last_zzi<10)oc_idct8x8_10(_y,_y);
+  else oc_idct8x8_slow(_y,_y);
+}
diff --git a/engine/code/libtheora-1.1.1/lib/info.c b/engine/code/libtheora-1.1.1/lib/info.c
new file mode 100644
index 00000000..6b976297
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/info.c
@@ -0,0 +1,131 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: info.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include "internal.h"
+
+
+
+/*This is more or less the same as strncasecmp, but that doesn't exist
+   everywhere, and this is a fairly trivial function, so we include it.
+  Note: We take advantage of the fact that we know _n is less than or equal to
+   the length of at least one of the strings.*/
+static int oc_tagcompare(const char *_s1,const char *_s2,int _n){
+  int c;
+  for(c=0;c<_n;c++){
+    if(toupper(_s1[c])!=toupper(_s2[c]))return !0;
+  }
+  return _s1[c]!='=';
+}
+
+
+
+void th_info_init(th_info *_info){
+  memset(_info,0,sizeof(*_info));
+  _info->version_major=TH_VERSION_MAJOR;
+  _info->version_minor=TH_VERSION_MINOR;
+  _info->version_subminor=TH_VERSION_SUB;
+  _info->keyframe_granule_shift=6;
+}
+
+void th_info_clear(th_info *_info){
+  memset(_info,0,sizeof(*_info));
+}
+
+
+
+void th_comment_init(th_comment *_tc){
+  memset(_tc,0,sizeof(*_tc));
+}
+
+void th_comment_add(th_comment *_tc,char *_comment){
+  char **user_comments;
+  int   *comment_lengths;
+  int    comment_len;
+  user_comments=_ogg_realloc(_tc->user_comments,
+   (_tc->comments+2)*sizeof(*_tc->user_comments));
+  if(user_comments==NULL)return;
+  _tc->user_comments=user_comments;
+  comment_lengths=_ogg_realloc(_tc->comment_lengths,
+   (_tc->comments+2)*sizeof(*_tc->comment_lengths));
+  if(comment_lengths==NULL)return;
+  _tc->comment_lengths=comment_lengths;
+  comment_len=strlen(_comment);
+  comment_lengths[_tc->comments]=comment_len;
+  user_comments[_tc->comments]=_ogg_malloc(comment_len+1);
+  if(user_comments[_tc->comments]==NULL)return;
+  memcpy(_tc->user_comments[_tc->comments],_comment,comment_len+1);
+  _tc->comments++;
+  _tc->user_comments[_tc->comments]=NULL;
+}
+
+void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val){
+  char *comment;
+  int   tag_len;
+  int   val_len;
+  tag_len=strlen(_tag);
+  val_len=strlen(_val);
+  /*+2 for '=' and '\0'.*/
+  comment=_ogg_malloc(tag_len+val_len+2);
+  if(comment==NULL)return;
+  memcpy(comment,_tag,tag_len);
+  comment[tag_len]='=';
+  memcpy(comment+tag_len+1,_val,val_len+1);
+  th_comment_add(_tc,comment);
+  _ogg_free(comment);
+}
+
+char *th_comment_query(th_comment *_tc,char *_tag,int _count){
+  long i;
+  int  found;
+  int  tag_len;
+  tag_len=strlen(_tag);
+  found=0;
+  for(i=0;i<_tc->comments;i++){
+    if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len)){
+      /*We return a pointer to the data, not a copy.*/
+      if(_count==found++)return _tc->user_comments[i]+tag_len+1;
+    }
+  }
+  /*Didn't find anything.*/
+  return NULL;
+}
+
+int th_comment_query_count(th_comment *_tc,char *_tag){
+  long i;
+  int  tag_len;
+  int  count;
+  tag_len=strlen(_tag);
+  count=0;
+  for(i=0;i<_tc->comments;i++){
+    if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len))count++;
+  }
+  return count;
+}
+
+void th_comment_clear(th_comment *_tc){
+  if(_tc!=NULL){
+    long i;
+    for(i=0;i<_tc->comments;i++)_ogg_free(_tc->user_comments[i]);
+    _ogg_free(_tc->user_comments);
+    _ogg_free(_tc->comment_lengths);
+    _ogg_free(_tc->vendor);
+    memset(_tc,0,sizeof(*_tc));
+  }
+}
diff --git a/engine/code/libtheora-1.1.1/lib/internal.c b/engine/code/libtheora-1.1.1/lib/internal.c
new file mode 100644
index 00000000..0fe4f63e
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/internal.c
@@ -0,0 +1,262 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: internal.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "internal.h"
+
+
+
+/*A map from the index in the zig zag scan to the coefficient number in a
+   block.
+  All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs
+   past the end of a block in bogus streams get mapped to a known location.*/
+const unsigned char OC_FZIG_ZAG[128]={
+   0, 1, 8,16, 9, 2, 3,10,
+  17,24,32,25,18,11, 4, 5,
+  12,19,26,33,40,48,41,34,
+  27,20,13, 6, 7,14,21,28,
+  35,42,49,56,57,50,43,36,
+  29,22,15,23,30,37,44,51,
+  58,59,52,45,38,31,39,46,
+  53,60,61,54,47,55,62,63,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64
+};
+
+/*A map from the coefficient number in a block to its index in the zig zag
+   scan.*/
+const unsigned char OC_IZIG_ZAG[64]={
+   0, 1, 5, 6,14,15,27,28,
+   2, 4, 7,13,16,26,29,42,
+   3, 8,12,17,25,30,41,43,
+   9,11,18,24,31,40,44,53,
+  10,19,23,32,39,45,52,54,
+  20,22,33,38,46,51,55,60,
+  21,34,37,47,50,56,59,61,
+  35,36,48,49,57,58,62,63
+};
+
+/*A map from physical macro block ordering to bitstream macro block
+   ordering within a super block.*/
+const unsigned char OC_MB_MAP[2][2]={{0,3},{1,2}};
+
+/*A list of the indices in the oc_mb.map array that can be valid for each of
+   the various chroma decimation types.*/
+const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={
+  {0,1,2,3,4,8},
+  {0,1,2,3,4,5,8,9},
+  {0,1,2,3,4,6,8,10},
+  {0,1,2,3,4,5,6,7,8,9,10,11}
+};
+
+/*The number of indices in the oc_mb.map array that can be valid for each of
+   the various chroma decimation types.*/
+const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12};
+
+/*The number of extra bits that are coded with each of the DCT tokens.
+  Each DCT token has some fixed number of additional bits (possibly 0) stored
+   after the token itself, containing, for example, coefficient magnitude,
+   sign bits, etc.*/
+const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={
+  0,0,0,2,3,4,12,3,6,
+  0,0,0,0,
+  1,1,1,1,2,3,4,5,6,10,
+  1,1,1,1,1,3,4,
+  2,3
+};
+
+
+
+int oc_ilog(unsigned _v){
+  int ret;
+  for(ret=0;_v;ret++)_v>>=1;
+  return ret;
+}
+
+
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X and Y directions
+   (4:2:0).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0];
+  dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the Y direction.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[2][0];
+  dy=_lbmvs[0][1]+_lbmvs[2][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+  dx=_lbmvs[1][0]+_lbmvs[3][0];
+  dy=_lbmvs[1][1]+_lbmvs[3][1];
+  _cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X direction (4:2:2).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[1][0];
+  dy=_lbmvs[0][1]+_lbmvs[1][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+  dx=_lbmvs[2][0]+_lbmvs[3][0];
+  dy=_lbmvs[2][1]+_lbmvs[3][1];
+  _cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with no chroma decimation (4:4:4).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lmbmv: The luma macro-block level motion vector to fill in for use in
+           prediction.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0]));
+}
+
+/*A table of functions used to fill in the chroma plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.*/
+const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
+};
+
+
+
+void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz){
+  size_t  rowsz;
+  size_t  colsz;
+  size_t  datsz;
+  char   *ret;
+  colsz=_height*sizeof(void *);
+  rowsz=_sz*_width;
+  datsz=rowsz*_height;
+  /*Alloc array and row pointers.*/
+  ret=(char *)_ogg_malloc(datsz+colsz);
+  if(ret==NULL)return NULL;
+  /*Initialize the array.*/
+  if(ret!=NULL){
+    size_t   i;
+    void   **p;
+    char    *datptr;
+    p=(void **)ret;
+    i=_height;
+    for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
+  }
+  return (void **)ret;
+}
+
+void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz){
+  size_t  colsz;
+  size_t  rowsz;
+  size_t  datsz;
+  char   *ret;
+  colsz=_height*sizeof(void *);
+  rowsz=_sz*_width;
+  datsz=rowsz*_height;
+  /*Alloc array and row pointers.*/
+  ret=(char *)_ogg_calloc(datsz+colsz,1);
+  if(ret==NULL)return NULL;
+  /*Initialize the array.*/
+  if(ret!=NULL){
+    size_t   i;
+    void   **p;
+    char    *datptr;
+    p=(void **)ret;
+    i=_height;
+    for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
+  }
+  return (void **)ret;
+}
+
+void oc_free_2d(void *_ptr){
+  _ogg_free(_ptr);
+}
+
+/*Fills in a Y'CbCr buffer with a pointer to the image data in the first
+   buffer, but with the opposite vertical orientation.
+  _dst: The destination buffer.
+        This can be the same as _src.
+  _src: The source buffer.*/
+void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
+ const th_ycbcr_buffer _src){
+  int pli;
+  for(pli=0;pli<3;pli++){
+    _dst[pli].width=_src[pli].width;
+    _dst[pli].height=_src[pli].height;
+    _dst[pli].stride=-_src[pli].stride;
+    _dst[pli].data=_src[pli].data
+     +(1-_dst[pli].height)*(ptrdiff_t)_dst[pli].stride;
+  }
+}
+
+const char *th_version_string(void){
+  return OC_VENDOR_STRING;
+}
+
+ogg_uint32_t th_version_number(void){
+  return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+TH_VERSION_SUB;
+}
+
+/*Determines the packet type.
+  Note that this correctly interprets a 0-byte packet as a video data packet.
+  Return: 1 for a header packet, 0 for a data packet.*/
+int th_packet_isheader(ogg_packet *_op){
+  return _op->bytes>0?_op->packet[0]>>7:0;
+}
+
+/*Determines the frame type of a video data packet.
+  Note that this correctly interprets a 0-byte packet as a delta frame.
+  Return: 1 for a key frame, 0 for a delta frame, and -1 for a header
+           packet.*/
+int th_packet_iskeyframe(ogg_packet *_op){
+  return _op->bytes<=0?0:_op->packet[0]&0x80?-1:!(_op->packet[0]&0x40);
+}
diff --git a/engine/code/libtheora-1.1.1/lib/internal.h b/engine/code/libtheora-1.1.1/lib/internal.h
new file mode 100644
index 00000000..d81263e1
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/internal.h
@@ -0,0 +1,509 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: internal.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+#if !defined(_internal_H)
+# define _internal_H (1)
+# include <stdlib.h>
+# include <limits.h>
+# if defined(HAVE_CONFIG_H)
+#  include <config.h>
+# endif
+# include "theora/codec.h"
+# include "theora/theora.h"
+
+# if defined(_MSC_VER)
+/*Disable missing EMMS warnings.*/
+#  pragma warning(disable:4799)
+/*Thank you Microsoft, I know the order of operations.*/
+#  pragma warning(disable:4554)
+# endif
+/*You, too, gcc.*/
+# if defined(__GNUC_PREREQ)
+#  if __GNUC_PREREQ(4,2)
+#   pragma GCC diagnostic ignored "-Wparentheses"
+#  endif
+# endif
+
+# include "ocintrin.h"
+# include "huffman.h"
+# include "quant.h"
+
+/*Some assembly constructs require aligned operands.*/
+# if defined(OC_X86_ASM)
+#  if defined(__GNUC__)
+#   define OC_ALIGN8(expr) expr __attribute__((aligned(8)))
+#   define OC_ALIGN16(expr) expr __attribute__((aligned(16)))
+#  elif defined(_MSC_VER)
+#   define OC_ALIGN8(expr) __declspec (align(8)) expr
+#   define OC_ALIGN16(expr) __declspec (align(16)) expr
+#  endif
+# endif
+# if !defined(OC_ALIGN8)
+#  define OC_ALIGN8(expr) expr
+# endif
+# if !defined(OC_ALIGN16)
+#  define OC_ALIGN16(expr) expr
+# endif
+
+
+
+typedef struct oc_sb_flags              oc_sb_flags;
+typedef struct oc_border_info           oc_border_info;
+typedef struct oc_fragment              oc_fragment;
+typedef struct oc_fragment_plane        oc_fragment_plane;
+typedef struct oc_base_opt_vtable       oc_base_opt_vtable;
+typedef struct oc_base_opt_data         oc_base_opt_data;
+typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable;
+typedef struct oc_theora_state          oc_theora_state;
+
+
+
+/*This library's version.*/
+# define OC_VENDOR_STRING "Xiph.Org libtheora 1.1 20090822 (Thusnelda)"
+
+/*Theora bitstream version.*/
+# define TH_VERSION_MAJOR (3)
+# define TH_VERSION_MINOR (2)
+# define TH_VERSION_SUB   (1)
+# define TH_VERSION_CHECK(_info,_maj,_min,_sub) \
+ ((_info)->version_major>(_maj)||(_info)->version_major==(_maj)&& \
+ ((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \
+ (_info)->version_subminor>=(_sub)))
+
+/*A keyframe.*/
+#define OC_INTRA_FRAME (0)
+/*A predicted frame.*/
+#define OC_INTER_FRAME (1)
+/*A frame of unknown type (frame type decision has not yet been made).*/
+#define OC_UNKWN_FRAME (-1)
+
+/*The amount of padding to add to the reconstructed frame buffers on all
+   sides.
+  This is used to allow unrestricted motion vectors without special casing.
+  This must be a multiple of 2.*/
+#define OC_UMV_PADDING (16)
+
+/*Frame classification indices.*/
+/*The previous golden frame.*/
+#define OC_FRAME_GOLD (0)
+/*The previous frame.*/
+#define OC_FRAME_PREV (1)
+/*The current frame.*/
+#define OC_FRAME_SELF (2)
+
+/*The input or output buffer.*/
+#define OC_FRAME_IO   (3)
+
+/*Macroblock modes.*/
+/*Macro block is invalid: It is never coded.*/
+#define OC_MODE_INVALID        (-1)
+/*Encoded difference from the same macro block in the previous frame.*/
+#define OC_MODE_INTER_NOMV     (0)
+/*Encoded with no motion compensated prediction.*/
+#define OC_MODE_INTRA          (1)
+/*Encoded difference from the previous frame offset by the given motion 
+  vector.*/
+#define OC_MODE_INTER_MV       (2)
+/*Encoded difference from the previous frame offset by the last coded motion 
+  vector.*/
+#define OC_MODE_INTER_MV_LAST  (3)
+/*Encoded difference from the previous frame offset by the second to last 
+  coded motion vector.*/
+#define OC_MODE_INTER_MV_LAST2 (4)
+/*Encoded difference from the same macro block in the previous golden 
+  frame.*/
+#define OC_MODE_GOLDEN_NOMV    (5)
+/*Encoded difference from the previous golden frame offset by the given motion 
+  vector.*/
+#define OC_MODE_GOLDEN_MV      (6)
+/*Encoded difference from the previous frame offset by the individual motion 
+  vectors given for each block.*/
+#define OC_MODE_INTER_MV_FOUR  (7)
+/*The number of (coded) modes.*/
+#define OC_NMODES              (8)
+
+/*Determines the reference frame used for a given MB mode.*/
+#define OC_FRAME_FOR_MODE(_x) \
+ OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \
+  OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x))
+
+/*Constants for the packet state machine common between encoder and decoder.*/
+
+/*Next packet to emit/read: Codec info header.*/
+#define OC_PACKET_INFO_HDR    (-3)
+/*Next packet to emit/read: Comment header.*/
+#define OC_PACKET_COMMENT_HDR (-2)
+/*Next packet to emit/read: Codec setup header.*/
+#define OC_PACKET_SETUP_HDR   (-1)
+/*No more packets to emit/read.*/
+#define OC_PACKET_DONE        (INT_MAX)
+
+
+
+/*Super blocks are 32x32 segments of pixels in a single color plane indexed
+   in image order.
+  Internally, super blocks are broken up into four quadrants, each of which
+   contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
+  Quadrants, and the blocks within them, are indexed in a special order called
+   a "Hilbert curve" within the super block.
+
+  In order to differentiate between the Hilbert-curve indexing strategy and
+   the regular image order indexing strategy, blocks indexed in image order
+   are called "fragments".
+  Fragments are indexed in image order, left to right, then bottom to top,
+   from Y' plane to Cb plane to Cr plane.
+
+  The co-located fragments in all image planes corresponding to the location
+   of a single quadrant of a luma plane super block form a macro block.
+  Thus there is only a single set of macro blocks for all planes, each of which
+   contains between 6 and 12 fragments, depending on the pixel format.
+  Therefore macro block information is kept in a separate set of arrays from
+   super blocks to avoid unused space in the other planes.
+  The lists are indexed in super block order.
+  That is, the macro block corresponding to the macro block mbi in (luma plane)
+   super block sbi is at index (sbi<<2|mbi).
+  Thus the number of macro blocks in each dimension is always twice the number
+   of super blocks, even when only an odd number fall inside the coded frame.
+  These "extra" macro blocks are just an artifact of our internal data layout,
+   and not part of the coded stream; they are flagged with a negative MB mode.*/
+
+
+
+/*A single quadrant of the map from a super block to fragment numbers.*/
+typedef ptrdiff_t       oc_sb_map_quad[4];
+/*A map from a super block to fragment numbers.*/
+typedef oc_sb_map_quad  oc_sb_map[4];
+/*A single plane of the map from a macro block to fragment numbers.*/
+typedef ptrdiff_t       oc_mb_map_plane[4];
+/*A map from a macro block to fragment numbers.*/
+typedef oc_mb_map_plane oc_mb_map[3];
+/*A motion vector.*/
+typedef signed char     oc_mv[2];
+
+
+
+/*Super block information.*/
+struct oc_sb_flags{
+  unsigned char coded_fully:1;
+  unsigned char coded_partially:1;
+  unsigned char quad_valid:4;
+};
+
+
+
+/*Information about a fragment which intersects the border of the displayable
+   region.
+  This marks which pixels belong to the displayable region.*/
+struct oc_border_info{
+  /*A bit mask marking which pixels are in the displayable region.
+    Pixel (x,y) corresponds to bit (y<<3|x).*/
+  ogg_int64_t mask;
+  /*The number of pixels in the displayable region.
+    This is always positive, and always less than 64.*/
+  int         npixels;
+};
+
+
+
+/*Fragment information.*/
+struct oc_fragment{
+  /*A flag indicating whether or not this fragment is coded.*/
+  unsigned   coded:1;
+  /*A flag indicating that this entire fragment lies outside the displayable
+     region of the frame.
+    Note the contrast with an invalid macro block, which is outside the coded
+     frame, not just the displayable one.
+    There are no fragments outside the coded frame by construction.*/
+  unsigned   invalid:1;
+  /*The index of the quality index used for this fragment's AC coefficients.*/
+  unsigned   qii:6;
+  /*The mode of the macroblock this fragment belongs to.*/
+  unsigned   mb_mode:3;
+  /*The index of the associated border information for fragments which lie
+     partially outside the displayable region.
+    For fragments completely inside or outside this region, this is -1.
+    Note that the C standard requires an explicit signed keyword for bitfield
+     types, since some compilers may treat them as unsigned without it.*/
+  signed int borderi:5;
+  /*The prediction-corrected DC component.
+    Note that the C standard requires an explicit signed keyword for bitfield
+     types, since some compilers may treat them as unsigned without it.*/
+  signed int dc:16;
+};
+
+
+
+/*A description of each fragment plane.*/
+struct oc_fragment_plane{
+  /*The number of fragments in the horizontal direction.*/
+  int       nhfrags;
+  /*The number of fragments in the vertical direction.*/
+  int       nvfrags;
+  /*The offset of the first fragment in the plane.*/
+  ptrdiff_t froffset;
+  /*The total number of fragments in the plane.*/
+  ptrdiff_t nfrags;
+  /*The number of super blocks in the horizontal direction.*/
+  unsigned  nhsbs;
+  /*The number of super blocks in the vertical direction.*/
+  unsigned  nvsbs;
+  /*The offset of the first super block in the plane.*/
+  unsigned  sboffset;
+  /*The total number of super blocks in the plane.*/
+  unsigned  nsbs;
+};
+
+
+
+/*The shared (encoder and decoder) functions that have accelerated variants.*/
+struct oc_base_opt_vtable{
+  void (*frag_copy)(unsigned char *_dst,
+   const unsigned char *_src,int _ystride);
+  void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
+   const ogg_int16_t _residue[64]);
+  void (*frag_recon_inter)(unsigned char *_dst,
+   const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+  void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1,
+   const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
+  void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi);
+  void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
+   int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+  void (*state_frag_copy_list)(const oc_theora_state *_state,
+   const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+   int _dst_frame,int _src_frame,int _pli);
+  void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,
+   int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);  
+  void (*restore_fpu)(void);
+};
+
+/*The shared (encoder and decoder) tables that vary according to which variants
+   of the above functions are used.*/
+struct oc_base_opt_data{
+  const unsigned char *dct_fzig_zag;
+};
+
+
+/*State information common to both the encoder and decoder.*/
+struct oc_theora_state{
+  /*The stream information.*/
+  th_info             info;
+  /*Table for shared accelerated functions.*/
+  oc_base_opt_vtable  opt_vtable;
+  /*Table for shared data used by accelerated functions.*/
+  oc_base_opt_data    opt_data;
+  /*CPU flags to detect the presence of extended instruction sets.*/
+  ogg_uint32_t        cpu_flags;
+  /*The fragment plane descriptions.*/
+  oc_fragment_plane   fplanes[3];
+  /*The list of fragments, indexed in image order.*/
+  oc_fragment        *frags;
+  /*The the offset into the reference frame buffer to the upper-left pixel of
+     each fragment.*/
+  ptrdiff_t          *frag_buf_offs;
+  /*The motion vector for each fragment.*/
+  oc_mv              *frag_mvs;
+  /*The total number of fragments in a single frame.*/
+  ptrdiff_t           nfrags;
+  /*The list of super block maps, indexed in image order.*/
+  oc_sb_map          *sb_maps;
+  /*The list of super block flags, indexed in image order.*/
+  oc_sb_flags        *sb_flags;
+  /*The total number of super blocks in a single frame.*/
+  unsigned            nsbs;
+  /*The fragments from each color plane that belong to each macro block.
+    Fragments are stored in image order (left to right then top to bottom).
+    When chroma components are decimated, the extra fragments have an index of
+     -1.*/
+  oc_mb_map          *mb_maps;
+  /*The list of macro block modes.
+    A negative number indicates the macro block lies entirely outside the
+     coded frame.*/
+  signed char        *mb_modes;
+  /*The number of macro blocks in the X direction.*/
+  unsigned            nhmbs;
+  /*The number of macro blocks in the Y direction.*/
+  unsigned            nvmbs;
+  /*The total number of macro blocks.*/
+  size_t              nmbs;
+  /*The list of coded fragments, in coded order.
+    Uncoded fragments are stored in reverse order from the end of the list.*/
+  ptrdiff_t          *coded_fragis;
+  /*The number of coded fragments in each plane.*/
+  ptrdiff_t           ncoded_fragis[3];
+  /*The total number of coded fragments.*/
+  ptrdiff_t           ntotal_coded_fragis;
+  /*The index of the buffers being used for each OC_FRAME_* reference frame.*/
+  int                 ref_frame_idx[4];
+  /*The actual buffers used for the previously decoded frames.*/
+  th_ycbcr_buffer     ref_frame_bufs[4];
+  /*The storage for the reference frame buffers.*/
+  unsigned char      *ref_frame_data[4];
+  /*The strides for each plane in the reference frames.*/
+  int                 ref_ystride[3];
+  /*The number of unique border patterns.*/
+  int                 nborders;
+  /*The unique border patterns for all border fragments.
+    The borderi field of fragments which straddle the border indexes this
+     list.*/
+  oc_border_info      borders[16];
+  /*The frame number of the last keyframe.*/
+  ogg_int64_t         keyframe_num;
+  /*The frame number of the current frame.*/
+  ogg_int64_t         curframe_num;
+  /*The granpos of the current frame.*/
+  ogg_int64_t         granpos;
+  /*The type of the current frame.*/
+  unsigned char       frame_type;
+  /*The bias to add to the frame count when computing granule positions.*/
+  unsigned char       granpos_bias;
+  /*The number of quality indices used in the current frame.*/
+  unsigned char       nqis;
+  /*The quality indices of the current frame.*/
+  unsigned char       qis[3];
+  /*The dequantization tables, stored in zig-zag order, and indexed by
+     qi, pli, qti, and zzi.*/
+  ogg_uint16_t       *dequant_tables[64][3][2];
+  OC_ALIGN16(oc_quant_table      dequant_table_data[64][3][2]);
+  /*Loop filter strength parameters.*/
+  unsigned char       loop_filter_limits[64];
+};
+
+
+
+/*The function type used to fill in the chroma plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lmbmv: The luma macro-block level motion vector to fill in for use in
+           prediction.
+  _lbmvs: The luma block-level motion vectors.*/
+typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]);
+
+
+
+/*A map from the index in the zig zag scan to the coefficient number in a
+   block.*/
+extern const unsigned char OC_FZIG_ZAG[128];
+/*A map from the coefficient number in a block to its index in the zig zag
+   scan.*/
+extern const unsigned char OC_IZIG_ZAG[64];
+/*A map from physical macro block ordering to bitstream macro block
+   ordering within a super block.*/
+extern const unsigned char OC_MB_MAP[2][2];
+/*A list of the indices in the oc_mb_map array that can be valid for each of
+   the various chroma decimation types.*/
+extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12];
+/*The number of indices in the oc_mb_map array that can be valid for each of
+   the various chroma decimation types.*/
+extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS];
+/*A table of functions used to fill in the Cb,Cr plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.*/
+extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS];
+
+
+
+int oc_ilog(unsigned _v);
+void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz);
+void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz);
+void oc_free_2d(void *_ptr);
+
+void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
+ const th_ycbcr_buffer _src);
+
+int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs);
+void oc_state_clear(oc_theora_state *_state);
+void oc_state_vtable_init_c(oc_theora_state *_state);
+void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
+ int _y0,int _yend);
+void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
+void oc_state_borders_fill(oc_theora_state *_state,int _refi);
+void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
+ th_ycbcr_buffer _img);
+int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby);
+int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
+ int _pli,int _dx,int _dy);
+
+int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv);
+void oc_state_loop_filter(oc_theora_state *_state,int _frame);
+#if defined(OC_DUMP_IMAGES)
+int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
+ const char *_suf);
+#endif
+
+/*Shared accelerated functions.*/
+void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src,int _ystride);
+void oc_frag_recon_intra(const oc_theora_state *_state,
+ unsigned char *_dst,int _dst_ystride,const ogg_int16_t _residue[64]);
+void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+void oc_frag_recon_inter2(const oc_theora_state *_state,
+ unsigned char *_dst,const unsigned char *_src1,const unsigned char *_src2,
+ int _ystride,const ogg_int16_t _residue[64]);
+void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi);
+void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_state_frag_copy_list(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli);
+void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_restore_fpu(const oc_theora_state *_state);
+
+/*Default pure-C implementations.*/
+void oc_frag_copy_c(unsigned char *_dst,
+ const unsigned char *_src,int _src_ystride);
+void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t _residue[64]);
+void oc_frag_recon_inter_c(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
+void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi);
+void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_state_frag_copy_list_c(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli);
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_restore_fpu_c(void);
+
+/*We need a way to call a few encoder functions without introducing a link-time
+   dependency into the decoder, while still allowing the old alpha API which
+   does not distinguish between encoder and decoder objects to be used.
+  We do this by placing a function table at the start of the encoder object
+   which can dispatch into the encoder library.
+  We do a similar thing for the decoder in case we ever decide to split off a
+   common base library.*/
+typedef void (*oc_state_clear_func)(theora_state *_th);
+typedef int (*oc_state_control_func)(theora_state *th,int _req,
+ void *_buf,size_t _buf_sz);
+typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
+ ogg_int64_t _granulepos);
+typedef double (*oc_state_granule_time_func)(theora_state *_th,
+ ogg_int64_t _granulepos);
+
+
+struct oc_state_dispatch_vtable{
+  oc_state_clear_func         clear;
+  oc_state_control_func       control;
+  oc_state_granule_frame_func granule_frame;
+  oc_state_granule_time_func  granule_time;
+};
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/ocintrin.h b/engine/code/libtheora-1.1.1/lib/ocintrin.h
new file mode 100644
index 00000000..d49ebb21
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/ocintrin.h
@@ -0,0 +1,128 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: ocintrin.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+/*Some common macros for potential platform-specific optimization.*/
+#include <math.h>
+#if !defined(_ocintrin_H)
+# define _ocintrin_H (1)
+
+/*Some specific platforms may have optimized intrinsic or inline assembly
+   versions of these functions which can substantially improve performance.
+  We define macros for them to allow easy incorporation of these non-ANSI
+   features.*/
+
+/*Note that we do not provide a macro for abs(), because it is provided as a
+   library function, which we assume is translated into an intrinsic to avoid
+   the function call overhead and then implemented in the smartest way for the
+   target platform.
+  With modern gcc (4.x), this is true: it uses cmov instructions if the
+   architecture supports it and branchless bit-twiddling if it does not (the
+   speed difference between the two approaches is not measurable).
+  Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150)
+   by Sun Microsystems, despite prior art dating back to at least 1996:
+   http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT
+  On gcc 3.x, however, our assumption is not true, as abs() is translated to a
+   conditional jump, which is horrible on deeply piplined architectures (e.g.,
+   all consumer architectures for the past decade or more).
+  Also be warned that -C*abs(x) where C is a constant is mis-optimized as
+   abs(C*x) on every gcc release before 4.2.3.
+  See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */
+
+/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
+   given an appropriate architecture, but the branchless bit-twiddling versions
+   are just as fast, and do not require any special target architecture.
+  Earlier gcc versions (3.x) compiled both code to the same assembly
+   instructions, because of the way they represented ((_b)>(_a)) internally.*/
+#define OC_MAXI(_a,_b)      ((_a)-((_a)-(_b)&-((_b)>(_a))))
+#define OC_MINI(_a,_b)      ((_a)+((_b)-(_a)&-((_b)<(_a))))
+/*Clamps an integer into the given range.
+  If _a>_c, then the lower bound _a is respected over the upper bound _c (this
+   behavior is required to meet our documented API behavior).
+  _a: The lower bound.
+  _b: The value to clamp.
+  _c: The upper boud.*/
+#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
+#define OC_CLAMP255(_x)     ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255))))
+/*This has a chance of compiling branchless, and is just as fast as the
+   bit-twiddling method, which is slightly less portable, since it relies on a
+   sign-extended rightshift, which is not guaranteed by ANSI (but present on
+   every relevant platform).*/
+#define OC_SIGNI(_a)        (((_a)>0)-((_a)<0))
+/*Slightly more portable than relying on a sign-extended right-shift (which is
+   not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both)
+   compile it into the right-shift anyway.*/
+#define OC_SIGNMASK(_a)     (-((_a)<0))
+/*Divides an integer by a power of two, truncating towards 0.
+  _dividend: The integer to divide.
+  _shift:    The non-negative power of two to divide by.
+  _rmask:    (1<<_shift)-1*/
+#define OC_DIV_POW2(_dividend,_shift,_rmask)\
+  ((_dividend)+(OC_SIGNMASK(_dividend)&(_rmask))>>(_shift))
+/*Divides _x by 65536, truncating towards 0.*/
+#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF)
+/*Divides _x by 2, truncating towards 0.*/
+#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1)
+/*Divides _x by 8, truncating towards 0.*/
+#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7)
+/*Divides _x by 16, truncating towards 0.*/
+#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF)
+/*Right shifts _dividend by _shift, adding _rval, and subtracting one for
+   negative dividends first.
+  When _rval is (1<<_shift-1), this is equivalent to division with rounding
+   ties away from zero.*/
+#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\
+  ((_dividend)+OC_SIGNMASK(_dividend)+(_rval)>>(_shift))
+/*Divides a _x by 2, rounding towards even numbers.*/
+#define OC_DIV2_RE(_x) ((_x)+((_x)>>1&1)>>1)
+/*Divides a _x by (1<<(_shift)), rounding towards even numbers.*/
+#define OC_DIV_POW2_RE(_x,_shift) \
+  ((_x)+((_x)>>(_shift)&1)+((1<<(_shift))-1>>1)>>(_shift))
+/*Swaps two integers _a and _b if _a>_b.*/
+#define OC_SORT2I(_a,_b) \
+  do{ \
+    int t__; \
+    t__=((_a)^(_b))&-((_b)<(_a)); \
+    (_a)^=t__; \
+    (_b)^=t__; \
+  } \
+  while(0)
+
+/*Accesses one of four (signed) bytes given an index.
+  This can be used to avoid small lookup tables.*/
+#define OC_BYTE_TABLE32(_a,_b,_c,_d,_i) \
+  ((signed char) \
+   (((_a)&0xFF|((_b)&0xFF)<<8|((_c)&0xFF)<<16|((_d)&0xFF)<<24)>>(_i)*8))
+/*Accesses one of eight (unsigned) nibbles given an index.
+  This can be used to avoid small lookup tables.*/
+#define OC_UNIBBLE_TABLE32(_a,_b,_c,_d,_e,_f,_g,_h,_i) \
+  ((((_a)&0xF|((_b)&0xF)<<4|((_c)&0xF)<<8|((_d)&0xF)<<12| \
+   ((_e)&0xF)<<16|((_f)&0xF)<<20|((_g)&0xF)<<24|((_h)&0xF)<<28)>>(_i)*4)&0xF)
+
+
+
+/*All of these macros should expect floats as arguments.*/
+#define OC_MAXF(_a,_b)      ((_a)<(_b)?(_b):(_a))
+#define OC_MINF(_a,_b)      ((_a)>(_b)?(_b):(_a))
+#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c)))
+#define OC_FABSF(_f)        ((float)fabs(_f))
+#define OC_SQRTF(_f)        ((float)sqrt(_f))
+#define OC_POWF(_b,_e)      ((float)pow(_b,_e))
+#define OC_LOGF(_f)         ((float)log(_f))
+#define OC_IFLOORF(_f)      ((int)floor(_f))
+#define OC_ICEILF(_f)       ((int)ceil(_f))
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/quant.c b/engine/code/libtheora-1.1.1/lib/quant.c
new file mode 100644
index 00000000..8359f5ab
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/quant.c
@@ -0,0 +1,119 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: quant.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "quant.h"
+#include "decint.h"
+
+static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
+static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
+
+/*Initializes the dequantization tables from a set of quantizer info.
+  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
+   be initialized as pointing to the storage reserved for them in the
+   oc_theora_state (resp. oc_enc_ctx) structure.
+  If some tables are duplicates of others, the pointers will be adjusted to
+   point to a single copy of the tables, but the storage for them will not be
+   freed.
+  If you're concerned about the memory footprint, the obvious thing to do is
+   to move the storage out of its fixed place in the structures and allocate
+   it on demand.
+  However, a much, much better option is to only store the quantization
+   matrices being used for the current frame, and to recalculate these as the
+   qi values change between frames (this is what VP3 did).*/
+void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
+ int _pp_dc_scale[64],const th_quant_info *_qinfo){
+  /*Coding mode: intra or inter.*/
+  int          qti;
+  /*Y', C_b, C_r*/
+  int          pli;
+  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+    /*Quality index.*/
+    int qi;
+    /*Range iterator.*/
+    int qri;
+    for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){
+      th_quant_base base;
+      ogg_uint32_t  q;
+      int           qi_start;
+      int           qi_end;
+      memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
+       sizeof(base));
+      qi_start=qi;
+      if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
+      else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
+      /*Iterate over quality indicies in this range.*/
+      for(;;){
+        ogg_uint32_t qfac;
+        int          zzi;
+        int          ci;
+        /*In the original VP3.2 code, the rounding offset and the size of the
+           dead zone around 0 were controlled by a "sharpness" parameter.
+          The size of our dead zone is now controlled by the per-coefficient
+           quality thresholds returned by our HVS module.
+          We round down from a more accurate value when the quality of the
+           reconstruction does not fall below our threshold and it saves bits.
+          Hence, all of that VP3.2 code is gone from here, and the remaining
+           floating point code has been implemented as equivalent integer code
+           with exact precision.*/
+        qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
+        /*For postprocessing, not dequantization.*/
+        if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
+        /*Scale DC the coefficient from the proper table.*/
+        q=(qfac/100)<<2;
+        q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
+        _dequant[qi][pli][qti][0]=(ogg_uint16_t)q;
+        /*Now scale AC coefficients from the proper table.*/
+        for(zzi=1;zzi<64;zzi++){
+          q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2;
+          q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
+          _dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q;
+        }
+        /*If this is a duplicate of a previous matrix, use that instead.
+          This simple check helps us improve cache coherency later.*/
+        {
+          int dupe;
+          int qtj;
+          int plj;
+          dupe=0;
+          for(qtj=0;qtj<=qti;qtj++){
+            for(plj=0;plj<(qtj<qti?3:pli);plj++){
+              if(!memcmp(_dequant[qi][pli][qti],_dequant[qi][plj][qtj],
+               sizeof(oc_quant_table))){
+                dupe=1;
+                break;
+              }
+            }
+            if(dupe)break;
+          }
+          if(dupe)_dequant[qi][pli][qti]=_dequant[qi][plj][qtj];
+        }
+        if(++qi>=qi_end)break;
+        /*Interpolate the next base matrix.*/
+        for(ci=0;ci<64;ci++){
+          base[ci]=(unsigned char)(
+           (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
+           (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
+           +_qinfo->qi_ranges[qti][pli].sizes[qri])/
+           (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
+        }
+      }
+    }
+  }
+}
diff --git a/engine/code/libtheora-1.1.1/lib/quant.h b/engine/code/libtheora-1.1.1/lib/quant.h
new file mode 100644
index 00000000..49ce13a6
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/quant.h
@@ -0,0 +1,33 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: quant.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#if !defined(_quant_H)
+# define _quant_H (1)
+# include "theora/codec.h"
+# include "ocintrin.h"
+
+typedef ogg_uint16_t   oc_quant_table[64];
+
+
+/*Maximum scaled quantizer value.*/
+#define OC_QUANT_MAX          (1024<<2)
+
+
+void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
+ int _pp_dc_scale[64],const th_quant_info *_qinfo);
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/state.c b/engine/code/libtheora-1.1.1/lib/state.c
new file mode 100644
index 00000000..42ed33a9
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/state.c
@@ -0,0 +1,1227 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: state.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include "internal.h"
+#if defined(OC_X86_ASM)
+#if defined(_MSC_VER)
+# include "x86_vc/x86int.h"
+#else
+# include "x86/x86int.h"
+#endif
+#endif
+#if defined(OC_DUMP_IMAGES)
+# include <stdio.h>
+# include "png.h"
+#endif
+
+/*Returns the fragment index of the top-left block in a macro block.
+  This can be used to test whether or not the whole macro block is valid.
+  _sb_map: The super block map.
+  _quadi:  The quadrant number.
+  Return: The index of the fragment of the upper left block in the macro
+   block, or -1 if the block lies outside the coded frame.*/
+static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
+  /*It so happens that under the Hilbert curve ordering described below, the
+     upper-left block in each macro block is at index 0, except in macro block
+     3, where it is at index 2.*/
+  return _sb_map[_quadi][_quadi&_quadi<<1];
+}
+
+/*Fills in the mapping from block positions to fragment numbers for a single
+   color plane.
+  This function also fills in the "valid" flag of each quadrant in the super
+   block flags.
+  _sb_maps:  The array of super block maps for the color plane.
+  _sb_flags: The array of super block flags for the color plane.
+  _frag0:    The index of the first fragment in the plane.
+  _hfrags:   The number of horizontal fragments in a coded frame.
+  _vfrags:   The number of vertical fragments in a coded frame.*/
+static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
+ oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
+  /*Contains the (macro_block,block) indices for a 4x4 grid of
+     fragments.
+    The pattern is a 4x4 Hilbert space-filling curve.
+    A Hilbert curve has the nice property that as the curve grows larger, its
+     fractal dimension approaches 2.
+    The intuition is that nearby blocks in the curve are also close spatially,
+     with the previous element always an immediate neighbor, so that runs of
+     blocks should be well correlated.*/
+  static const int SB_MAP[4][4][2]={
+    {{0,0},{0,1},{3,2},{3,3}},
+    {{0,3},{0,2},{3,1},{3,0}},
+    {{1,0},{1,3},{2,0},{2,3}},
+    {{1,1},{1,2},{2,1},{2,2}}
+  };
+  ptrdiff_t  yfrag;
+  unsigned   sbi;
+  int        y;
+  sbi=0;
+  yfrag=_frag0;
+  for(y=0;;y+=4){
+    int imax;
+    int x;
+    /*Figure out how many columns of blocks in this super block lie within the
+       image.*/
+    imax=_vfrags-y;
+    if(imax>4)imax=4;
+    else if(imax<=0)break;
+    for(x=0;;x+=4,sbi++){
+      ptrdiff_t xfrag;
+      int       jmax;
+      int       quadi;
+      int       i;
+      /*Figure out how many rows of blocks in this super block lie within the
+         image.*/
+      jmax=_hfrags-x;
+      if(jmax>4)jmax=4;
+      else if(jmax<=0)break;
+      /*By default, set all fragment indices to -1.*/
+      memset(_sb_maps[sbi][0],0xFF,sizeof(_sb_maps[sbi]));
+      /*Fill in the fragment map for this super block.*/
+      xfrag=yfrag+x;
+      for(i=0;i<imax;i++){
+        int j;
+        for(j=0;j<jmax;j++){
+          _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
+        }
+        xfrag+=_hfrags;
+      }
+      /*Mark which quadrants of this super block lie within the image.*/
+      for(quadi=0;quadi<4;quadi++){
+        _sb_flags[sbi].quad_valid|=
+         (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
+      }
+    }
+    yfrag+=_hfrags<<2;
+  }
+}
+
+/*Fills in the Y plane fragment map for a macro block given the fragment
+   coordinates of its upper-left hand corner.
+  _mb_map:    The macro block map to fill.
+  _fplane: The description of the Y plane.
+  _xfrag0: The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
+  int i;
+  int j;
+  for(i=0;i<2;i++)for(j=0;j<2;j++){
+    _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
+  }
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with chroma decimated in the X and Y directions
+   (4:2:0).
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  _xfrag0>>=1;
+  _yfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
+  _mb_map[1][0]=fragi+_fplanes[1].froffset;
+  _mb_map[2][0]=fragi+_fplanes[2].froffset;
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with chroma decimated in the Y direction.
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  int       j;
+  _yfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
+  for(j=0;j<2;j++){
+    _mb_map[1][j]=fragi+_fplanes[1].froffset;
+    _mb_map[2][j]=fragi+_fplanes[2].froffset;
+    fragi++;
+  }
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with chroma decimated in the X direction (4:2:2).
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  int       i;
+  _xfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
+  for(i=0;i<2;i++){
+    _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
+    _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
+    fragi+=_fplanes[1].nhfrags;
+  }
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with no chroma decimation (4:4:4).
+  This uses the already filled-in luma plane values.
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.*/
+static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3]){
+  int k;
+  for(k=0;k<4;k++){
+    _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
+    _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
+  }
+}
+
+/*The function type used to fill in the chroma plane fragment maps for a
+   macro block.
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
+
+/*A table of functions used to fill in the chroma plane fragment maps for a
+   macro block for each type of chrominance decimation.*/
+static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
+  oc_mb_fill_cmapping00,
+  oc_mb_fill_cmapping01,
+  oc_mb_fill_cmapping10,
+  (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11
+};
+
+/*Fills in the mapping from macro blocks to their corresponding fragment
+   numbers in each plane.
+  _mb_maps:   The list of macro block maps.
+  _mb_modes:  The list of macro block modes; macro blocks completely outside
+               the coded region are marked invalid.
+  _fplanes:   The descriptions of the fragment planes.
+  _pixel_fmt: The chroma decimation type.*/
+static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
+ signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
+  oc_mb_fill_cmapping_func  mb_fill_cmapping;
+  unsigned                  sbi;
+  int                       y;
+  mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
+  /*Loop through the luma plane super blocks.*/
+  for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
+    int x;
+    for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
+      int ymb;
+      /*Loop through the macro blocks in each super block in display order.*/
+      for(ymb=0;ymb<2;ymb++){
+        int xmb;
+        for(xmb=0;xmb<2;xmb++){
+          unsigned mbi;
+          int      mbx;
+          int      mby;
+          mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
+          mbx=x|xmb<<1;
+          mby=y|ymb<<1;
+          /*Initialize fragment indices to -1.*/
+          memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
+          /*Make sure this macro block is within the encoded region.*/
+          if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
+            _mb_modes[mbi]=OC_MODE_INVALID;
+            continue;
+          }
+          /*Fill in the fragment indices for the luma plane.*/
+          oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
+          /*Fill in the fragment indices for the chroma planes.*/
+          (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
+        }
+      }
+    }
+  }
+}
+
+/*Marks the fragments which fall all or partially outside the displayable
+   region of the frame.
+  _state: The Theora state containing the fragments to be marked.*/
+static void oc_state_border_init(oc_theora_state *_state){
+  oc_fragment       *frag;
+  oc_fragment       *yfrag_end;
+  oc_fragment       *xfrag_end;
+  oc_fragment_plane *fplane;
+  int                crop_x0;
+  int                crop_y0;
+  int                crop_xf;
+  int                crop_yf;
+  int                pli;
+  int                y;
+  int                x;
+  /*The method we use here is slow, but the code is dead simple and handles
+     all the special cases easily.
+    We only ever need to do it once.*/
+  /*Loop through the fragments, marking those completely outside the
+     displayable region and constructing a border mask for those that straddle
+     the border.*/
+  _state->nborders=0;
+  yfrag_end=frag=_state->frags;
+  for(pli=0;pli<3;pli++){
+    fplane=_state->fplanes+pli;
+    /*Set up the cropping rectangle for this plane.*/
+    crop_x0=_state->info.pic_x;
+    crop_xf=_state->info.pic_x+_state->info.pic_width;
+    crop_y0=_state->info.pic_y;
+    crop_yf=_state->info.pic_y+_state->info.pic_height;
+    if(pli>0){
+      if(!(_state->info.pixel_fmt&1)){
+        crop_x0=crop_x0>>1;
+        crop_xf=crop_xf+1>>1;
+      }
+      if(!(_state->info.pixel_fmt&2)){
+        crop_y0=crop_y0>>1;
+        crop_yf=crop_yf+1>>1;
+      }
+    }
+    y=0;
+    for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){
+      x=0;
+      for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){
+        /*First check to see if this fragment is completely outside the
+           displayable region.*/
+        /*Note the special checks for an empty cropping rectangle.
+          This guarantees that if we count a fragment as straddling the
+           border below, at least one pixel in the fragment will be inside
+           the displayable region.*/
+        if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
+         crop_x0>=crop_xf||crop_y0>=crop_yf){
+          frag->invalid=1;
+        }
+        /*Otherwise, check to see if it straddles the border.*/
+        else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
+         y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
+          ogg_int64_t mask;
+          int         npixels;
+          int         i;
+          mask=npixels=0;
+          for(i=0;i<8;i++){
+            int j;
+            for(j=0;j<8;j++){
+              if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
+                mask|=(ogg_int64_t)1<<(i<<3|j);
+                npixels++;
+              }
+            }
+          }
+          /*Search the fragment array for border info with the same pattern.
+            In general, there will be at most 8 different patterns (per
+             plane).*/
+          for(i=0;;i++){
+            if(i>=_state->nborders){
+              _state->nborders++;
+              _state->borders[i].mask=mask;
+              _state->borders[i].npixels=npixels;
+            }
+            else if(_state->borders[i].mask!=mask)continue;
+            frag->borderi=i;
+            break;
+          }
+        }
+        else frag->borderi=-1;
+      }
+    }
+  }
+}
+
+static int oc_state_frarray_init(oc_theora_state *_state){
+  int       yhfrags;
+  int       yvfrags;
+  int       chfrags;
+  int       cvfrags;
+  ptrdiff_t yfrags;
+  ptrdiff_t cfrags;
+  ptrdiff_t nfrags;
+  unsigned  yhsbs;
+  unsigned  yvsbs;
+  unsigned  chsbs;
+  unsigned  cvsbs;
+  unsigned  ysbs;
+  unsigned  csbs;
+  unsigned  nsbs;
+  size_t    nmbs;
+  int       hdec;
+  int       vdec;
+  int       pli;
+  /*Figure out the number of fragments in each plane.*/
+  /*These parameters have already been validated to be multiples of 16.*/
+  yhfrags=_state->info.frame_width>>3;
+  yvfrags=_state->info.frame_height>>3;
+  hdec=!(_state->info.pixel_fmt&1);
+  vdec=!(_state->info.pixel_fmt&2);
+  chfrags=yhfrags+hdec>>hdec;
+  cvfrags=yvfrags+vdec>>vdec;
+  yfrags=yhfrags*(ptrdiff_t)yvfrags;
+  cfrags=chfrags*(ptrdiff_t)cvfrags;
+  nfrags=yfrags+2*cfrags;
+  /*Figure out the number of super blocks in each plane.*/
+  yhsbs=yhfrags+3>>2;
+  yvsbs=yvfrags+3>>2;
+  chsbs=chfrags+3>>2;
+  cvsbs=cvfrags+3>>2;
+  ysbs=yhsbs*yvsbs;
+  csbs=chsbs*cvsbs;
+  nsbs=ysbs+2*csbs;
+  nmbs=(size_t)ysbs<<2;
+  /*Check for overflow.
+    We support the ridiculous upper limits of the specification (1048560 by
+     1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
+     but for those with 32-bit pointers (or smaller!) we have to check.
+    If the caller wants to prevent denial-of-service by imposing a more
+     reasonable upper limit on the size of attempted allocations, they must do
+     so themselves; we have no platform independent way to determine how much
+     system memory there is nor an application-independent way to decide what a
+     "reasonable" allocation is.*/
+  if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
+   ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
+    return TH_EIMPL;
+  }
+  /*Initialize the fragment array.*/
+  _state->fplanes[0].nhfrags=yhfrags;
+  _state->fplanes[0].nvfrags=yvfrags;
+  _state->fplanes[0].froffset=0;
+  _state->fplanes[0].nfrags=yfrags;
+  _state->fplanes[0].nhsbs=yhsbs;
+  _state->fplanes[0].nvsbs=yvsbs;
+  _state->fplanes[0].sboffset=0;
+  _state->fplanes[0].nsbs=ysbs;
+  _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags;
+  _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags;
+  _state->fplanes[1].froffset=yfrags;
+  _state->fplanes[2].froffset=yfrags+cfrags;
+  _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags;
+  _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs;
+  _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs;
+  _state->fplanes[1].sboffset=ysbs;
+  _state->fplanes[2].sboffset=ysbs+csbs;
+  _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
+  _state->nfrags=nfrags;
+  _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
+  _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
+  _state->nsbs=nsbs;
+  _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
+  _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
+  _state->nhmbs=yhsbs<<1;
+  _state->nvmbs=yvsbs<<1;
+  _state->nmbs=nmbs;
+  _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
+  _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
+  _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
+  if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
+   _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL||
+   _state->coded_fragis==NULL){
+    return TH_EFAULT;
+  }
+  /*Create the mapping from super blocks to fragments.*/
+  for(pli=0;pli<3;pli++){
+    oc_fragment_plane *fplane;
+    fplane=_state->fplanes+pli;
+    oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
+     _state->sb_flags+fplane->sboffset,fplane->froffset,
+     fplane->nhfrags,fplane->nvfrags);
+  }
+  /*Create the mapping from macro blocks to fragments.*/
+  oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
+   _state->fplanes,_state->info.pixel_fmt);
+  /*Initialize the invalid and borderi fields of each fragment.*/
+  oc_state_border_init(_state);
+  return 0;
+}
+
+static void oc_state_frarray_clear(oc_theora_state *_state){
+  _ogg_free(_state->coded_fragis);
+  _ogg_free(_state->mb_modes);
+  _ogg_free(_state->mb_maps);
+  _ogg_free(_state->sb_flags);
+  _ogg_free(_state->sb_maps);
+  _ogg_free(_state->frag_mvs);
+  _ogg_free(_state->frags);
+}
+
+
+/*Initializes the buffers used for reconstructed frames.
+  These buffers are padded with 16 extra pixels on each side, to allow
+   unrestricted motion vectors without special casing the boundary.
+  If chroma is decimated in either direction, the padding is reduced by a
+   factor of 2 on the appropriate sides.
+  _nrefs: The number of reference buffers to init; must be 3 or 4.*/
+static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
+  th_info       *info;
+  unsigned char *ref_frame_data;
+  size_t         ref_frame_data_sz;
+  size_t         ref_frame_sz;
+  size_t         yplane_sz;
+  size_t         cplane_sz;
+  int            yhstride;
+  int            yheight;
+  int            chstride;
+  int            cheight;
+  ptrdiff_t      yoffset;
+  ptrdiff_t      coffset;
+  ptrdiff_t     *frag_buf_offs;
+  ptrdiff_t      fragi;
+  int            hdec;
+  int            vdec;
+  int            rfi;
+  int            pli;
+  if(_nrefs<3||_nrefs>4)return TH_EINVAL;
+  info=&_state->info;
+  /*Compute the image buffer parameters for each plane.*/
+  hdec=!(info->pixel_fmt&1);
+  vdec=!(info->pixel_fmt&2);
+  yhstride=info->frame_width+2*OC_UMV_PADDING;
+  yheight=info->frame_height+2*OC_UMV_PADDING;
+  chstride=yhstride>>hdec;
+  cheight=yheight>>vdec;
+  yplane_sz=yhstride*(size_t)yheight;
+  cplane_sz=chstride*(size_t)cheight;
+  yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
+  coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
+  ref_frame_sz=yplane_sz+2*cplane_sz;
+  ref_frame_data_sz=_nrefs*ref_frame_sz;
+  /*Check for overflow.
+    The same caveats apply as for oc_state_frarray_init().*/
+  if(yplane_sz/yhstride!=yheight||2*cplane_sz<cplane_sz||
+   ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
+    return TH_EIMPL;
+  }
+  ref_frame_data=_ogg_malloc(ref_frame_data_sz);
+  frag_buf_offs=_state->frag_buf_offs=
+   _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
+  if(ref_frame_data==NULL||frag_buf_offs==NULL){
+    _ogg_free(frag_buf_offs);
+    _ogg_free(ref_frame_data);
+    return TH_EFAULT;
+  }
+  /*Set up the width, height and stride for the image buffers.*/
+  _state->ref_frame_bufs[0][0].width=info->frame_width;
+  _state->ref_frame_bufs[0][0].height=info->frame_height;
+  _state->ref_frame_bufs[0][0].stride=yhstride;
+  _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
+   info->frame_width>>hdec;
+  _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
+   info->frame_height>>vdec;
+  _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
+   chstride;
+  for(rfi=1;rfi<_nrefs;rfi++){
+    memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
+     sizeof(_state->ref_frame_bufs[0]));
+  }
+  /*Set up the data pointers for the image buffers.*/
+  for(rfi=0;rfi<_nrefs;rfi++){
+    _state->ref_frame_data[rfi]=ref_frame_data;
+    _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
+    ref_frame_data+=yplane_sz;
+    _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
+    ref_frame_data+=cplane_sz;
+    _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
+    ref_frame_data+=cplane_sz;
+    /*Flip the buffer upside down.
+      This allows us to decode Theora's bottom-up frames in their natural
+       order, yet return a top-down buffer with a positive stride to the user.*/
+    oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
+     _state->ref_frame_bufs[rfi]);
+  }
+  _state->ref_ystride[0]=-yhstride;
+  _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
+  /*Initialize the fragment buffer offsets.*/
+  ref_frame_data=_state->ref_frame_data[0];
+  fragi=0;
+  for(pli=0;pli<3;pli++){
+    th_img_plane      *iplane;
+    oc_fragment_plane *fplane;
+    unsigned char     *vpix;
+    ptrdiff_t          stride;
+    ptrdiff_t          vfragi_end;
+    int                nhfrags;
+    iplane=_state->ref_frame_bufs[0]+pli;
+    fplane=_state->fplanes+pli;
+    vpix=iplane->data;
+    vfragi_end=fplane->froffset+fplane->nfrags;
+    nhfrags=fplane->nhfrags;
+    stride=iplane->stride;
+    while(fragi<vfragi_end){
+      ptrdiff_t      hfragi_end;
+      unsigned char *hpix;
+      hpix=vpix;
+      for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
+        frag_buf_offs[fragi]=hpix-ref_frame_data;
+        hpix+=8;
+      }
+      vpix+=stride<<3;
+    }
+  }
+  /*Initialize the reference frame indices.*/
+  _state->ref_frame_idx[OC_FRAME_GOLD]=
+   _state->ref_frame_idx[OC_FRAME_PREV]=
+   _state->ref_frame_idx[OC_FRAME_SELF]=-1;
+  _state->ref_frame_idx[OC_FRAME_IO]=_nrefs>3?3:-1;
+  return 0;
+}
+
+static void oc_state_ref_bufs_clear(oc_theora_state *_state){
+  _ogg_free(_state->frag_buf_offs);
+  _ogg_free(_state->ref_frame_data[0]);
+}
+
+
+void oc_state_vtable_init_c(oc_theora_state *_state){
+  _state->opt_vtable.frag_copy=oc_frag_copy_c;
+  _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
+  _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
+  _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
+  _state->opt_vtable.idct8x8=oc_idct8x8_c;
+  _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
+  _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c;
+  _state->opt_vtable.state_loop_filter_frag_rows=
+   oc_state_loop_filter_frag_rows_c;
+  _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
+  _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
+}
+
+/*Initialize the accelerated function pointers.*/
+void oc_state_vtable_init(oc_theora_state *_state){
+#if defined(OC_X86_ASM)
+  oc_state_vtable_init_x86(_state);
+#else
+  oc_state_vtable_init_c(_state);
+#endif
+}
+
+
+int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
+  int ret;
+  /*First validate the parameters.*/
+  if(_info==NULL)return TH_EFAULT;
+  /*The width and height of the encoded frame must be multiples of 16.
+    They must also, when divided by 16, fit into a 16-bit unsigned integer.
+    The displayable frame offset coordinates must fit into an 8-bit unsigned
+     integer.
+    Note that the offset Y in the API is specified on the opposite side from
+     how it is specified in the bitstream, because the Y axis is flipped in
+     the bitstream.
+    The displayable frame must fit inside the encoded frame.
+    The color space must be one known by the encoder.*/
+  if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
+   _info->frame_width<=0||_info->frame_width>=0x100000||
+   _info->frame_height<=0||_info->frame_height>=0x100000||
+   _info->pic_x+_info->pic_width>_info->frame_width||
+   _info->pic_y+_info->pic_height>_info->frame_height||
+   _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255||
+   /*Note: the following <0 comparisons may generate spurious warnings on
+      platforms where enums are unsigned.
+     We could cast them to unsigned and just use the following >= comparison,
+      but there are a number of compilers which will mis-optimize this.
+     It's better to live with the spurious warnings.*/
+   _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
+   _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){
+    return TH_EINVAL;
+  }
+  memset(_state,0,sizeof(*_state));
+  memcpy(&_state->info,_info,sizeof(*_info));
+  /*Invert the sense of pic_y to match Theora's right-handed coordinate
+     system.*/
+  _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
+  _state->frame_type=OC_UNKWN_FRAME;
+  oc_state_vtable_init(_state);
+  ret=oc_state_frarray_init(_state);
+  if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
+  if(ret<0){
+    oc_state_frarray_clear(_state);
+    return ret;
+  }
+  /*If the keyframe_granule_shift is out of range, use the maximum allowable
+     value.*/
+  if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
+    _state->info.keyframe_granule_shift=31;
+  }
+  _state->keyframe_num=0;
+  _state->curframe_num=-1;
+  /*3.2.0 streams mark the frame index instead of the frame count.
+    This was changed with stream version 3.2.1 to conform to other Ogg
+     codecs.
+    We add an extra bias when computing granule positions for new streams.*/
+  _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1);
+  return 0;
+}
+
+void oc_state_clear(oc_theora_state *_state){
+  oc_state_ref_bufs_clear(_state);
+  oc_state_frarray_clear(_state);
+}
+
+
+/*Duplicates the pixels on the border of the image plane out into the
+   surrounding padding for use by unrestricted motion vectors.
+  This function only adds the left and right borders, and only for the fragment
+   rows specified.
+  _refi: The index of the reference buffer to pad.
+  _pli:  The color plane.
+  _y0:   The Y coordinate of the first row to pad.
+  _yend: The Y coordinate of the row to stop padding at.*/
+void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
+ int _y0,int _yend){
+  th_img_plane  *iplane;
+  unsigned char *apix;
+  unsigned char *bpix;
+  unsigned char *epix;
+  int            stride;
+  int            hpadding;
+  hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
+  iplane=_state->ref_frame_bufs[_refi]+_pli;
+  stride=iplane->stride;
+  apix=iplane->data+_y0*(ptrdiff_t)stride;
+  bpix=apix+iplane->width-1;
+  epix=iplane->data+_yend*(ptrdiff_t)stride;
+  /*Note the use of != instead of <, which allows the stride to be negative.*/
+  while(apix!=epix){
+    memset(apix-hpadding,apix[0],hpadding);
+    memset(bpix+1,bpix[0],hpadding);
+    apix+=stride;
+    bpix+=stride;
+  }
+}
+
+/*Duplicates the pixels on the border of the image plane out into the
+   surrounding padding for use by unrestricted motion vectors.
+  This function only adds the top and bottom borders, and must be called after
+   the left and right borders are added.
+  _refi:      The index of the reference buffer to pad.
+  _pli:       The color plane.*/
+void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
+  th_img_plane  *iplane;
+  unsigned char *apix;
+  unsigned char *bpix;
+  unsigned char *epix;
+  int            stride;
+  int            hpadding;
+  int            vpadding;
+  int            fullw;
+  hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
+  vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
+  iplane=_state->ref_frame_bufs[_refi]+_pli;
+  stride=iplane->stride;
+  fullw=iplane->width+(hpadding<<1);
+  apix=iplane->data-hpadding;
+  bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
+  epix=apix-stride*(ptrdiff_t)vpadding;
+  while(apix!=epix){
+    memcpy(apix-stride,apix,fullw);
+    memcpy(bpix+stride,bpix,fullw);
+    apix-=stride;
+    bpix+=stride;
+  }
+}
+
+/*Duplicates the pixels on the border of the given reference image out into
+   the surrounding padding for use by unrestricted motion vectors.
+  _state: The context containing the reference buffers.
+  _refi:  The index of the reference buffer to pad.*/
+void oc_state_borders_fill(oc_theora_state *_state,int _refi){
+  int pli;
+  for(pli=0;pli<3;pli++){
+    oc_state_borders_fill_rows(_state,_refi,pli,0,
+     _state->ref_frame_bufs[_refi][pli].height);
+    oc_state_borders_fill_caps(_state,_refi,pli);
+  }
+}
+
+/*Determines the offsets in an image buffer to use for motion compensation.
+  _state:   The Theora state the offsets are to be computed with.
+  _offsets: Returns the offset for the buffer(s).
+            _offsets[0] is always set.
+            _offsets[1] is set if the motion vector has non-zero fractional
+             components.
+  _pli:     The color plane index.
+  _dx:      The X component of the motion vector.
+  _dy:      The Y component of the motion vector.
+  Return: The number of offsets returned: 1 or 2.*/
+int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
+ int _pli,int _dx,int _dy){
+  /*Here is a brief description of how Theora handles motion vectors:
+    Motion vector components are specified to half-pixel accuracy in
+     undecimated directions of each plane, and quarter-pixel accuracy in
+     decimated directions.
+    Integer parts are extracted by dividing (not shifting) by the
+     appropriate amount, with truncation towards zero.
+    These integer values are used to calculate the first offset.
+
+    If either of the fractional parts are non-zero, then a second offset is
+     computed.
+    No third or fourth offsets are computed, even if both components have
+     non-zero fractional parts.
+    The second offset is computed by dividing (not shifting) by the
+     appropriate amount, always truncating _away_ from zero.*/
+#if 0
+  /*This version of the code doesn't use any tables, but is slower.*/
+  int ystride;
+  int xprec;
+  int yprec;
+  int xfrac;
+  int yfrac;
+  int offs;
+  ystride=_state->ref_ystride[_pli];
+  /*These two variables decide whether we are in half- or quarter-pixel
+     precision in each component.*/
+  xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
+  yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
+  /*These two variables are either 0 if all the fractional bits are zero or -1
+     if any of them are non-zero.*/
+  xfrac=OC_SIGNMASK(-(_dx&(xprec|1)));
+  yfrac=OC_SIGNMASK(-(_dy&(yprec|1)));
+  offs=(_dx>>xprec)+(_dy>>yprec)*ystride;
+  if(xfrac||yfrac){
+    int xmask;
+    int ymask;
+    xmask=OC_SIGNMASK(_dx);
+    ymask=OC_SIGNMASK(_dy);
+    yfrac&=ystride;
+    _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
+    _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
+    return 2;
+  }
+  else{
+    _offsets[0]=offs;
+    return 1;
+  }
+#else
+  /*Using tables simplifies the code, and there's enough arithmetic to hide the
+     latencies of the memory references.*/
+  static const signed char OC_MVMAP[2][64]={
+    {
+          -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
+       -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1,  0,
+        0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,
+        8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
+    },
+    {
+           -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
+       -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1,  0,  0,  0,
+        0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+        4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7
+    }
+  };
+  static const signed char OC_MVMAP2[2][64]={
+    {
+        -1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
+      0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
+      0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,
+      0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1
+    },
+    {
+        -1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
+      0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
+      0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,
+      0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1
+    }
+  };
+  int ystride;
+  int qpx;
+  int qpy;
+  int mx;
+  int my;
+  int mx2;
+  int my2;
+  int offs;
+  ystride=_state->ref_ystride[_pli];
+  qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
+  my=OC_MVMAP[qpy][_dy+31];
+  my2=OC_MVMAP2[qpy][_dy+31];
+  qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
+  mx=OC_MVMAP[qpx][_dx+31];
+  mx2=OC_MVMAP2[qpx][_dx+31];
+  offs=my*ystride+mx;
+  if(mx2||my2){
+    _offsets[1]=offs+my2*ystride+mx2;
+    _offsets[0]=offs;
+    return 2;
+  }
+  _offsets[0]=offs;
+  return 1;
+#endif
+}
+
+void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
+  _state->opt_vtable.state_frag_recon(_state,_fragi,_pli,_dct_coeffs,
+   _last_zzi,_dc_quant);
+}
+
+void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
+  unsigned char *dst;
+  ptrdiff_t      frag_buf_off;
+  int            ystride;
+  int            mb_mode;
+  /*Apply the inverse transform.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    ogg_int16_t p;
+    int         ci;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
+    /*LOOP VECTORIZES.*/
+    for(ci=0;ci<64;ci++)_dct_coeffs[ci]=p;
+  }
+  else{
+    /*First, dequantize the DC coefficient.*/
+    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
+    oc_idct8x8(_state,_dct_coeffs,_last_zzi);
+  }
+  /*Fill in the target buffer.*/
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  mb_mode=_state->frags[_fragi].mb_mode;
+  ystride=_state->ref_ystride[_pli];
+  dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+  if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs);
+  else{
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=
+     _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
+     +frag_buf_off;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+     _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+      oc_frag_recon_inter2(_state,
+       dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs);
+    }
+    else oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs);
+  }
+}
+
+/*Copies the fragments specified by the lists of fragment indices from one
+   frame to another.
+  _fragis:    A pointer to a list of fragment indices.
+  _nfragis:   The number of fragment indices to copy.
+  _dst_frame: The reference frame to copy to.
+  _src_frame: The reference frame to copy from.
+  _pli:       The color plane the fragments lie in.*/
+void oc_state_frag_copy_list(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli){
+  _state->opt_vtable.state_frag_copy_list(_state,_fragis,_nfragis,_dst_frame,
+   _src_frame,_pli);
+}
+
+void oc_state_frag_copy_list_c(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli){
+  const ptrdiff_t     *frag_buf_offs;
+  const unsigned char *src_frame_data;
+  unsigned char       *dst_frame_data;
+  ptrdiff_t            fragii;
+  int                  ystride;
+  dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]];
+  src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]];
+  ystride=_state->ref_ystride[_pli];
+  frag_buf_offs=_state->frag_buf_offs;
+  for(fragii=0;fragii<_nfragis;fragii++){
+    ptrdiff_t frag_buf_off;
+    frag_buf_off=frag_buf_offs[_fragis[fragii]];
+    oc_frag_copy(_state,dst_frame_data+frag_buf_off,
+     src_frame_data+frag_buf_off,ystride);
+  }
+}
+
+static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){
+  int y;
+  _pix-=2;
+  for(y=0;y<8;y++){
+    int f;
+    f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]);
+    /*The _bv array is used to compute the function
+      f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
+      where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
+    f=*(_bv+(f+4>>3));
+    _pix[1]=OC_CLAMP255(_pix[1]+f);
+    _pix[2]=OC_CLAMP255(_pix[2]-f);
+    _pix+=_ystride;
+  }
+}
+
+static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){
+  int x;
+  _pix-=_ystride*2;
+  for(x=0;x<8;x++){
+    int f;
+    f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
+    /*The _bv array is used to compute the function
+      f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
+      where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
+    f=*(_bv+(f+4>>3));
+    _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
+    _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
+  }
+}
+
+/*Initialize the bounding values array used by the loop filter.
+  _bv: Storage for the array.
+  Return: 0 on success, or a non-zero value if no filtering need be applied.*/
+int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){
+  int flimit;
+  int i;
+  flimit=_state->loop_filter_limits[_state->qis[0]];
+  if(flimit==0)return 1;
+  memset(_bv,0,sizeof(_bv[0])*256);
+  for(i=0;i<flimit;i++){
+    if(127-i-flimit>=0)_bv[127-i-flimit]=i-flimit;
+    _bv[127-i]=-i;
+    _bv[127+i]=i;
+    if(127+i+flimit<256)_bv[127+i+flimit]=flimit-i;
+  }
+  return 0;
+}
+
+/*Apply the loop filter to a given set of fragment rows in the given plane.
+  The filter may be run on the bottom edge, affecting pixels in the next row of
+   fragments, so this row also needs to be available.
+  _bv:        The bounding values array.
+  _refi:      The index of the frame buffer to filter.
+  _pli:       The color plane to filter.
+  _fragy0:    The Y coordinate of the first fragment row to filter.
+  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
+void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256],
+ int _refi,int _pli,int _fragy0,int _fragy_end){
+  _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli,
+   _fragy0,_fragy_end);
+}
+
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv,
+ int _refi,int _pli,int _fragy0,int _fragy_end){
+  const oc_fragment_plane *fplane;
+  const oc_fragment       *frags;
+  const ptrdiff_t         *frag_buf_offs;
+  unsigned char           *ref_frame_data;
+  ptrdiff_t                fragi_top;
+  ptrdiff_t                fragi_bot;
+  ptrdiff_t                fragi0;
+  ptrdiff_t                fragi0_end;
+  int                      ystride;
+  int                      nhfrags;
+  _bv+=127;
+  fplane=_state->fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  fragi_top=fplane->froffset;
+  fragi_bot=fragi_top+fplane->nfrags;
+  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
+  fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
+  ystride=_state->ref_ystride[_pli];
+  frags=_state->frags;
+  frag_buf_offs=_state->frag_buf_offs;
+  ref_frame_data=_state->ref_frame_data[_refi];
+  /*The following loops are constructed somewhat non-intuitively on purpose.
+    The main idea is: if a block boundary has at least one coded fragment on
+     it, the filter is applied to it.
+    However, the order that the filters are applied in matters, and VP3 chose
+     the somewhat strange ordering used below.*/
+  while(fragi0<fragi0_end){
+    ptrdiff_t fragi;
+    ptrdiff_t fragi_end;
+    fragi=fragi0;
+    fragi_end=fragi+nhfrags;
+    while(fragi<fragi_end){
+      if(frags[fragi].coded){
+        unsigned char *ref;
+        ref=ref_frame_data+frag_buf_offs[fragi];
+        if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
+        if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
+        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
+          loop_filter_h(ref+8,ystride,_bv);
+        }
+        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
+          loop_filter_v(ref+(ystride<<3),ystride,_bv);
+        }
+      }
+      fragi++;
+    }
+    fragi0+=nhfrags;
+  }
+}
+
+#if defined(OC_DUMP_IMAGES)
+int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
+ const char *_suf){
+  /*Dump a PNG of the reconstructed image.*/
+  png_structp    png;
+  png_infop      info;
+  png_bytep     *image;
+  FILE          *fp;
+  char           fname[16];
+  unsigned char *y_row;
+  unsigned char *u_row;
+  unsigned char *v_row;
+  unsigned char *y;
+  unsigned char *u;
+  unsigned char *v;
+  ogg_int64_t    iframe;
+  ogg_int64_t    pframe;
+  int            y_stride;
+  int            u_stride;
+  int            v_stride;
+  int            framei;
+  int            width;
+  int            height;
+  int            imgi;
+  int            imgj;
+  width=_state->info.frame_width;
+  height=_state->info.frame_height;
+  iframe=_state->granpos>>_state->info.keyframe_granule_shift;
+  pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift);
+  sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
+  fp=fopen(fname,"wb");
+  if(fp==NULL)return TH_EFAULT;
+  image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
+  if(image==NULL){
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
+  if(png==NULL){
+    oc_free_2d(image);
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  info=png_create_info_struct(png);
+  if(info==NULL){
+    png_destroy_write_struct(&png,NULL);
+    oc_free_2d(image);
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  if(setjmp(png_jmpbuf(png))){
+    png_destroy_write_struct(&png,&info);
+    oc_free_2d(image);
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  framei=_state->ref_frame_idx[_frame];
+  y_row=_state->ref_frame_bufs[framei][0].data;
+  u_row=_state->ref_frame_bufs[framei][1].data;
+  v_row=_state->ref_frame_bufs[framei][2].data;
+  y_stride=_state->ref_frame_bufs[framei][0].stride;
+  u_stride=_state->ref_frame_bufs[framei][1].stride;
+  v_stride=_state->ref_frame_bufs[framei][2].stride;
+  /*Chroma up-sampling is just done with a box filter.
+    This is very likely what will actually be used in practice on a real
+     display, and also removes one more layer to search in for the source of
+     artifacts.
+    As an added bonus, it's dead simple.*/
+  for(imgi=height;imgi-->0;){
+    int dc;
+    y=y_row;
+    u=u_row;
+    v=v_row;
+    for(imgj=0;imgj<6*width;){
+      float    yval;
+      float    uval;
+      float    vval;
+      unsigned rval;
+      unsigned gval;
+      unsigned bval;
+      /*This is intentionally slow and very accurate.*/
+      yval=(*y-16)*(1.0F/219);
+      uval=(*u-128)*(2*(1-0.114F)/224);
+      vval=(*v-128)*(2*(1-0.299F)/224);
+      rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535);
+      gval=OC_CLAMPI(0,(int)(65535*(
+       yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535);
+      bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535);
+      image[imgi][imgj++]=(unsigned char)(rval>>8);
+      image[imgi][imgj++]=(unsigned char)(rval&0xFF);
+      image[imgi][imgj++]=(unsigned char)(gval>>8);
+      image[imgi][imgj++]=(unsigned char)(gval&0xFF);
+      image[imgi][imgj++]=(unsigned char)(bval>>8);
+      image[imgi][imgj++]=(unsigned char)(bval&0xFF);
+      dc=(y-y_row&1)|(_state->info.pixel_fmt&1);
+      y++;
+      u+=dc;
+      v+=dc;
+    }
+    dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1);
+    y_row+=y_stride;
+    u_row+=dc&u_stride;
+    v_row+=dc&v_stride;
+  }
+  png_init_io(png,fp);
+  png_set_compression_level(png,Z_BEST_COMPRESSION);
+  png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB,
+   PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT);
+  switch(_state->info.colorspace){
+    case TH_CS_ITU_REC_470M:{
+      png_set_gAMA(png,info,2.2);
+      png_set_cHRM_fixed(png,info,31006,31616,
+       67000,32000,21000,71000,14000,8000);
+    }break;
+    case TH_CS_ITU_REC_470BG:{
+      png_set_gAMA(png,info,2.67);
+      png_set_cHRM_fixed(png,info,31271,32902,
+       64000,33000,29000,60000,15000,6000);
+    }break;
+    default:break;
+  }
+  png_set_pHYs(png,info,_state->info.aspect_numerator,
+   _state->info.aspect_denominator,0);
+  png_set_rows(png,info,image);
+  png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL);
+  png_write_end(png,info);
+  png_destroy_write_struct(&png,&info);
+  oc_free_2d(image);
+  fclose(fp);
+  return 0;
+}
+#endif
+
+
+
+ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){
+  oc_theora_state *state;
+  state=(oc_theora_state *)_encdec;
+  if(_granpos>=0){
+    ogg_int64_t iframe;
+    ogg_int64_t pframe;
+    iframe=_granpos>>state->info.keyframe_granule_shift;
+    pframe=_granpos-(iframe<<state->info.keyframe_granule_shift);
+    /*3.2.0 streams store the frame index in the granule position.
+      3.2.1 and later store the frame count.
+      We return the index, so adjust the value if we have a 3.2.1 or later
+       stream.*/
+    return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1);
+  }
+  return -1;
+}
+
+double th_granule_time(void *_encdec,ogg_int64_t _granpos){
+  oc_theora_state *state;
+  state=(oc_theora_state *)_encdec;
+  if(_granpos>=0){
+    return (th_granule_frame(_encdec, _granpos)+1)*(
+     (double)state->info.fps_denominator/state->info.fps_numerator);
+  }
+  return -1;
+}
diff --git a/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.c b/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.c
new file mode 100644
index 00000000..2c732939
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.c
@@ -0,0 +1,293 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: mmxfrag.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+/*MMX acceleration of fragment reconstruction for motion compensation.
+  Originally written by Rudolf Marek.
+  Additional optimization by Nils Pipenbrinck.
+  Note: Loops are unrolled for best performance.
+  The iteration each instruction belongs to is marked in the comments as #i.*/
+#include <stddef.h>
+#include "x86int.h"
+#include "mmxfrag.h"
+
+#if defined(OC_X86_ASM)
+
+/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes
+   between rows.*/
+void oc_frag_copy_mmx(unsigned char *_dst,
+ const unsigned char *_src,int _ystride){
+  OC_FRAG_COPY_MMX(_dst,_src,_ystride);
+}
+
+void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
+ const ogg_int16_t *_residue){
+  __asm__ __volatile__(
+    /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/
+    "pcmpeqw %%mm0,%%mm0\n\t"
+    /*#0 Load low residue.*/
+    "movq 0*8(%[residue]),%%mm1\n\t"
+    /*#0 Load high residue.*/
+    "movq 1*8(%[residue]),%%mm2\n\t"
+    /*Set mm0 to 0x8000800080008000.*/
+    "psllw $15,%%mm0\n\t"
+    /*#1 Load low residue.*/
+    "movq 2*8(%[residue]),%%mm3\n\t"
+    /*#1 Load high residue.*/
+    "movq 3*8(%[residue]),%%mm4\n\t"
+    /*Set mm0 to 0x0080008000800080.*/
+    "psrlw $8,%%mm0\n\t"
+    /*#2 Load low residue.*/
+    "movq 4*8(%[residue]),%%mm5\n\t"
+    /*#2 Load high residue.*/
+    "movq 5*8(%[residue]),%%mm6\n\t"
+    /*#0 Bias low  residue.*/
+    "paddsw %%mm0,%%mm1\n\t"
+    /*#0 Bias high residue.*/
+    "paddsw %%mm0,%%mm2\n\t"
+    /*#0 Pack to byte.*/
+    "packuswb %%mm2,%%mm1\n\t"
+    /*#1 Bias low  residue.*/
+    "paddsw %%mm0,%%mm3\n\t"
+    /*#1 Bias high residue.*/
+    "paddsw %%mm0,%%mm4\n\t"
+    /*#1 Pack to byte.*/
+    "packuswb %%mm4,%%mm3\n\t"
+    /*#2 Bias low  residue.*/
+    "paddsw %%mm0,%%mm5\n\t"
+    /*#2 Bias high residue.*/
+    "paddsw %%mm0,%%mm6\n\t"
+    /*#2 Pack to byte.*/
+    "packuswb %%mm6,%%mm5\n\t"
+    /*#0 Write row.*/
+    "movq %%mm1,(%[dst])\n\t"
+    /*#1 Write row.*/
+    "movq %%mm3,(%[dst],%[ystride])\n\t"
+    /*#2 Write row.*/
+    "movq %%mm5,(%[dst],%[ystride],2)\n\t"
+    /*#3 Load low residue.*/
+    "movq 6*8(%[residue]),%%mm1\n\t"
+    /*#3 Load high residue.*/
+    "movq 7*8(%[residue]),%%mm2\n\t"
+    /*#4 Load high residue.*/
+    "movq 8*8(%[residue]),%%mm3\n\t"
+    /*#4 Load high residue.*/
+    "movq 9*8(%[residue]),%%mm4\n\t"
+    /*#5 Load high residue.*/
+    "movq 10*8(%[residue]),%%mm5\n\t"
+    /*#5 Load high residue.*/
+    "movq 11*8(%[residue]),%%mm6\n\t"
+    /*#3 Bias low  residue.*/
+    "paddsw %%mm0,%%mm1\n\t"
+    /*#3 Bias high residue.*/
+    "paddsw %%mm0,%%mm2\n\t"
+    /*#3 Pack to byte.*/
+    "packuswb %%mm2,%%mm1\n\t"
+    /*#4 Bias low  residue.*/
+    "paddsw %%mm0,%%mm3\n\t"
+    /*#4 Bias high residue.*/
+    "paddsw %%mm0,%%mm4\n\t"
+    /*#4 Pack to byte.*/
+    "packuswb %%mm4,%%mm3\n\t"
+    /*#5 Bias low  residue.*/
+    "paddsw %%mm0,%%mm5\n\t"
+    /*#5 Bias high residue.*/
+    "paddsw %%mm0,%%mm6\n\t"
+    /*#5 Pack to byte.*/
+    "packuswb %%mm6,%%mm5\n\t"
+    /*#3 Write row.*/
+    "movq %%mm1,(%[dst],%[ystride3])\n\t"
+    /*#4 Write row.*/
+    "movq %%mm3,(%[dst4])\n\t"
+    /*#5 Write row.*/
+    "movq %%mm5,(%[dst4],%[ystride])\n\t"
+    /*#6 Load low residue.*/
+    "movq 12*8(%[residue]),%%mm1\n\t"
+    /*#6 Load high residue.*/
+    "movq 13*8(%[residue]),%%mm2\n\t"
+    /*#7 Load low residue.*/
+    "movq 14*8(%[residue]),%%mm3\n\t"
+    /*#7 Load high residue.*/
+    "movq 15*8(%[residue]),%%mm4\n\t"
+    /*#6 Bias low  residue.*/
+    "paddsw %%mm0,%%mm1\n\t"
+    /*#6 Bias high residue.*/
+    "paddsw %%mm0,%%mm2\n\t"
+    /*#6 Pack to byte.*/
+    "packuswb %%mm2,%%mm1\n\t"
+    /*#7 Bias low  residue.*/
+    "paddsw %%mm0,%%mm3\n\t"
+    /*#7 Bias high residue.*/
+    "paddsw %%mm0,%%mm4\n\t"
+    /*#7 Pack to byte.*/
+    "packuswb %%mm4,%%mm3\n\t"
+    /*#6 Write row.*/
+    "movq %%mm1,(%[dst4],%[ystride],2)\n\t"
+    /*#7 Write row.*/
+    "movq %%mm3,(%[dst4],%[ystride3])\n\t"
+    :
+    :[residue]"r"(_residue),
+     [dst]"r"(_dst),
+     [dst4]"r"(_dst+(_ystride<<2)),
+     [ystride]"r"((ptrdiff_t)_ystride),
+     [ystride3]"r"((ptrdiff_t)_ystride*3)
+    :"memory"
+  );
+}
+
+void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src,
+ int _ystride,const ogg_int16_t *_residue){
+  int i;
+  /*Zero mm0.*/
+  __asm__ __volatile__("pxor %%mm0,%%mm0\n\t"::);
+  for(i=4;i-->0;){
+    __asm__ __volatile__(
+      /*#0 Load source.*/
+      "movq (%[src]),%%mm3\n\t"
+      /*#1 Load source.*/
+      "movq (%[src],%[ystride]),%%mm7\n\t"
+      /*#0 Get copy of src.*/
+      "movq %%mm3,%%mm4\n\t"
+      /*#0 Expand high source.*/
+      "punpckhbw %%mm0,%%mm4\n\t"
+      /*#0 Expand low  source.*/
+      "punpcklbw %%mm0,%%mm3\n\t"
+      /*#0 Add residue high.*/
+      "paddsw 8(%[residue]),%%mm4\n\t"
+      /*#1 Get copy of src.*/
+      "movq %%mm7,%%mm2\n\t"
+      /*#0 Add residue low.*/
+      "paddsw (%[residue]), %%mm3\n\t"
+      /*#1 Expand high source.*/
+      "punpckhbw %%mm0,%%mm2\n\t"
+      /*#0 Pack final row pixels.*/
+      "packuswb %%mm4,%%mm3\n\t"
+      /*#1 Expand low  source.*/
+      "punpcklbw %%mm0,%%mm7\n\t"
+      /*#1 Add residue low.*/
+      "paddsw 16(%[residue]),%%mm7\n\t"
+      /*#1 Add residue high.*/
+      "paddsw 24(%[residue]),%%mm2\n\t"
+      /*Advance residue.*/
+      "lea 32(%[residue]),%[residue]\n\t"
+      /*#1 Pack final row pixels.*/
+      "packuswb %%mm2,%%mm7\n\t"
+      /*Advance src.*/
+      "lea (%[src],%[ystride],2),%[src]\n\t"
+      /*#0 Write row.*/
+      "movq %%mm3,(%[dst])\n\t"
+      /*#1 Write row.*/
+      "movq %%mm7,(%[dst],%[ystride])\n\t"
+      /*Advance dst.*/
+      "lea (%[dst],%[ystride],2),%[dst]\n\t"
+      :[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src)
+      :[ystride]"r"((ptrdiff_t)_ystride)
+      :"memory"
+    );
+  }
+}
+
+void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){
+  int i;
+  /*Zero mm7.*/
+  __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::);
+  for(i=4;i-->0;){
+    __asm__ __volatile__(
+      /*#0 Load src1.*/
+      "movq (%[src1]),%%mm0\n\t"
+      /*#0 Load src2.*/
+      "movq (%[src2]),%%mm2\n\t"
+      /*#0 Copy src1.*/
+      "movq %%mm0,%%mm1\n\t"
+      /*#0 Copy src2.*/
+      "movq %%mm2,%%mm3\n\t"
+      /*#1 Load src1.*/
+      "movq (%[src1],%[ystride]),%%mm4\n\t"
+      /*#0 Unpack lower src1.*/
+      "punpcklbw %%mm7,%%mm0\n\t"
+      /*#1 Load src2.*/
+      "movq (%[src2],%[ystride]),%%mm5\n\t"
+      /*#0 Unpack higher src1.*/
+      "punpckhbw %%mm7,%%mm1\n\t"
+      /*#0 Unpack lower src2.*/
+      "punpcklbw %%mm7,%%mm2\n\t"
+      /*#0 Unpack higher src2.*/
+      "punpckhbw %%mm7,%%mm3\n\t"
+      /*Advance src1 ptr.*/
+      "lea (%[src1],%[ystride],2),%[src1]\n\t"
+      /*Advance src2 ptr.*/
+      "lea (%[src2],%[ystride],2),%[src2]\n\t"
+      /*#0 Lower src1+src2.*/
+      "paddsw %%mm2,%%mm0\n\t"
+      /*#0 Higher src1+src2.*/
+      "paddsw %%mm3,%%mm1\n\t"
+      /*#1 Copy src1.*/
+      "movq %%mm4,%%mm2\n\t"
+      /*#0 Build lo average.*/
+      "psraw $1,%%mm0\n\t"
+      /*#1 Copy src2.*/
+      "movq %%mm5,%%mm3\n\t"
+      /*#1 Unpack lower src1.*/
+      "punpcklbw %%mm7,%%mm4\n\t"
+      /*#0 Build hi average.*/
+      "psraw $1,%%mm1\n\t"
+      /*#1 Unpack higher src1.*/
+      "punpckhbw %%mm7,%%mm2\n\t"
+      /*#0 low+=residue.*/
+      "paddsw (%[residue]),%%mm0\n\t"
+      /*#1 Unpack lower src2.*/
+      "punpcklbw %%mm7,%%mm5\n\t"
+      /*#0 high+=residue.*/
+      "paddsw 8(%[residue]),%%mm1\n\t"
+      /*#1 Unpack higher src2.*/
+      "punpckhbw %%mm7,%%mm3\n\t"
+      /*#1 Lower src1+src2.*/
+      "paddsw %%mm4,%%mm5\n\t"
+      /*#0 Pack and saturate.*/
+      "packuswb %%mm1,%%mm0\n\t"
+      /*#1 Higher src1+src2.*/
+      "paddsw %%mm2,%%mm3\n\t"
+      /*#0 Write row.*/
+      "movq %%mm0,(%[dst])\n\t"
+      /*#1 Build lo average.*/
+      "psraw $1,%%mm5\n\t"
+      /*#1 Build hi average.*/
+      "psraw $1,%%mm3\n\t"
+      /*#1 low+=residue.*/
+      "paddsw 16(%[residue]),%%mm5\n\t"
+      /*#1 high+=residue.*/
+      "paddsw 24(%[residue]),%%mm3\n\t"
+      /*#1 Pack and saturate.*/
+      "packuswb  %%mm3,%%mm5\n\t"
+      /*#1 Write row ptr.*/
+      "movq %%mm5,(%[dst],%[ystride])\n\t"
+      /*Advance residue ptr.*/
+      "add $32,%[residue]\n\t"
+      /*Advance dest ptr.*/
+      "lea (%[dst],%[ystride],2),%[dst]\n\t"
+     :[dst]"+r"(_dst),[residue]"+r"(_residue),
+      [src1]"+%r"(_src1),[src2]"+r"(_src2)
+     :[ystride]"r"((ptrdiff_t)_ystride)
+     :"memory"
+    );
+  }
+}
+
+void oc_restore_fpu_mmx(void){
+  __asm__ __volatile__("emms\n\t");
+}
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.h b/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.h
new file mode 100644
index 00000000..a3984276
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86/mmxfrag.h
@@ -0,0 +1,64 @@
+#if !defined(_x86_mmxfrag_H)
+# define _x86_mmxfrag_H (1)
+# include <stddef.h>
+# include "x86int.h"
+
+#if defined(OC_X86_ASM)
+
+/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes
+   between rows.*/
+#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \
+  do{ \
+    const unsigned char *src; \
+    unsigned char       *dst; \
+    ptrdiff_t            ystride3; \
+    src=(_src); \
+    dst=(_dst); \
+    __asm__ __volatile__( \
+      /*src+0*ystride*/ \
+      "movq (%[src]),%%mm0\n\t" \
+      /*src+1*ystride*/ \
+      "movq (%[src],%[ystride]),%%mm1\n\t" \
+      /*ystride3=ystride*3*/ \
+      "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \
+      /*src+2*ystride*/ \
+      "movq (%[src],%[ystride],2),%%mm2\n\t" \
+      /*src+3*ystride*/ \
+      "movq (%[src],%[ystride3]),%%mm3\n\t" \
+      /*dst+0*ystride*/ \
+      "movq %%mm0,(%[dst])\n\t" \
+      /*dst+1*ystride*/ \
+      "movq %%mm1,(%[dst],%[ystride])\n\t" \
+      /*Pointer to next 4.*/ \
+      "lea (%[src],%[ystride],4),%[src]\n\t" \
+      /*dst+2*ystride*/ \
+      "movq %%mm2,(%[dst],%[ystride],2)\n\t" \
+      /*dst+3*ystride*/ \
+      "movq %%mm3,(%[dst],%[ystride3])\n\t" \
+      /*Pointer to next 4.*/ \
+      "lea (%[dst],%[ystride],4),%[dst]\n\t" \
+      /*src+0*ystride*/ \
+      "movq (%[src]),%%mm0\n\t" \
+      /*src+1*ystride*/ \
+      "movq (%[src],%[ystride]),%%mm1\n\t" \
+      /*src+2*ystride*/ \
+      "movq (%[src],%[ystride],2),%%mm2\n\t" \
+      /*src+3*ystride*/ \
+      "movq (%[src],%[ystride3]),%%mm3\n\t" \
+      /*dst+0*ystride*/ \
+      "movq %%mm0,(%[dst])\n\t" \
+      /*dst+1*ystride*/ \
+      "movq %%mm1,(%[dst],%[ystride])\n\t" \
+      /*dst+2*ystride*/ \
+      "movq %%mm2,(%[dst],%[ystride],2)\n\t" \
+      /*dst+3*ystride*/ \
+      "movq %%mm3,(%[dst],%[ystride3])\n\t" \
+      :[dst]"+r"(dst),[src]"+r"(src),[ystride3]"=&r"(ystride3) \
+      :[ystride]"r"((ptrdiff_t)(_ystride)) \
+      :"memory" \
+    ); \
+  } \
+  while(0)
+
+# endif
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86/mmxidct.c b/engine/code/libtheora-1.1.1/lib/x86/mmxidct.c
new file mode 100644
index 00000000..76424e63
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86/mmxidct.c
@@ -0,0 +1,564 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+/*MMX acceleration of Theora's iDCT.
+  Originally written by Rudolf Marek, based on code from On2's VP3.*/
+#include "x86int.h"
+#include "../dct.h"
+
+#if defined(OC_X86_ASM)
+
+/*These are offsets into the table of constants below.*/
+/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/
+#define OC_COSINE_OFFSET (0)
+/*A row of 8's.*/
+#define OC_EIGHT_OFFSET  (56)
+
+
+
+/*A table of constants used by the MMX routines.*/
+static const ogg_uint16_t __attribute__((aligned(8),used))
+ OC_IDCT_CONSTS[(7+1)*4]={
+  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
+  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
+  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
+  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
+  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
+  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
+  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
+  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
+  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
+  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
+  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
+  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
+  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
+  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
+      8,    8,    8,    8
+};
+
+/*Converts the expression in the argument to a string.*/
+#define OC_M2STR(_s) #_s
+
+/*38 cycles*/
+#define OC_IDCT_BEGIN \
+  "#OC_IDCT_BEGIN\n\t" \
+  "movq "OC_I(3)",%%mm2\n\t" \
+  "movq "OC_C(3)",%%mm6\n\t" \
+  "movq %%mm2,%%mm4\n\t" \
+  "movq "OC_J(5)",%%mm7\n\t" \
+  "pmulhw %%mm6,%%mm4\n\t" \
+  "movq "OC_C(5)",%%mm1\n\t" \
+  "pmulhw %%mm7,%%mm6\n\t" \
+  "movq %%mm1,%%mm5\n\t" \
+  "pmulhw %%mm2,%%mm1\n\t" \
+  "movq "OC_I(1)",%%mm3\n\t" \
+  "pmulhw %%mm7,%%mm5\n\t" \
+  "movq "OC_C(1)",%%mm0\n\t" \
+  "paddw %%mm2,%%mm4\n\t" \
+  "paddw %%mm7,%%mm6\n\t" \
+  "paddw %%mm1,%%mm2\n\t" \
+  "movq "OC_J(7)",%%mm1\n\t" \
+  "paddw %%mm5,%%mm7\n\t" \
+  "movq %%mm0,%%mm5\n\t" \
+  "pmulhw %%mm3,%%mm0\n\t" \
+  "paddw %%mm7,%%mm4\n\t" \
+  "pmulhw %%mm1,%%mm5\n\t" \
+  "movq "OC_C(7)",%%mm7\n\t" \
+  "psubw %%mm2,%%mm6\n\t" \
+  "paddw %%mm3,%%mm0\n\t" \
+  "pmulhw %%mm7,%%mm3\n\t" \
+  "movq "OC_I(2)",%%mm2\n\t" \
+  "pmulhw %%mm1,%%mm7\n\t" \
+  "paddw %%mm1,%%mm5\n\t" \
+  "movq %%mm2,%%mm1\n\t" \
+  "pmulhw "OC_C(2)",%%mm2\n\t" \
+  "psubw %%mm5,%%mm3\n\t" \
+  "movq "OC_J(6)",%%mm5\n\t" \
+  "paddw %%mm7,%%mm0\n\t" \
+  "movq %%mm5,%%mm7\n\t" \
+  "psubw %%mm4,%%mm0\n\t" \
+  "pmulhw "OC_C(2)",%%mm5\n\t" \
+  "paddw %%mm1,%%mm2\n\t" \
+  "pmulhw "OC_C(6)",%%mm1\n\t" \
+  "paddw %%mm4,%%mm4\n\t" \
+  "paddw %%mm0,%%mm4\n\t" \
+  "psubw %%mm6,%%mm3\n\t" \
+  "paddw %%mm7,%%mm5\n\t" \
+  "paddw %%mm6,%%mm6\n\t" \
+  "pmulhw "OC_C(6)",%%mm7\n\t" \
+  "paddw %%mm3,%%mm6\n\t" \
+  "movq %%mm4,"OC_I(1)"\n\t" \
+  "psubw %%mm5,%%mm1\n\t" \
+  "movq "OC_C(4)",%%mm4\n\t" \
+  "movq %%mm3,%%mm5\n\t" \
+  "pmulhw %%mm4,%%mm3\n\t" \
+  "paddw %%mm2,%%mm7\n\t" \
+  "movq %%mm6,"OC_I(2)"\n\t" \
+  "movq %%mm0,%%mm2\n\t" \
+  "movq "OC_I(0)",%%mm6\n\t" \
+  "pmulhw %%mm4,%%mm0\n\t" \
+  "paddw %%mm3,%%mm5\n\t" \
+  "movq "OC_J(4)",%%mm3\n\t" \
+  "psubw %%mm1,%%mm5\n\t" \
+  "paddw %%mm0,%%mm2\n\t" \
+  "psubw %%mm3,%%mm6\n\t" \
+  "movq %%mm6,%%mm0\n\t" \
+  "pmulhw %%mm4,%%mm6\n\t" \
+  "paddw %%mm3,%%mm3\n\t" \
+  "paddw %%mm1,%%mm1\n\t" \
+  "paddw %%mm0,%%mm3\n\t" \
+  "paddw %%mm5,%%mm1\n\t" \
+  "pmulhw %%mm3,%%mm4\n\t" \
+  "paddw %%mm0,%%mm6\n\t" \
+  "psubw %%mm2,%%mm6\n\t" \
+  "paddw %%mm2,%%mm2\n\t" \
+  "movq "OC_I(1)",%%mm0\n\t" \
+  "paddw %%mm6,%%mm2\n\t" \
+  "paddw %%mm3,%%mm4\n\t" \
+  "psubw %%mm1,%%mm2\n\t" \
+  "#end OC_IDCT_BEGIN\n\t" \
+
+/*38+8=46 cycles.*/
+#define OC_ROW_IDCT \
+  "#OC_ROW_IDCT\n" \
+  OC_IDCT_BEGIN \
+  /*r3=D'*/ \
+  "movq "OC_I(2)",%%mm3\n\t" \
+  /*r4=E'=E-G*/ \
+  "psubw %%mm7,%%mm4\n\t" \
+  /*r1=H'+H'*/ \
+  "paddw %%mm1,%%mm1\n\t" \
+  /*r7=G+G*/ \
+  "paddw %%mm7,%%mm7\n\t" \
+  /*r1=R1=A''+H'*/ \
+  "paddw %%mm2,%%mm1\n\t" \
+  /*r7=G'=E+G*/ \
+  "paddw %%mm4,%%mm7\n\t" \
+  /*r4=R4=E'-D'*/ \
+  "psubw %%mm3,%%mm4\n\t" \
+  "paddw %%mm3,%%mm3\n\t" \
+  /*r6=R6=F'-B''*/ \
+  "psubw %%mm5,%%mm6\n\t" \
+  "paddw %%mm5,%%mm5\n\t" \
+  /*r3=R3=E'+D'*/ \
+  "paddw %%mm4,%%mm3\n\t" \
+  /*r5=R5=F'+B''*/ \
+  "paddw %%mm6,%%mm5\n\t" \
+  /*r7=R7=G'-C'*/ \
+  "psubw %%mm0,%%mm7\n\t" \
+  "paddw %%mm0,%%mm0\n\t" \
+  /*Save R1.*/ \
+  "movq %%mm1,"OC_I(1)"\n\t" \
+  /*r0=R0=G.+C.*/ \
+  "paddw %%mm7,%%mm0\n\t" \
+  "#end OC_ROW_IDCT\n\t" \
+
+/*The following macro does two 4x4 transposes in place.
+  At entry, we assume:
+    r0 = a3 a2 a1 a0
+  I(1) = b3 b2 b1 b0
+    r2 = c3 c2 c1 c0
+    r3 = d3 d2 d1 d0
+
+    r4 = e3 e2 e1 e0
+    r5 = f3 f2 f1 f0
+    r6 = g3 g2 g1 g0
+    r7 = h3 h2 h1 h0
+
+  At exit, we have:
+  I(0) = d0 c0 b0 a0
+  I(1) = d1 c1 b1 a1
+  I(2) = d2 c2 b2 a2
+  I(3) = d3 c3 b3 a3
+
+  J(4) = h0 g0 f0 e0
+  J(5) = h1 g1 f1 e1
+  J(6) = h2 g2 f2 e2
+  J(7) = h3 g3 f3 e3
+
+  I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
+  J(4) J(5) J(6) J(7) is the transpose of r4  r5  r6 r7.
+
+  Since r1 is free at entry, we calculate the Js first.*/
+/*19 cycles.*/
+#define OC_TRANSPOSE \
+  "#OC_TRANSPOSE\n\t" \
+  "movq %%mm4,%%mm1\n\t" \
+  "punpcklwd %%mm5,%%mm4\n\t" \
+  "movq %%mm0,"OC_I(0)"\n\t" \
+  "punpckhwd %%mm5,%%mm1\n\t" \
+  "movq %%mm6,%%mm0\n\t" \
+  "punpcklwd %%mm7,%%mm6\n\t" \
+  "movq %%mm4,%%mm5\n\t" \
+  "punpckldq %%mm6,%%mm4\n\t" \
+  "punpckhdq %%mm6,%%mm5\n\t" \
+  "movq %%mm1,%%mm6\n\t" \
+  "movq %%mm4,"OC_J(4)"\n\t" \
+  "punpckhwd %%mm7,%%mm0\n\t" \
+  "movq %%mm5,"OC_J(5)"\n\t" \
+  "punpckhdq %%mm0,%%mm6\n\t" \
+  "movq "OC_I(0)",%%mm4\n\t" \
+  "punpckldq %%mm0,%%mm1\n\t" \
+  "movq "OC_I(1)",%%mm5\n\t" \
+  "movq %%mm4,%%mm0\n\t" \
+  "movq %%mm6,"OC_J(7)"\n\t" \
+  "punpcklwd %%mm5,%%mm0\n\t" \
+  "movq %%mm1,"OC_J(6)"\n\t" \
+  "punpckhwd %%mm5,%%mm4\n\t" \
+  "movq %%mm2,%%mm5\n\t" \
+  "punpcklwd %%mm3,%%mm2\n\t" \
+  "movq %%mm0,%%mm1\n\t" \
+  "punpckldq %%mm2,%%mm0\n\t" \
+  "punpckhdq %%mm2,%%mm1\n\t" \
+  "movq %%mm4,%%mm2\n\t" \
+  "movq %%mm0,"OC_I(0)"\n\t" \
+  "punpckhwd %%mm3,%%mm5\n\t" \
+  "movq %%mm1,"OC_I(1)"\n\t" \
+  "punpckhdq %%mm5,%%mm4\n\t" \
+  "punpckldq %%mm5,%%mm2\n\t" \
+  "movq %%mm4,"OC_I(3)"\n\t" \
+  "movq %%mm2,"OC_I(2)"\n\t" \
+  "#end OC_TRANSPOSE\n\t" \
+
+/*38+19=57 cycles.*/
+#define OC_COLUMN_IDCT \
+  "#OC_COLUMN_IDCT\n" \
+  OC_IDCT_BEGIN \
+  "paddw "OC_8",%%mm2\n\t" \
+  /*r1=H'+H'*/ \
+  "paddw %%mm1,%%mm1\n\t" \
+  /*r1=R1=A''+H'*/ \
+  "paddw %%mm2,%%mm1\n\t" \
+  /*r2=NR2*/ \
+  "psraw $4,%%mm2\n\t" \
+  /*r4=E'=E-G*/ \
+  "psubw %%mm7,%%mm4\n\t" \
+  /*r1=NR1*/ \
+  "psraw $4,%%mm1\n\t" \
+  /*r3=D'*/ \
+  "movq "OC_I(2)",%%mm3\n\t" \
+  /*r7=G+G*/ \
+  "paddw %%mm7,%%mm7\n\t" \
+  /*Store NR2 at I(2).*/ \
+  "movq %%mm2,"OC_I(2)"\n\t" \
+  /*r7=G'=E+G*/ \
+  "paddw %%mm4,%%mm7\n\t" \
+  /*Store NR1 at I(1).*/ \
+  "movq %%mm1,"OC_I(1)"\n\t" \
+  /*r4=R4=E'-D'*/ \
+  "psubw %%mm3,%%mm4\n\t" \
+  "paddw "OC_8",%%mm4\n\t" \
+  /*r3=D'+D'*/ \
+  "paddw %%mm3,%%mm3\n\t" \
+  /*r3=R3=E'+D'*/ \
+  "paddw %%mm4,%%mm3\n\t" \
+  /*r4=NR4*/ \
+  "psraw $4,%%mm4\n\t" \
+  /*r6=R6=F'-B''*/ \
+  "psubw %%mm5,%%mm6\n\t" \
+  /*r3=NR3*/ \
+  "psraw $4,%%mm3\n\t" \
+  "paddw "OC_8",%%mm6\n\t" \
+  /*r5=B''+B''*/ \
+  "paddw %%mm5,%%mm5\n\t" \
+  /*r5=R5=F'+B''*/ \
+  "paddw %%mm6,%%mm5\n\t" \
+  /*r6=NR6*/ \
+  "psraw $4,%%mm6\n\t" \
+  /*Store NR4 at J(4).*/ \
+  "movq %%mm4,"OC_J(4)"\n\t" \
+  /*r5=NR5*/ \
+  "psraw $4,%%mm5\n\t" \
+  /*Store NR3 at I(3).*/ \
+  "movq %%mm3,"OC_I(3)"\n\t" \
+  /*r7=R7=G'-C'*/ \
+  "psubw %%mm0,%%mm7\n\t" \
+  "paddw "OC_8",%%mm7\n\t" \
+  /*r0=C'+C'*/ \
+  "paddw %%mm0,%%mm0\n\t" \
+  /*r0=R0=G'+C'*/ \
+  "paddw %%mm7,%%mm0\n\t" \
+  /*r7=NR7*/ \
+  "psraw $4,%%mm7\n\t" \
+  /*Store NR6 at J(6).*/ \
+  "movq %%mm6,"OC_J(6)"\n\t" \
+  /*r0=NR0*/ \
+  "psraw $4,%%mm0\n\t" \
+  /*Store NR5 at J(5).*/ \
+  "movq %%mm5,"OC_J(5)"\n\t" \
+  /*Store NR7 at J(7).*/ \
+  "movq %%mm7,"OC_J(7)"\n\t" \
+  /*Store NR0 at I(0).*/ \
+  "movq %%mm0,"OC_I(0)"\n\t" \
+  "#end OC_COLUMN_IDCT\n\t" \
+
+#define OC_MID(_m,_i) OC_M2STR(_m+(_i)*8)"(%[c])"
+#define OC_C(_i)      OC_MID(OC_COSINE_OFFSET,_i-1)
+#define OC_8          OC_MID(OC_EIGHT_OFFSET,0)
+
+static void oc_idct8x8_slow(ogg_int16_t _y[64]){
+  /*This routine accepts an 8x8 matrix, but in partially transposed form.
+    Every 4x4 block is transposed.*/
+  __asm__ __volatile__(
+#define OC_I(_k)      OC_M2STR((_k*16))"(%[y])"
+#define OC_J(_k)      OC_M2STR(((_k-4)*16)+8)"(%[y])"
+    OC_ROW_IDCT
+    OC_TRANSPOSE
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k)      OC_M2STR((_k*16)+64)"(%[y])"
+#define OC_J(_k)      OC_M2STR(((_k-4)*16)+72)"(%[y])"
+    OC_ROW_IDCT
+    OC_TRANSPOSE
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k)      OC_M2STR((_k*16))"(%[y])"
+#define OC_J(_k)      OC_I(_k)
+    OC_COLUMN_IDCT
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k)      OC_M2STR((_k*16)+8)"(%[y])"
+#define OC_J(_k)      OC_I(_k)
+    OC_COLUMN_IDCT
+#undef  OC_I
+#undef  OC_J
+    :
+    :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS)
+  );
+}
+
+/*25 cycles.*/
+#define OC_IDCT_BEGIN_10 \
+ "#OC_IDCT_BEGIN_10\n\t" \
+ "movq "OC_I(3)",%%mm2\n\t" \
+ "nop\n\t" \
+ "movq "OC_C(3)",%%mm6\n\t" \
+ "movq %%mm2,%%mm4\n\t" \
+ "movq "OC_C(5)",%%mm1\n\t" \
+ "pmulhw %%mm6,%%mm4\n\t" \
+ "movq "OC_I(1)",%%mm3\n\t" \
+ "pmulhw %%mm2,%%mm1\n\t" \
+ "movq "OC_C(1)",%%mm0\n\t" \
+ "paddw %%mm2,%%mm4\n\t" \
+ "pxor %%mm6,%%mm6\n\t" \
+ "paddw %%mm1,%%mm2\n\t" \
+ "movq "OC_I(2)",%%mm5\n\t" \
+ "pmulhw %%mm3,%%mm0\n\t" \
+ "movq %%mm5,%%mm1\n\t" \
+ "paddw %%mm3,%%mm0\n\t" \
+ "pmulhw "OC_C(7)",%%mm3\n\t" \
+ "psubw %%mm2,%%mm6\n\t" \
+ "pmulhw "OC_C(2)",%%mm5\n\t" \
+ "psubw %%mm4,%%mm0\n\t" \
+ "movq "OC_I(2)",%%mm7\n\t" \
+ "paddw %%mm4,%%mm4\n\t" \
+ "paddw %%mm5,%%mm7\n\t" \
+ "paddw %%mm0,%%mm4\n\t" \
+ "pmulhw "OC_C(6)",%%mm1\n\t" \
+ "psubw %%mm6,%%mm3\n\t" \
+ "movq %%mm4,"OC_I(1)"\n\t" \
+ "paddw %%mm6,%%mm6\n\t" \
+ "movq "OC_C(4)",%%mm4\n\t" \
+ "paddw %%mm3,%%mm6\n\t" \
+ "movq %%mm3,%%mm5\n\t" \
+ "pmulhw %%mm4,%%mm3\n\t" \
+ "movq %%mm6,"OC_I(2)"\n\t" \
+ "movq %%mm0,%%mm2\n\t" \
+ "movq "OC_I(0)",%%mm6\n\t" \
+ "pmulhw %%mm4,%%mm0\n\t" \
+ "paddw %%mm3,%%mm5\n\t" \
+ "paddw %%mm0,%%mm2\n\t" \
+ "psubw %%mm1,%%mm5\n\t" \
+ "pmulhw %%mm4,%%mm6\n\t" \
+ "paddw "OC_I(0)",%%mm6\n\t" \
+ "paddw %%mm1,%%mm1\n\t" \
+ "movq %%mm6,%%mm4\n\t" \
+ "paddw %%mm5,%%mm1\n\t" \
+ "psubw %%mm2,%%mm6\n\t" \
+ "paddw %%mm2,%%mm2\n\t" \
+ "movq "OC_I(1)",%%mm0\n\t" \
+ "paddw %%mm6,%%mm2\n\t" \
+ "psubw %%mm1,%%mm2\n\t" \
+ "nop\n\t" \
+ "#end OC_IDCT_BEGIN_10\n\t" \
+
+/*25+8=33 cycles.*/
+#define OC_ROW_IDCT_10 \
+ "#OC_ROW_IDCT_10\n\t" \
+ OC_IDCT_BEGIN_10 \
+ /*r3=D'*/ \
+ "movq "OC_I(2)",%%mm3\n\t" \
+ /*r4=E'=E-G*/ \
+ "psubw %%mm7,%%mm4\n\t" \
+ /*r1=H'+H'*/ \
+ "paddw %%mm1,%%mm1\n\t" \
+ /*r7=G+G*/ \
+ "paddw %%mm7,%%mm7\n\t" \
+ /*r1=R1=A''+H'*/ \
+ "paddw %%mm2,%%mm1\n\t" \
+ /*r7=G'=E+G*/ \
+ "paddw %%mm4,%%mm7\n\t" \
+ /*r4=R4=E'-D'*/ \
+ "psubw %%mm3,%%mm4\n\t" \
+ "paddw %%mm3,%%mm3\n\t" \
+ /*r6=R6=F'-B''*/ \
+ "psubw %%mm5,%%mm6\n\t" \
+ "paddw %%mm5,%%mm5\n\t" \
+ /*r3=R3=E'+D'*/ \
+ "paddw %%mm4,%%mm3\n\t" \
+ /*r5=R5=F'+B''*/ \
+ "paddw %%mm6,%%mm5\n\t" \
+ /*r7=R7=G'-C'*/ \
+ "psubw %%mm0,%%mm7\n\t" \
+ "paddw %%mm0,%%mm0\n\t" \
+ /*Save R1.*/ \
+ "movq %%mm1,"OC_I(1)"\n\t" \
+ /*r0=R0=G'+C'*/ \
+ "paddw %%mm7,%%mm0\n\t" \
+ "#end OC_ROW_IDCT_10\n\t" \
+
+/*25+19=44 cycles'*/
+#define OC_COLUMN_IDCT_10 \
+ "#OC_COLUMN_IDCT_10\n\t" \
+ OC_IDCT_BEGIN_10 \
+ "paddw "OC_8",%%mm2\n\t" \
+ /*r1=H'+H'*/ \
+ "paddw %%mm1,%%mm1\n\t" \
+ /*r1=R1=A''+H'*/ \
+ "paddw %%mm2,%%mm1\n\t" \
+ /*r2=NR2*/ \
+ "psraw $4,%%mm2\n\t" \
+ /*r4=E'=E-G*/ \
+ "psubw %%mm7,%%mm4\n\t" \
+ /*r1=NR1*/ \
+ "psraw $4,%%mm1\n\t" \
+ /*r3=D'*/ \
+ "movq "OC_I(2)",%%mm3\n\t" \
+ /*r7=G+G*/ \
+ "paddw %%mm7,%%mm7\n\t" \
+ /*Store NR2 at I(2).*/ \
+ "movq %%mm2,"OC_I(2)"\n\t" \
+ /*r7=G'=E+G*/ \
+ "paddw %%mm4,%%mm7\n\t" \
+ /*Store NR1 at I(1).*/ \
+ "movq %%mm1,"OC_I(1)"\n\t" \
+ /*r4=R4=E'-D'*/ \
+ "psubw %%mm3,%%mm4\n\t" \
+ "paddw "OC_8",%%mm4\n\t" \
+ /*r3=D'+D'*/ \
+ "paddw %%mm3,%%mm3\n\t" \
+ /*r3=R3=E'+D'*/ \
+ "paddw %%mm4,%%mm3\n\t" \
+ /*r4=NR4*/ \
+ "psraw $4,%%mm4\n\t" \
+ /*r6=R6=F'-B''*/ \
+ "psubw %%mm5,%%mm6\n\t" \
+ /*r3=NR3*/ \
+ "psraw $4,%%mm3\n\t" \
+ "paddw "OC_8",%%mm6\n\t" \
+ /*r5=B''+B''*/ \
+ "paddw %%mm5,%%mm5\n\t" \
+ /*r5=R5=F'+B''*/ \
+ "paddw %%mm6,%%mm5\n\t" \
+ /*r6=NR6*/ \
+ "psraw $4,%%mm6\n\t" \
+ /*Store NR4 at J(4).*/ \
+ "movq %%mm4,"OC_J(4)"\n\t" \
+ /*r5=NR5*/ \
+ "psraw $4,%%mm5\n\t" \
+ /*Store NR3 at I(3).*/ \
+ "movq %%mm3,"OC_I(3)"\n\t" \
+ /*r7=R7=G'-C'*/ \
+ "psubw %%mm0,%%mm7\n\t" \
+ "paddw "OC_8",%%mm7\n\t" \
+ /*r0=C'+C'*/ \
+ "paddw %%mm0,%%mm0\n\t" \
+ /*r0=R0=G'+C'*/ \
+ "paddw %%mm7,%%mm0\n\t" \
+ /*r7=NR7*/ \
+ "psraw $4,%%mm7\n\t" \
+ /*Store NR6 at J(6).*/ \
+ "movq %%mm6,"OC_J(6)"\n\t" \
+ /*r0=NR0*/ \
+ "psraw $4,%%mm0\n\t" \
+ /*Store NR5 at J(5).*/ \
+ "movq %%mm5,"OC_J(5)"\n\t" \
+ /*Store NR7 at J(7).*/ \
+ "movq %%mm7,"OC_J(7)"\n\t" \
+ /*Store NR0 at I(0).*/ \
+ "movq %%mm0,"OC_I(0)"\n\t" \
+ "#end OC_COLUMN_IDCT_10\n\t" \
+
+static void oc_idct8x8_10(ogg_int16_t _y[64]){
+  __asm__ __volatile__(
+#define OC_I(_k) OC_M2STR((_k*16))"(%[y])"
+#define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])"
+    /*Done with dequant, descramble, and partial transpose.
+      Now do the iDCT itself.*/
+    OC_ROW_IDCT_10
+    OC_TRANSPOSE
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k) OC_M2STR((_k*16))"(%[y])"
+#define OC_J(_k) OC_I(_k)
+    OC_COLUMN_IDCT_10
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k) OC_M2STR((_k*16)+8)"(%[y])"
+#define OC_J(_k) OC_I(_k)
+    OC_COLUMN_IDCT_10
+#undef  OC_I
+#undef  OC_J
+    :
+    :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS)
+  );
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.*/
+void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){
+  /*_last_zzi is subtly different from an actual count of the number of
+     coefficients we decoded for this block.
+    It contains the value of zzi BEFORE the final token in the block was
+     decoded.
+    In most cases this is an EOB token (the continuation of an EOB run from a
+     previous block counts), and so this is the same as the coefficient count.
+    However, in the case that the last token was NOT an EOB token, but filled
+     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
+    Provided the last token was not a pure zero run, the minimum value it can
+     be is 46, and so that doesn't affect any of the cases in this routine.
+    However, if the last token WAS a pure zero run of length 63, then _last_zzi
+     will be 1 while the number of coefficients decoded is 64.
+    Thus, we will trigger the following special case, where the real
+     coefficient count would not.
+    Note also that a zero run of length 64 will give _last_zzi a value of 0,
+     but we still process the DC coefficient, which might have a non-zero value
+     due to DC prediction.
+    Although convoluted, this is arguably the correct behavior: it allows us to
+     use a smaller transform when the block ends with a long zero run instead
+     of a normal EOB token.
+    It could be smarter... multiple separate zero runs at the end of a block
+     will fool it, but an encoder that generates these really deserves what it
+     gets.
+    Needless to say we inherited this approach from VP3.*/
+  /*Then perform the iDCT.*/
+  if(_last_zzi<10)oc_idct8x8_10(_y);
+  else oc_idct8x8_slow(_y);
+}
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86/mmxloop.h b/engine/code/libtheora-1.1.1/lib/x86/mmxloop.h
new file mode 100644
index 00000000..2e870c79
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86/mmxloop.h
@@ -0,0 +1,215 @@
+#if !defined(_x86_mmxloop_H)
+# define _x86_mmxloop_H (1)
+# include <stddef.h>
+# include "x86int.h"
+
+#if defined(OC_X86_ASM)
+
+/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}.
+  On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and
+   mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/
+#define OC_LOOP_FILTER8_MMX \
+ "#OC_LOOP_FILTER8_MMX\n\t" \
+ /*mm7=0*/ \
+ "pxor %%mm7,%%mm7\n\t" \
+ /*mm6:mm0={a0,...,a7}*/ \
+ "movq %%mm0,%%mm6\n\t" \
+ "punpcklbw %%mm7,%%mm0\n\t" \
+ "punpckhbw %%mm7,%%mm6\n\t" \
+ /*mm3:mm5={d0,...,d7}*/ \
+ "movq %%mm3,%%mm5\n\t" \
+ "punpcklbw %%mm7,%%mm3\n\t" \
+ "punpckhbw %%mm7,%%mm5\n\t" \
+ /*mm6:mm0={a0-d0,...,a7-d7}*/ \
+ "psubw %%mm3,%%mm0\n\t" \
+ "psubw %%mm5,%%mm6\n\t" \
+ /*mm3:mm1={b0,...,b7}*/ \
+ "movq %%mm1,%%mm3\n\t" \
+ "punpcklbw %%mm7,%%mm1\n\t" \
+ "movq %%mm2,%%mm4\n\t" \
+ "punpckhbw %%mm7,%%mm3\n\t" \
+ /*mm5:mm4={c0,...,c7}*/ \
+ "movq %%mm2,%%mm5\n\t" \
+ "punpcklbw %%mm7,%%mm4\n\t" \
+ "punpckhbw %%mm7,%%mm5\n\t" \
+ /*mm7={3}x4 \
+   mm5:mm4={c0-b0,...,c7-b7}*/ \
+ "pcmpeqw %%mm7,%%mm7\n\t" \
+ "psubw %%mm1,%%mm4\n\t" \
+ "psrlw $14,%%mm7\n\t" \
+ "psubw %%mm3,%%mm5\n\t" \
+ /*Scale by 3.*/ \
+ "pmullw %%mm7,%%mm4\n\t" \
+ "pmullw %%mm7,%%mm5\n\t" \
+ /*mm7={4}x4 \
+   mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \
+ "psrlw $1,%%mm7\n\t" \
+ "paddw %%mm0,%%mm4\n\t" \
+ "psllw $2,%%mm7\n\t" \
+ "movq (%[ll]),%%mm0\n\t" \
+ "paddw %%mm6,%%mm5\n\t" \
+ /*R_i has the range [-127,128], so we compute -R_i instead. \
+   mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \
+ "psubw %%mm7,%%mm4\n\t" \
+ "psubw %%mm7,%%mm5\n\t" \
+ "psraw $3,%%mm4\n\t" \
+ "psraw $3,%%mm5\n\t" \
+ "pcmpeqb %%mm7,%%mm7\n\t" \
+ "packsswb %%mm5,%%mm4\n\t" \
+ "pxor %%mm6,%%mm6\n\t" \
+ "pxor %%mm7,%%mm4\n\t" \
+ "packuswb %%mm3,%%mm1\n\t" \
+ /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \
+ /*There's no unsigned byte+signed byte with unsigned saturation op code, so \
+    we have to split things by sign (the other option is to work in 16 bits, \
+    but working in 8 bits gives much better parallelism). \
+   We compute abs(R_i), but save a mask of which terms were negative in mm6. \
+   Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \
+   Finally, we split mm4 into positive and negative pieces using the mask in \
+    mm6, and add and subtract them as appropriate.*/ \
+ /*mm4=abs(-R_i)*/ \
+ /*mm7=255-2*L*/ \
+ "pcmpgtb %%mm4,%%mm6\n\t" \
+ "psubb %%mm0,%%mm7\n\t" \
+ "pxor %%mm6,%%mm4\n\t" \
+ "psubb %%mm0,%%mm7\n\t" \
+ "psubb %%mm6,%%mm4\n\t" \
+ /*mm7=255-max(2*L-abs(R_i),0)*/ \
+ "paddusb %%mm4,%%mm7\n\t" \
+ /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \
+ "paddusb %%mm7,%%mm4\n\t" \
+ "psubusb %%mm7,%%mm4\n\t" \
+ /*Now split mm4 by the original sign of -R_i.*/ \
+ "movq %%mm4,%%mm5\n\t" \
+ "pand %%mm6,%%mm4\n\t" \
+ "pandn %%mm5,%%mm6\n\t" \
+ /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \
+ /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \
+ "paddusb %%mm4,%%mm1\n\t" \
+ "psubusb %%mm4,%%mm2\n\t" \
+ "psubusb %%mm6,%%mm1\n\t" \
+ "paddusb %%mm6,%%mm2\n\t" \
+
+#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \
+  do{ \
+    ptrdiff_t ystride3__; \
+    __asm__ __volatile__( \
+      /*mm0={a0,...,a7}*/ \
+      "movq (%[pix]),%%mm0\n\t" \
+      /*ystride3=_ystride*3*/ \
+      "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \
+      /*mm3={d0,...,d7}*/ \
+      "movq (%[pix],%[ystride3]),%%mm3\n\t" \
+      /*mm1={b0,...,b7}*/ \
+      "movq (%[pix],%[ystride]),%%mm1\n\t" \
+      /*mm2={c0,...,c7}*/ \
+      "movq (%[pix],%[ystride],2),%%mm2\n\t" \
+      OC_LOOP_FILTER8_MMX \
+      /*Write it back out.*/ \
+      "movq %%mm1,(%[pix],%[ystride])\n\t" \
+      "movq %%mm2,(%[pix],%[ystride],2)\n\t" \
+      :[ystride3]"=&r"(ystride3__) \
+      :[pix]"r"(_pix-_ystride*2),[ystride]"r"((ptrdiff_t)(_ystride)), \
+       [ll]"r"(_ll) \
+      :"memory" \
+    ); \
+  } \
+  while(0)
+
+#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \
+  do{ \
+    unsigned char *pix__; \
+    ptrdiff_t      ystride3__; \
+    ptrdiff_t      d__; \
+    pix__=(_pix)-2; \
+    __asm__ __volatile__( \
+      /*x x x x d0 c0 b0 a0*/ \
+      "movd (%[pix]),%%mm0\n\t" \
+      /*x x x x d1 c1 b1 a1*/ \
+      "movd (%[pix],%[ystride]),%%mm1\n\t" \
+      /*ystride3=_ystride*3*/ \
+      "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \
+      /*x x x x d2 c2 b2 a2*/ \
+      "movd (%[pix],%[ystride],2),%%mm2\n\t" \
+      /*x x x x d3 c3 b3 a3*/ \
+      "lea (%[pix],%[ystride],4),%[d]\n\t" \
+      "movd (%[pix],%[ystride3]),%%mm3\n\t" \
+      /*x x x x d4 c4 b4 a4*/ \
+      "movd (%[d]),%%mm4\n\t" \
+      /*x x x x d5 c5 b5 a5*/ \
+      "movd (%[d],%[ystride]),%%mm5\n\t" \
+      /*x x x x d6 c6 b6 a6*/ \
+      "movd (%[d],%[ystride],2),%%mm6\n\t" \
+      /*x x x x d7 c7 b7 a7*/ \
+      "movd (%[d],%[ystride3]),%%mm7\n\t" \
+      /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \
+      "punpcklbw %%mm1,%%mm0\n\t" \
+      /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \
+      "punpcklbw %%mm3,%%mm2\n\t" \
+      /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \
+      "movq %%mm0,%%mm3\n\t" \
+      /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \
+      "punpcklwd %%mm2,%%mm0\n\t" \
+      /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \
+      "punpckhwd %%mm2,%%mm3\n\t" \
+      /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \
+      "movq %%mm0,%%mm1\n\t" \
+      /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \
+      "punpcklbw %%mm5,%%mm4\n\t" \
+      /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \
+      "punpcklbw %%mm7,%%mm6\n\t" \
+      /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \
+      "movq %%mm4,%%mm5\n\t" \
+      /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \
+      "punpcklwd %%mm6,%%mm4\n\t" \
+      /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \
+      "punpckhwd %%mm6,%%mm5\n\t" \
+      /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \
+      "movq %%mm3,%%mm2\n\t" \
+      /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \
+      "punpckldq %%mm4,%%mm0\n\t" \
+      /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \
+      "punpckhdq %%mm4,%%mm1\n\t" \
+      /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \
+      "punpckldq %%mm5,%%mm2\n\t" \
+      /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \
+      "punpckhdq %%mm5,%%mm3\n\t" \
+      OC_LOOP_FILTER8_MMX \
+      /*mm2={b0+R_0'',...,b7+R_7''}*/ \
+      "movq %%mm1,%%mm0\n\t" \
+      /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \
+      "punpcklbw %%mm2,%%mm1\n\t" \
+      /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \
+      "punpckhbw %%mm2,%%mm0\n\t" \
+      /*[d]=c1 b1 c0 b0*/ \
+      "movd %%mm1,%[d]\n\t" \
+      "movw %w[d],1(%[pix])\n\t" \
+      "psrlq $32,%%mm1\n\t" \
+      "shr $16,%[d]\n\t" \
+      "movw %w[d],1(%[pix],%[ystride])\n\t" \
+      /*[d]=c3 b3 c2 b2*/ \
+      "movd %%mm1,%[d]\n\t" \
+      "movw %w[d],1(%[pix],%[ystride],2)\n\t" \
+      "shr $16,%[d]\n\t" \
+      "movw %w[d],1(%[pix],%[ystride3])\n\t" \
+      "lea (%[pix],%[ystride],4),%[pix]\n\t" \
+      /*[d]=c5 b5 c4 b4*/ \
+      "movd %%mm0,%[d]\n\t" \
+      "movw %w[d],1(%[pix])\n\t" \
+      "psrlq $32,%%mm0\n\t" \
+      "shr $16,%[d]\n\t" \
+      "movw %w[d],1(%[pix],%[ystride])\n\t" \
+      /*[d]=c7 b7 c6 b6*/ \
+      "movd %%mm0,%[d]\n\t" \
+      "movw %w[d],1(%[pix],%[ystride],2)\n\t" \
+      "shr $16,%[d]\n\t" \
+      "movw %w[d],1(%[pix],%[ystride3])\n\t" \
+      :[pix]"+r"(pix__),[ystride3]"=&r"(ystride3__),[d]"=&r"(d__) \
+      :[ystride]"r"((ptrdiff_t)(_ystride)),[ll]"r"(_ll) \
+      :"memory" \
+    ); \
+  } \
+  while(0)
+
+# endif
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86/mmxstate.c b/engine/code/libtheora-1.1.1/lib/x86/mmxstate.c
new file mode 100644
index 00000000..808b0a78
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86/mmxstate.c
@@ -0,0 +1,188 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: mmxstate.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+/*MMX acceleration of complete fragment reconstruction algorithm.
+  Originally written by Rudolf Marek.*/
+#include <string.h>
+#include "x86int.h"
+#include "mmxfrag.h"
+#include "mmxloop.h"
+
+#if defined(OC_X86_ASM)
+
+void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
+  unsigned char *dst;
+  ptrdiff_t      frag_buf_off;
+  int            ystride;
+  int            mb_mode;
+  /*Apply the inverse transform.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    /*Note that this value must be unsigned, to keep the __asm__ block from
+       sign-extending it when it puts it in a register.*/
+    ogg_uint16_t p;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
+    /*Fill _dct_coeffs with p.*/
+    __asm__ __volatile__(
+      /*mm0=0000 0000 0000 AAAA*/
+      "movd %[p],%%mm0\n\t"
+      /*mm0=0000 0000 AAAA AAAA*/
+      "punpcklwd %%mm0,%%mm0\n\t"
+      /*mm0=AAAA AAAA AAAA AAAA*/
+      "punpckldq %%mm0,%%mm0\n\t"
+      "movq %%mm0,(%[y])\n\t"
+      "movq %%mm0,8(%[y])\n\t"
+      "movq %%mm0,16(%[y])\n\t"
+      "movq %%mm0,24(%[y])\n\t"
+      "movq %%mm0,32(%[y])\n\t"
+      "movq %%mm0,40(%[y])\n\t"
+      "movq %%mm0,48(%[y])\n\t"
+      "movq %%mm0,56(%[y])\n\t"
+      "movq %%mm0,64(%[y])\n\t"
+      "movq %%mm0,72(%[y])\n\t"
+      "movq %%mm0,80(%[y])\n\t"
+      "movq %%mm0,88(%[y])\n\t"
+      "movq %%mm0,96(%[y])\n\t"
+      "movq %%mm0,104(%[y])\n\t"
+      "movq %%mm0,112(%[y])\n\t"
+      "movq %%mm0,120(%[y])\n\t"
+      :
+      :[y]"r"(_dct_coeffs),[p]"r"((unsigned)p)
+      :"memory"
+    );
+  }
+  else{
+    /*Dequantize the DC coefficient.*/
+    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
+    oc_idct8x8_mmx(_dct_coeffs,_last_zzi);
+  }
+  /*Fill in the target buffer.*/
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  mb_mode=_state->frags[_fragi].mb_mode;
+  ystride=_state->ref_ystride[_pli];
+  dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+  if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs);
+  else{
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=
+     _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
+     +frag_buf_off;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+     _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+      oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
+       _dct_coeffs);
+    }
+    else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs);
+  }
+}
+
+/*We copy these entire function to inline the actual MMX routines so that we
+   use only a single indirect call.*/
+
+/*Copies the fragments specified by the lists of fragment indices from one
+   frame to another.
+  _fragis:    A pointer to a list of fragment indices.
+  _nfragis:   The number of fragment indices to copy.
+  _dst_frame: The reference frame to copy to.
+  _src_frame: The reference frame to copy from.
+  _pli:       The color plane the fragments lie in.*/
+void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli){
+  const ptrdiff_t     *frag_buf_offs;
+  const unsigned char *src_frame_data;
+  unsigned char       *dst_frame_data;
+  ptrdiff_t            fragii;
+  int                  ystride;
+  dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]];
+  src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]];
+  ystride=_state->ref_ystride[_pli];
+  frag_buf_offs=_state->frag_buf_offs;
+  for(fragii=0;fragii<_nfragis;fragii++){
+    ptrdiff_t frag_buf_off;
+    frag_buf_off=frag_buf_offs[_fragis[fragii]];
+    OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off,
+     src_frame_data+frag_buf_off,ystride);
+  }
+}
+
+/*Apply the loop filter to a given set of fragment rows in the given plane.
+  The filter may be run on the bottom edge, affecting pixels in the next row of
+   fragments, so this row also needs to be available.
+  _bv:        The bounding values array.
+  _refi:      The index of the frame buffer to filter.
+  _pli:       The color plane to filter.
+  _fragy0:    The Y coordinate of the first fragment row to filter.
+  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
+void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
+  OC_ALIGN8(unsigned char   ll[8]);
+  const oc_fragment_plane *fplane;
+  const oc_fragment       *frags;
+  const ptrdiff_t         *frag_buf_offs;
+  unsigned char           *ref_frame_data;
+  ptrdiff_t                fragi_top;
+  ptrdiff_t                fragi_bot;
+  ptrdiff_t                fragi0;
+  ptrdiff_t                fragi0_end;
+  int                      ystride;
+  int                      nhfrags;
+  memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll));
+  fplane=_state->fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  fragi_top=fplane->froffset;
+  fragi_bot=fragi_top+fplane->nfrags;
+  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
+  fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
+  ystride=_state->ref_ystride[_pli];
+  frags=_state->frags;
+  frag_buf_offs=_state->frag_buf_offs;
+  ref_frame_data=_state->ref_frame_data[_refi];
+  /*The following loops are constructed somewhat non-intuitively on purpose.
+    The main idea is: if a block boundary has at least one coded fragment on
+     it, the filter is applied to it.
+    However, the order that the filters are applied in matters, and VP3 chose
+     the somewhat strange ordering used below.*/
+  while(fragi0<fragi0_end){
+    ptrdiff_t fragi;
+    ptrdiff_t fragi_end;
+    fragi=fragi0;
+    fragi_end=fragi+nhfrags;
+    while(fragi<fragi_end){
+      if(frags[fragi].coded){
+        unsigned char *ref;
+        ref=ref_frame_data+frag_buf_offs[fragi];
+        if(fragi>fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll);
+        if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll);
+        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
+          OC_LOOP_FILTER_H_MMX(ref+8,ystride,ll);
+        }
+        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
+          OC_LOOP_FILTER_V_MMX(ref+(ystride<<3),ystride,ll);
+        }
+      }
+      fragi++;
+    }
+    fragi0+=nhfrags;
+  }
+}
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86/x86int.h b/engine/code/libtheora-1.1.1/lib/x86/x86int.h
new file mode 100644
index 00000000..ede724f5
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86/x86int.h
@@ -0,0 +1,42 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86int.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#if !defined(_x86_x86int_H)
+# define _x86_x86int_H (1)
+# include "../internal.h"
+
+void oc_state_vtable_init_x86(oc_theora_state *_state);
+
+void oc_frag_copy_mmx(unsigned char *_dst,
+ const unsigned char *_src,int _ystride);
+void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
+ const ogg_int16_t *_residue);
+void oc_frag_recon_inter_mmx(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t *_residue);
+void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
+void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi);
+void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli);
+void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_restore_fpu_mmx(void);
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86/x86state.c b/engine/code/libtheora-1.1.1/lib/x86/x86state.c
new file mode 100644
index 00000000..a786bec2
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86/x86state.c
@@ -0,0 +1,62 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86state.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include "x86int.h"
+
+#if defined(OC_X86_ASM)
+
+#include "../cpu.c"
+
+/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into
+   each quadrant of the destination.*/
+static const unsigned char OC_FZIG_ZAG_MMX[128]={
+   0, 8, 1, 2, 9,16,24,17,
+  10, 3,32,11,18,25, 4,12,
+   5,26,19,40,33,34,41,48,
+  27, 6,13,20,28,21,14, 7,
+  56,49,42,35,43,50,57,36,
+  15,22,29,30,23,44,37,58,
+  51,59,38,45,52,31,60,53,
+  46,39,47,54,61,62,55,63,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+};
+
+void oc_state_vtable_init_x86(oc_theora_state *_state){
+  _state->cpu_flags=oc_cpu_flags_get();
+  if(_state->cpu_flags&OC_CPU_X86_MMX){
+    _state->opt_vtable.frag_copy=oc_frag_copy_mmx;
+    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
+    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
+    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
+    _state->opt_vtable.idct8x8=oc_idct8x8_mmx;
+    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
+    _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx;
+    _state->opt_vtable.state_loop_filter_frag_rows=
+     oc_state_loop_filter_frag_rows_mmx;
+    _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx;
+    _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX;
+  }
+  else oc_state_vtable_init_c(_state);
+}
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.c b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.c
new file mode 100644
index 00000000..4eb2084d
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.c
@@ -0,0 +1,337 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: mmxfrag.c 16578 2009-09-25 19:50:48Z cristianadam $
+
+ ********************************************************************/
+
+/*MMX acceleration of fragment reconstruction for motion compensation.
+  Originally written by Rudolf Marek.
+  Additional optimization by Nils Pipenbrinck.
+  Note: Loops are unrolled for best performance.
+  The iteration each instruction belongs to is marked in the comments as #i.*/
+#include <stddef.h>
+#include "x86int.h"
+#include "mmxfrag.h"
+
+#if defined(OC_X86_ASM)
+
+/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes
+   between rows.*/
+void oc_frag_copy_mmx(unsigned char *_dst,
+ const unsigned char *_src,int _ystride){
+#define SRC edx
+#define DST eax
+#define YSTRIDE ecx
+#define YSTRIDE3 esi
+  OC_FRAG_COPY_MMX(_dst,_src,_ystride);
+#undef SRC
+#undef DST
+#undef YSTRIDE
+#undef YSTRIDE3
+}
+
+void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
+ const ogg_int16_t *_residue){
+  __asm{
+#define DST edx
+#define DST4 esi
+#define YSTRIDE eax
+#define YSTRIDE3 edi
+#define RESIDUE ecx
+    mov DST,_dst
+    mov YSTRIDE,_ystride
+    mov RESIDUE,_residue
+    lea DST4,[DST+YSTRIDE*4]
+    lea YSTRIDE3,[YSTRIDE+YSTRIDE*2]
+    /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/
+    pcmpeqw mm0,mm0
+    /*#0 Load low residue.*/
+    movq mm1,[0*8+RESIDUE]
+    /*#0 Load high residue.*/
+    movq mm2,[1*8+RESIDUE]
+    /*Set mm0 to 0x8000800080008000.*/
+    psllw mm0,15
+    /*#1 Load low residue.*/
+    movq mm3,[2*8+RESIDUE]
+    /*#1 Load high residue.*/
+    movq mm4,[3*8+RESIDUE]
+    /*Set mm0 to 0x0080008000800080.*/
+    psrlw mm0,8
+    /*#2 Load low residue.*/
+    movq mm5,[4*8+RESIDUE]
+    /*#2 Load high residue.*/
+    movq mm6,[5*8+RESIDUE]
+    /*#0 Bias low  residue.*/
+    paddsw mm1,mm0
+    /*#0 Bias high residue.*/
+    paddsw mm2,mm0
+    /*#0 Pack to byte.*/
+    packuswb mm1,mm2
+    /*#1 Bias low  residue.*/
+    paddsw mm3,mm0
+    /*#1 Bias high residue.*/
+    paddsw mm4,mm0
+    /*#1 Pack to byte.*/
+    packuswb mm3,mm4
+    /*#2 Bias low  residue.*/
+    paddsw mm5,mm0
+    /*#2 Bias high residue.*/
+    paddsw mm6,mm0
+    /*#2 Pack to byte.*/
+    packuswb mm5,mm6
+    /*#0 Write row.*/
+    movq [DST],mm1
+    /*#1 Write row.*/
+    movq [DST+YSTRIDE],mm3
+    /*#2 Write row.*/
+    movq [DST+YSTRIDE*2],mm5
+    /*#3 Load low residue.*/
+    movq mm1,[6*8+RESIDUE]
+    /*#3 Load high residue.*/
+    movq mm2,[7*8+RESIDUE]
+    /*#4 Load high residue.*/
+    movq mm3,[8*8+RESIDUE]
+    /*#4 Load high residue.*/
+    movq mm4,[9*8+RESIDUE]
+    /*#5 Load high residue.*/
+    movq mm5,[10*8+RESIDUE]
+    /*#5 Load high residue.*/
+    movq mm6,[11*8+RESIDUE]
+    /*#3 Bias low  residue.*/
+    paddsw mm1,mm0
+    /*#3 Bias high residue.*/
+    paddsw mm2,mm0
+    /*#3 Pack to byte.*/
+    packuswb mm1,mm2
+    /*#4 Bias low  residue.*/
+    paddsw mm3,mm0
+    /*#4 Bias high residue.*/
+    paddsw mm4,mm0
+    /*#4 Pack to byte.*/
+    packuswb mm3,mm4
+    /*#5 Bias low  residue.*/
+    paddsw mm5,mm0
+    /*#5 Bias high residue.*/
+    paddsw mm6,mm0
+    /*#5 Pack to byte.*/
+    packuswb mm5,mm6
+    /*#3 Write row.*/
+    movq [DST+YSTRIDE3],mm1
+    /*#4 Write row.*/
+    movq [DST4],mm3
+    /*#5 Write row.*/
+    movq [DST4+YSTRIDE],mm5
+    /*#6 Load low residue.*/
+    movq mm1,[12*8+RESIDUE]
+    /*#6 Load high residue.*/
+    movq mm2,[13*8+RESIDUE]
+    /*#7 Load low residue.*/
+    movq mm3,[14*8+RESIDUE]
+    /*#7 Load high residue.*/
+    movq mm4,[15*8+RESIDUE]
+    /*#6 Bias low  residue.*/
+    paddsw mm1,mm0
+    /*#6 Bias high residue.*/
+    paddsw mm2,mm0
+    /*#6 Pack to byte.*/
+    packuswb mm1,mm2
+    /*#7 Bias low  residue.*/
+    paddsw mm3,mm0
+    /*#7 Bias high residue.*/
+    paddsw mm4,mm0
+    /*#7 Pack to byte.*/
+    packuswb mm3,mm4
+    /*#6 Write row.*/
+    movq [DST4+YSTRIDE*2],mm1
+    /*#7 Write row.*/
+    movq [DST4+YSTRIDE3],mm3
+#undef DST
+#undef DST4
+#undef YSTRIDE
+#undef YSTRIDE3
+#undef RESIDUE
+  }
+}
+
+void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src,
+ int _ystride,const ogg_int16_t *_residue){
+  int i;
+  /*Zero mm0.*/
+  __asm pxor mm0,mm0;
+  for(i=4;i-->0;){
+    __asm{
+#define DST edx
+#define SRC ecx
+#define YSTRIDE edi
+#define RESIDUE eax
+      mov DST,_dst
+      mov SRC,_src
+      mov YSTRIDE,_ystride
+      mov RESIDUE,_residue
+      /*#0 Load source.*/
+      movq mm3,[SRC]
+      /*#1 Load source.*/
+      movq mm7,[SRC+YSTRIDE]
+      /*#0 Get copy of src.*/
+      movq mm4,mm3
+      /*#0 Expand high source.*/
+      punpckhbw mm4,mm0
+      /*#0 Expand low  source.*/
+      punpcklbw mm3,mm0
+      /*#0 Add residue high.*/
+      paddsw mm4,[8+RESIDUE]
+      /*#1 Get copy of src.*/
+      movq mm2,mm7
+      /*#0 Add residue low.*/
+      paddsw  mm3,[RESIDUE]
+      /*#1 Expand high source.*/
+      punpckhbw mm2,mm0
+      /*#0 Pack final row pixels.*/
+      packuswb mm3,mm4
+      /*#1 Expand low  source.*/
+      punpcklbw mm7,mm0
+      /*#1 Add residue low.*/
+      paddsw mm7,[16+RESIDUE]
+      /*#1 Add residue high.*/
+      paddsw mm2,[24+RESIDUE]
+      /*Advance residue.*/
+      lea RESIDUE,[32+RESIDUE]
+      /*#1 Pack final row pixels.*/
+      packuswb mm7,mm2
+      /*Advance src.*/
+      lea SRC,[SRC+YSTRIDE*2]
+      /*#0 Write row.*/
+      movq [DST],mm3
+      /*#1 Write row.*/
+      movq [DST+YSTRIDE],mm7
+      /*Advance dst.*/
+      lea DST,[DST+YSTRIDE*2]
+      mov _residue,RESIDUE
+      mov _dst,DST
+      mov _src,SRC
+#undef DST
+#undef SRC
+#undef YSTRIDE
+#undef RESIDUE
+    }
+  }
+}
+
+void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){
+  int i;
+  /*Zero mm7.*/
+  __asm pxor mm7,mm7;
+  for(i=4;i-->0;){
+    __asm{
+#define SRC1 ecx
+#define SRC2 edi
+#define YSTRIDE esi
+#define RESIDUE edx
+#define DST eax
+      mov YSTRIDE,_ystride
+      mov DST,_dst
+      mov RESIDUE,_residue
+      mov SRC1,_src1
+      mov SRC2,_src2
+      /*#0 Load src1.*/
+      movq mm0,[SRC1]
+      /*#0 Load src2.*/
+      movq mm2,[SRC2]
+      /*#0 Copy src1.*/
+      movq mm1,mm0
+      /*#0 Copy src2.*/
+      movq mm3,mm2
+      /*#1 Load src1.*/
+      movq mm4,[SRC1+YSTRIDE]
+      /*#0 Unpack lower src1.*/
+      punpcklbw mm0,mm7
+      /*#1 Load src2.*/
+      movq mm5,[SRC2+YSTRIDE]
+      /*#0 Unpack higher src1.*/
+      punpckhbw mm1,mm7
+      /*#0 Unpack lower src2.*/
+      punpcklbw mm2,mm7
+      /*#0 Unpack higher src2.*/
+      punpckhbw mm3,mm7
+      /*Advance src1 ptr.*/
+      lea SRC1,[SRC1+YSTRIDE*2]
+      /*Advance src2 ptr.*/
+      lea SRC2,[SRC2+YSTRIDE*2]
+      /*#0 Lower src1+src2.*/
+      paddsw mm0,mm2
+      /*#0 Higher src1+src2.*/
+      paddsw mm1,mm3
+      /*#1 Copy src1.*/
+      movq mm2,mm4
+      /*#0 Build lo average.*/
+      psraw mm0,1
+      /*#1 Copy src2.*/
+      movq mm3,mm5
+      /*#1 Unpack lower src1.*/
+      punpcklbw mm4,mm7
+      /*#0 Build hi average.*/
+      psraw mm1,1
+      /*#1 Unpack higher src1.*/
+      punpckhbw mm2,mm7
+      /*#0 low+=residue.*/
+      paddsw mm0,[RESIDUE]
+      /*#1 Unpack lower src2.*/
+      punpcklbw mm5,mm7
+      /*#0 high+=residue.*/
+      paddsw mm1,[8+RESIDUE]
+      /*#1 Unpack higher src2.*/
+      punpckhbw mm3,mm7
+      /*#1 Lower src1+src2.*/
+      paddsw mm5,mm4
+      /*#0 Pack and saturate.*/
+      packuswb mm0,mm1
+      /*#1 Higher src1+src2.*/
+      paddsw mm3,mm2
+      /*#0 Write row.*/
+      movq [DST],mm0
+      /*#1 Build lo average.*/
+      psraw mm5,1
+      /*#1 Build hi average.*/
+      psraw mm3,1
+      /*#1 low+=residue.*/
+      paddsw mm5,[16+RESIDUE]
+      /*#1 high+=residue.*/
+      paddsw mm3,[24+RESIDUE]
+      /*#1 Pack and saturate.*/
+      packuswb  mm5,mm3
+      /*#1 Write row ptr.*/
+      movq [DST+YSTRIDE],mm5
+      /*Advance residue ptr.*/
+      add RESIDUE,32
+      /*Advance dest ptr.*/
+      lea DST,[DST+YSTRIDE*2]
+      mov _dst,DST
+      mov _residue,RESIDUE
+      mov _src1,SRC1
+      mov _src2,SRC2
+#undef SRC1
+#undef SRC2
+#undef YSTRIDE
+#undef RESIDUE
+#undef DST
+    }
+  }
+}
+
+void oc_restore_fpu_mmx(void){
+  __asm emms;
+}
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.h b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.h
new file mode 100644
index 00000000..45ee93e7
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxfrag.h
@@ -0,0 +1,61 @@
+#if !defined(_x86_vc_mmxfrag_H)
+# define _x86_vc_mmxfrag_H (1)
+# include <stddef.h>
+# include "x86int.h"
+
+#if defined(OC_X86_ASM)
+
+/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes
+   between rows.*/
+#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \
+  do{ \
+    const unsigned char *src; \
+    unsigned char       *dst; \
+    src=(_src); \
+    dst=(_dst); \
+    __asm  mov SRC,src \
+    __asm  mov DST,dst \
+    __asm  mov YSTRIDE,_ystride \
+    /*src+0*ystride*/ \
+    __asm  movq mm0,[SRC] \
+    /*src+1*ystride*/ \
+    __asm  movq mm1,[SRC+YSTRIDE] \
+    /*ystride3=ystride*3*/ \
+    __asm  lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \
+    /*src+2*ystride*/ \
+    __asm  movq mm2,[SRC+YSTRIDE*2] \
+    /*src+3*ystride*/ \
+    __asm  movq mm3,[SRC+YSTRIDE3] \
+    /*dst+0*ystride*/ \
+    __asm  movq [DST],mm0 \
+    /*dst+1*ystride*/ \
+    __asm  movq [DST+YSTRIDE],mm1 \
+    /*Pointer to next 4.*/ \
+    __asm  lea SRC,[SRC+YSTRIDE*4] \
+    /*dst+2*ystride*/ \
+    __asm  movq [DST+YSTRIDE*2],mm2 \
+    /*dst+3*ystride*/ \
+    __asm  movq [DST+YSTRIDE3],mm3 \
+    /*Pointer to next 4.*/ \
+    __asm  lea DST,[DST+YSTRIDE*4] \
+    /*src+0*ystride*/ \
+    __asm  movq mm0,[SRC] \
+    /*src+1*ystride*/ \
+    __asm  movq mm1,[SRC+YSTRIDE] \
+    /*src+2*ystride*/ \
+    __asm  movq mm2,[SRC+YSTRIDE*2] \
+    /*src+3*ystride*/ \
+    __asm  movq mm3,[SRC+YSTRIDE3] \
+    /*dst+0*ystride*/ \
+    __asm  movq [DST],mm0 \
+    /*dst+1*ystride*/ \
+    __asm  movq [DST+YSTRIDE],mm1 \
+    /*dst+2*ystride*/ \
+    __asm  movq [DST+YSTRIDE*2],mm2 \
+    /*dst+3*ystride*/ \
+    __asm  movq [DST+YSTRIDE3],mm3 \
+  } \
+  while(0)
+
+# endif
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/mmxidct.c b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxidct.c
new file mode 100644
index 00000000..8f5ff680
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxidct.c
@@ -0,0 +1,562 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+/*MMX acceleration of Theora's iDCT.
+  Originally written by Rudolf Marek, based on code from On2's VP3.*/
+#include "x86int.h"
+#include "../dct.h"
+
+#if defined(OC_X86_ASM)
+
+/*These are offsets into the table of constants below.*/
+/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/
+#define OC_COSINE_OFFSET (0)
+/*A row of 8's.*/
+#define OC_EIGHT_OFFSET  (56)
+
+
+
+/*A table of constants used by the MMX routines.*/
+static const __declspec(align(16))ogg_uint16_t
+ OC_IDCT_CONSTS[(7+1)*4]={
+  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
+  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
+  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
+  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
+  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
+  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
+  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
+  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
+  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
+  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
+  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
+  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
+  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
+  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
+      8,    8,    8,    8
+};
+
+/*38 cycles*/
+#define OC_IDCT_BEGIN __asm{ \
+  __asm movq mm2,OC_I(3) \
+  __asm movq mm6,OC_C(3) \
+  __asm movq mm4,mm2 \
+  __asm movq mm7,OC_J(5) \
+  __asm pmulhw mm4,mm6 \
+  __asm movq mm1,OC_C(5) \
+  __asm pmulhw mm6,mm7 \
+  __asm movq mm5,mm1 \
+  __asm pmulhw mm1,mm2 \
+  __asm movq mm3,OC_I(1) \
+  __asm pmulhw mm5,mm7 \
+  __asm movq mm0,OC_C(1) \
+  __asm paddw mm4,mm2 \
+  __asm paddw mm6,mm7 \
+  __asm paddw mm2,mm1 \
+  __asm movq mm1,OC_J(7) \
+  __asm paddw mm7,mm5 \
+  __asm movq mm5,mm0 \
+  __asm pmulhw mm0,mm3 \
+  __asm paddw mm4,mm7 \
+  __asm pmulhw mm5,mm1 \
+  __asm movq mm7,OC_C(7) \
+  __asm psubw mm6,mm2 \
+  __asm paddw mm0,mm3 \
+  __asm pmulhw mm3,mm7 \
+  __asm movq mm2,OC_I(2) \
+  __asm pmulhw mm7,mm1 \
+  __asm paddw mm5,mm1 \
+  __asm movq mm1,mm2 \
+  __asm pmulhw mm2,OC_C(2) \
+  __asm psubw mm3,mm5 \
+  __asm movq mm5,OC_J(6) \
+  __asm paddw mm0,mm7 \
+  __asm movq mm7,mm5 \
+  __asm psubw mm0,mm4 \
+  __asm pmulhw mm5,OC_C(2) \
+  __asm paddw mm2,mm1 \
+  __asm pmulhw mm1,OC_C(6) \
+  __asm paddw mm4,mm4 \
+  __asm paddw mm4,mm0 \
+  __asm psubw mm3,mm6 \
+  __asm paddw mm5,mm7 \
+  __asm paddw mm6,mm6 \
+  __asm pmulhw mm7,OC_C(6) \
+  __asm paddw mm6,mm3 \
+  __asm movq OC_I(1),mm4 \
+  __asm psubw mm1,mm5 \
+  __asm movq mm4,OC_C(4) \
+  __asm movq mm5,mm3 \
+  __asm pmulhw mm3,mm4 \
+  __asm paddw mm7,mm2 \
+  __asm movq OC_I(2),mm6 \
+  __asm movq mm2,mm0 \
+  __asm movq mm6,OC_I(0) \
+  __asm pmulhw mm0,mm4 \
+  __asm paddw mm5,mm3 \
+  __asm movq mm3,OC_J(4) \
+  __asm psubw mm5,mm1 \
+  __asm paddw mm2,mm0 \
+  __asm psubw mm6,mm3 \
+  __asm movq mm0,mm6 \
+  __asm pmulhw mm6,mm4 \
+  __asm paddw mm3,mm3 \
+  __asm paddw mm1,mm1 \
+  __asm paddw mm3,mm0 \
+  __asm paddw mm1,mm5 \
+  __asm pmulhw mm4,mm3 \
+  __asm paddw mm6,mm0 \
+  __asm psubw mm6,mm2 \
+  __asm paddw mm2,mm2 \
+  __asm movq mm0,OC_I(1) \
+  __asm paddw mm2,mm6 \
+  __asm paddw mm4,mm3 \
+  __asm psubw mm2,mm1 \
+}
+
+/*38+8=46 cycles.*/
+#define OC_ROW_IDCT __asm{ \
+  OC_IDCT_BEGIN \
+  /*r3=D'*/ \
+  __asm  movq mm3,OC_I(2) \
+  /*r4=E'=E-G*/ \
+  __asm  psubw mm4,mm7 \
+  /*r1=H'+H'*/ \
+  __asm  paddw mm1,mm1 \
+  /*r7=G+G*/ \
+  __asm  paddw mm7,mm7 \
+  /*r1=R1=A''+H'*/ \
+  __asm  paddw mm1,mm2 \
+  /*r7=G'=E+G*/ \
+  __asm  paddw mm7,mm4 \
+  /*r4=R4=E'-D'*/ \
+  __asm  psubw mm4,mm3 \
+  __asm  paddw mm3,mm3 \
+  /*r6=R6=F'-B''*/ \
+  __asm  psubw mm6,mm5 \
+  __asm  paddw mm5,mm5 \
+  /*r3=R3=E'+D'*/ \
+  __asm  paddw mm3,mm4 \
+  /*r5=R5=F'+B''*/ \
+  __asm  paddw mm5,mm6 \
+  /*r7=R7=G'-C'*/ \
+  __asm  psubw mm7,mm0 \
+  __asm  paddw mm0,mm0 \
+  /*Save R1.*/ \
+  __asm  movq OC_I(1),mm1 \
+  /*r0=R0=G.+C.*/ \
+  __asm  paddw mm0,mm7 \
+}
+
+/*The following macro does two 4x4 transposes in place.
+  At entry, we assume:
+    r0 = a3 a2 a1 a0
+  I(1) = b3 b2 b1 b0
+    r2 = c3 c2 c1 c0
+    r3 = d3 d2 d1 d0
+
+    r4 = e3 e2 e1 e0
+    r5 = f3 f2 f1 f0
+    r6 = g3 g2 g1 g0
+    r7 = h3 h2 h1 h0
+
+  At exit, we have:
+  I(0) = d0 c0 b0 a0
+  I(1) = d1 c1 b1 a1
+  I(2) = d2 c2 b2 a2
+  I(3) = d3 c3 b3 a3
+
+  J(4) = h0 g0 f0 e0
+  J(5) = h1 g1 f1 e1
+  J(6) = h2 g2 f2 e2
+  J(7) = h3 g3 f3 e3
+
+  I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
+  J(4) J(5) J(6) J(7) is the transpose of r4  r5  r6 r7.
+
+  Since r1 is free at entry, we calculate the Js first.*/
+/*19 cycles.*/
+#define OC_TRANSPOSE __asm{ \
+  __asm movq mm1,mm4 \
+  __asm punpcklwd mm4,mm5 \
+  __asm movq OC_I(0),mm0 \
+  __asm punpckhwd mm1,mm5 \
+  __asm movq mm0,mm6 \
+  __asm punpcklwd mm6,mm7 \
+  __asm movq mm5,mm4 \
+  __asm punpckldq mm4,mm6 \
+  __asm punpckhdq mm5,mm6 \
+  __asm movq mm6,mm1 \
+  __asm movq OC_J(4),mm4 \
+  __asm punpckhwd mm0,mm7 \
+  __asm movq OC_J(5),mm5 \
+  __asm punpckhdq mm6,mm0 \
+  __asm movq mm4,OC_I(0) \
+  __asm punpckldq mm1,mm0 \
+  __asm movq mm5,OC_I(1) \
+  __asm movq mm0,mm4 \
+  __asm movq OC_J(7),mm6 \
+  __asm punpcklwd mm0,mm5 \
+  __asm movq OC_J(6),mm1 \
+  __asm punpckhwd mm4,mm5 \
+  __asm movq mm5,mm2 \
+  __asm punpcklwd mm2,mm3 \
+  __asm movq mm1,mm0 \
+  __asm punpckldq mm0,mm2 \
+  __asm punpckhdq mm1,mm2 \
+  __asm movq mm2,mm4 \
+  __asm movq OC_I(0),mm0 \
+  __asm punpckhwd mm5,mm3 \
+  __asm movq OC_I(1),mm1 \
+  __asm punpckhdq mm4,mm5 \
+  __asm punpckldq mm2,mm5 \
+  __asm movq OC_I(3),mm4 \
+  __asm movq OC_I(2),mm2 \
+}
+
+/*38+19=57 cycles.*/
+#define OC_COLUMN_IDCT __asm{ \
+  OC_IDCT_BEGIN \
+  __asm paddw mm2,OC_8 \
+  /*r1=H'+H'*/ \
+  __asm paddw mm1,mm1 \
+  /*r1=R1=A''+H'*/ \
+  __asm paddw mm1,mm2 \
+  /*r2=NR2*/ \
+  __asm psraw mm2,4 \
+  /*r4=E'=E-G*/ \
+  __asm psubw mm4,mm7 \
+  /*r1=NR1*/ \
+  __asm psraw mm1,4 \
+  /*r3=D'*/ \
+  __asm movq mm3,OC_I(2) \
+  /*r7=G+G*/ \
+  __asm paddw mm7,mm7 \
+  /*Store NR2 at I(2).*/ \
+  __asm movq OC_I(2),mm2 \
+  /*r7=G'=E+G*/ \
+  __asm paddw mm7,mm4 \
+  /*Store NR1 at I(1).*/ \
+  __asm movq OC_I(1),mm1 \
+  /*r4=R4=E'-D'*/ \
+  __asm psubw mm4,mm3 \
+  __asm paddw mm4,OC_8 \
+  /*r3=D'+D'*/ \
+  __asm paddw mm3,mm3 \
+  /*r3=R3=E'+D'*/ \
+  __asm paddw mm3,mm4 \
+  /*r4=NR4*/ \
+  __asm psraw mm4,4 \
+  /*r6=R6=F'-B''*/ \
+  __asm psubw mm6,mm5 \
+  /*r3=NR3*/ \
+  __asm psraw mm3,4 \
+  __asm paddw mm6,OC_8 \
+  /*r5=B''+B''*/ \
+  __asm paddw mm5,mm5 \
+  /*r5=R5=F'+B''*/ \
+  __asm paddw mm5,mm6 \
+  /*r6=NR6*/ \
+  __asm psraw mm6,4 \
+  /*Store NR4 at J(4).*/ \
+  __asm movq OC_J(4),mm4 \
+  /*r5=NR5*/ \
+  __asm psraw mm5,4 \
+  /*Store NR3 at I(3).*/ \
+  __asm movq OC_I(3),mm3 \
+  /*r7=R7=G'-C'*/ \
+  __asm psubw mm7,mm0 \
+  __asm paddw mm7,OC_8 \
+  /*r0=C'+C'*/ \
+  __asm paddw mm0,mm0 \
+  /*r0=R0=G'+C'*/ \
+  __asm paddw mm0,mm7 \
+  /*r7=NR7*/ \
+  __asm psraw mm7,4 \
+  /*Store NR6 at J(6).*/ \
+  __asm movq OC_J(6),mm6 \
+  /*r0=NR0*/ \
+  __asm psraw mm0,4 \
+  /*Store NR5 at J(5).*/ \
+  __asm movq OC_J(5),mm5 \
+  /*Store NR7 at J(7).*/ \
+  __asm movq OC_J(7),mm7 \
+  /*Store NR0 at I(0).*/ \
+  __asm movq OC_I(0),mm0 \
+}
+
+#define OC_MID(_m,_i) [CONSTS+_m+(_i)*8]
+#define OC_C(_i)      OC_MID(OC_COSINE_OFFSET,_i-1)
+#define OC_8          OC_MID(OC_EIGHT_OFFSET,0)
+
+static void oc_idct8x8_slow(ogg_int16_t _y[64]){
+  /*This routine accepts an 8x8 matrix, but in partially transposed form.
+    Every 4x4 block is transposed.*/
+  __asm{
+#define CONSTS eax
+#define Y edx
+    mov CONSTS,offset OC_IDCT_CONSTS
+    mov Y,_y
+#define OC_I(_k)      [Y+_k*16]
+#define OC_J(_k)      [Y+(_k-4)*16+8]
+    OC_ROW_IDCT
+    OC_TRANSPOSE
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k)      [Y+(_k*16)+64]
+#define OC_J(_k)      [Y+(_k-4)*16+72]
+    OC_ROW_IDCT
+    OC_TRANSPOSE
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k)      [Y+_k*16]
+#define OC_J(_k)      OC_I(_k)
+    OC_COLUMN_IDCT
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k)      [Y+_k*16+8]
+#define OC_J(_k)      OC_I(_k)
+    OC_COLUMN_IDCT
+#undef  OC_I
+#undef  OC_J
+#undef  CONSTS
+#undef  Y
+  }
+}
+
+/*25 cycles.*/
+#define OC_IDCT_BEGIN_10 __asm{ \
+  __asm movq mm2,OC_I(3) \
+  __asm nop \
+  __asm movq mm6,OC_C(3) \
+  __asm movq mm4,mm2 \
+  __asm movq mm1,OC_C(5) \
+  __asm pmulhw mm4,mm6 \
+  __asm movq mm3,OC_I(1) \
+  __asm pmulhw mm1,mm2 \
+  __asm movq mm0,OC_C(1) \
+  __asm paddw mm4,mm2 \
+  __asm pxor mm6,mm6 \
+  __asm paddw mm2,mm1 \
+  __asm movq mm5,OC_I(2) \
+  __asm pmulhw mm0,mm3 \
+  __asm movq mm1,mm5 \
+  __asm paddw mm0,mm3 \
+  __asm pmulhw mm3,OC_C(7) \
+  __asm psubw mm6,mm2 \
+  __asm pmulhw mm5,OC_C(2) \
+  __asm psubw mm0,mm4 \
+  __asm movq mm7,OC_I(2) \
+  __asm paddw mm4,mm4 \
+  __asm paddw mm7,mm5 \
+  __asm paddw mm4,mm0 \
+  __asm pmulhw mm1,OC_C(6) \
+  __asm psubw mm3,mm6 \
+  __asm movq OC_I(1),mm4 \
+  __asm paddw mm6,mm6 \
+  __asm movq mm4,OC_C(4) \
+  __asm paddw mm6,mm3 \
+  __asm movq mm5,mm3 \
+  __asm pmulhw mm3,mm4 \
+  __asm movq OC_I(2),mm6 \
+  __asm movq mm2,mm0 \
+  __asm movq mm6,OC_I(0) \
+  __asm pmulhw mm0,mm4 \
+  __asm paddw mm5,mm3 \
+  __asm paddw mm2,mm0 \
+  __asm psubw mm5,mm1 \
+  __asm pmulhw mm6,mm4 \
+  __asm paddw mm6,OC_I(0) \
+  __asm paddw mm1,mm1 \
+  __asm movq mm4,mm6 \
+  __asm paddw mm1,mm5 \
+  __asm psubw mm6,mm2 \
+  __asm paddw mm2,mm2 \
+  __asm movq mm0,OC_I(1) \
+  __asm paddw mm2,mm6 \
+  __asm psubw mm2,mm1 \
+  __asm nop \
+}
+
+/*25+8=33 cycles.*/
+#define OC_ROW_IDCT_10 __asm{ \
+  OC_IDCT_BEGIN_10 \
+  /*r3=D'*/ \
+   __asm movq mm3,OC_I(2) \
+  /*r4=E'=E-G*/ \
+   __asm psubw mm4,mm7 \
+  /*r1=H'+H'*/ \
+   __asm paddw mm1,mm1 \
+  /*r7=G+G*/ \
+   __asm paddw mm7,mm7 \
+  /*r1=R1=A''+H'*/ \
+   __asm paddw mm1,mm2 \
+  /*r7=G'=E+G*/ \
+   __asm paddw mm7,mm4 \
+  /*r4=R4=E'-D'*/ \
+   __asm psubw mm4,mm3 \
+   __asm paddw mm3,mm3 \
+  /*r6=R6=F'-B''*/ \
+   __asm psubw mm6,mm5 \
+   __asm paddw mm5,mm5 \
+  /*r3=R3=E'+D'*/ \
+   __asm paddw mm3,mm4 \
+  /*r5=R5=F'+B''*/ \
+   __asm paddw mm5,mm6 \
+  /*r7=R7=G'-C'*/ \
+   __asm psubw mm7,mm0 \
+   __asm paddw mm0,mm0 \
+  /*Save R1.*/ \
+   __asm movq OC_I(1),mm1 \
+  /*r0=R0=G'+C'*/ \
+   __asm paddw mm0,mm7 \
+}
+
+/*25+19=44 cycles'*/
+#define OC_COLUMN_IDCT_10 __asm{ \
+  OC_IDCT_BEGIN_10 \
+  __asm paddw mm2,OC_8 \
+  /*r1=H'+H'*/ \
+  __asm paddw mm1,mm1 \
+  /*r1=R1=A''+H'*/ \
+  __asm paddw mm1,mm2 \
+  /*r2=NR2*/ \
+  __asm psraw mm2,4 \
+  /*r4=E'=E-G*/ \
+  __asm psubw mm4,mm7 \
+  /*r1=NR1*/ \
+  __asm psraw mm1,4 \
+  /*r3=D'*/ \
+  __asm movq mm3,OC_I(2) \
+  /*r7=G+G*/ \
+  __asm paddw mm7,mm7 \
+  /*Store NR2 at I(2).*/ \
+  __asm movq OC_I(2),mm2 \
+  /*r7=G'=E+G*/ \
+  __asm paddw mm7,mm4 \
+  /*Store NR1 at I(1).*/ \
+  __asm movq OC_I(1),mm1 \
+  /*r4=R4=E'-D'*/ \
+  __asm psubw mm4,mm3 \
+  __asm paddw mm4,OC_8 \
+  /*r3=D'+D'*/ \
+  __asm paddw mm3,mm3 \
+  /*r3=R3=E'+D'*/ \
+  __asm paddw mm3,mm4 \
+  /*r4=NR4*/ \
+  __asm psraw mm4,4 \
+  /*r6=R6=F'-B''*/ \
+  __asm psubw mm6,mm5 \
+  /*r3=NR3*/ \
+  __asm psraw mm3,4 \
+  __asm paddw mm6,OC_8 \
+  /*r5=B''+B''*/ \
+  __asm paddw mm5,mm5 \
+  /*r5=R5=F'+B''*/ \
+  __asm paddw mm5,mm6 \
+  /*r6=NR6*/ \
+  __asm psraw mm6,4 \
+  /*Store NR4 at J(4).*/ \
+  __asm movq OC_J(4),mm4 \
+  /*r5=NR5*/ \
+  __asm psraw mm5,4 \
+  /*Store NR3 at I(3).*/ \
+  __asm movq OC_I(3),mm3 \
+  /*r7=R7=G'-C'*/ \
+  __asm psubw mm7,mm0 \
+  __asm paddw mm7,OC_8 \
+  /*r0=C'+C'*/ \
+  __asm paddw mm0,mm0 \
+  /*r0=R0=G'+C'*/ \
+  __asm paddw mm0,mm7 \
+  /*r7=NR7*/ \
+  __asm psraw mm7,4 \
+  /*Store NR6 at J(6).*/ \
+  __asm movq OC_J(6),mm6 \
+  /*r0=NR0*/ \
+  __asm psraw mm0,4 \
+  /*Store NR5 at J(5).*/ \
+  __asm movq OC_J(5),mm5 \
+  /*Store NR7 at J(7).*/ \
+  __asm movq OC_J(7),mm7 \
+  /*Store NR0 at I(0).*/ \
+  __asm movq OC_I(0),mm0 \
+}
+
+static void oc_idct8x8_10(ogg_int16_t _y[64]){
+  __asm{
+#define CONSTS eax
+#define Y edx
+    mov CONSTS,offset OC_IDCT_CONSTS
+    mov Y,_y
+#define OC_I(_k) [Y+_k*16]
+#define OC_J(_k) [Y+(_k-4)*16+8]
+    /*Done with dequant, descramble, and partial transpose.
+      Now do the iDCT itself.*/
+    OC_ROW_IDCT_10
+    OC_TRANSPOSE
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k) [Y+_k*16]
+#define OC_J(_k) OC_I(_k)
+    OC_COLUMN_IDCT_10
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k) [Y+_k*16+8]
+#define OC_J(_k) OC_I(_k)
+    OC_COLUMN_IDCT_10
+#undef  OC_I
+#undef  OC_J
+#undef  CONSTS
+#undef  Y
+  }
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.*/
+void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){
+  /*_last_zzi is subtly different from an actual count of the number of
+     coefficients we decoded for this block.
+    It contains the value of zzi BEFORE the final token in the block was
+     decoded.
+    In most cases this is an EOB token (the continuation of an EOB run from a
+     previous block counts), and so this is the same as the coefficient count.
+    However, in the case that the last token was NOT an EOB token, but filled
+     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
+    Provided the last token was not a pure zero run, the minimum value it can
+     be is 46, and so that doesn't affect any of the cases in this routine.
+    However, if the last token WAS a pure zero run of length 63, then _last_zzi
+     will be 1 while the number of coefficients decoded is 64.
+    Thus, we will trigger the following special case, where the real
+     coefficient count would not.
+    Note also that a zero run of length 64 will give _last_zzi a value of 0,
+     but we still process the DC coefficient, which might have a non-zero value
+     due to DC prediction.
+    Although convoluted, this is arguably the correct behavior: it allows us to
+     use a smaller transform when the block ends with a long zero run instead
+     of a normal EOB token.
+    It could be smarter... multiple separate zero runs at the end of a block
+     will fool it, but an encoder that generates these really deserves what it
+     gets.
+    Needless to say we inherited this approach from VP3.*/
+  /*Perform the iDCT.*/
+  if(_last_zzi<10)oc_idct8x8_10(_y);
+  else oc_idct8x8_slow(_y);
+}
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/mmxloop.h b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxloop.h
new file mode 100644
index 00000000..2561fca2
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxloop.h
@@ -0,0 +1,219 @@
+#if !defined(_x86_vc_mmxloop_H)
+# define _x86_vc_mmxloop_H (1)
+# include <stddef.h>
+# include "x86int.h"
+
+#if defined(OC_X86_ASM)
+
+/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}.
+  On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and
+   mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/
+#define OC_LOOP_FILTER8_MMX __asm{ \
+  /*mm7=0*/ \
+  __asm pxor mm7,mm7 \
+  /*mm6:mm0={a0,...,a7}*/ \
+  __asm movq mm6,mm0 \
+  __asm punpcklbw mm0,mm7 \
+  __asm punpckhbw mm6,mm7 \
+  /*mm3:mm5={d0,...,d7}*/ \
+  __asm movq mm5,mm3 \
+  __asm punpcklbw mm3,mm7 \
+  __asm punpckhbw mm5,mm7 \
+  /*mm6:mm0={a0-d0,...,a7-d7}*/ \
+  __asm psubw mm0,mm3 \
+  __asm psubw mm6,mm5 \
+  /*mm3:mm1={b0,...,b7}*/ \
+  __asm movq mm3,mm1 \
+  __asm punpcklbw mm1,mm7 \
+  __asm movq mm4,mm2 \
+  __asm punpckhbw mm3,mm7 \
+  /*mm5:mm4={c0,...,c7}*/ \
+  __asm movq mm5,mm2 \
+  __asm punpcklbw mm4,mm7 \
+  __asm punpckhbw mm5,mm7 \
+  /*mm7={3}x4 \
+    mm5:mm4={c0-b0,...,c7-b7}*/ \
+  __asm pcmpeqw mm7,mm7 \
+  __asm psubw mm4,mm1 \
+  __asm psrlw mm7,14 \
+  __asm psubw mm5,mm3 \
+  /*Scale by 3.*/ \
+  __asm pmullw mm4,mm7 \
+  __asm pmullw mm5,mm7 \
+  /*mm7={4}x4 \
+    mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \
+  __asm psrlw mm7,1 \
+  __asm paddw mm4,mm0 \
+  __asm psllw mm7,2 \
+  __asm movq mm0,[LL] \
+  __asm paddw mm5,mm6 \
+  /*R_i has the range [-127,128], so we compute -R_i instead. \
+    mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \
+  __asm psubw mm4,mm7 \
+  __asm psubw mm5,mm7 \
+  __asm psraw mm4,3 \
+  __asm psraw mm5,3 \
+  __asm pcmpeqb mm7,mm7 \
+  __asm packsswb mm4,mm5 \
+  __asm pxor mm6,mm6 \
+  __asm pxor mm4,mm7 \
+  __asm packuswb mm1,mm3 \
+  /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \
+  /*There's no unsigned byte+signed byte with unsigned saturation op code, so \
+     we have to split things by sign (the other option is to work in 16 bits, \
+     but working in 8 bits gives much better parallelism). \
+    We compute abs(R_i), but save a mask of which terms were negative in mm6. \
+    Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \
+    Finally, we split mm4 into positive and negative pieces using the mask in \
+     mm6, and add and subtract them as appropriate.*/ \
+  /*mm4=abs(-R_i)*/ \
+  /*mm7=255-2*L*/ \
+  __asm pcmpgtb mm6,mm4 \
+  __asm psubb mm7,mm0 \
+  __asm pxor mm4,mm6 \
+  __asm psubb mm7,mm0 \
+  __asm psubb mm4,mm6 \
+  /*mm7=255-max(2*L-abs(R_i),0)*/ \
+  __asm paddusb mm7,mm4 \
+  /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \
+  __asm paddusb mm4,mm7 \
+  __asm psubusb mm4,mm7 \
+  /*Now split mm4 by the original sign of -R_i.*/ \
+  __asm movq mm5,mm4 \
+  __asm pand mm4,mm6 \
+  __asm pandn mm6,mm5 \
+  /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \
+  /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \
+  __asm paddusb mm1,mm4 \
+  __asm psubusb mm2,mm4 \
+  __asm psubusb mm1,mm6 \
+  __asm paddusb mm2,mm6 \
+}
+
+#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \
+  do{ \
+    /*Used local variable pix__ in order to fix compilation errors like: \
+       "error C2425: 'SHL' : non-constant expression in 'second operand'".*/ \
+    unsigned char *pix__; \
+    unsigned char *ll__; \
+    ll__=(_ll); \
+    pix__=(_pix); \
+    __asm mov YSTRIDE,_ystride \
+    __asm mov LL,ll__ \
+    __asm mov PIX,pix__ \
+    __asm sub PIX,YSTRIDE \
+    __asm sub PIX,YSTRIDE \
+    /*mm0={a0,...,a7}*/ \
+    __asm movq mm0,[PIX] \
+    /*ystride3=_ystride*3*/ \
+    __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \
+    /*mm3={d0,...,d7}*/ \
+    __asm movq mm3,[PIX+YSTRIDE3] \
+    /*mm1={b0,...,b7}*/ \
+    __asm movq mm1,[PIX+YSTRIDE] \
+    /*mm2={c0,...,c7}*/ \
+    __asm movq mm2,[PIX+YSTRIDE*2] \
+    OC_LOOP_FILTER8_MMX \
+    /*Write it back out.*/ \
+    __asm movq [PIX+YSTRIDE],mm1 \
+    __asm movq [PIX+YSTRIDE*2],mm2 \
+  } \
+  while(0)
+
+#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \
+  do{ \
+    /*Used local variable ll__ in order to fix compilation errors like: \
+       "error C2443: operand size conflict".*/ \
+    unsigned char *ll__; \
+    unsigned char *pix__; \
+    ll__=(_ll); \
+    pix__=(_pix)-2; \
+    __asm mov PIX,pix__ \
+    __asm mov YSTRIDE,_ystride \
+    __asm mov LL,ll__ \
+    /*x x x x d0 c0 b0 a0*/ \
+    __asm movd mm0,[PIX] \
+    /*x x x x d1 c1 b1 a1*/ \
+    __asm movd mm1,[PIX+YSTRIDE] \
+    /*ystride3=_ystride*3*/ \
+    __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \
+    /*x x x x d2 c2 b2 a2*/ \
+    __asm movd mm2,[PIX+YSTRIDE*2] \
+    /*x x x x d3 c3 b3 a3*/ \
+    __asm lea D,[PIX+YSTRIDE*4] \
+    __asm movd mm3,[PIX+YSTRIDE3] \
+    /*x x x x d4 c4 b4 a4*/ \
+    __asm movd mm4,[D] \
+    /*x x x x d5 c5 b5 a5*/ \
+    __asm movd mm5,[D+YSTRIDE] \
+    /*x x x x d6 c6 b6 a6*/ \
+    __asm movd mm6,[D+YSTRIDE*2] \
+    /*x x x x d7 c7 b7 a7*/ \
+    __asm movd mm7,[D+YSTRIDE3] \
+    /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \
+    __asm punpcklbw mm0,mm1 \
+    /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \
+    __asm punpcklbw mm2,mm3 \
+    /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \
+    __asm movq mm3,mm0 \
+    /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \
+    __asm punpcklwd mm0,mm2 \
+    /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \
+    __asm punpckhwd mm3,mm2 \
+    /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \
+    __asm movq mm1,mm0 \
+    /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \
+    __asm punpcklbw mm4,mm5 \
+    /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \
+    __asm punpcklbw mm6,mm7 \
+    /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \
+    __asm movq mm5,mm4 \
+    /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \
+    __asm punpcklwd mm4,mm6 \
+    /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \
+    __asm punpckhwd mm5,mm6 \
+    /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \
+    __asm movq mm2,mm3 \
+    /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \
+    __asm punpckldq mm0,mm4 \
+    /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \
+    __asm punpckhdq mm1,mm4 \
+    /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \
+    __asm punpckldq mm2,mm5 \
+    /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \
+    __asm punpckhdq mm3,mm5 \
+    OC_LOOP_FILTER8_MMX \
+    /*mm2={b0+R_0'',...,b7+R_7''}*/ \
+    __asm movq mm0,mm1 \
+    /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \
+    __asm punpcklbw mm1,mm2 \
+    /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \
+    __asm punpckhbw mm0,mm2 \
+    /*[d]=c1 b1 c0 b0*/ \
+    __asm movd D,mm1 \
+    __asm mov [PIX+1],D_WORD \
+    __asm psrlq mm1,32 \
+    __asm shr D,16 \
+    __asm mov [PIX+YSTRIDE+1],D_WORD \
+    /*[d]=c3 b3 c2 b2*/ \
+    __asm movd D,mm1 \
+    __asm mov [PIX+YSTRIDE*2+1],D_WORD \
+    __asm shr D,16 \
+    __asm mov [PIX+YSTRIDE3+1],D_WORD \
+    __asm lea PIX,[PIX+YSTRIDE*4] \
+    /*[d]=c5 b5 c4 b4*/ \
+    __asm movd D,mm0 \
+    __asm mov [PIX+1],D_WORD \
+    __asm psrlq mm0,32 \
+    __asm shr D,16 \
+    __asm mov [PIX+YSTRIDE+1],D_WORD \
+    /*[d]=c7 b7 c6 b6*/ \
+    __asm movd D,mm0 \
+    __asm mov [PIX+YSTRIDE*2+1],D_WORD \
+    __asm shr D,16 \
+    __asm mov [PIX+YSTRIDE3+1],D_WORD \
+  } \
+  while(0)
+
+# endif
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/mmxstate.c b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxstate.c
new file mode 100644
index 00000000..73bd1981
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86_vc/mmxstate.c
@@ -0,0 +1,211 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: mmxstate.c 16584 2009-09-26 19:35:55Z tterribe $
+
+ ********************************************************************/
+
+/*MMX acceleration of complete fragment reconstruction algorithm.
+  Originally written by Rudolf Marek.*/
+#include <string.h>
+#include "x86int.h"
+#include "mmxfrag.h"
+#include "mmxloop.h"
+
+#if defined(OC_X86_ASM)
+
+void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
+  unsigned char *dst;
+  ptrdiff_t      frag_buf_off;
+  int            ystride;
+  int            mb_mode;
+  /*Apply the inverse transform.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    /*Note that this value must be unsigned, to keep the __asm__ block from
+       sign-extending it when it puts it in a register.*/
+    ogg_uint16_t p;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
+    /*Fill _dct_coeffs with p.*/
+    __asm{
+#define Y eax
+#define P ecx
+      mov Y,_dct_coeffs
+      movzx P,p
+      /*mm0=0000 0000 0000 AAAA*/
+      movd mm0,P
+      /*mm0=0000 0000 AAAA AAAA*/
+      punpcklwd mm0,mm0
+      /*mm0=AAAA AAAA AAAA AAAA*/
+      punpckldq mm0,mm0
+      movq [Y],mm0
+      movq [8+Y],mm0
+      movq [16+Y],mm0
+      movq [24+Y],mm0
+      movq [32+Y],mm0
+      movq [40+Y],mm0
+      movq [48+Y],mm0
+      movq [56+Y],mm0
+      movq [64+Y],mm0
+      movq [72+Y],mm0
+      movq [80+Y],mm0
+      movq [88+Y],mm0
+      movq [96+Y],mm0
+      movq [104+Y],mm0
+      movq [112+Y],mm0
+      movq [120+Y],mm0
+#undef Y
+#undef P
+    }
+  }
+  else{
+    /*Dequantize the DC coefficient.*/
+    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
+    oc_idct8x8_mmx(_dct_coeffs,_last_zzi);
+  }
+  /*Fill in the target buffer.*/
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  mb_mode=_state->frags[_fragi].mb_mode;
+  ystride=_state->ref_ystride[_pli];
+  dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+  if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs);
+  else{
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=
+     _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
+     +frag_buf_off;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+     _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+      oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
+       _dct_coeffs);
+    }
+    else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs);
+  }
+}
+
+/*We copy these entire function to inline the actual MMX routines so that we
+   use only a single indirect call.*/
+
+/*Copies the fragments specified by the lists of fragment indices from one
+   frame to another.
+  _fragis:    A pointer to a list of fragment indices.
+  _nfragis:   The number of fragment indices to copy.
+  _dst_frame: The reference frame to copy to.
+  _src_frame: The reference frame to copy from.
+  _pli:       The color plane the fragments lie in.*/
+void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli){
+  const ptrdiff_t     *frag_buf_offs;
+  const unsigned char *src_frame_data;
+  unsigned char       *dst_frame_data;
+  ptrdiff_t            fragii;
+  int                  ystride;
+  dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]];
+  src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]];
+  ystride=_state->ref_ystride[_pli];
+  frag_buf_offs=_state->frag_buf_offs;
+  for(fragii=0;fragii<_nfragis;fragii++){
+    ptrdiff_t frag_buf_off;
+    frag_buf_off=frag_buf_offs[_fragis[fragii]];
+#define SRC edx
+#define DST eax
+#define YSTRIDE ecx
+#define YSTRIDE3 edi
+    OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off,
+     src_frame_data+frag_buf_off,ystride);
+#undef SRC
+#undef DST
+#undef YSTRIDE
+#undef YSTRIDE3
+  }
+}
+
+/*Apply the loop filter to a given set of fragment rows in the given plane.
+  The filter may be run on the bottom edge, affecting pixels in the next row of
+   fragments, so this row also needs to be available.
+  _bv:        The bounding values array.
+  _refi:      The index of the frame buffer to filter.
+  _pli:       The color plane to filter.
+  _fragy0:    The Y coordinate of the first fragment row to filter.
+  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
+void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
+  OC_ALIGN8(unsigned char  ll[8]);
+  const oc_fragment_plane *fplane;
+  const oc_fragment       *frags;
+  const ptrdiff_t         *frag_buf_offs;
+  unsigned char           *ref_frame_data;
+  ptrdiff_t                fragi_top;
+  ptrdiff_t                fragi_bot;
+  ptrdiff_t                fragi0;
+  ptrdiff_t                fragi0_end;
+  int                      ystride;
+  int                      nhfrags;
+  memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll));
+  fplane=_state->fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  fragi_top=fplane->froffset;
+  fragi_bot=fragi_top+fplane->nfrags;
+  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
+  fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
+  ystride=_state->ref_ystride[_pli];
+  frags=_state->frags;
+  frag_buf_offs=_state->frag_buf_offs;
+  ref_frame_data=_state->ref_frame_data[_refi];
+  /*The following loops are constructed somewhat non-intuitively on purpose.
+    The main idea is: if a block boundary has at least one coded fragment on
+     it, the filter is applied to it.
+    However, the order that the filters are applied in matters, and VP3 chose
+     the somewhat strange ordering used below.*/
+  while(fragi0<fragi0_end){
+    ptrdiff_t fragi;
+    ptrdiff_t fragi_end;
+    fragi=fragi0;
+    fragi_end=fragi+nhfrags;
+    while(fragi<fragi_end){
+      if(frags[fragi].coded){
+        unsigned char *ref;
+        ref=ref_frame_data+frag_buf_offs[fragi];
+#define PIX eax
+#define YSTRIDE3 edi
+#define YSTRIDE ecx
+#define LL edx
+#define D esi
+#define D_WORD si
+        if(fragi>fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll);
+        if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll);
+        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
+          OC_LOOP_FILTER_H_MMX(ref+8,ystride,ll);
+        }
+        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
+          OC_LOOP_FILTER_V_MMX(ref+(ystride<<3),ystride,ll);
+        }
+#undef PIX
+#undef YSTRIDE3
+#undef YSTRIDE
+#undef LL
+#undef D
+#undef D_WORD
+      }
+      fragi++;
+    }
+    fragi0+=nhfrags;
+  }
+}
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/x86int.h b/engine/code/libtheora-1.1.1/lib/x86_vc/x86int.h
new file mode 100644
index 00000000..4cca4853
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86_vc/x86int.h
@@ -0,0 +1,42 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86int.h 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#if !defined(_x86_vc_x86int_H)
+# define _x86_vc_x86int_H (1)
+# include "../internal.h"
+
+void oc_state_vtable_init_x86(oc_theora_state *_state);
+
+void oc_frag_copy_mmx(unsigned char *_dst,
+ const unsigned char *_src,int _ystride);
+void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride,
+ const ogg_int16_t *_residue);
+void oc_frag_recon_inter_mmx(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t *_residue);
+void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
+void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi);
+void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli);
+void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_restore_fpu_mmx(void);
+
+#endif
diff --git a/engine/code/libtheora-1.1.1/lib/x86_vc/x86state.c b/engine/code/libtheora-1.1.1/lib/x86_vc/x86state.c
new file mode 100644
index 00000000..a786bec2
--- /dev/null
+++ b/engine/code/libtheora-1.1.1/lib/x86_vc/x86state.c
@@ -0,0 +1,62 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86state.c 16503 2009-08-22 18:14:02Z giles $
+
+ ********************************************************************/
+
+#include "x86int.h"
+
+#if defined(OC_X86_ASM)
+
+#include "../cpu.c"
+
+/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into
+   each quadrant of the destination.*/
+static const unsigned char OC_FZIG_ZAG_MMX[128]={
+   0, 8, 1, 2, 9,16,24,17,
+  10, 3,32,11,18,25, 4,12,
+   5,26,19,40,33,34,41,48,
+  27, 6,13,20,28,21,14, 7,
+  56,49,42,35,43,50,57,36,
+  15,22,29,30,23,44,37,58,
+  51,59,38,45,52,31,60,53,
+  46,39,47,54,61,62,55,63,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+};
+
+void oc_state_vtable_init_x86(oc_theora_state *_state){
+  _state->cpu_flags=oc_cpu_flags_get();
+  if(_state->cpu_flags&OC_CPU_X86_MMX){
+    _state->opt_vtable.frag_copy=oc_frag_copy_mmx;
+    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
+    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
+    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
+    _state->opt_vtable.idct8x8=oc_idct8x8_mmx;
+    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
+    _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx;
+    _state->opt_vtable.state_loop_filter_frag_rows=
+     oc_state_loop_filter_frag_rows_mmx;
+    _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx;
+    _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX;
+  }
+  else oc_state_vtable_init_c(_state);
+}
+#endif
diff --git a/engine/code/qcommon/files.c b/engine/code/qcommon/files.c
index 2c36c1eb..c5bf3b74 100644
--- a/engine/code/qcommon/files.c
+++ b/engine/code/qcommon/files.c
@@ -2438,6 +2438,12 @@ int	FS_GetFileList(  const char *path, const char *extension, char *listbuf, int
 		return FS_GetModList(listbuf, bufsize);
 	}
 
+const char *extensions[] = { "RoQ", "roq"
+#if defined(USE_CODEC_VORBIS) && (defined(USE_CIN_XVID) || defined(USE_CIN_THEORA))
+			, "ogm", "ogv"
+#endif
+			};
+
 	pFiles = FS_ListFiles(path, extension, &nFiles);
 
 	for (i =0; i < nFiles; i++) {
diff --git a/engine/code/qcommon/q_shared.h b/engine/code/qcommon/q_shared.h
index 5fe8899e..81862195 100644
--- a/engine/code/qcommon/q_shared.h
+++ b/engine/code/qcommon/q_shared.h
@@ -67,7 +67,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #define BASETA				"missionpack"
 
 #ifndef PRODUCT_VERSION
-#define PRODUCT_VERSION "v0.4_r515"
+#define PRODUCT_VERSION "v0.4_r518"
 #endif
 
 
diff --git a/q3rallycode.ppr b/q3rallycode.ppr
index ff558679..c796c51e 100644
--- a/q3rallycode.ppr
+++ b/q3rallycode.ppr
@@ -194,7 +194,8 @@ q3rallycode
 				engine\code\client\snd_openal.c
 				engine\code\client\snd_public.h
 				engine\code\client\snd_wavelet.c
-			-game
+				engine\code\client\cl_cin_ogm.c
+			+game
 				engine\code\game\ai_chat.c
 				engine\code\game\ai_chat.h
 				engine\code\game\ai_cmd.c
@@ -456,7 +457,7 @@ q3rallycode
 				engine\code\null\null_main.c
 				engine\code\null\null_net.c
 				engine\code\null\null_snddma.c
-			+q3_ui
+			-q3_ui
 				engine\code\q3_ui\ui.def
 				engine\code\q3_ui\ui_addbots.c
 				engine\code\q3_ui\ui_atoms.c
@@ -745,7 +746,7 @@ q3rallycode
 					engine\code\tools\lcc\LOG
 					engine\code\tools\lcc\README
 					engine\code\tools\lcc\README.id
-			+ui
+			-ui
 				engine\code\ui\ui_atoms.c
 				engine\code\ui\ui_gameinfo.c
 				engine\code\ui\ui_local.h
@@ -888,41 +889,29 @@ q3rallycode
 		engine\cross-make-mingw64.sh
 [Open project files]
 0=engine\code\qcommon\q_shared.h
-1=engine\code\cgame\cg_draw.c
-2=engine\code\cgame\cg_local.h
-3=engine\code\cgame\cg_event.c
-4=engine\code\ui\ui_local.h
-5=engine\code\cgame\cg_scoreboard.c
-6=engine\code\q3_ui\ui_rally_credits.c
-7=engine\code\q3_ui\ui_menu.c
-8=engine\code\q3_ui\ui_video.c
+1=engine\Makefile
+2=engine\code\client\cl_cin.c
+3=engine\code\client\client.h
+4=engine\code\qcommon\files.c
+5=engine\code\game\g_mover.c
 [Selected Project Files]
 Main=
 Selected=engine\code\qcommon\q_shared.h
 [engine\code\qcommon\q_shared.h]
-TopLine=51
+TopLine=56
 Caret=35,70
-[engine\code\cgame\cg_draw.c]
-TopLine=828
-Caret=28,848
-[engine\code\cgame\cg_local.h]
-TopLine=57
-Caret=27,76
-[engine\code\cgame\cg_event.c]
-TopLine=275
-Caret=58,297
-[engine\code\ui\ui_local.h]
-TopLine=129
-Caret=1,139
-[engine\code\cgame\cg_scoreboard.c]
-TopLine=54
-Caret=41,78
-[engine\code\q3_ui\ui_rally_credits.c]
-TopLine=84
-Caret=45,107
-[engine\code\q3_ui\ui_menu.c]
-TopLine=572
-Caret=66,592
-[engine\code\q3_ui\ui_video.c]
-TopLine=1042
-Caret=1,1085
+[engine\Makefile]
+TopLine=342
+Caret=6,347
+[engine\code\client\cl_cin.c]
+TopLine=382
+Caret=5,397
+[engine\code\client\client.h]
+TopLine=584
+Caret=31,615
+[engine\code\qcommon\files.c]
+TopLine=2426
+Caret=6,2445
+[engine\code\game\g_mover.c]
+TopLine=1857
+Caret=3,1864