From 195ed466e3be91934c73975e4e8cbd9639c42e52 Mon Sep 17 00:00:00 2001 From: helixhorned Date: Sun, 25 Sep 2011 15:11:11 +0000 Subject: [PATCH] VPX: in 3 planes -> packed format conversion code, group together the three individual loops and compile the enclosing function at -O3 (-O1 for debugging builds). Now, the time for this conversion ranges from 7 to 18 ms per frame across various tested machines, a clear improvement. git-svn-id: https://svn.eduke32.com/eduke32@2042 1a8010ca-5511-0410-912e-c29ae57300e0 --- polymer/eduke32/source/animvpx.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/polymer/eduke32/source/animvpx.c b/polymer/eduke32/source/animvpx.c index db38077f4..acd171957 100644 --- a/polymer/eduke32/source/animvpx.c +++ b/polymer/eduke32/source/animvpx.c @@ -208,6 +208,11 @@ const char *animvpx_nextpic_errmsg[] = { // retrieves one picture-frame from the stream // pic format: lines of [Y U V 0] pixels // *picptr==NULL means EOF has been reached +#ifdef DEBUGGINGAIDS +ATTRIBUTE((optimize("O1"))) +#else +ATTRIBUTE((optimize("O3"))) +#endif int32_t animvpx_nextpic(animvpx_codec_ctx *codec, uint8_t **picptr) { int32_t ret, corrupted; @@ -277,14 +282,11 @@ read_ivf_frame: /*** 3 planes --> packed conversion ***/ for (y=0; yd_h; y++) for (x=0; xd_w; x++) + { codec->pic[(img->d_w*y + x)<<2] = img->planes[VPX_PLANE_Y][img->stride[VPX_PLANE_Y]*y + x]; - - for (y=0; yd_h; y++) - for (x=0; xd_w; x++) codec->pic[((img->d_w*y + x)<<2) + 1] = img->planes[VPX_PLANE_U][img->stride[VPX_PLANE_U]*(y>>1) + (x>>1)]; - for (y=0; yd_h; y++) - for (x=0; xd_w; x++) codec->pic[((img->d_w*y + x)<<2) + 2] = img->planes[VPX_PLANE_V][img->stride[VPX_PLANE_V]*(y>>1) + (x>>1)]; + } *picptr = codec->pic; return 0;