Update opus from 1.1 to 1.1.4

Define FLOAT_APPROX in Makefile and misc/msvc12/quake3.vcxproj.
This commit is contained in:
Zack Middleton 2017-05-23 10:48:01 -05:00
parent 67bfe8f8d6
commit 853110d5d4
146 changed files with 11919 additions and 2111 deletions

View File

@ -1002,7 +1002,7 @@ endif
ifeq ($(NEED_OPUS),1)
ifeq ($(USE_INTERNAL_OPUS),1)
OPUS_CFLAGS = -DOPUS_BUILD -DHAVE_LRINTF -DFLOATING_POINT -DUSE_ALLOCA \
OPUS_CFLAGS = -DOPUS_BUILD -DHAVE_LRINTF -DFLOATING_POINT -DFLOAT_APPROX -DUSE_ALLOCA \
-I$(OPUSDIR)/include -I$(OPUSDIR)/celt -I$(OPUSDIR)/silk \
-I$(OPUSDIR)/silk/float -I$(OPUSFILEDIR)/include
else

View File

@ -65,10 +65,6 @@
do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
# define C_MUL4(m,a,b) \
do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
(m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0)
# define C_MULBYSCALAR( c, s ) \
do{ (c).r = S_MUL( (c).r , s ) ;\
(c).i = S_MUL( (c).i , s ) ; }while(0)
@ -101,6 +97,9 @@
#if defined(OPUS_ARM_INLINE_EDSP)
#include "arm/kiss_fft_armv5e.h"
#endif
#if defined(MIPSr1_ASM)
#include "mips/kiss_fft_mipsr1.h"
#endif
#else /* not FIXED_POINT*/

View File

@ -69,11 +69,8 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
#define IMUL32(a,b) ((a)*(b))
#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */
#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */
#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */
#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */
#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */
#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */
#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */
#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */
@ -81,6 +78,15 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
#define UADD32(a,b) ((a)+(b))
#define USUB32(a,b) ((a)-(b))
/* Set this if opus_int64 is a native type of the CPU. */
/* Assume that all LP64 architectures have fast 64-bit types; also x86_64
(which can be ILP32 for x32) and Win64 (which is LLP64). */
#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
#define OPUS_FAST_INT64 1
#else
#define OPUS_FAST_INT64 0
#endif
#define PRINT_MIPS(file)
#ifdef FIXED_POINT
@ -108,13 +114,22 @@ typedef opus_val32 celt_ener;
#define SCALEIN(a) (a)
#define SCALEOUT(a) (a)
#define ABS16(x) ((x) < 0 ? (-(x)) : (x))
#define ABS32(x) ((x) < 0 ? (-(x)) : (x))
static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
}
#ifdef FIXED_DEBUG
#include "fixed_debug.h"
#else
#include "fixed_generic.h"
#ifdef OPUS_ARM_INLINE_EDSP
#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
#include "arm/fixed_arm64.h"
#elif OPUS_ARM_INLINE_EDSP
#include "arm/fixed_armv5e.h"
#elif defined (OPUS_ARM_INLINE_ASM)
#include "arm/fixed_armv4.h"
@ -137,6 +152,22 @@ typedef float celt_sig;
typedef float celt_norm;
typedef float celt_ener;
#ifdef FLOAT_APPROX
/* This code should reliably detect NaN/inf even when -ffast-math is used.
Assumes IEEE 754 format. */
static OPUS_INLINE int celt_isnan(float x)
{
union {float f; opus_uint32 i;} in;
in.f = x;
return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0;
}
#else
#ifdef __FAST_MATH__
#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input
#endif
#define celt_isnan(x) ((x)!=(x))
#endif
#define Q15ONE 1.0f
#define NORM_SCALING 1.f
@ -146,6 +177,10 @@ typedef float celt_ener;
#define VERY_LARGE16 1e15f
#define Q15_ONE ((opus_val16)1.f)
/* This appears to be the same speed as C99's fabsf() but it's more portable. */
#define ABS16(x) ((float)fabs(x))
#define ABS32(x) ((float)fabs(x))
#define QCONST16(x,bits) (x)
#define QCONST32(x,bits) (x)
@ -184,6 +219,7 @@ typedef float celt_ener;
#define MULT32_32_Q31(a,b) ((a)*(b))
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b))
#define MULT16_16_Q11_32(a,b) ((a)*(b))
#define MULT16_16_Q11(a,b) ((a)*(b))
@ -201,6 +237,8 @@ typedef float celt_ener;
#define SCALEIN(a) ((a)*CELT_SIG_SCALE)
#define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE))
#define SIG2WORD16(x) (x)
#endif /* !FIXED_POINT */
#ifndef GLOBAL_STACK_SIZE

View File

@ -1,7 +1,33 @@
#!/usr/bin/perl
# Copyright (C) 2002-2013 Xiph.org Foundation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
my $bigend; # little/big endian
my $nxstack;
my $apple = 0;
my $symprefix = "";
$nxstack = 0;
@ -10,11 +36,16 @@ eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
while ($ARGV[0] =~ /^-/) {
$_ = shift;
last if /^--/;
if (/^-n/) {
last if /^--$/;
if (/^-n$/) {
$nflag++;
next;
}
if (/^--apple$/) {
$apple = 1;
$symprefix = "_";
next;
}
die "I don't recognize this switch: $_\\n";
}
$printit++ unless $nflag;
@ -25,6 +56,8 @@ $n=0;
$thumb = 0; # ARM mode by default, not Thumb.
@proc_stack = ();
printf (" .syntax unified\n");
LINE:
while (<>) {
@ -53,7 +86,7 @@ while (<>) {
s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/;
s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/;
s/\bIMPORT\b/.extern/;
s/\bEXPORT\b/.global/;
s/\bEXPORT\b\s*/.global $symprefix/;
s/^(\s+)\[/$1IF/;
s/^(\s+)\|/$1ELSE/;
s/^(\s+)\]/$1ENDIF/;
@ -109,7 +142,7 @@ while (<>) {
# won't match the original source file (we could use the .line
# directive, which is documented to be obsolete, but then gdb will
# show the wrong line in the translated source file).
s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/;
s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/ unless ($apple);
}
}
@ -131,9 +164,13 @@ while (<>) {
$prefix = "";
if ($proc)
{
$prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc);
$prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc) unless ($apple);
# Make sure we $prefix isn't empty here (for the $apple case).
# We handle mangling the label here, make sure it doesn't match
# the label handling below (if $prefix would be empty).
$prefix = "; ";
push(@proc_stack, $proc);
s/^[A-Za-z_\.]\w+/$&:/;
s/^[A-Za-z_\.]\w+/$symprefix$&:/;
}
$prefix = $prefix."\t.thumb_func; " if ($thumb);
s/\bPROC\b/@ $&/;
@ -146,7 +183,7 @@ while (<>) {
my $proc;
s/\bENDP\b/@ $&/;
$proc = pop(@proc_stack);
$_ = "\t.size $proc, .-$proc".$_ if ($proc);
$_ = "\t.size $proc, .-$proc".$_ if ($proc && !$apple);
}
s/\bSUBT\b/@ $&/;
s/\bDATA\b/@ $&/; # DATA directive is deprecated -- Asm guide, p.7-25
@ -311,6 +348,6 @@ while (<>) {
}
#If we had a code section, mark that this object doesn't need an executable
# stack.
if ($nxstack) {
if ($nxstack && !$apple) {
printf (" .section\t.note.GNU-stack,\"\",\%\%progbits\n");
}

View File

@ -30,10 +30,15 @@
#endif
#include "pitch.h"
#include "kiss_fft.h"
#include "mdct.h"
#if defined(OPUS_HAVE_RTCD)
# if defined(FIXED_POINT)
# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
(defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int , int) = {
celt_pitch_xcorr_c, /* ARMv4 */
@ -41,9 +46,98 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
};
# else
# error "Floating-point implementation is not supported by ARM asm yet." \
"Reconfigure with --disable-rtcd or send patches."
# endif
# endif
# else /* !FIXED_POINT */
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
celt_pitch_xcorr_c, /* EDSP */
celt_pitch_xcorr_c, /* Media */
celt_pitch_xcorr_float_neon /* Neon */
};
# endif
# endif /* FIXED_POINT */
#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \
defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len
) = {
xcorr_kernel_c, /* ARMv4 */
xcorr_kernel_c, /* EDSP */
xcorr_kernel_c, /* Media */
xcorr_kernel_neon_fixed, /* Neon */
};
#endif
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# if defined(HAVE_ARM_NE10)
# if defined(CUSTOM_MODES)
int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_alloc_arch_c, /* ARMv4 */
opus_fft_alloc_arch_c, /* EDSP */
opus_fft_alloc_arch_c, /* Media */
opus_fft_alloc_arm_neon /* Neon with NE10 library support */
};
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_free_arch_c, /* ARMv4 */
opus_fft_free_arch_c, /* EDSP */
opus_fft_free_arch_c, /* Media */
opus_fft_free_arm_neon /* Neon with NE10 */
};
# endif /* CUSTOM_MODES */
void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout) = {
opus_fft_c, /* ARMv4 */
opus_fft_c, /* EDSP */
opus_fft_c, /* Media */
opus_fft_neon /* Neon with NE10 */
};
void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout) = {
opus_ifft_c, /* ARMv4 */
opus_ifft_c, /* EDSP */
opus_ifft_c, /* Media */
opus_ifft_neon /* Neon with NE10 */
};
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift,
int stride, int arch) = {
clt_mdct_forward_c, /* ARMv4 */
clt_mdct_forward_c, /* EDSP */
clt_mdct_forward_c, /* Media */
clt_mdct_forward_neon /* Neon with NE10 */
};
void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift,
int stride, int arch) = {
clt_mdct_backward_c, /* ARMv4 */
clt_mdct_backward_c, /* EDSP */
clt_mdct_backward_c, /* Media */
clt_mdct_backward_neon /* Neon with NE10 */
};
# endif /* HAVE_ARM_NE10 */
# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
#endif /* OPUS_HAVE_RTCD */

View File

@ -37,11 +37,12 @@
#include "cpu_support.h"
#include "os_support.h"
#include "opus_types.h"
#include "arch.h"
#define OPUS_CPU_ARM_V4 (1)
#define OPUS_CPU_ARM_EDSP (1<<1)
#define OPUS_CPU_ARM_MEDIA (1<<2)
#define OPUS_CPU_ARM_NEON (1<<3)
#define OPUS_CPU_ARM_V4_FLAG (1<<OPUS_ARCH_ARM_V4)
#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP)
#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON)
#if defined(_MSC_VER)
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
@ -55,29 +56,31 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
/* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
* instructions via their assembled hex code.
* All of these instructions should be essentially nops. */
# if defined(OPUS_ARM_MAY_HAVE_EDSP)
# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{
/*PLD [r13]*/
__emit(0xF5DDF000);
flags|=OPUS_CPU_ARM_EDSP;
flags|=OPUS_CPU_ARM_EDSP_FLAG;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/
}
# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{
/*SHADD8 r3,r3,r3*/
__emit(0xE6333F93);
flags|=OPUS_CPU_ARM_MEDIA;
flags|=OPUS_CPU_ARM_MEDIA_FLAG;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/
}
# if defined(OPUS_ARM_MAY_HAVE_NEON)
# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{
/*VORR q0,q0,q0*/
__emit(0xF2200150);
flags|=OPUS_CPU_ARM_NEON;
flags|=OPUS_CPU_ARM_NEON_FLAG;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/
@ -107,26 +110,26 @@ opus_uint32 opus_cpu_capabilities(void)
while(fgets(buf, 512, cpuinfo) != NULL)
{
# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON)
# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
/* Search for edsp and neon flag */
if(memcmp(buf, "Features", 8) == 0)
{
char *p;
# if defined(OPUS_ARM_MAY_HAVE_EDSP)
p = strstr(buf, " edsp");
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_EDSP;
# endif
flags |= OPUS_CPU_ARM_EDSP_FLAG;
# if defined(OPUS_ARM_MAY_HAVE_NEON)
# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
p = strstr(buf, " neon");
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_NEON;
flags |= OPUS_CPU_ARM_NEON_FLAG;
# endif
}
# endif
# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
/* Search for media capabilities (>= ARMv6) */
if(memcmp(buf, "CPU architecture:", 17) == 0)
{
@ -134,7 +137,7 @@ opus_uint32 opus_cpu_capabilities(void)
version = atoi(buf+17);
if(version >= 6)
flags |= OPUS_CPU_ARM_MEDIA;
flags |= OPUS_CPU_ARM_MEDIA_FLAG;
}
# endif
}
@ -156,18 +159,26 @@ int opus_select_arch(void)
opus_uint32 flags = opus_cpu_capabilities();
int arch = 0;
if(!(flags & OPUS_CPU_ARM_EDSP))
if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) {
/* Asserts ensure arch values are sequential */
celt_assert(arch == OPUS_ARCH_ARM_V4);
return arch;
}
arch++;
if(!(flags & OPUS_CPU_ARM_MEDIA))
if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) {
celt_assert(arch == OPUS_ARCH_ARM_EDSP);
return arch;
}
arch++;
if(!(flags & OPUS_CPU_ARM_NEON))
if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) {
celt_assert(arch == OPUS_ARCH_ARM_MEDIA);
return arch;
}
arch++;
celt_assert(arch == OPUS_ARCH_ARM_NEON);
return arch;
}

View File

@ -66,6 +66,12 @@
# if defined(OPUS_HAVE_RTCD)
int opus_select_arch(void);
#define OPUS_ARCH_ARM_V4 (0)
#define OPUS_ARCH_ARM_EDSP (1)
#define OPUS_ARCH_ARM_MEDIA (2)
#define OPUS_ARCH_ARM_NEON (3)
# endif
#endif

View File

@ -0,0 +1,174 @@
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_ne10_fft.c
@brief ARM Neon optimizations for fft using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#endif
#include <NE10_init.h>
#include <NE10_dsp.h>
#include "os_support.h"
#include "kiss_fft.h"
#include "stack_alloc.h"
#if !defined(FIXED_POINT)
# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
#else
# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
#endif
#if defined(CUSTOM_MODES)
/* nfft lengths in NE10 that support scaled fft */
# define NE10_FFTSCALED_SUPPORT_MAX 4
static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
480, 240, 120, 60
};
int opus_fft_alloc_arm_neon(kiss_fft_state *st)
{
int i;
size_t memneeded = sizeof(struct arch_fft_state);
st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
if (!st->arch_fft)
return -1;
for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
if(st->nfft == ne10_fft_scaled_support[i])
break;
}
if (i == NE10_FFTSCALED_SUPPORT_MAX) {
/* This nfft length (scaled fft) is not supported in NE10 */
st->arch_fft->is_supported = 0;
st->arch_fft->priv = NULL;
}
else {
st->arch_fft->is_supported = 1;
st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
if (st->arch_fft->priv == NULL) {
return -1;
}
}
return 0;
}
void opus_fft_free_arm_neon(kiss_fft_state *st)
{
NE10_FFT_CFG_TYPE_T cfg;
if (!st->arch_fft)
return;
cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
if (cfg)
NE10_FFT_DESTROY_C2C_TYPE(cfg);
opus_free(st->arch_fft);
}
#endif
void opus_fft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
{
NE10_FFT_STATE_TYPE_T state;
NE10_FFT_CFG_TYPE_T cfg = &state;
VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
SAVE_STACK;
ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_fft_c(st, fin, fout);
}
else {
memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
#if !defined(FIXED_POINT)
state.is_forward_scaled = 1;
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 0);
#else
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 0, 1);
#endif
}
RESTORE_STACK;
}
void opus_ifft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
{
NE10_FFT_STATE_TYPE_T state;
NE10_FFT_CFG_TYPE_T cfg = &state;
VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
SAVE_STACK;
ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_ifft_c(st, fin, fout);
}
else {
memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
#if !defined(FIXED_POINT)
state.is_backward_scaled = 0;
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 1);
#else
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 1, 0);
#endif
}
RESTORE_STACK;
}

View File

@ -0,0 +1,258 @@
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_ne10_mdct.c
@brief ARM Neon optimizations for mdct using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#endif
#include "kiss_fft.h"
#include "_kiss_fft_guts.h"
#include "mdct.h"
#include "stack_alloc.h"
void clt_mdct_forward_neon(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
VARDECL(kiss_fft_scalar, f);
VARDECL(kiss_fft_cpx, f2);
const kiss_fft_state *st = l->kfft[shift];
const kiss_twiddle_scalar *trig;
SAVE_STACK;
N = l->n;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
ALLOC(f2, N4, kiss_fft_cpx);
/* Consider the input to be composed of four blocks: [a, b, c, d] */
/* Window, shuffle, fold */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
for(i=0;i<((overlap+3)>>2);i++)
{
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
*yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
*yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
xp1+=2;
xp2-=2;
wp1+=2;
wp2-=2;
}
wp1 = window;
wp2 = window+overlap-1;
for(;i<N4-((overlap+3)>>2);i++)
{
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
*yp++ = *xp2;
*yp++ = *xp1;
xp1+=2;
xp2-=2;
}
for(;i<N4;i++)
{
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
*yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
*yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
xp1+=2;
xp2-=2;
wp1+=2;
wp2-=2;
}
}
/* Pre-rotation */
{
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const kiss_twiddle_scalar *t = &trig[0];
for(i=0;i<N4;i++)
{
kiss_fft_cpx yc;
kiss_twiddle_scalar t0, t1;
kiss_fft_scalar re, im, yr, yi;
t0 = t[i];
t1 = t[N4+i];
re = *yp++;
im = *yp++;
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
yc.r = yr;
yc.i = yi;
f2[i] = yc;
}
}
opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
/* Post-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
const kiss_twiddle_scalar *t = &trig[0];
/* Temp pointers to make it really clear to the compiler what we're doing */
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
*yp1 = yr;
*yp2 = yi;
fp++;
yp1 += 2*stride;
yp2 -= 2*stride;
}
}
RESTORE_STACK;
}
void clt_mdct_backward_neon(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window,
int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
VARDECL(kiss_fft_scalar, f);
const kiss_twiddle_scalar *trig;
const kiss_fft_state *st = l->kfft[shift];
N = l->n;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
/* Pre-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
yp[2*i] = yr;
yp[2*i+1] = yi;
xp1+=2*stride;
xp2-=2*stride;
}
}
opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch);
/* Post-rotate and de-shuffle from both ends of the buffer at once to make
it in-place. */
{
kiss_fft_scalar * yp0 = out+(overlap>>1);
kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
const kiss_twiddle_scalar *t = &trig[0];
/* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
middle pair will be computed twice. */
for(i=0;i<(N4+1)>>1;i++)
{
kiss_fft_scalar re, im, yr, yi;
kiss_twiddle_scalar t0, t1;
re = yp0[0];
im = yp0[1];
t0 = t[i];
t1 = t[N4+i];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
re = yp1[0];
im = yp1[1];
yp0[0] = yr;
yp1[1] = yi;
t0 = t[(N4-i-1)];
t1 = t[(N2-i-1)];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
yp1[0] = yr;
yp0[1] = yi;
yp0 += 2;
yp1 -= 2;
}
}
/* Mirror on both sides for TDAC */
{
kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
const opus_val16 * OPUS_RESTRICT wp1 = window;
const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
for(i = 0; i < overlap/2; i++)
{
kiss_fft_scalar x1, x2;
x1 = *xp1;
x2 = *yp1;
*yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
*xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
wp1++;
wp2--;
}
}
RESTORE_STACK;
}

View File

@ -0,0 +1,311 @@
/* Copyright (c) 2014-2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_neon_intr.c
@brief ARM Neon Intrinsic optimizations for celt
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#include "../pitch.h"
#if defined(FIXED_POINT)
void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
int32x4_t a = vld1q_s32(sum);
/* Load y[0...3] */
/* This requires len>0 to always be valid (which we assert in the C code). */
int16x4_t y0 = vld1_s16(y);
y += 4;
for (j = 0; j + 8 <= len; j += 8)
{
/* Load x[0...7] */
int16x8_t xx = vld1q_s16(x);
int16x4_t x0 = vget_low_s16(xx);
int16x4_t x4 = vget_high_s16(xx);
/* Load y[4...11] */
int16x8_t yy = vld1q_s16(y);
int16x4_t y4 = vget_low_s16(yy);
int16x4_t y8 = vget_high_s16(yy);
int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0);
int16x4_t y1 = vext_s16(y0, y4, 1);
int16x4_t y5 = vext_s16(y4, y8, 1);
int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1);
int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1);
int16x4_t y2 = vext_s16(y0, y4, 2);
int16x4_t y6 = vext_s16(y4, y8, 2);
int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2);
int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2);
int16x4_t y3 = vext_s16(y0, y4, 3);
int16x4_t y7 = vext_s16(y4, y8, 3);
int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3);
int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3);
y0 = y8;
a = a7;
x += 8;
y += 8;
}
for (; j < len; j++)
{
int16x4_t x0 = vld1_dup_s16(x); /* load next x */
int32x4_t a0 = vmlal_s16(a, y0, x0);
int16x4_t y4 = vld1_dup_s16(y); /* load next y */
y0 = vext_s16(y0, y4, 1);
a = a0;
x++;
y++;
}
vst1q_s32(sum, a);
}
#else
/*
* Function: xcorr_kernel_neon_float
* ---------------------------------
* Computes 4 correlation values and stores them in sum[4]
*/
static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
float32_t sum[4], int len) {
float32x4_t YY[3];
float32x4_t YEXT[3];
float32x4_t XX[2];
float32x2_t XX_2;
float32x4_t SUMM;
const float32_t *xi = x;
const float32_t *yi = y;
celt_assert(len>0);
YY[0] = vld1q_f32(yi);
SUMM = vdupq_n_f32(0);
/* Consume 8 elements in x vector and 12 elements in y
* vector. However, the 12'th element never really gets
* touched in this loop. So, if len == 8, then we only
* must access y[0] to y[10]. y[11] must not be accessed
* hence make sure len > 8 and not len >= 8
*/
while (len > 8) {
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
YY[2] = vld1q_f32(yi);
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
YEXT[0] = vextq_f32(YY[0], YY[1], 1);
SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
YEXT[1] = vextq_f32(YY[0], YY[1], 2);
SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
YEXT[2] = vextq_f32(YY[0], YY[1], 3);
SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
SUMM = vmlaq_lane_f32(SUMM, YY[1], vget_low_f32(XX[1]), 0);
YEXT[0] = vextq_f32(YY[1], YY[2], 1);
SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[1]), 1);
YEXT[1] = vextq_f32(YY[1], YY[2], 2);
SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[1]), 0);
YEXT[2] = vextq_f32(YY[1], YY[2], 3);
SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[1]), 1);
YY[0] = YY[2];
len -= 8;
}
/* Consume 4 elements in x vector and 8 elements in y
* vector. However, the 8'th element in y never really gets
* touched in this loop. So, if len == 4, then we only
* must access y[0] to y[6]. y[7] must not be accessed
* hence make sure len>4 and not len>=4
*/
if (len > 4) {
yi += 4;
YY[1] = vld1q_f32(yi);
XX[0] = vld1q_f32(xi);
xi += 4;
SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
YEXT[0] = vextq_f32(YY[0], YY[1], 1);
SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
YEXT[1] = vextq_f32(YY[0], YY[1], 2);
SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
YEXT[2] = vextq_f32(YY[0], YY[1], 3);
SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
YY[0] = YY[1];
len -= 4;
}
while (--len > 0) {
XX_2 = vld1_dup_f32(xi++);
SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
YY[0]= vld1q_f32(++yi);
}
XX_2 = vld1_dup_f32(xi);
SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
vst1q_f32(sum, SUMM);
}
/*
* Function: xcorr_kernel_neon_float_process1
* ---------------------------------
* Computes single correlation values and stores in *sum
*/
static void xcorr_kernel_neon_float_process1(const float32_t *x,
const float32_t *y, float32_t *sum, int len) {
float32x4_t XX[4];
float32x4_t YY[4];
float32x2_t XX_2;
float32x2_t YY_2;
float32x4_t SUMM;
float32x2_t SUMM_2[2];
const float32_t *xi = x;
const float32_t *yi = y;
SUMM = vdupq_n_f32(0);
/* Work on 16 values per iteration */
while (len >= 16) {
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
XX[2] = vld1q_f32(xi);
xi += 4;
XX[3] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
YY[2] = vld1q_f32(yi);
yi += 4;
YY[3] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
SUMM = vmlaq_f32(SUMM, YY[2], XX[2]);
SUMM = vmlaq_f32(SUMM, YY[3], XX[3]);
len -= 16;
}
/* Work on 8 values */
if (len >= 8) {
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
len -= 8;
}
/* Work on 4 values */
if (len >= 4) {
XX[0] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
len -= 4;
}
/* Start accumulating results */
SUMM_2[0] = vget_low_f32(SUMM);
if (len >= 2) {
/* While at it, consume 2 more values if available */
XX_2 = vld1_f32(xi);
xi += 2;
YY_2 = vld1_f32(yi);
yi += 2;
SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2);
len -= 2;
}
SUMM_2[1] = vget_high_f32(SUMM);
SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]);
SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]);
/* Ok, now we have result accumulated in SUMM_2[0].0 */
if (len > 0) {
/* Case when you have one value left */
XX_2 = vld1_dup_f32(xi);
YY_2 = vld1_dup_f32(yi);
SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2);
}
vst1_lane_f32(sum, SUMM_2[0], 0);
}
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch) {
int i;
celt_assert(max_pitch > 0);
celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
for (i = 0; i < (max_pitch-3); i += 4) {
xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i,
(float32_t *)xcorr+i, len);
}
/* In case max_pitch isn't multiple of 4
* compute single correlation value per iteration
*/
for (; i < max_pitch; i++) {
xcorr_kernel_neon_float_process1((const float32_t *)_x,
(const float32_t *)_y+i, (float32_t *)xcorr+i, len);
}
}
#endif

View File

@ -0,0 +1,551 @@
.syntax unified
@ Copyright (c) 2007-2008 CSIRO
@ Copyright (c) 2007-2009 Xiph.Org Foundation
@ Copyright (c) 2013 Parrot
@ Written by Aurélien Zanelli
@
@ Redistribution and use in source and binary forms, with or without
@ modification, are permitted provided that the following conditions
@ are met:
@
@ - Redistributions of source code must retain the above copyright
@ notice, this list of conditions and the following disclaimer.
@
@ - Redistributions in binary form must reproduce the above copyright
@ notice, this list of conditions and the following disclaimer in the
@ documentation and/or other materials provided with the distribution.
@
@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
@ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.text; .p2align 2; .arch armv7-a
.fpu neon
.object_arch armv4t
.include "celt/arm/armopts-gnu.S"
.if OPUS_ARM_MAY_HAVE_EDSP
.global celt_pitch_xcorr_edsp
.endif
.if OPUS_ARM_MAY_HAVE_NEON
.global celt_pitch_xcorr_neon
.endif
.if OPUS_ARM_MAY_HAVE_NEON
@ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
; xcorr_kernel_neon: @ PROC
xcorr_kernel_neon_start:
@ input:
@ r3 = int len
@ r4 = opus_val16 *x
@ r5 = opus_val16 *y
@ q0 = opus_val32 sum[4]
@ output:
@ q0 = opus_val32 sum[4]
@ preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
@ internal usage:
@ r12 = int j
@ d3 = y_3|y_2|y_1|y_0
@ q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
@ q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
@ q8 = scratch
@
@ Load y[0...3]
@ This requires len>0 to always be valid (which we assert in the C code).
VLD1.16 {d5}, [r5]!
SUBS r12, r3, #8
BLE xcorr_kernel_neon_process4
@ Process 8 samples at a time.
@ This loop loads one y value more than we actually need. Therefore we have to
@ stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
@ reading past the end of the array.
xcorr_kernel_neon_process8:
@ This loop has 19 total instructions (10 cycles to issue, minimum), with
@ - 2 cycles of ARM insrtuctions,
@ - 10 cycles of load/store/byte permute instructions, and
@ - 9 cycles of data processing instructions.
@ On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
@ latter two categories, meaning the whole loop should run in 10 cycles per
@ iteration, barring cache misses.
@
@ Load x[0...7]
VLD1.16 {d6, d7}, [r4]!
@ Unlike VMOV, VAND is a data processsing instruction (and doesn't get
@ assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
VAND d3, d5, d5
SUBS r12, r12, #8
@ Load y[4...11]
VLD1.16 {d4, d5}, [r5]!
VMLAL.S16 q0, d3, d6[0]
VEXT.16 d16, d3, d4, #1
VMLAL.S16 q0, d4, d7[0]
VEXT.16 d17, d4, d5, #1
VMLAL.S16 q0, d16, d6[1]
VEXT.16 d16, d3, d4, #2
VMLAL.S16 q0, d17, d7[1]
VEXT.16 d17, d4, d5, #2
VMLAL.S16 q0, d16, d6[2]
VEXT.16 d16, d3, d4, #3
VMLAL.S16 q0, d17, d7[2]
VEXT.16 d17, d4, d5, #3
VMLAL.S16 q0, d16, d6[3]
VMLAL.S16 q0, d17, d7[3]
BGT xcorr_kernel_neon_process8
@ Process 4 samples here if we have > 4 left (still reading one extra y value).
xcorr_kernel_neon_process4:
ADDS r12, r12, #4
BLE xcorr_kernel_neon_process2
@ Load x[0...3]
VLD1.16 d6, [r4]!
@ Use VAND since it's a data processing instruction again.
VAND d4, d5, d5
SUB r12, r12, #4
@ Load y[4...7]
VLD1.16 d5, [r5]!
VMLAL.S16 q0, d4, d6[0]
VEXT.16 d16, d4, d5, #1
VMLAL.S16 q0, d16, d6[1]
VEXT.16 d16, d4, d5, #2
VMLAL.S16 q0, d16, d6[2]
VEXT.16 d16, d4, d5, #3
VMLAL.S16 q0, d16, d6[3]
@ Process 2 samples here if we have > 2 left (still reading one extra y value).
xcorr_kernel_neon_process2:
ADDS r12, r12, #2
BLE xcorr_kernel_neon_process1
@ Load x[0...1]
VLD2.16 {d6[],d7[]}, [r4]!
@ Use VAND since it's a data processing instruction again.
VAND d4, d5, d5
SUB r12, r12, #2
@ Load y[4...5]
VLD1.32 {d5[]}, [r5]!
VMLAL.S16 q0, d4, d6
VEXT.16 d16, d4, d5, #1
@ Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
@ instead of VEXT, since it's a data-processing instruction.
VSRI.64 d5, d4, #32
VMLAL.S16 q0, d16, d7
@ Process 1 sample using the extra y value we loaded above.
xcorr_kernel_neon_process1:
@ Load next *x
VLD1.16 {d6[]}, [r4]!
ADDS r12, r12, #1
@ y[0...3] are left in d5 from prior iteration(s) (if any)
VMLAL.S16 q0, d5, d6
MOVLE pc, lr
@ Now process 1 last sample, not reading ahead.
@ Load last *y
VLD1.16 {d4[]}, [r5]!
VSRI.64 d4, d5, #16
@ Load last *x
VLD1.16 {d6[]}, [r4]!
VMLAL.S16 q0, d4, d6
MOV pc, lr
.size xcorr_kernel_neon, .-xcorr_kernel_neon @ ENDP
@ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
@ opus_val32 *xcorr, int len, int max_pitch)
; celt_pitch_xcorr_neon: @ PROC
@ input:
@ r0 = opus_val16 *_x
@ r1 = opus_val16 *_y
@ r2 = opus_val32 *xcorr
@ r3 = int len
@ output:
@ r0 = int maxcorr
@ internal usage:
@ r4 = opus_val16 *x (for xcorr_kernel_neon())
@ r5 = opus_val16 *y (for xcorr_kernel_neon())
@ r6 = int max_pitch
@ r12 = int j
@ q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon())
STMFD sp!, {r4-r6, lr}
LDR r6, [sp, #16]
VMOV.S32 q15, #1
@ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
SUBS r6, r6, #4
BLT celt_pitch_xcorr_neon_process4_done
celt_pitch_xcorr_neon_process4:
@ xcorr_kernel_neon parameters:
@ r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
MOV r4, r0
MOV r5, r1
VEOR q0, q0, q0
@ xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
@ So we don't save/restore any other registers.
BL xcorr_kernel_neon_start
SUBS r6, r6, #4
VST1.32 {q0}, [r2]!
@ _y += 4
ADD r1, r1, #8
VMAX.S32 q15, q15, q0
@ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
BGE celt_pitch_xcorr_neon_process4
@ We have less than 4 sums left to compute.
celt_pitch_xcorr_neon_process4_done:
ADDS r6, r6, #4
@ Reduce maxcorr to a single value
VMAX.S32 d30, d30, d31
VPMAX.S32 d30, d30, d30
@ if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
BLE celt_pitch_xcorr_neon_done
@ Now compute each remaining sum one at a time.
celt_pitch_xcorr_neon_process_remaining:
MOV r4, r0
MOV r5, r1
VMOV.I32 q0, #0
SUBS r12, r3, #8
BLT celt_pitch_xcorr_neon_process_remaining4
@ Sum terms 8 at a time.
celt_pitch_xcorr_neon_process_remaining_loop8:
@ Load x[0...7]
VLD1.16 {q1}, [r4]!
@ Load y[0...7]
VLD1.16 {q2}, [r5]!
SUBS r12, r12, #8
VMLAL.S16 q0, d4, d2
VMLAL.S16 q0, d5, d3
BGE celt_pitch_xcorr_neon_process_remaining_loop8
@ Sum terms 4 at a time.
celt_pitch_xcorr_neon_process_remaining4:
ADDS r12, r12, #4
BLT celt_pitch_xcorr_neon_process_remaining4_done
@ Load x[0...3]
VLD1.16 {d2}, [r4]!
@ Load y[0...3]
VLD1.16 {d3}, [r5]!
SUB r12, r12, #4
VMLAL.S16 q0, d3, d2
celt_pitch_xcorr_neon_process_remaining4_done:
@ Reduce the sum to a single value.
VADD.S32 d0, d0, d1
VPADDL.S32 d0, d0
ADDS r12, r12, #4
BLE celt_pitch_xcorr_neon_process_remaining_loop_done
@ Sum terms 1 at a time.
celt_pitch_xcorr_neon_process_remaining_loop1:
VLD1.16 {d2[]}, [r4]!
VLD1.16 {d3[]}, [r5]!
SUBS r12, r12, #1
VMLAL.S16 q0, d2, d3
BGT celt_pitch_xcorr_neon_process_remaining_loop1
celt_pitch_xcorr_neon_process_remaining_loop_done:
VST1.32 {d0[0]}, [r2]!
VMAX.S32 d30, d30, d0
SUBS r6, r6, #1
@ _y++
ADD r1, r1, #2
@ if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
BGT celt_pitch_xcorr_neon_process_remaining
celt_pitch_xcorr_neon_done:
VMOV.32 r0, d30[0]
LDMFD sp!, {r4-r6, pc}
.size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon @ ENDP
.endif
.if OPUS_ARM_MAY_HAVE_EDSP
@ This will get used on ARMv7 devices without NEON, so it has been optimized
@ to take advantage of dual-issuing where possible.
; xcorr_kernel_edsp: @ PROC
xcorr_kernel_edsp_start:
@ input:
@ r3 = int len
@ r4 = opus_val16 *_x (must be 32-bit aligned)
@ r5 = opus_val16 *_y (must be 32-bit aligned)
@ r6...r9 = opus_val32 sum[4]
@ output:
@ r6...r9 = opus_val32 sum[4]
@ preserved: r0-r5
@ internal usage
@ r2 = int j
@ r12,r14 = opus_val16 x[4]
@ r10,r11 = opus_val16 y[4]
STMFD sp!, {r2,r4,r5,lr}
LDR r10, [r5], #4 @ Load y[0...1]
SUBS r2, r3, #4 @ j = len-4
LDR r11, [r5], #4 @ Load y[2...3]
BLE xcorr_kernel_edsp_process4_done
LDR r12, [r4], #4 @ Load x[0...1]
@ Stall
xcorr_kernel_edsp_process4:
@ The multiplies must issue from pipeline 0, and can't dual-issue with each
@ other. Every other instruction here dual-issues with a multiply, and is
@ thus "free". There should be no stalls in the body of the loop.
SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_0,y_0)
LDR r14, [r4], #4 @ Load x[2...3]
SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x_0,y_1)
SUBS r2, r2, #4 @ j-=4
SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_0,y_2)
SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x_0,y_3)
SMLATT r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_1,y_1)
LDR r10, [r5], #4 @ Load y[4...5]
SMLATB r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],x_1,y_2)
SMLATT r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_1,y_3)
SMLATB r9, r12, r10, r9 @ sum[3] = MAC16_16(sum[3],x_1,y_4)
LDRGT r12, [r4], #4 @ Load x[0...1]
SMLABB r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_2,y_2)
SMLABT r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x_2,y_3)
SMLABB r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_2,y_4)
SMLABT r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x_2,y_5)
SMLATT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_3,y_3)
LDR r11, [r5], #4 @ Load y[6...7]
SMLATB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],x_3,y_4)
SMLATT r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_3,y_5)
SMLATB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],x_3,y_6)
BGT xcorr_kernel_edsp_process4
xcorr_kernel_edsp_process4_done:
ADDS r2, r2, #4
BLE xcorr_kernel_edsp_done
LDRH r12, [r4], #2 @ r12 = *x++
SUBS r2, r2, #1 @ j--
@ Stall
SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_0)
LDRHGT r14, [r4], #2 @ r14 = *x++
SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x,y_1)
SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_2)
SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x,y_3)
BLE xcorr_kernel_edsp_done
SMLABT r6, r14, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_1)
SUBS r2, r2, #1 @ j--
SMLABB r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x,y_2)
LDRH r10, [r5], #2 @ r10 = y_4 = *y++
SMLABT r8, r14, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_3)
LDRHGT r12, [r4], #2 @ r12 = *x++
SMLABB r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x,y_4)
BLE xcorr_kernel_edsp_done
SMLABB r6, r12, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_2)
CMP r2, #1 @ j--
SMLABT r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_3)
LDRH r2, [r5], #2 @ r2 = y_5 = *y++
SMLABB r8, r12, r10, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_4)
LDRHGT r14, [r4] @ r14 = *x
SMLABB r9, r12, r2, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_5)
BLE xcorr_kernel_edsp_done
SMLABT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_3)
LDRH r11, [r5] @ r11 = y_6 = *y
SMLABB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_4)
SMLABB r8, r14, r2, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_5)
SMLABB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_6)
xcorr_kernel_edsp_done:
LDMFD sp!, {r2,r4,r5,pc}
.size xcorr_kernel_edsp, .-xcorr_kernel_edsp @ ENDP
; celt_pitch_xcorr_edsp: @ PROC
@ input:
@ r0 = opus_val16 *_x (must be 32-bit aligned)
@ r1 = opus_val16 *_y (only needs to be 16-bit aligned)
@ r2 = opus_val32 *xcorr
@ r3 = int len
@ output:
@ r0 = maxcorr
@ internal usage
@ r4 = opus_val16 *x
@ r5 = opus_val16 *y
@ r6 = opus_val32 sum0
@ r7 = opus_val32 sum1
@ r8 = opus_val32 sum2
@ r9 = opus_val32 sum3
@ r1 = int max_pitch
@ r12 = int j
STMFD sp!, {r4-r11, lr}
MOV r5, r1
LDR r1, [sp, #36]
MOV r4, r0
TST r5, #3
@ maxcorr = 1
MOV r0, #1
BEQ celt_pitch_xcorr_edsp_process1u_done
@ Compute one sum at the start to make y 32-bit aligned.
SUBS r12, r3, #4
@ r14 = sum = 0
MOV r14, #0
LDRH r8, [r5], #2
BLE celt_pitch_xcorr_edsp_process1u_loop4_done
LDR r6, [r4], #4
MOV r8, r8, LSL #16
celt_pitch_xcorr_edsp_process1u_loop4:
LDR r9, [r5], #4
SMLABT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0)
LDR r7, [r4], #4
SMLATB r14, r6, r9, r14 @ sum = MAC16_16(sum, x_1, y_1)
LDR r8, [r5], #4
SMLABT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2)
SUBS r12, r12, #4 @ j-=4
SMLATB r14, r7, r8, r14 @ sum = MAC16_16(sum, x_3, y_3)
LDRGT r6, [r4], #4
BGT celt_pitch_xcorr_edsp_process1u_loop4
MOV r8, r8, LSR #16
celt_pitch_xcorr_edsp_process1u_loop4_done:
ADDS r12, r12, #4
celt_pitch_xcorr_edsp_process1u_loop1:
LDRHGE r6, [r4], #2
@ Stall
SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y)
SUBSGE r12, r12, #1
LDRHGT r8, [r5], #2
BGT celt_pitch_xcorr_edsp_process1u_loop1
@ Restore _x
SUB r4, r4, r3, LSL #1
@ Restore and advance _y
SUB r5, r5, r3, LSL #1
@ maxcorr = max(maxcorr, sum)
CMP r0, r14
ADD r5, r5, #2
MOVLT r0, r14
SUBS r1, r1, #1
@ xcorr[i] = sum
STR r14, [r2], #4
BLE celt_pitch_xcorr_edsp_done
celt_pitch_xcorr_edsp_process1u_done:
@ if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
SUBS r1, r1, #4
BLT celt_pitch_xcorr_edsp_process2
celt_pitch_xcorr_edsp_process4:
@ xcorr_kernel_edsp parameters:
@ r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
MOV r6, #0
MOV r7, #0
MOV r8, #0
MOV r9, #0
BL xcorr_kernel_edsp_start @ xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
@ maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
CMP r0, r6
@ _y+=4
ADD r5, r5, #8
MOVLT r0, r6
CMP r0, r7
MOVLT r0, r7
CMP r0, r8
MOVLT r0, r8
CMP r0, r9
MOVLT r0, r9
STMIA r2!, {r6-r9}
SUBS r1, r1, #4
BGE celt_pitch_xcorr_edsp_process4
celt_pitch_xcorr_edsp_process2:
ADDS r1, r1, #2
BLT celt_pitch_xcorr_edsp_process1a
SUBS r12, r3, #4
@ {r10, r11} = {sum0, sum1} = {0, 0}
MOV r10, #0
MOV r11, #0
LDR r8, [r5], #4
BLE celt_pitch_xcorr_edsp_process2_loop_done
LDR r6, [r4], #4
LDR r9, [r5], #4
celt_pitch_xcorr_edsp_process2_loop4:
SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0)
LDR r7, [r4], #4
SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1)
SUBS r12, r12, #4 @ j-=4
SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1)
LDR r8, [r5], #4
SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2)
LDRGT r6, [r4], #4
SMLABB r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_2, y_2)
SMLABT r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_2, y_3)
SMLATT r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_3, y_3)
LDRGT r9, [r5], #4
SMLATB r11, r7, r8, r11 @ sum1 = MAC16_16(sum1, x_3, y_4)
BGT celt_pitch_xcorr_edsp_process2_loop4
celt_pitch_xcorr_edsp_process2_loop_done:
ADDS r12, r12, #2
BLE celt_pitch_xcorr_edsp_process2_1
LDR r6, [r4], #4
@ Stall
SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0)
LDR r9, [r5], #4
SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1)
SUB r12, r12, #2
SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1)
MOV r8, r9
SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2)
celt_pitch_xcorr_edsp_process2_1:
LDRH r6, [r4], #2
ADDS r12, r12, #1
@ Stall
SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0)
LDRHGT r7, [r4], #2
SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1)
BLE celt_pitch_xcorr_edsp_process2_done
LDRH r9, [r5], #2
SMLABT r10, r7, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_1)
SMLABB r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_0, y_2)
celt_pitch_xcorr_edsp_process2_done:
@ Restore _x
SUB r4, r4, r3, LSL #1
@ Restore and advance _y
SUB r5, r5, r3, LSL #1
@ maxcorr = max(maxcorr, sum0)
CMP r0, r10
ADD r5, r5, #2
MOVLT r0, r10
SUB r1, r1, #2
@ maxcorr = max(maxcorr, sum1)
CMP r0, r11
@ xcorr[i] = sum
STR r10, [r2], #4
MOVLT r0, r11
STR r11, [r2], #4
celt_pitch_xcorr_edsp_process1a:
ADDS r1, r1, #1
BLT celt_pitch_xcorr_edsp_done
SUBS r12, r3, #4
@ r14 = sum = 0
MOV r14, #0
BLT celt_pitch_xcorr_edsp_process1a_loop_done
LDR r6, [r4], #4
LDR r8, [r5], #4
LDR r7, [r4], #4
LDR r9, [r5], #4
celt_pitch_xcorr_edsp_process1a_loop4:
SMLABB r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0)
SUBS r12, r12, #4 @ j-=4
SMLATT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1)
LDRGE r6, [r4], #4
SMLABB r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2)
LDRGE r8, [r5], #4
SMLATT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_3, y_3)
LDRGE r7, [r4], #4
LDRGE r9, [r5], #4
BGE celt_pitch_xcorr_edsp_process1a_loop4
celt_pitch_xcorr_edsp_process1a_loop_done:
ADDS r12, r12, #2
LDRGE r6, [r4], #4
LDRGE r8, [r5], #4
@ Stall
SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0)
SUBGE r12, r12, #2
SMLATTGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1)
ADDS r12, r12, #1
LDRHGE r6, [r4], #2
LDRHGE r8, [r5], #2
@ Stall
SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y)
@ maxcorr = max(maxcorr, sum)
CMP r0, r14
@ xcorr[i] = sum
STR r14, [r2], #4
MOVLT r0, r14
celt_pitch_xcorr_edsp_done:
LDMFD sp!, {r4-r11, pc}
.size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp @ ENDP
.endif
@ END:
.section .note.GNU-stack,"",%progbits

View File

@ -42,6 +42,7 @@ IF OPUS_ARM_MAY_HAVE_NEON
; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
xcorr_kernel_neon PROC
xcorr_kernel_neon_start
; input:
; r3 = int len
; r4 = opus_val16 *x
@ -181,7 +182,7 @@ celt_pitch_xcorr_neon_process4
VEOR q0, q0, q0
; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
; So we don't save/restore any other registers.
BL xcorr_kernel_neon
BL xcorr_kernel_neon_start
SUBS r6, r6, #4
VST1.32 {q0}, [r2]!
; _y += 4
@ -257,6 +258,7 @@ IF OPUS_ARM_MAY_HAVE_EDSP
; This will get used on ARMv7 devices without NEON, so it has been optimized
; to take advantage of dual-issuing where possible.
xcorr_kernel_edsp PROC
xcorr_kernel_edsp_start
; input:
; r3 = int len
; r4 = opus_val16 *_x (must be 32-bit aligned)
@ -309,7 +311,7 @@ xcorr_kernel_edsp_process4_done
SUBS r2, r2, #1 ; j--
; Stall
SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0)
LDRGTH r14, [r4], #2 ; r14 = *x++
LDRHGT r14, [r4], #2 ; r14 = *x++
SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1)
SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2)
SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3)
@ -319,7 +321,7 @@ xcorr_kernel_edsp_process4_done
SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2)
LDRH r10, [r5], #2 ; r10 = y_4 = *y++
SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3)
LDRGTH r12, [r4], #2 ; r12 = *x++
LDRHGT r12, [r4], #2 ; r12 = *x++
SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4)
BLE xcorr_kernel_edsp_done
SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2)
@ -327,7 +329,7 @@ xcorr_kernel_edsp_process4_done
SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3)
LDRH r2, [r5], #2 ; r2 = y_5 = *y++
SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4)
LDRGTH r14, [r4] ; r14 = *x
LDRHGT r14, [r4] ; r14 = *x
SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5)
BLE xcorr_kernel_edsp_done
SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3)
@ -387,11 +389,11 @@ celt_pitch_xcorr_edsp_process1u_loop4
celt_pitch_xcorr_edsp_process1u_loop4_done
ADDS r12, r12, #4
celt_pitch_xcorr_edsp_process1u_loop1
LDRGEH r6, [r4], #2
LDRHGE r6, [r4], #2
; Stall
SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y)
SUBGES r12, r12, #1
LDRGTH r8, [r5], #2
SUBSGE r12, r12, #1
LDRHGT r8, [r5], #2
BGT celt_pitch_xcorr_edsp_process1u_loop1
; Restore _x
SUB r4, r4, r3, LSL #1
@ -416,7 +418,7 @@ celt_pitch_xcorr_edsp_process4
MOV r7, #0
MOV r8, #0
MOV r9, #0
BL xcorr_kernel_edsp ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
BL xcorr_kernel_edsp_start ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
CMP r0, r6
; _y+=4
@ -474,7 +476,7 @@ celt_pitch_xcorr_edsp_process2_1
ADDS r12, r12, #1
; Stall
SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0)
LDRGTH r7, [r4], #2
LDRHGT r7, [r4], #2
SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1)
BLE celt_pitch_xcorr_edsp_process2_done
LDRH r9, [r5], #2
@ -527,8 +529,8 @@ celt_pitch_xcorr_edsp_process1a_loop_done
SUBGE r12, r12, #2
SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1)
ADDS r12, r12, #1
LDRGEH r6, [r4], #2
LDRGEH r8, [r5], #2
LDRHGE r6, [r4], #2
LDRHGE r8, [r5], #2
; Stall
SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y)
; maxcorr = max(maxcorr, sum)

View File

@ -0,0 +1,72 @@
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file fft_arm.h
@brief ARM Neon Intrinsic optimizations for fft using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if !defined(FFT_ARM_H)
#define FFT_ARM_H
#include "config.h"
#include "kiss_fft.h"
#if defined(HAVE_ARM_NE10)
int opus_fft_alloc_arm_neon(kiss_fft_state *st);
void opus_fft_free_arm_neon(kiss_fft_state *st);
void opus_fft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
void opus_ifft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OPUS_FFT (1)
#define opus_fft_alloc_arch(_st, arch) \
((void)(arch), opus_fft_alloc_arm_neon(_st))
#define opus_fft_free_arch(_st, arch) \
((void)(arch), opus_fft_free_arm_neon(_st))
#define opus_fft(_st, _fin, _fout, arch) \
((void)(arch), opus_fft_neon(_st, _fin, _fout))
#define opus_ifft(_st, _fin, _fout, arch) \
((void)(arch), opus_ifft_neon(_st, _fin, _fout))
#endif /* OPUS_HAVE_RTCD */
#endif /* HAVE_ARM_NE10 */
#endif

View File

@ -0,0 +1,35 @@
/* Copyright (C) 2015 Vidyo */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FIXED_ARM64_H
#define FIXED_ARM64_H
#include <arm_neon.h>
#undef SIG2WORD16
#define SIG2WORD16(x) (vqmovns_s32(PSHR32((x), SIG_SHIFT)))
#endif

View File

@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
#undef MAC16_32_Q15
#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
Result fits in 32 bits. */
#undef MAC16_32_Q16
#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b))
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
#undef MULT32_32_Q31

View File

@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
}
#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
Result fits in 32 bits. */
#undef MAC16_32_Q16
static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a,
opus_val32 b)
{
int res;
__asm__(
"#MAC16_32_Q16\n\t"
"smlawb %0, %1, %2, %3;\n"
: "=r"(res)
: "r"(b), "r"(a), "r"(c)
);
return res;
}
#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b))
/** 16x16 multiply-add where the result fits in 32 bits */
#undef MAC16_16
static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
}
#define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
#ifdef OPUS_ARM_INLINE_MEDIA
#undef SIG2WORD16
static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x)
{
celt_sig res;
__asm__(
"#SIG2WORD16\n\t"
"ssat %0, #16, %1, ASR #12\n\t"
: "=r"(res)
: "r"(x+2048)
);
return EXTRACT16(res);
}
#define SIG2WORD16(x) (SIG2WORD16_armv6(x))
#endif /* OPUS_ARM_INLINE_MEDIA */
#endif

View File

@ -0,0 +1,60 @@
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file arm_mdct.h
@brief ARM Neon Intrinsic optimizations for mdct using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if !defined(MDCT_ARM_H)
#define MDCT_ARM_H
#include "config.h"
#include "mdct.h"
#if defined(HAVE_ARM_NE10)
/** Compute a forward MDCT and scale by 4/N, trashes the input array */
void clt_mdct_forward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap,
int shift, int stride, int arch);
void clt_mdct_backward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap,
int shift, int stride, int arch);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OPUS_MDCT (1)
#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
clt_mdct_forward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
#define clt_mdct_backward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
clt_mdct_backward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
#endif /* OPUS_HAVE_RTCD */
#endif /* HAVE_ARM_NE10 */
#endif

View File

@ -46,12 +46,81 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
# endif
# if !defined(OPUS_HAVE_RTCD)
# if defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
(defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
extern opus_val32
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# elif defined(OPUS_ARM_PRESUME_EDSP) || \
defined(OPUS_ARM_PRESUME_MEDIA) || \
defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
# endif
# endif
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void xcorr_kernel_neon_fixed(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
# endif
# if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
# define OVERRIDE_XCORR_KERNEL (1)
# define xcorr_kernel(x, y, sum, len, arch) \
((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_XCORR_KERNEL (1)
# define xcorr_kernel(x, y, sum, len, arch) \
((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len))
# endif
#else /* Start !FIXED_POINT */
/* Float case */
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
#endif
# if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
# endif
#endif /* end !FIXED_POINT */
#endif

View File

@ -92,11 +92,11 @@ static int bitexact_log2tan(int isin,int icos)
#ifdef FIXED_POINT
/* Compute the amplitude (sqrt energy) in each of the bands */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
{
int i, c, N;
const opus_int16 *eBands = m->eBands;
N = M*m->shortMdctSize;
N = m->shortMdctSize<<LM;
c=0; do {
for (i=0;i<end;i++)
{
@ -104,18 +104,23 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
opus_val32 maxval=0;
opus_val32 sum = 0;
j=M*eBands[i]; do {
maxval = MAX32(maxval, X[j+c*N]);
maxval = MAX32(maxval, -X[j+c*N]);
} while (++j<M*eBands[i+1]);
maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
if (maxval > 0)
{
int shift = celt_ilog2(maxval)-10;
j=M*eBands[i]; do {
sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
EXTRACT16(VSHR32(X[j+c*N],shift)));
} while (++j<M*eBands[i+1]);
int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
j=eBands[i]<<LM;
if (shift>0)
{
do {
sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
EXTRACT16(SHR32(X[j+c*N],shift)));
} while (++j<eBands[i+1]<<LM);
} else {
do {
sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
EXTRACT16(SHL32(X[j+c*N],-shift)));
} while (++j<eBands[i+1]<<LM);
}
/* We're adding one here to ensure the normalized band isn't larger than unity norm */
bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
} else {
@ -150,18 +155,16 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
#else /* FIXED_POINT */
/* Compute the amplitude (sqrt energy) in each of the bands */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
{
int i, c, N;
const opus_int16 *eBands = m->eBands;
N = M*m->shortMdctSize;
N = m->shortMdctSize<<LM;
c=0; do {
for (i=0;i<end;i++)
{
int j;
opus_val32 sum = 1e-27f;
for (j=M*eBands[i];j<M*eBands[i+1];j++)
sum += X[j+c*N]*X[j+c*N];
opus_val32 sum;
sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
bandE[i+c*m->nbEBands] = celt_sqrt(sum);
/*printf ("%f ", bandE[i+c*m->nbEBands]);*/
}
@ -190,74 +193,80 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
/* De-normalise the energy to produce the synthesis from the unit-energy bands */
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M)
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
int end, int M, int downsample, int silence)
{
int i, c, N;
int i, N;
int bound;
celt_sig * OPUS_RESTRICT f;
const celt_norm * OPUS_RESTRICT x;
const opus_int16 *eBands = m->eBands;
N = M*m->shortMdctSize;
celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
c=0; do {
celt_sig * OPUS_RESTRICT f;
const celt_norm * OPUS_RESTRICT x;
f = freq+c*N;
x = X+c*N+M*eBands[start];
for (i=0;i<M*eBands[start];i++)
*f++ = 0;
for (i=start;i<end;i++)
{
int j, band_end;
opus_val16 g;
opus_val16 lg;
bound = M*eBands[end];
if (downsample!=1)
bound = IMIN(bound, N/downsample);
if (silence)
{
bound = 0;
start = end = 0;
}
f = freq;
x = X+M*eBands[start];
for (i=0;i<M*eBands[start];i++)
*f++ = 0;
for (i=start;i<end;i++)
{
int j, band_end;
opus_val16 g;
opus_val16 lg;
#ifdef FIXED_POINT
int shift;
int shift;
#endif
j=M*eBands[i];
band_end = M*eBands[i+1];
lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
j=M*eBands[i];
band_end = M*eBands[i+1];
lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6));
#ifndef FIXED_POINT
g = celt_exp2(lg);
g = celt_exp2(lg);
#else
/* Handle the integer part of the log energy */
shift = 16-(lg>>DB_SHIFT);
if (shift>31)
{
shift=0;
g=0;
} else {
/* Handle the fractional part. */
g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
}
/* Handle extreme gains with negative shift. */
if (shift<0)
{
/* For shift < -2 we'd be likely to overflow, so we're capping
/* Handle the integer part of the log energy */
shift = 16-(lg>>DB_SHIFT);
if (shift>31)
{
shift=0;
g=0;
} else {
/* Handle the fractional part. */
g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
}
/* Handle extreme gains with negative shift. */
if (shift<0)
{
/* For shift < -2 we'd be likely to overflow, so we're capping
the gain here. This shouldn't happen unless the bitstream is
already corrupted. */
if (shift < -2)
{
g = 32767;
shift = -2;
}
do {
*f++ = SHL32(MULT16_16(*x++, g), -shift);
} while (++j<band_end);
} else
if (shift < -2)
{
g = 32767;
shift = -2;
}
do {
*f++ = SHL32(MULT16_16(*x++, g), -shift);
} while (++j<band_end);
} else
#endif
/* Be careful of the fixed-point "else" just above when changing this code */
do {
*f++ = SHR32(MULT16_16(*x++, g), shift);
} while (++j<band_end);
}
celt_assert(start <= end);
for (i=M*eBands[end];i<N;i++)
*f++ = 0;
} while (++c<C);
}
celt_assert(start <= end);
OPUS_CLEAR(&freq[bound], N-bound);
}
/* This prevents energy collapse for transients with multiple short MDCTs */
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
opus_val16 *prev2logE, int *pulses, opus_uint32 seed)
int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
{
int c, i, j, k;
for (i=start;i<end;i++)
@ -272,7 +281,8 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
N0 = m->eBands[i+1]-m->eBands[i];
/* depth in 1/8 bits */
depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM);
celt_assert(pulses[i]>=0);
depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
#ifdef FIXED_POINT
thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
@ -345,12 +355,12 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
}
/* We just added some energy, so we need to renormalise */
if (renormalize)
renormalise_vector(X, N0<<LM, Q15ONE);
renormalise_vector(X, N0<<LM, Q15ONE, arch);
} while (++c<C);
}
}
static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N)
static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
{
int i = bandID;
int j;
@ -370,25 +380,25 @@ static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, cons
celt_norm r, l;
l = X[j];
r = Y[j];
X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
/* Side is not encoded, no need to calculate */
}
}
static void stereo_split(celt_norm *X, celt_norm *Y, int N)
static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
{
int j;
for (j=0;j<N;j++)
{
celt_norm r, l;
l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]);
r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]);
X[j] = l+r;
Y[j] = r-l;
opus_val32 r, l;
l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
}
}
static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
{
int j;
opus_val32 xp=0, side=0;
@ -400,17 +410,16 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
opus_val32 t, lgain, rgain;
/* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
dual_inner_prod(Y, X, Y, N, &xp, &side);
dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
/* Compensating for the mid normalization */
xp = MULT16_32_Q15(mid, xp);
/* mid and side are in Q15, not Q14 like X and Y */
mid2 = SHR32(mid, 1);
mid2 = SHR16(mid, 1);
El = MULT16_16(mid2, mid2) + side - 2*xp;
Er = MULT16_16(mid2, mid2) + side + 2*xp;
if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
{
for (j=0;j<N;j++)
Y[j] = X[j];
OPUS_COPY(Y, X, N);
return;
}
@ -434,7 +443,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
{
celt_norm r, l;
/* Apply mid scaling (side is already scaled) */
l = MULT16_16_Q15(mid, X[j]);
l = MULT16_16_P15(mid, X[j]);
r = Y[j];
X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
@ -442,7 +451,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
}
/* Decide whether we should spread the pulses in the current frame */
int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
int last_decision, int *hf_average, int *tapset_decision, int update_hf,
int end, int C, int M)
{
@ -463,7 +472,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
{
int j, N, tmp=0;
int tcount[3] = {0,0,0};
celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
N = M*(eBands[i+1]-eBands[i]);
if (N<=8)
continue;
@ -483,7 +492,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
/* Only include four last bands (8 kHz and up) */
if (i>m->nbEBands-4)
hf_sum += 32*(tcount[1]+tcount[0])/N;
hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
sum += tmp*256;
nbBands++;
@ -493,7 +502,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
if (update_hf)
{
if (hf_sum)
hf_sum /= C*(4-m->nbEBands+end);
hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
*hf_average = (*hf_average+hf_sum)>>1;
hf_sum = *hf_average;
if (*tapset_decision==2)
@ -509,7 +518,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
}
/*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
celt_assert(nbBands>0); /* end has to be non-zero */
sum /= nbBands;
celt_assert(sum>=0);
sum = celt_udiv(sum, nbBands);
/* Recursive averaging */
sum = (sum+*average)>>1;
*average = sum;
@ -567,8 +577,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard
for (j=0;j<N0;j++)
tmp[i*N0+j] = X[j*stride+i];
}
for (j=0;j<N;j++)
X[j] = tmp[j];
OPUS_COPY(X, tmp, N);
RESTORE_STACK;
}
@ -591,8 +600,7 @@ static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
for (j=0;j<N0;j++)
tmp[j*stride+i] = X[i*N0+j];
}
for (j=0;j<N;j++)
X[j] = tmp[j];
OPUS_COPY(X, tmp, N);
RESTORE_STACK;
}
@ -603,11 +611,11 @@ void haar1(celt_norm *X, int N0, int stride)
for (i=0;i<stride;i++)
for (j=0;j<N0;j++)
{
celt_norm tmp1, tmp2;
tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]);
tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
X[stride*2*j+i] = tmp1 + tmp2;
X[stride*(2*j+1)+i] = tmp1 - tmp2;
opus_val32 tmp1, tmp2;
tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
}
}
@ -622,7 +630,8 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
/* The upper limit ensures that in a stereo split with itheta==16384, we'll
always have enough bits left over to code at least one pulse in the
side; otherwise it would collapse, since it doesn't get folded. */
qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2);
qb = celt_sudiv(b+N2*offset, N2);
qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
qb = IMIN(8<<BITRES, qb);
@ -647,6 +656,7 @@ struct band_ctx {
opus_int32 remaining_bits;
const celt_ener *bandE;
opus_uint32 seed;
int arch;
};
struct split_ctx {
@ -698,13 +708,13 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
side and mid. With just that parameter, we can re-scale both
mid and side because we know that 1) they have unit norm and
2) they are orthogonal. */
itheta = stereo_itheta(X, Y, stereo, N);
itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
}
tell = ec_tell_frac(ec);
if (qn!=1)
{
if (encode)
itheta = (itheta*qn+8192)>>14;
itheta = (itheta*(opus_int32)qn+8192)>>14;
/* Entropy coding of the angle. We use a uniform pdf for the
time split, a step for stereo, and a triangular one for the rest. */
@ -769,7 +779,8 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
ec_dec_update(ec, fl, fl+fs, ft);
}
}
itheta = (opus_int32)itheta*16384/qn;
celt_assert(itheta>=0);
itheta = celt_udiv((opus_int32)itheta*16384, qn);
if (encode && stereo)
{
if (itheta==0)
@ -1021,8 +1032,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
fill &= cm_mask;
if (!fill)
{
for (j=0;j<N;j++)
X[j] = 0;
OPUS_CLEAR(X, N);
} else {
if (lowband == NULL)
{
@ -1046,7 +1056,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
}
cm = fill;
}
renormalise_vector(X, N, gain);
renormalise_vector(X, N, gain, ctx->arch);
}
}
}
@ -1084,7 +1094,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
longBlocks = B0==1;
N_B /= B;
N_B = celt_udiv(N_B, B);
/* Special case for one sample */
if (N==1)
@ -1098,9 +1108,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
{
int j;
for (j=0;j<N;j++)
lowband_scratch[j] = lowband[j];
OPUS_COPY(lowband_scratch, lowband, N);
lowband = lowband_scratch;
}
@ -1340,7 +1348,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
if (resynth)
{
if (N!=2)
stereo_merge(X, Y, mid, N);
stereo_merge(X, Y, mid, N, ctx->arch);
if (inv)
{
int j;
@ -1353,9 +1361,11 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
void quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed)
celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
opus_uint32 *seed, int arch)
{
int i;
opus_int32 remaining_bits;
@ -1397,6 +1407,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
ctx.m = m;
ctx.seed = *seed;
ctx.spread = spread;
ctx.arch = arch;
for (i=start;i<end;i++)
{
opus_int32 tell;
@ -1428,7 +1439,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
ctx.remaining_bits = remaining_bits;
if (i <= codedBands-1)
{
curr_balance = balance / IMIN(3, codedBands-i);
curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
} else {
b = 0;

View File

@ -41,7 +41,7 @@
* @param X Spectrum
* @param bandE Square root of the energy for each band (returned)
*/
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M);
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM);
/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
@ -59,14 +59,15 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
* @param bandE Square root of the energy for each band
*/
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M);
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start,
int end, int M, int downsample, int silence);
#define SPREAD_NONE (0)
#define SPREAD_LIGHT (1)
#define SPREAD_NORMAL (2)
#define SPREAD_AGGRESSIVE (3)
int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
int last_decision, int *hf_average, int *tapset_decision, int update_hf,
int end, int C, int M);
@ -97,15 +98,20 @@ void haar1(celt_norm *X, int N0, int stride);
* @param LM log2() of the number of 2.5 subframes in the frame
* @param codedBands Last band to receive bits + 1
* @param seed Random generator seed
* @param arch Run-time architecture (see opus_select_arch())
*/
void quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed);
celt_norm * X, celt_norm * Y, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed,
int arch);
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
opus_val16 *prev2logE, int *pulses, opus_uint32 seed);
void anti_collapse(const CELTMode *m, celt_norm *X_,
unsigned char *collapse_masks, int LM, int C, int size, int start,
int end, const opus_val16 *logE, const opus_val16 *prev1logE,
const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed,
int arch);
opus_uint32 celt_lcg_rand(opus_uint32 seed);

View File

@ -54,6 +54,10 @@
#define PACKAGE_VERSION "unknown"
#endif
#if defined(MIPSr1_ASM)
#include "mips/celt_mipsr1.h"
#endif
int resampling_factor(opus_int32 rate)
{
@ -85,8 +89,71 @@ int resampling_factor(opus_int32 rate)
return ret;
}
#ifndef OVERRIDE_COMB_FILTER_CONST
static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
#if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C)
/* This version should be faster on ARM */
#ifdef OPUS_ARM_ASM
#ifndef NON_STATIC_COMB_FILTER_CONST_C
static
#endif
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
opus_val32 x0, x1, x2, x3, x4;
int i;
x4 = SHL32(x[-T-2], 1);
x3 = SHL32(x[-T-1], 1);
x2 = SHL32(x[-T], 1);
x1 = SHL32(x[-T+1], 1);
for (i=0;i<N-4;i+=5)
{
opus_val32 t;
x0=SHL32(x[i-T+2],1);
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
y[i] = t;
x4=SHL32(x[i-T+3],1);
t = MAC16_32_Q16(x[i+1], g10, x1);
t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
y[i+1] = t;
x3=SHL32(x[i-T+4],1);
t = MAC16_32_Q16(x[i+2], g10, x0);
t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
y[i+2] = t;
x2=SHL32(x[i-T+5],1);
t = MAC16_32_Q16(x[i+3], g10, x4);
t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
y[i+3] = t;
x1=SHL32(x[i-T+6],1);
t = MAC16_32_Q16(x[i+4], g10, x3);
t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
y[i+4] = t;
}
#ifdef CUSTOM_MODES
for (;i<N;i++)
{
opus_val32 t;
x0=SHL32(x[i-T+2],1);
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
y[i] = t;
x4=x3;
x3=x2;
x2=x1;
x1=x0;
}
#endif
}
#else
#ifndef NON_STATIC_COMB_FILTER_CONST_C
static
#endif
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
opus_val32 x0, x1, x2, x3, x4;
@ -110,10 +177,12 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
}
#endif
#endif
#ifndef OVERRIDE_comb_filter
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap)
const opus_val16 *window, int overlap, int arch)
{
int i;
/* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
@ -131,16 +200,19 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
OPUS_MOVE(y, x, N);
return;
}
g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
g00 = MULT16_16_P15(g0, gains[tapset0][0]);
g01 = MULT16_16_P15(g0, gains[tapset0][1]);
g02 = MULT16_16_P15(g0, gains[tapset0][2]);
g10 = MULT16_16_P15(g1, gains[tapset1][0]);
g11 = MULT16_16_P15(g1, gains[tapset1][1]);
g12 = MULT16_16_P15(g1, gains[tapset1][2]);
x1 = x[-T1+1];
x2 = x[-T1 ];
x3 = x[-T1-1];
x4 = x[-T1-2];
/* If the filter didn't change, we don't need the overlap */
if (g0==g1 && T0==T1 && tapset0==tapset1)
overlap=0;
for (i=0;i<overlap;i++)
{
opus_val16 f;
@ -168,8 +240,9 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
}
/* Compute the part with the constant filter. */
comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12, arch);
}
#endif /* OVERRIDE_comb_filter */
const signed char tf_select_table[4][8] = {
{0, -1, 0, -1, 0,-1, 0,-1},
@ -213,6 +286,9 @@ const char *opus_strerror(int error)
const char *opus_get_version_string(void)
{
return "libopus " PACKAGE_VERSION
/* Applications may rely on the presence of this substring in the version
string to determine if they have a fixed-point or floating-point build
at runtime. */
#ifdef FIXED_POINT
"-fixed"
#endif

View File

@ -134,7 +134,8 @@ int celt_decoder_get_size(int channels);
int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec);
int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
#define celt_encoder_ctl opus_custom_encoder_ctl
#define celt_decoder_ctl opus_custom_decoder_ctl
@ -200,15 +201,25 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap);
const opus_val16 *window, int overlap, int arch);
#ifdef NON_STATIC_COMB_FILTER_CONST_C
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12);
#endif
#ifndef OVERRIDE_COMB_FILTER_CONST
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
#endif
void init_caps(const CELTMode *m,int *cap,int LM,int C);
#ifdef RESYNTH
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
celt_sig * OPUS_RESTRICT out_mem[], int C, int LM);
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
int LM, int downsample, int silence);
#endif
#ifdef __cplusplus

View File

@ -51,6 +51,9 @@
#include "celt_lpc.h"
#include "vq.h"
#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
#define NORM_ALIASING_HACK
#endif
/**********************************************************************/
/* */
/* DECODER */
@ -79,6 +82,7 @@ struct OpusCustomDecoder {
int error;
int last_pitch_index;
int loss_count;
int skip_plc;
int postfilter_period;
int postfilter_period_old;
opus_val16 postfilter_gain;
@ -161,8 +165,6 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
st->signalling = 1;
st->arch = opus_select_arch();
st->loss_count = 0;
opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
return OPUS_OK;
@ -175,28 +177,24 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
}
#endif /* CUSTOM_MODES */
static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
{
#ifdef FIXED_POINT
x = PSHR32(x, SIG_SHIFT);
x = MAX32(x, -32768);
x = MIN32(x, 32767);
return EXTRACT16(x);
#else
return (opus_val16)x;
#endif
}
#ifndef RESYNTH
static
#endif
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch)
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
celt_sig *mem, int accum)
{
int c;
int Nd;
int apply_downsampling=0;
opus_val16 coef0;
VARDECL(celt_sig, scratch);
SAVE_STACK;
#ifndef FIXED_POINT
(void)accum;
celt_assert(accum==0);
#endif
ALLOC(scratch, N, celt_sig);
coef0 = coef[0];
Nd = N/downsample;
c=0; do {
@ -234,11 +232,24 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
apply_downsampling=1;
} else {
/* Shortcut for the standard (non-custom modes) case */
for (j=0;j<N;j++)
#ifdef FIXED_POINT
if (accum)
{
celt_sig tmp = x[j] + m + VERY_SMALL;
m = MULT16_32_Q15(coef0, tmp);
y[j*C] = SCALEOUT(SIG2WORD16(tmp));
for (j=0;j<N;j++)
{
celt_sig tmp = x[j] + m + VERY_SMALL;
m = MULT16_32_Q15(coef0, tmp);
y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));
}
} else
#endif
{
for (j=0;j<N;j++)
{
celt_sig tmp = x[j] + m + VERY_SMALL;
m = MULT16_32_Q15(coef0, tmp);
y[j*C] = SCALEOUT(SIG2WORD16(tmp));
}
}
}
mem[c] = m;
@ -246,41 +257,95 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
if (apply_downsampling)
{
/* Perform down-sampling */
for (j=0;j<Nd;j++)
y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
#ifdef FIXED_POINT
if (accum)
{
for (j=0;j<Nd;j++)
y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));
} else
#endif
{
for (j=0;j<Nd;j++)
y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
}
}
} while (++c<C);
RESTORE_STACK;
}
/** Compute the IMDCT and apply window for all sub-frames and
all channels in a frame */
#ifndef RESYNTH
static
#endif
void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
celt_sig * OPUS_RESTRICT out_mem[], int C, int LM)
void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
opus_val16 *oldBandE, int start, int effEnd, int C, int CC,
int isTransient, int LM, int downsample,
int silence, int arch)
{
int b, c;
int c, i;
int M;
int b;
int B;
int N;
int N, NB;
int shift;
const int overlap = OVERLAP(mode);
int nbEBands;
int overlap;
VARDECL(celt_sig, freq);
SAVE_STACK;
if (shortBlocks)
overlap = mode->overlap;
nbEBands = mode->nbEBands;
N = mode->shortMdctSize<<LM;
ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */
M = 1<<LM;
if (isTransient)
{
B = shortBlocks;
N = mode->shortMdctSize;
B = M;
NB = mode->shortMdctSize;
shift = mode->maxLM;
} else {
B = 1;
N = mode->shortMdctSize<<LM;
NB = mode->shortMdctSize<<LM;
shift = mode->maxLM-LM;
}
c=0; do {
/* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
if (CC==2&&C==1)
{
/* Copying a mono streams to two channels */
celt_sig *freq2;
denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
downsample, silence);
/* Store a temporary copy in the output buffer because the IMDCT destroys its input. */
freq2 = out_syn[1]+overlap/2;
OPUS_COPY(freq2, freq, N);
for (b=0;b<B;b++)
clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B);
} while (++c<C);
clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
for (b=0;b<B;b++)
clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B, arch);
} else if (CC==1&&C==2)
{
/* Downmixing a stereo stream to mono */
celt_sig *freq2;
freq2 = out_syn[0]+overlap/2;
denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
downsample, silence);
/* Use the output buffer as temp array before downmixing. */
denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
downsample, silence);
for (i=0;i<N;i++)
freq[i] = HALF32(ADD32(freq[i],freq2[i]));
for (b=0;b<B;b++)
clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
} else {
/* Normal case (mono or stereo) */
c=0; do {
denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M,
downsample, silence);
for (b=0;b<B;b++)
clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch);
} while (++c<CC);
}
RESTORE_STACK;
}
static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
@ -330,7 +395,23 @@ static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM,
pitch of 480 Hz. */
#define PLC_PITCH_LAG_MIN (100)
static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
{
int pitch_index;
VARDECL( opus_val16, lp_pitch_buf );
SAVE_STACK;
ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
pitch_downsample(decode_mem, lp_pitch_buf,
DECODE_BUFFER_SIZE, C, arch);
pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch);
pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
RESTORE_STACK;
return pitch_index;
}
static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
{
int c;
int i;
@ -343,11 +424,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
int nbEBands;
int overlap;
int start;
int downsample;
int loss_count;
int noise_based;
const opus_int16 *eBands;
VARDECL(celt_sig, scratch);
SAVE_STACK;
mode = st->mode;
@ -367,40 +446,37 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
loss_count = st->loss_count;
start = st->start;
downsample = st->downsample;
noise_based = loss_count >= 5 || start != 0;
ALLOC(scratch, noise_based?N*C:N, celt_sig);
noise_based = loss_count >= 5 || start != 0 || st->skip_plc;
if (noise_based)
{
/* Noise-based PLC/CNG */
celt_sig *freq;
#ifdef NORM_ALIASING_HACK
celt_norm *X;
#else
VARDECL(celt_norm, X);
#endif
opus_uint32 seed;
opus_val16 *plcLogE;
int end;
int effEnd;
opus_val16 decay;
end = st->end;
effEnd = IMAX(start, IMIN(end, mode->effEBands));
/* Share the interleaved signal MDCT coefficient buffer with the
deemphasis scratch buffer. */
freq = scratch;
#ifdef NORM_ALIASING_HACK
/* This is an ugly hack that breaks aliasing rules and would be easily broken,
but it saves almost 4kB of stack. */
X = (celt_norm*)(out_syn[C-1]+overlap/2);
#else
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
#endif
if (loss_count >= 5)
plcLogE = backgroundLogE;
else {
/* Energy decay */
opus_val16 decay = loss_count==0 ?
QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
c=0; do
{
for (i=start;i<end;i++)
oldBandE[c*nbEBands+i] -= decay;
} while (++c<C);
plcLogE = oldBandE;
}
/* Energy decay */
decay = loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
c=0; do
{
for (i=start;i<end;i++)
oldBandE[c*nbEBands+i] = MAX16(backgroundLogE[c*nbEBands+i], oldBandE[c*nbEBands+i] - decay);
} while (++c<C);
seed = st->rng;
for (c=0;c<C;c++)
{
@ -416,25 +492,17 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
seed = celt_lcg_rand(seed);
X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
}
renormalise_vector(X+boffs, blen, Q15ONE);
renormalise_vector(X+boffs, blen, Q15ONE, st->arch);
}
}
st->rng = seed;
denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM);
c=0; do {
int bound = eBands[effEnd]<<LM;
if (downsample!=1)
bound = IMIN(bound, N/downsample);
for (i=bound;i<N;i++)
freq[c*N+i] = 0;
} while (++c<C);
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
DECODE_BUFFER_SIZE-N+(overlap>>1));
} while (++c<C);
compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
} else {
/* Pitch-based PLC */
const opus_val16 *window;
@ -445,15 +513,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
if (loss_count == 0)
{
VARDECL( opus_val16, lp_pitch_buf );
ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
pitch_downsample(decode_mem, lp_pitch_buf,
DECODE_BUFFER_SIZE, C, st->arch);
pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch);
pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
st->last_pitch_index = pitch_index;
st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
} else {
pitch_index = st->last_pitch_index;
fade = QCONST16(.8f,15);
@ -516,7 +576,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
}
/* Compute the excitation for exc_length samples before the loss. */
celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch);
}
/* Check if the waveform is decaying, and if so how fast.
@ -583,7 +643,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
the signal domain. */
celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
lpc_mem);
lpc_mem, st->arch);
}
/* Check if the synthesis energy is higher than expected, which can
@ -631,7 +691,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
st->postfilter_period, st->postfilter_period, overlap,
-st->postfilter_gain, -st->postfilter_gain,
st->postfilter_tapset, st->postfilter_tapset, NULL, 0);
st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch);
/* Simulate TDAC on the concealed audio so that it blends with the
MDCT of the next frame. */
@ -644,22 +704,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
} while (++c<C);
}
deemphasis(out_syn, pcm, N, C, downsample,
mode->preemph, st->preemph_memD, scratch);
st->loss_count = loss_count+1;
RESTORE_STACK;
}
int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
{
int c, i, N;
int spread_decision;
opus_int32 bits;
ec_dec _dec;
VARDECL(celt_sig, freq);
#ifdef NORM_ALIASING_HACK
celt_norm *X;
#else
VARDECL(celt_norm, X);
#endif
VARDECL(int, fine_quant);
VARDECL(int, pulses);
VARDECL(int, cap);
@ -677,6 +738,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
int intra_ener;
const int CC = st->channels;
int LM, M;
int start;
int end;
int effEnd;
int codedBands;
int alloc_trim;
@ -703,11 +766,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
nbEBands = mode->nbEBands;
overlap = mode->overlap;
eBands = mode->eBands;
start = st->start;
end = st->end;
frame_size *= st->downsample;
c=0; do {
decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
} while (++c<CC);
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
oldBandE = lpc+CC*LPC_ORDER;
oldLogE = oldBandE + 2*nbEBands;
@ -725,7 +787,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
if (data0<0)
return OPUS_INVALID_PACKET;
}
st->end = IMAX(1, mode->effEBands-2*(data0>>5));
st->end = end = IMAX(1, mode->effEBands-2*(data0>>5));
LM = (data0>>3)&0x3;
C = 1 + ((data0>>2)&0x1);
data++;
@ -752,18 +814,27 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
return OPUS_BAD_ARG;
N = M*mode->shortMdctSize;
c=0; do {
decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
} while (++c<CC);
effEnd = st->end;
effEnd = end;
if (effEnd > mode->effEBands)
effEnd = mode->effEBands;
if (data == NULL || len<=1)
{
celt_decode_lost(st, pcm, N, LM);
celt_decode_lost(st, N, LM);
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
RESTORE_STACK;
return frame_size/st->downsample;
}
/* Check if there are at least two packets received consecutively before
* turning on the pitch-based PLC */
st->skip_plc = st->loss_count != 0;
if (dec == NULL)
{
ec_dec_init(&_dec,(unsigned char*)data,len);
@ -795,7 +866,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
postfilter_gain = 0;
postfilter_pitch = 0;
postfilter_tapset = 0;
if (st->start==0 && tell+16 <= total_bits)
if (start==0 && tell+16 <= total_bits)
{
if(ec_dec_bit_logp(dec, 1))
{
@ -826,11 +897,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
/* Decode the global flags (first symbols in the stream) */
intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
/* Get band energies */
unquant_coarse_energy(mode, st->start, st->end, oldBandE,
unquant_coarse_energy(mode, start, end, oldBandE,
intra_ener, dec, C, LM);
ALLOC(tf_res, nbEBands, int);
tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
tf_decode(start, end, isTransient, tf_res, LM, dec);
tell = ec_tell(dec);
spread_decision = SPREAD_NORMAL;
@ -846,7 +917,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
dynalloc_logp = 6;
total_bits<<=BITRES;
tell = ec_tell_frac(dec);
for (i=st->start;i<st->end;i++)
for (i=start;i<end;i++)
{
int width, quanta;
int dynalloc_loop_logp;
@ -885,84 +956,62 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
ALLOC(pulses, nbEBands, int);
ALLOC(fine_priority, nbEBands, int);
codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
codedBands = compute_allocation(mode, start, end, offsets, cap,
alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C);
unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
} while (++c<CC);
/* Decode fixed codebook */
ALLOC(collapse_masks, C*nbEBands, unsigned char);
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
#ifdef NORM_ALIASING_HACK
/* This is an ugly hack that breaks aliasing rules and would be easily broken,
but it saves almost 4kB of stack. */
X = (celt_norm*)(out_syn[CC-1]+overlap/2);
#else
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
#endif
quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, st->arch);
if (anti_collapse_rsv > 0)
{
anti_collapse_on = ec_dec_bits(dec, 1);
}
unquant_energy_finalise(mode, st->start, st->end, oldBandE,
unquant_energy_finalise(mode, start, end, oldBandE,
fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
if (anti_collapse_on)
anti_collapse(mode, X, collapse_masks, LM, C, N,
st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch);
if (silence)
{
for (i=0;i<C*nbEBands;i++)
oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
for (i=0;i<C*N;i++)
freq[i] = 0;
} else {
/* Synthesis */
denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
}
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
} while (++c<CC);
c=0; do {
int bound = M*eBands[effEnd];
if (st->downsample!=1)
bound = IMIN(bound, N/st->downsample);
for (i=bound;i<N;i++)
freq[c*N+i] = 0;
} while (++c<C);
c=0; do {
out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
} while (++c<CC);
if (CC==2&&C==1)
{
for (i=0;i<N;i++)
freq[N+i] = freq[i];
}
if (CC==1&&C==2)
{
for (i=0;i<N;i++)
freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
}
/* Compute inverse MDCTs */
compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd,
C, CC, isTransient, LM, st->downsample, silence, st->arch);
c=0; do {
st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize,
st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
mode->window, overlap);
mode->window, overlap, st->arch);
if (LM!=0)
comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize,
st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
mode->window, overlap);
mode->window, overlap, st->arch);
} while (++c<CC);
st->postfilter_period_old = st->postfilter_period;
@ -978,32 +1027,36 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
st->postfilter_tapset_old = st->postfilter_tapset;
}
if (C==1) {
for (i=0;i<nbEBands;i++)
oldBandE[nbEBands+i]=oldBandE[i];
}
if (C==1)
OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
/* In case start or end were to change */
if (!isTransient)
{
opus_val16 max_background_increase;
OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands);
OPUS_COPY(oldLogE, oldBandE, 2*nbEBands);
/* In normal circumstances, we only allow the noise floor to increase by
up to 2.4 dB/second, but when we're in DTX, we allow up to 6 dB
increase for each update.*/
if (st->loss_count < 10)
max_background_increase = M*QCONST16(0.001f,DB_SHIFT);
else
max_background_increase = QCONST16(1.f,DB_SHIFT);
for (i=0;i<2*nbEBands;i++)
oldLogE2[i] = oldLogE[i];
for (i=0;i<2*nbEBands;i++)
oldLogE[i] = oldBandE[i];
for (i=0;i<2*nbEBands;i++)
backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]);
} else {
for (i=0;i<2*nbEBands;i++)
oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
}
c=0; do
{
for (i=0;i<st->start;i++)
for (i=0;i<start;i++)
{
oldBandE[c*nbEBands+i]=0;
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
}
for (i=st->end;i<nbEBands;i++)
for (i=end;i<nbEBands;i++)
{
oldBandE[c*nbEBands+i]=0;
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
@ -1011,8 +1064,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
} while (++c<2);
st->rng = dec->rng;
/* We reuse freq[] as scratch space for the de-emphasis */
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
st->loss_count = 0;
RESTORE_STACK;
if (ec_tell(dec) > 8*len)
@ -1028,7 +1080,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
#ifdef FIXED_POINT
int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
{
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
}
#ifndef DISABLE_FLOAT_API
@ -1045,7 +1097,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char
N = frame_size;
ALLOC(out, C*N, opus_int16);
ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
if (ret>0)
for (j=0;j<C*ret;j++)
pcm[j]=out[j]*(1.f/32768.f);
@ -1059,7 +1111,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char
int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
{
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
}
int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
@ -1075,7 +1127,7 @@ int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data
N = frame_size;
ALLOC(out, C*N, celt_sig);
ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
if (ret>0)
for (j=0;j<C*ret;j++)
@ -1149,6 +1201,7 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
((char*)&st->DECODER_RESET_START - (char*)st));
for (i=0;i<2*st->mode->nbEBands;i++)
oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
st->skip_plc = 1;
}
break;
case OPUS_GET_PITCH_REQUEST:

View File

@ -57,7 +57,6 @@
*/
struct OpusCustomEncoder {
const OpusCustomMode *mode; /**< Mode used by the encoder */
int overlap;
int channels;
int stream_channels;
@ -173,7 +172,6 @@ static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
st->mode = mode;
st->overlap = mode->overlap;
st->stream_channels = st->channels = channels;
st->upsample = 1;
@ -276,8 +274,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
}
/*printf("\n");*/
/* First few samples are bad because we don't propagate the memory */
for (i=0;i<12;i++)
tmp[i] = 0;
OPUS_CLEAR(tmp, 12);
#ifdef FIXED_POINT
/* Normalize tmp to max range */
@ -346,9 +343,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
{
int id;
#ifdef FIXED_POINT
id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */
id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */
#else
id = IMAX(0,IMIN(127,(int)floor(64*norm*tmp[i]))); /* Do not round to nearest */
id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */
#endif
unmask += inv_table[id];
}
@ -366,7 +363,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
/* Arbitrary metric for VBR boost */
tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42);
/* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
*tf_estimate = celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28)));
*tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28)));
/*printf("%d %f\n", tf_max, mask_metric);*/
RESTORE_STACK;
#ifdef FUZZING
@ -378,8 +375,8 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
/* Looks for sudden increases of energy to decide whether we need to patch
the transient decision */
int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
int end, int C)
static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
int start, int end, int C)
{
int i, c;
opus_val32 mean_diff=0;
@ -388,28 +385,28 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
avoid false detection caused by irrelevant bands */
if (C==1)
{
spread_old[0] = oldE[0];
for (i=1;i<end;i++)
spread_old[start] = oldE[start];
for (i=start+1;i<end;i++)
spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]);
} else {
spread_old[0] = MAX16(oldE[0],oldE[nbEBands]);
for (i=1;i<end;i++)
spread_old[start] = MAX16(oldE[start],oldE[start+nbEBands]);
for (i=start+1;i<end;i++)
spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT),
MAX16(oldE[i],oldE[i+nbEBands]));
}
for (i=end-2;i>=0;i--)
for (i=end-2;i>=start;i--)
spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT));
/* Compute mean increase */
c=0; do {
for (i=2;i<end-1;i++)
for (i=IMAX(2,start);i<end-1;i++)
{
opus_val16 x1, x2;
x1 = MAX16(0, newE[i]);
x1 = MAX16(0, newE[i + c*nbEBands]);
x2 = MAX16(0, spread_old[i]);
mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2))));
}
} while (++c<C);
mean_diff = DIV32(mean_diff, C*(end-3));
mean_diff = DIV32(mean_diff, C*(end-1-IMAX(2,start)));
/*printf("%f %f %d\n", mean_diff, max_diff, count);*/
return mean_diff > QCONST16(1.f, DB_SHIFT);
}
@ -417,9 +414,10 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
/** Apply window and compute the MDCT for all sub-frames and
all channels in a frame */
static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample)
celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample,
int arch)
{
const int overlap = OVERLAP(mode);
const int overlap = mode->overlap;
int N;
int B;
int shift;
@ -438,7 +436,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
for (b=0;b<B;b++)
{
/* Interleaving the sub-frames while doing the MDCTs */
clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N,
&out[b+c*N*B], mode->window, overlap, shift, B,
arch);
}
} while (++c<CC);
if (CC==2&&C==1)
@ -453,8 +453,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
int bound = B*N/upsample;
for (i=0;i<bound;i++)
out[c*B*N+i] *= upsample;
for (;i<B*N;i++)
out[c*B*N+i] = 0;
OPUS_CLEAR(&out[c*B*N+bound], B*N-bound);
} while (++c<C);
}
}
@ -469,26 +468,30 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
int Nu;
coef0 = coef[0];
m = *mem;
/* Fast path for the normal 48kHz case and no clipping */
if (coef[1] == 0 && upsample == 1 && !clip)
{
for (i=0;i<N;i++)
{
opus_val16 x;
x = SCALEIN(pcmp[CC*i]);
/* Apply pre-emphasis */
inp[i] = SHL32(x, SIG_SHIFT) - m;
m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT);
}
*mem = m;
return;
}
Nu = N/upsample;
if (upsample!=1)
{
for (i=0;i<N;i++)
inp[i] = 0;
OPUS_CLEAR(inp, N);
}
for (i=0;i<Nu;i++)
{
celt_sig x;
x = SCALEIN(pcmp[CC*i]);
#ifndef FIXED_POINT
/* Replace NaNs with zeros */
if (!(x==x))
x = 0;
#endif
inp[i*upsample] = x;
}
inp[i*upsample] = SCALEIN(pcmp[CC*i]);
#ifndef FIXED_POINT
if (clip)
@ -500,7 +503,6 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
#else
(void)clip; /* Avoids a warning about clip being unused. */
#endif
m = *mem;
#ifdef CUSTOM_MODES
if (coef[1] != 0)
{
@ -520,11 +522,11 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
{
for (i=0;i<N;i++)
{
celt_sig x;
x = SHL32(inp[i], SIG_SHIFT);
opus_val16 x;
x = inp[i];
/* Apply pre-emphasis */
inp[i] = x + m;
m = - MULT16_32_Q15(coef0, x);
inp[i] = SHL32(x, SIG_SHIFT) - m;
m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT);
}
}
*mem = m;
@ -575,15 +577,14 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
*tf_sum = 0;
for (i=0;i<len;i++)
{
int j, k, N;
int k, N;
int narrow;
opus_val32 L1, best_L1;
int best_level=0;
N = (m->eBands[i+1]-m->eBands[i])<<LM;
/* band is too narrow to be split down to LM=-1 */
narrow = (m->eBands[i+1]-m->eBands[i])==1;
for (j=0;j<N;j++)
tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)];
OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<<LM)], N);
/* Just add the right channel if we're in stereo */
/*if (C==2)
for (j=0;j<N;j++)
@ -593,8 +594,7 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
/* Check the -1 case for transients */
if (isTransient && !narrow)
{
for (j=0;j<N;j++)
tmp_1[j] = tmp[j];
OPUS_COPY(tmp_1, tmp, N);
haar1(tmp_1, N>>LM, 1<<LM);
L1 = l1_metric(tmp_1, N, LM+1, bias);
if (L1<best_L1)
@ -754,12 +754,12 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM,
static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
const opus_val16 *bandLogE, int end, int LM, int C, int N0,
AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
int intensity, opus_val16 surround_trim)
int intensity, opus_val16 surround_trim, int arch)
{
int i;
opus_val32 diff=0;
int c;
int trim_index = 5;
int trim_index;
opus_val16 trim = QCONST16(5.f, 8);
opus_val16 logXC, logXC2;
if (C==2)
@ -769,10 +769,9 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
/* Compute inter-channel correlation for low frequencies */
for (i=0;i<8;i++)
{
int j;
opus_val32 partial = 0;
for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
partial = MAC16_16(partial, X[j], X[N0+j]);
opus_val32 partial;
partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)],
(m->eBands[i+1]-m->eBands[i])<<LM, arch);
sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
}
sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
@ -780,22 +779,13 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
minXC = sum;
for (i=8;i<intensity;i++)
{
int j;
opus_val32 partial = 0;
for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
partial = MAC16_16(partial, X[j], X[N0+j]);
opus_val32 partial;
partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)],
(m->eBands[i+1]-m->eBands[i])<<LM, arch);
minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18))));
}
minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC));
/*printf ("%f\n", sum);*/
if (sum > QCONST16(.995f,10))
trim_index-=4;
else if (sum > QCONST16(.92f,10))
trim_index-=3;
else if (sum > QCONST16(.85f,10))
trim_index-=2;
else if (sum > QCONST16(.8f,10))
trim_index-=1;
/* mid-side savings estimations based on the LF average*/
logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
/* mid-side savings estimations based on min correlation */
@ -819,14 +809,6 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
} while (++c<C);
diff /= C*(end-1);
/*printf("%f\n", diff);*/
if (diff > QCONST16(2.f, DB_SHIFT))
trim_index--;
if (diff > QCONST16(8.f, DB_SHIFT))
trim_index--;
if (diff < -QCONST16(4.f, DB_SHIFT))
trim_index++;
if (diff < -QCONST16(10.f, DB_SHIFT))
trim_index++;
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
trim -= SHR16(surround_trim, DB_SHIFT-8);
trim -= 2*SHR16(tf_estimate, 14-8);
@ -836,6 +818,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8),
(opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))));
}
#else
(void)analysis;
#endif
#ifdef FIXED_POINT
@ -843,10 +827,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
#else
trim_index = (int)floor(.5f+trim);
#endif
if (trim_index<0)
trim_index = 0;
if (trim_index>10)
trim_index = 10;
trim_index = IMAX(0, IMIN(10, trim_index));
/*printf("%d\n", trim_index);*/
#ifdef FUZZING
trim_index = rand()%11;
@ -886,6 +867,66 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X,
> MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
}
#define MSWAP(a,b) do {opus_val16 tmp = a;a=b;b=tmp;} while(0)
static opus_val16 median_of_5(const opus_val16 *x)
{
opus_val16 t0, t1, t2, t3, t4;
t2 = x[2];
if (x[0] > x[1])
{
t0 = x[1];
t1 = x[0];
} else {
t0 = x[0];
t1 = x[1];
}
if (x[3] > x[4])
{
t3 = x[4];
t4 = x[3];
} else {
t3 = x[3];
t4 = x[4];
}
if (t0 > t3)
{
MSWAP(t0, t3);
MSWAP(t1, t4);
}
if (t2 > t1)
{
if (t1 < t3)
return MIN16(t2, t3);
else
return MIN16(t4, t1);
} else {
if (t2 < t3)
return MIN16(t1, t3);
else
return MIN16(t2, t4);
}
}
static opus_val16 median_of_3(const opus_val16 *x)
{
opus_val16 t0, t1, t2;
if (x[0] > x[1])
{
t0 = x[1];
t1 = x[0];
} else {
t0 = x[0];
t1 = x[1];
}
t2 = x[2];
if (t1 < t2)
return t1;
else if (t0 < t2)
return t2;
else
return t0;
}
static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
@ -899,8 +940,7 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
SAVE_STACK;
ALLOC(follower, C*nbEBands, opus_val16);
ALLOC(noise_floor, C*nbEBands, opus_val16);
for (i=0;i<nbEBands;i++)
offsets[i] = 0;
OPUS_CLEAR(offsets, nbEBands);
/* Dynamic allocation code */
maxDepth=-QCONST16(31.9f, DB_SHIFT);
for (i=0;i<end;i++)
@ -922,7 +962,11 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
int last=0;
c=0;do
{
follower[c*nbEBands] = bandLogE2[c*nbEBands];
opus_val16 offset;
opus_val16 tmp;
opus_val16 *f;
f = &follower[c*nbEBands];
f[0] = bandLogE2[c*nbEBands];
for (i=1;i<end;i++)
{
/* The last band to be at least 3 dB higher than the previous one
@ -930,12 +974,26 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
bandlimited signals. */
if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
last=i;
follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
}
for (i=last-1;i>=0;i--)
follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i], MIN16(follower[c*nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
/* Combine with a median filter to avoid dynalloc triggering unnecessarily.
The "offset" value controls how conservative we are -- a higher offset
reduces the impact of the median filter and makes dynalloc use more bits. */
offset = QCONST16(1.f, DB_SHIFT);
for (i=2;i<end-2;i++)
f[i] = MAX16(f[i], median_of_5(&bandLogE2[c*nbEBands+i-2])-offset);
tmp = median_of_3(&bandLogE2[c*nbEBands])-offset;
f[0] = MAX16(f[0], tmp);
f[1] = MAX16(f[1], tmp);
tmp = median_of_3(&bandLogE2[c*nbEBands+end-3])-offset;
f[end-2] = MAX16(f[end-2], tmp);
f[end-1] = MAX16(f[end-1], tmp);
for (i=0;i<end;i++)
follower[c*nbEBands+i] = MAX16(follower[c*nbEBands+i], noise_floor[i]);
f[i] = MAX16(f[i], noise_floor[i]);
} while (++c<C);
if (C==2)
{
@ -1016,9 +1074,11 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
opus_val16 pf_threshold;
int pf_on;
int qg;
int overlap;
SAVE_STACK;
mode = st->mode;
overlap = mode->overlap;
ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
pre[0] = _pre;
@ -1027,7 +1087,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
c=0; do {
OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+overlap)+overlap, N);
} while (++c<CC);
if (enabled)
@ -1044,7 +1104,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
N, &pitch_index, st->prefilter_period, st->prefilter_gain);
N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch);
if (pitch_index > COMBFILTER_MAXPERIOD-2)
pitch_index = COMBFILTER_MAXPERIOD-2;
gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
@ -1100,25 +1160,25 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
/*printf("%d %f\n", pitch_index, gain1);*/
c=0; do {
int offset = mode->shortMdctSize-st->overlap;
int offset = mode->shortMdctSize-overlap;
st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap);
OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap);
if (offset)
comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD,
comb_filter(in+c*(N+overlap)+overlap, pre[c]+COMBFILTER_MAXPERIOD,
st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain,
st->prefilter_tapset, st->prefilter_tapset, NULL, 0);
st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch);
comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
st->prefilter_tapset, prefilter_tapset, mode->window, st->overlap);
OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap);
st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch);
OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap);
if (N>COMBFILTER_MAXPERIOD)
{
OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
} else {
OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
}
} while (++c<CC);
@ -1196,6 +1256,9 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
/*printf("%f %f ", analysis->tonality, tonal);*/
target = tonal_target;
}
#else
(void)analysis;
(void)pitch_change;
#endif
if (has_surround_mask&&!lfe)
@ -1218,12 +1281,15 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
{
opus_val16 rate_factor;
opus_val16 rate_factor = Q15ONE;
if (bitrate < 64000)
{
#ifdef FIXED_POINT
rate_factor = MAX16(0,(bitrate-32000));
rate_factor = MAX16(0,(bitrate-32000));
#else
rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
#endif
}
if (constrained_vbr)
rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);
@ -1273,6 +1339,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
int LM, M;
int tf_select;
int nbFilledBytes, nbAvailableBytes;
int start;
int end;
int effEnd;
int codedBands;
int tf_sum;
@ -1316,6 +1384,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
nbEBands = mode->nbEBands;
overlap = mode->overlap;
eBands = mode->eBands;
start = st->start;
end = st->end;
tf_estimate = 0;
if (nbCompressedBytes<2 || pcm==NULL)
{
@ -1335,8 +1405,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
M=1<<LM;
N = M*mode->shortMdctSize;
prefilter_mem = st->in_mem+CC*(st->overlap);
oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD));
prefilter_mem = st->in_mem+CC*(overlap);
oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD));
oldLogE = oldBandE + CC*nbEBands;
oldLogE2 = oldLogE + CC*nbEBands;
@ -1352,8 +1422,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
#ifdef CUSTOM_MODES
if (st->signalling && enc==NULL)
{
int tmp = (mode->effEBands-st->end)>>1;
st->end = IMAX(1, mode->effEBands-tmp);
int tmp = (mode->effEBands-end)>>1;
end = st->end = IMAX(1, mode->effEBands-tmp);
compressed[0] = tmp<<5;
compressed[0] |= LM<<3;
compressed[0] |= (C==2)<<2;
@ -1436,11 +1506,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
}
total_bits = nbCompressedBytes*8;
effEnd = st->end;
effEnd = end;
if (effEnd > mode->effEBands)
effEnd = mode->effEBands;
ALLOC(in, CC*(N+st->overlap), celt_sig);
ALLOC(in, CC*(N+overlap), celt_sig);
sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample));
st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample);
@ -1474,8 +1544,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
enc->nbits_total+=tell-ec_tell(enc);
}
c=0; do {
celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample,
mode->preemph, st->preemph_memE+c, st->clip);
int need_clip=0;
#ifndef FIXED_POINT
need_clip = st->clip && sample_max>65536.f;
#endif
celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample,
mode->preemph, st->preemph_memE+c, need_clip);
} while (++c<CC);
@ -1484,7 +1558,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
{
int enabled;
int qg;
enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf
enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf
&& st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE);
prefilter_tapset = st->tapset_decision;
@ -1494,7 +1568,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
pitch_change = 1;
if (pf_on==0)
{
if(st->start==0 && tell+16<=total_bits)
if(start==0 && tell+16<=total_bits)
ec_enc_bit_logp(enc, 0, 1);
} else {
/*This block is not gated by a total bits check only because
@ -1515,7 +1589,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
shortBlocks = 0;
if (st->complexity >= 1 && !st->lfe)
{
isTransient = transient_analysis(in, N+st->overlap, CC,
isTransient = transient_analysis(in, N+overlap, CC,
&tf_estimate, &tf_chan);
}
if (LM>0 && ec_tell(enc)+3<=total_bits)
@ -1535,33 +1609,32 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(bandLogE2, C*nbEBands, opus_val16);
if (secondMdct)
{
compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample);
compute_band_energies(mode, freq, bandE, effEnd, C, M);
amp2Log2(mode, effEnd, st->end, bandE, bandLogE2, C);
compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
amp2Log2(mode, effEnd, end, bandE, bandLogE2, C);
for (i=0;i<C*nbEBands;i++)
bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
}
compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
if (CC==2&&C==1)
tf_chan = 0;
compute_band_energies(mode, freq, bandE, effEnd, C, M);
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
if (st->lfe)
{
for (i=2;i<st->end;i++)
for (i=2;i<end;i++)
{
bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0]));
bandE[i] = MAX32(bandE[i], EPSILON);
}
}
amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
for(i=0;i<st->end;i++)
surround_dynalloc[i] = 0;
OPUS_CLEAR(surround_dynalloc, end);
/* This computes how much masking takes place between surround channels */
if (st->start==0&&st->energy_mask&&!st->lfe)
if (start==0&&st->energy_mask&&!st->lfe)
{
int mask_end;
int midband;
@ -1584,6 +1657,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
diff += MULT16_16(mask, 1+2*i-mask_end);
}
}
celt_assert(count>0);
mask_avg = DIV32_16(mask_avg,count);
mask_avg += QCONST16(.2f, DB_SHIFT);
diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
@ -1621,8 +1695,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
disabling masking. */
mask_avg = 0;
diff = 0;
for(i=0;i<mask_end;i++)
surround_dynalloc[i] = 0;
OPUS_CLEAR(surround_dynalloc, mask_end);
} else {
for(i=0;i<mask_end;i++)
surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
@ -1640,14 +1713,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
opus_val16 follow=-QCONST16(10.0f,DB_SHIFT);
opus_val32 frame_avg=0;
opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
for(i=st->start;i<st->end;i++)
for(i=start;i<end;i++)
{
follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset);
if (C==2)
follow = MAX16(follow, bandLogE[i+nbEBands]-offset);
frame_avg += follow;
}
frame_avg /= (st->end-st->start);
frame_avg /= (end-start);
temporal_vbr = SUB16(frame_avg,st->spec_avg);
temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr));
st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr);
@ -1658,21 +1731,20 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (!secondMdct)
{
for (i=0;i<C*nbEBands;i++)
bandLogE2[i] = bandLogE[i];
OPUS_COPY(bandLogE2, bandLogE, C*nbEBands);
}
/* Last chance to catch any transient we might have missed in the
time-domain analysis */
if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe)
{
if (patch_transient_decision(bandLogE, oldBandE, nbEBands, st->end, C))
if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C))
{
isTransient = 1;
shortBlocks = M;
compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
compute_band_energies(mode, freq, bandE, effEnd, C, M);
amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
/* Compensate for the scaling of short vs long mdcts */
for (i=0;i<C*nbEBands;i++)
bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
@ -1690,7 +1762,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(tf_res, nbEBands, int);
/* Disable variable tf resolution for hybrid and at very low bitrate */
if (effectiveBytes>=15*C && st->start==0 && st->complexity>=2 && !st->lfe)
if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe)
{
int lambda;
if (effectiveBytes<40)
@ -1703,22 +1775,22 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
lambda = 3;
lambda*=2;
tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan);
for (i=effEnd;i<st->end;i++)
for (i=effEnd;i<end;i++)
tf_res[i] = tf_res[effEnd-1];
} else {
tf_sum = 0;
for (i=0;i<st->end;i++)
for (i=0;i<end;i++)
tf_res[i] = isTransient;
tf_select=0;
}
ALLOC(error, C*nbEBands, opus_val16);
quant_coarse_energy(mode, st->start, st->end, effEnd, bandLogE,
quant_coarse_energy(mode, start, end, effEnd, bandLogE,
oldBandE, total_bits, error, enc,
C, LM, nbAvailableBytes, st->force_intra,
&st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe);
tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc);
if (ec_tell(enc)+4<=total_bits)
{
@ -1726,7 +1798,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
{
st->tapset_decision = 0;
st->spread_decision = SPREAD_NORMAL;
} else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0)
} else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0)
{
if (st->complexity == 0)
st->spread_decision = SPREAD_NONE;
@ -1760,7 +1832,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(offsets, nbEBands, int);
maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,
maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets,
st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
/* For LFE, everything interesting is in the first band */
@ -1773,7 +1845,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
total_bits<<=BITRES;
total_boost = 0;
tell = ec_tell_frac(enc);
for (i=st->start;i<st->end;i++)
for (i=start;i<end;i++)
{
int width, quanta;
int dynalloc_loop_logp;
@ -1818,7 +1890,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000),
intensity_thresholds, intensity_histeresis, 21, st->intensity);
st->intensity = IMIN(st->end,IMAX(st->start, st->intensity));
st->intensity = IMIN(end,IMAX(start, st->intensity));
}
alloc_trim = 5;
@ -1828,7 +1900,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
alloc_trim = 5;
else
alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim);
end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate,
st->intensity, surround_trim, st->arch);
ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
tell = ec_tell_frac(enc);
}
@ -1930,7 +2003,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
bits -= anti_collapse_rsv;
signalBandwidth = st->end-1;
signalBandwidth = end-1;
#ifndef DISABLE_FLOAT_API
if (st->analysis.valid)
{
@ -1950,7 +2023,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
#endif
if (st->lfe)
signalBandwidth = 1;
codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
codedBands = compute_allocation(mode, start, end, offsets, cap,
alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth);
if (st->lastCodedBands)
@ -1958,13 +2031,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
else
st->lastCodedBands = codedBands;
quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C);
quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, enc, C);
/* Residual quantisation */
ALLOC(collapse_masks, C*nbEBands, unsigned char);
quant_all_bands(1, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res,
nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
bandE, pulses, shortBlocks, st->spread_decision,
dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv,
balance, enc, LM, codedBands, &st->rng, st->arch);
if (anti_collapse_rsv > 0)
{
@ -1974,7 +2048,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
#endif
ec_enc_bits(enc, anti_collapse_on, 1);
}
quant_energy_finalise(mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
if (silence)
{
@ -1990,40 +2064,26 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (anti_collapse_on)
{
anti_collapse(mode, X, collapse_masks, LM, C, N,
st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
}
if (silence)
{
for (i=0;i<C*N;i++)
freq[i] = 0;
} else {
/* Synthesis */
denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
}
c=0; do {
OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2);
} while (++c<CC);
if (CC==2&&C==1)
{
for (i=0;i<N;i++)
freq[N+i] = freq[i];
}
c=0; do {
out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N;
} while (++c<CC);
compute_inv_mdcts(mode, shortBlocks, freq, out_mem, CC, LM);
celt_synthesis(mode, X, out_mem, oldBandE, start, effEnd,
C, CC, isTransient, LM, st->upsample, silence, st->arch);
c=0; do {
st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
mode->window, st->overlap);
mode->window, overlap);
if (LM!=0)
comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
@ -2031,7 +2091,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
} while (++c<CC);
/* We reuse freq[] as scratch space for the de-emphasis */
deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, freq);
deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD);
st->prefilter_period_old = st->prefilter_period;
st->prefilter_gain_old = st->prefilter_gain;
st->prefilter_tapset_old = st->prefilter_tapset;
@ -2051,16 +2111,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
#endif
if (CC==2&&C==1) {
for (i=0;i<nbEBands;i++)
oldBandE[nbEBands+i]=oldBandE[i];
OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
}
if (!isTransient)
{
for (i=0;i<CC*nbEBands;i++)
oldLogE2[i] = oldLogE[i];
for (i=0;i<CC*nbEBands;i++)
oldLogE[i] = oldBandE[i];
OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands);
OPUS_COPY(oldLogE, oldBandE, CC*nbEBands);
} else {
for (i=0;i<CC*nbEBands;i++)
oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
@ -2068,12 +2125,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
/* In case start or end were to change */
c=0; do
{
for (i=0;i<st->start;i++)
for (i=0;i<start;i++)
{
oldBandE[c*nbEBands+i]=0;
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
}
for (i=st->end;i<nbEBands;i++)
for (i=end;i<nbEBands;i++)
{
oldBandE[c*nbEBands+i]=0;
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
@ -2274,7 +2331,7 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
{
int i;
opus_val16 *oldBandE, *oldLogE, *oldLogE2;
oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->overlap+COMBFILTER_MAXPERIOD));
oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->mode->overlap+COMBFILTER_MAXPERIOD));
oldLogE = oldBandE + st->channels*st->mode->nbEBands;
oldLogE2 = oldLogE + st->channels*st->mode->nbEBands;
OPUS_CLEAR((char*)&st->ENCODER_RESET_START,

View File

@ -49,8 +49,7 @@ int p
float *lpc = _lpc;
#endif
for (i = 0; i < p; i++)
lpc[i] = 0;
OPUS_CLEAR(lpc, p);
if (ac[0] != 0)
{
for (i = 0; i < p; i++) {
@ -88,12 +87,15 @@ int p
#endif
}
void celt_fir(const opus_val16 *_x,
void celt_fir_c(
const opus_val16 *_x,
const opus_val16 *num,
opus_val16 *_y,
int N,
int ord,
opus_val16 *mem)
opus_val16 *mem,
int arch)
{
int i,j;
VARDECL(opus_val16, rnum);
@ -111,6 +113,7 @@ void celt_fir(const opus_val16 *_x,
for(i=0;i<ord;i++)
mem[i] = _x[N-i-1];
#ifdef SMALL_FOOTPRINT
(void)arch;
for (i=0;i<N;i++)
{
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
@ -124,7 +127,7 @@ void celt_fir(const opus_val16 *_x,
for (i=0;i<N-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
xcorr_kernel(rnum, x+i, sum, ord);
xcorr_kernel(rnum, x+i, sum, ord, arch);
_y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT)));
_y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
_y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
@ -146,10 +149,12 @@ void celt_iir(const opus_val32 *_x,
opus_val32 *_y,
int N,
int ord,
opus_val16 *mem)
opus_val16 *mem,
int arch)
{
#ifdef SMALL_FOOTPRINT
int i,j;
(void)arch;
for (i=0;i<N;i++)
{
opus_val32 sum = _x[i];
@ -187,7 +192,7 @@ void celt_iir(const opus_val32 *_x,
sum[1]=_x[i+1];
sum[2]=_x[i+2];
sum[3]=_x[i+3];
xcorr_kernel(rden, y+i, sum, ord);
xcorr_kernel(rden, y+i, sum, ord, arch);
/* Patch up the result to compensate for the fact that this is an IIR */
y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT);

View File

@ -29,24 +29,37 @@
#define PLC_H
#include "arch.h"
#include "cpu_support.h"
#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
#include "x86/celt_lpc_sse.h"
#endif
#define LPC_ORDER 24
void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
void celt_fir(const opus_val16 *x,
void celt_fir_c(
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem);
opus_val16 *mem,
int arch);
#if !defined(OVERRIDE_CELT_FIR)
#define celt_fir(x, num, y, N, ord, mem, arch) \
(celt_fir_c(x, num, y, N, ord, mem, arch))
#endif
void celt_iir(const opus_val32 *x,
const opus_val16 *den,
opus_val32 *y,
int N,
int ord,
opus_val16 *mem);
opus_val16 *mem,
int arch);
int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
const opus_val16 *window, int overlap, int lag, int n, int arch);

View File

@ -31,7 +31,8 @@
#include "opus_types.h"
#include "opus_defines.h"
#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM)
#if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/armcpu.h"
/* We currently support 4 ARM variants:
@ -42,6 +43,22 @@
*/
#define OPUS_ARCHMASK 3
#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
#include "x86/x86cpu.h"
/* We currently support 5 x86 variants:
* arch[0] -> non-sse
* arch[1] -> sse
* arch[2] -> sse2
* arch[3] -> sse4.1
* arch[4] -> avx
*/
#define OPUS_ARCHMASK 7
int opus_select_arch(void);
#else
#define OPUS_ARCHMASK 0
@ -50,5 +67,4 @@ static OPUS_INLINE int opus_select_arch(void)
return 0;
}
#endif
#endif

View File

@ -74,7 +74,7 @@ int log2_frac(opus_uint32 val, int frac)
/*Although derived separately, the pulse vector coding scheme is equivalent to
a Pyramid Vector Quantizer \cite{Fis86}.
Some additional notes about an early version appear at
http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
and the definitions of some terms have evolved since that was written.
The conversion from a pulse vector to an integer index (encoding) and back
@ -460,10 +460,12 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
}
static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
opus_uint32 p;
int s;
int k0;
opus_int16 val;
opus_val32 yy=0;
celt_assert(_k>0);
celt_assert(_n>1);
while(_n>2){
@ -487,7 +489,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
}
else for(p=row[_k];p>_i;p=row[_k])_k--;
_i-=p;
*_y++=(k0-_k+s)^s;
val=(k0-_k+s)^s;
*_y++=val;
yy=MAC16_16(yy,val,val);
}
/*Lots of dimensions case:*/
else{
@ -507,7 +511,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
do p=CELT_PVQ_U_ROW[--_k][_n];
while(p>_i);
_i-=p;
*_y++=(k0-_k+s)^s;
val=(k0-_k+s)^s;
*_y++=val;
yy=MAC16_16(yy,val,val);
}
}
_n--;
@ -519,14 +525,19 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
k0=_k;
_k=(_i+1)>>1;
if(_k)_i-=2*_k-1;
*_y++=(k0-_k+s)^s;
val=(k0-_k+s)^s;
*_y++=val;
yy=MAC16_16(yy,val,val);
/*_n==1*/
s=-(int)_i;
*_y=(_k+s)^s;
val=(_k+s)^s;
*_y=val;
yy=MAC16_16(yy,val,val);
return yy;
}
void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
}
#else /* SMALL_FOOTPRINT */
@ -591,8 +602,10 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
_y: Returns the vector of pulses.
_u: Must contain entries [0..._k+1] of row _n of U() on input.
Its contents will be destructively modified.*/
static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
int j;
opus_int16 val;
opus_val32 yy=0;
celt_assert(_n>0);
j=0;
do{
@ -607,10 +620,13 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
while(p>_i)p=_u[--_k];
_i-=p;
yj-=_k;
_y[j]=(yj+s)^s;
val=(yj+s)^s;
_y[j]=val;
yy=MAC16_16(yy,val,val);
uprev(_u,_k+2,0);
}
while(++j<_n);
return yy;
}
/*Returns the index of the given combination of K elements chosen from a set
@ -685,13 +701,15 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
RESTORE_STACK;
}
void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
VARDECL(opus_uint32,u);
int ret;
SAVE_STACK;
celt_assert(_k>0);
ALLOC(u,_k+2U,opus_uint32);
cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
RESTORE_STACK;
return ret;
}
#endif /* SMALL_FOOTPRINT */

View File

@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac);
void encode_pulses(const int *_y, int N, int K, ec_enc *enc);
void decode_pulses(int *_y, int N, int K, ec_dec *dec);
opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec);
#endif /* CWRS_H */

View File

@ -62,6 +62,27 @@ int ec_ilog(opus_uint32 _v){
}
#endif
#if 1
/* This is a faster version of ec_tell_frac() that takes advantage
of the low (1/8 bit) resolution to use just a linear function
followed by a lookup to determine the exact transition thresholds. */
opus_uint32 ec_tell_frac(ec_ctx *_this){
static const unsigned correction[8] =
{35733, 38967, 42495, 46340,
50535, 55109, 60097, 65535};
opus_uint32 nbits;
opus_uint32 r;
int l;
unsigned b;
nbits=_this->nbits_total<<BITRES;
l=EC_ILOG(_this->rng);
r=_this->rng>>(l-16);
b = (r>>12)-8;
b += r>correction[b];
l = (l<<3)+b;
return nbits-l;
}
#else
opus_uint32 ec_tell_frac(ec_ctx *_this){
opus_uint32 nbits;
opus_uint32 r;
@ -91,3 +112,42 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){
}
return nbits-l;
}
#endif
#ifdef USE_SMALL_DIV_TABLE
/* Result of 2^32/(2*i+1), except for i=0. */
const opus_uint32 SMALL_DIV_TABLE[129] = {
0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084,
0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906,
0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A,
0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A,
0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104,
0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1,
0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2,
0x0329161F, 0x03159721, 0x03030303, 0x02F14990,
0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46,
0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597,
0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17,
0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902,
0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810,
0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC,
0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30,
0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364,
0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14,
0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F,
0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE,
0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6,
0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3,
0x01539094, 0x01501501, 0x014CAB88, 0x0149539E,
0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A,
0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190,
0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227,
0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4,
0x01194538, 0x0116E068, 0x011485F0, 0x0112358E,
0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3,
0x01073260, 0x0105197F, 0x0103091B, 0x01010101
};
#endif

View File

@ -34,6 +34,12 @@
# include <stddef.h>
# include "ecintrin.h"
extern const opus_uint32 SMALL_DIV_TABLE[129];
#ifdef OPUS_ARM_ASM
#define USE_SMALL_DIV_TABLE
#endif
/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
larger type, you can speed up the decoder by using it here.*/
typedef opus_uint32 ec_window;
@ -114,4 +120,33 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){
rounding error is in the positive direction).*/
opus_uint32 ec_tell_frac(ec_ctx *_this);
/* Tested exhaustively for all n and for 1<=d<=256 */
static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
celt_assert(d>0);
#ifdef USE_SMALL_DIV_TABLE
if (d>256)
return n/d;
else {
opus_uint32 t, q;
t = EC_ILOG(d&-d);
q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32;
return q+(n-q*d >= d);
}
#else
return n/d;
#endif
}
static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) {
celt_assert(d>0);
#ifdef USE_SMALL_DIV_TABLE
if (n<0)
return -(opus_int32)celt_udiv(-n, d);
else
return celt_udiv(n, d);
#else
return n/d;
#endif
}
#endif

View File

@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){
unsigned ec_decode(ec_dec *_this,unsigned _ft){
unsigned s;
_this->ext=_this->rng/_ft;
_this->ext=celt_udiv(_this->rng,_ft);
s=(unsigned)(_this->val/_this->ext);
return _ft-EC_MINI(s+1,_ft);
}

View File

@ -98,7 +98,7 @@ static void ec_enc_carry_out(ec_enc *_this,int _c){
else _this->ext++;
}
static void ec_enc_normalize(ec_enc *_this){
static OPUS_INLINE void ec_enc_normalize(ec_enc *_this){
/*If the range is too small, output some bits and rescale it.*/
while(_this->rng<=EC_CODE_BOT){
ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT));
@ -127,7 +127,7 @@ void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){
void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
opus_uint32 r;
r=_this->rng/_ft;
r=celt_udiv(_this->rng,_ft);
if(_fl>0){
_this->val+=_this->rng-IMUL32(r,(_ft-_fl));
_this->rng=IMUL32(r,(_fh-_fl));

View File

@ -496,6 +496,7 @@ static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int
#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
#define MAC16_32_Q16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q16((a),(b))))
static OPUS_INLINE int SATURATE(int a, int b)
{
@ -767,6 +768,16 @@ static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
return res;
}
static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
{
x = PSHR32(x, SIG_SHIFT);
x = MAX32(x, -32768);
x = MIN32(x, 32767);
return EXTRACT16(x);
}
#define SIG2WORD16(x) (SIG2WORD16_generic(x))
#undef PRINT_MIPS
#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0);

View File

@ -37,16 +37,32 @@
#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16))
#else
#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
#endif
/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16))
#else
#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
#endif
/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15))
#else
#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
#endif
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31))
#else
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
#endif
/** Compile-time conversion of float constant to 16-bit value */
#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
@ -113,7 +129,11 @@
/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
b must fit in 31 bits.
Result fits in 32 bits. */
#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add.
Results fits in 32 bits */
#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)))
#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
@ -131,4 +151,17 @@
/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
#if defined(MIPSr1_ASM)
#include "mips/fixed_generic_mipsr1.h"
#endif
static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
{
x = PSHR32(x, SIG_SHIFT);
x = MAX32(x, -32768);
x = MIN32(x, 32767);
return EXTRACT16(x);
}
#define SIG2WORD16(x) (SIG2WORD16_generic(x))
#endif

View File

@ -90,14 +90,14 @@
#include <math.h>
#define float2int(x) lrint(x)
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN64) || defined (_WIN64))
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_X64)
#include <xmmintrin.h>
__inline long int float2int(float value)
{
return _mm_cvtss_si32(_mm_load_ss(&value));
}
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32))
#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86)
#include <math.h>
/* Win32 doesn't seem to have these functions.

View File

@ -47,64 +47,56 @@
static void kf_bfly2(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
int N
)
{
kiss_fft_cpx * Fout2;
const kiss_twiddle_cpx * tw1;
int i,j;
kiss_fft_cpx * Fout_beg = Fout;
for (i=0;i<N;i++)
int i;
(void)m;
#ifdef CUSTOM_MODES
if (m==1)
{
Fout = Fout_beg + i*mm;
Fout2 = Fout + m;
tw1 = st->twiddles;
for(j=0;j<m;j++)
celt_assert(m==1);
for (i=0;i<N;i++)
{
kiss_fft_cpx t;
Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
C_MUL (t, *Fout2 , *tw1);
tw1 += fstride;
Fout2 = Fout + 1;
t = *Fout2;
C_SUB( *Fout2 , *Fout , t );
C_ADDTO( *Fout , t );
++Fout2;
++Fout;
Fout += 2;
}
}
}
static void ki_bfly2(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
)
{
kiss_fft_cpx * Fout2;
const kiss_twiddle_cpx * tw1;
kiss_fft_cpx t;
int i,j;
kiss_fft_cpx * Fout_beg = Fout;
for (i=0;i<N;i++)
} else
#endif
{
Fout = Fout_beg + i*mm;
Fout2 = Fout + m;
tw1 = st->twiddles;
for(j=0;j<m;j++)
opus_val16 tw;
tw = QCONST16(0.7071067812f, 15);
/* We know that m==4 here because the radix-2 is just after a radix-4 */
celt_assert(m==4);
for (i=0;i<N;i++)
{
C_MULC (t, *Fout2 , *tw1);
tw1 += fstride;
C_SUB( *Fout2 , *Fout , t );
C_ADDTO( *Fout , t );
++Fout2;
++Fout;
kiss_fft_cpx t;
Fout2 = Fout + 4;
t = Fout2[0];
C_SUB( Fout2[0] , Fout[0] , t );
C_ADDTO( Fout[0] , t );
t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
C_SUB( Fout2[1] , Fout[1] , t );
C_ADDTO( Fout[1] , t );
t.r = Fout2[2].i;
t.i = -Fout2[2].r;
C_SUB( Fout2[2] , Fout[2] , t );
C_ADDTO( Fout[2] , t );
t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
C_SUB( Fout2[3] , Fout[3] , t );
C_ADDTO( Fout[3] , t );
Fout += 8;
}
}
}
@ -118,88 +110,66 @@ static void kf_bfly4(
int mm
)
{
const kiss_twiddle_cpx *tw1,*tw2,*tw3;
kiss_fft_cpx scratch[6];
const size_t m2=2*m;
const size_t m3=3*m;
int i, j;
int i;
kiss_fft_cpx * Fout_beg = Fout;
for (i=0;i<N;i++)
if (m==1)
{
Fout = Fout_beg + i*mm;
tw3 = tw2 = tw1 = st->twiddles;
for (j=0;j<m;j++)
/* Degenerate case where all the twiddles are 1. */
for (i=0;i<N;i++)
{
C_MUL4(scratch[0],Fout[m] , *tw1 );
C_MUL4(scratch[1],Fout[m2] , *tw2 );
C_MUL4(scratch[2],Fout[m3] , *tw3 );
kiss_fft_cpx scratch0, scratch1;
Fout->r = PSHR32(Fout->r, 2);
Fout->i = PSHR32(Fout->i, 2);
C_SUB( scratch[5] , *Fout, scratch[1] );
C_ADDTO(*Fout, scratch[1]);
C_ADD( scratch[3] , scratch[0] , scratch[2] );
C_SUB( scratch[4] , scratch[0] , scratch[2] );
C_SUB( Fout[m2], *Fout, scratch[3] );
tw1 += fstride;
tw2 += fstride*2;
tw3 += fstride*3;
C_ADDTO( *Fout , scratch[3] );
C_SUB( scratch0 , *Fout, Fout[2] );
C_ADDTO(*Fout, Fout[2]);
C_ADD( scratch1 , Fout[1] , Fout[3] );
C_SUB( Fout[2], *Fout, scratch1 );
C_ADDTO( *Fout , scratch1 );
C_SUB( scratch1 , Fout[1] , Fout[3] );
Fout[m].r = scratch[5].r + scratch[4].i;
Fout[m].i = scratch[5].i - scratch[4].r;
Fout[m3].r = scratch[5].r - scratch[4].i;
Fout[m3].i = scratch[5].i + scratch[4].r;
++Fout;
Fout[1].r = scratch0.r + scratch1.i;
Fout[1].i = scratch0.i - scratch1.r;
Fout[3].r = scratch0.r - scratch1.i;
Fout[3].i = scratch0.i + scratch1.r;
Fout+=4;
}
} else {
int j;
kiss_fft_cpx scratch[6];
const kiss_twiddle_cpx *tw1,*tw2,*tw3;
const int m2=2*m;
const int m3=3*m;
kiss_fft_cpx * Fout_beg = Fout;
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
tw3 = tw2 = tw1 = st->twiddles;
/* m is guaranteed to be a multiple of 4. */
for (j=0;j<m;j++)
{
C_MUL(scratch[0],Fout[m] , *tw1 );
C_MUL(scratch[1],Fout[m2] , *tw2 );
C_MUL(scratch[2],Fout[m3] , *tw3 );
C_SUB( scratch[5] , *Fout, scratch[1] );
C_ADDTO(*Fout, scratch[1]);
C_ADD( scratch[3] , scratch[0] , scratch[2] );
C_SUB( scratch[4] , scratch[0] , scratch[2] );
C_SUB( Fout[m2], *Fout, scratch[3] );
tw1 += fstride;
tw2 += fstride*2;
tw3 += fstride*3;
C_ADDTO( *Fout , scratch[3] );
Fout[m].r = scratch[5].r + scratch[4].i;
Fout[m].i = scratch[5].i - scratch[4].r;
Fout[m3].r = scratch[5].r - scratch[4].i;
Fout[m3].i = scratch[5].i + scratch[4].r;
++Fout;
}
}
}
}
static void ki_bfly4(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
)
{
const kiss_twiddle_cpx *tw1,*tw2,*tw3;
kiss_fft_cpx scratch[6];
const size_t m2=2*m;
const size_t m3=3*m;
int i, j;
kiss_fft_cpx * Fout_beg = Fout;
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
tw3 = tw2 = tw1 = st->twiddles;
for (j=0;j<m;j++)
{
C_MULC(scratch[0],Fout[m] , *tw1 );
C_MULC(scratch[1],Fout[m2] , *tw2 );
C_MULC(scratch[2],Fout[m3] , *tw3 );
C_SUB( scratch[5] , *Fout, scratch[1] );
C_ADDTO(*Fout, scratch[1]);
C_ADD( scratch[3] , scratch[0] , scratch[2] );
C_SUB( scratch[4] , scratch[0] , scratch[2] );
C_SUB( Fout[m2], *Fout, scratch[3] );
tw1 += fstride;
tw2 += fstride*2;
tw3 += fstride*3;
C_ADDTO( *Fout , scratch[3] );
Fout[m].r = scratch[5].r - scratch[4].i;
Fout[m].i = scratch[5].i + scratch[4].r;
Fout[m3].r = scratch[5].r + scratch[4].i;
Fout[m3].i = scratch[5].i - scratch[4].r;
++Fout;
}
}
}
#ifndef RADIX_TWO_ONLY
@ -220,14 +190,19 @@ static void kf_bfly3(
kiss_twiddle_cpx epi3;
kiss_fft_cpx * Fout_beg = Fout;
#ifdef FIXED_POINT
/*epi3.r = -16384;*/ /* Unused */
epi3.i = -28378;
#else
epi3 = st->twiddles[fstride*m];
#endif
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
tw1=tw2=st->twiddles;
/* For non-custom modes, m is guaranteed to be a multiple of 4. */
k=m;
do {
C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
C_MUL(scratch[1],Fout[m] , *tw1);
C_MUL(scratch[2],Fout[m2] , *tw2);
@ -255,56 +230,8 @@ static void kf_bfly3(
}
}
static void ki_bfly3(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
)
{
int i, k;
const size_t m2 = 2*m;
const kiss_twiddle_cpx *tw1,*tw2;
kiss_fft_cpx scratch[5];
kiss_twiddle_cpx epi3;
kiss_fft_cpx * Fout_beg = Fout;
epi3 = st->twiddles[fstride*m];
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
tw1=tw2=st->twiddles;
k=m;
do{
C_MULC(scratch[1],Fout[m] , *tw1);
C_MULC(scratch[2],Fout[m2] , *tw2);
C_ADD(scratch[3],scratch[1],scratch[2]);
C_SUB(scratch[0],scratch[1],scratch[2]);
tw1 += fstride;
tw2 += fstride*2;
Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
C_MULBYSCALAR( scratch[0] , -epi3.i );
C_ADDTO(*Fout,scratch[3]);
Fout[m2].r = Fout[m].r + scratch[0].i;
Fout[m2].i = Fout[m].i - scratch[0].r;
Fout[m].r -= scratch[0].i;
Fout[m].i += scratch[0].r;
++Fout;
}while(--k);
}
}
#ifndef OVERRIDE_kf_bfly5
static void kf_bfly5(
kiss_fft_cpx * Fout,
const size_t fstride,
@ -317,13 +244,19 @@ static void kf_bfly5(
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
int i, u;
kiss_fft_cpx scratch[13];
const kiss_twiddle_cpx * twiddles = st->twiddles;
const kiss_twiddle_cpx *tw;
kiss_twiddle_cpx ya,yb;
kiss_fft_cpx * Fout_beg = Fout;
ya = twiddles[fstride*m];
yb = twiddles[fstride*2*m];
#ifdef FIXED_POINT
ya.r = 10126;
ya.i = -31164;
yb.r = -26510;
yb.i = -19261;
#else
ya = st->twiddles[fstride*m];
yb = st->twiddles[fstride*2*m];
#endif
tw=st->twiddles;
for (i=0;i<N;i++)
@ -335,8 +268,8 @@ static void kf_bfly5(
Fout3=Fout0+3*m;
Fout4=Fout0+4*m;
/* For non-custom modes, m is guaranteed to be a multiple of 4. */
for ( u=0; u<m; ++u ) {
C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
scratch[0] = *Fout0;
C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
@ -373,74 +306,8 @@ static void kf_bfly5(
}
}
}
#endif /* OVERRIDE_kf_bfly5 */
static void ki_bfly5(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
)
{
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
int i, u;
kiss_fft_cpx scratch[13];
const kiss_twiddle_cpx * twiddles = st->twiddles;
const kiss_twiddle_cpx *tw;
kiss_twiddle_cpx ya,yb;
kiss_fft_cpx * Fout_beg = Fout;
ya = twiddles[fstride*m];
yb = twiddles[fstride*2*m];
tw=st->twiddles;
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
Fout0=Fout;
Fout1=Fout0+m;
Fout2=Fout0+2*m;
Fout3=Fout0+3*m;
Fout4=Fout0+4*m;
for ( u=0; u<m; ++u ) {
scratch[0] = *Fout0;
C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
C_ADD( scratch[7],scratch[1],scratch[4]);
C_SUB( scratch[10],scratch[1],scratch[4]);
C_ADD( scratch[8],scratch[2],scratch[3]);
C_SUB( scratch[9],scratch[2],scratch[3]);
Fout0->r += scratch[7].r + scratch[8].r;
Fout0->i += scratch[7].i + scratch[8].i;
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
C_SUB(*Fout1,scratch[5],scratch[6]);
C_ADD(*Fout4,scratch[5],scratch[6]);
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
C_ADD(*Fout2,scratch[11],scratch[12]);
C_SUB(*Fout3,scratch[11],scratch[12]);
++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
}
}
}
#endif
@ -488,6 +355,9 @@ static
int kf_factor(int n,opus_int16 * facbuf)
{
int p=4;
int i;
int stages=0;
int nbak = n;
/*factor out powers of 4, powers of 2, then any remaining primes */
do {
@ -509,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf)
{
return 0;
}
*facbuf++ = p;
*facbuf++ = n;
facbuf[2*stages] = p;
if (p==2 && stages > 1)
{
facbuf[2*stages] = 4;
facbuf[2] = 2;
}
stages++;
} while (n > 1);
n = nbak;
/* Reverse the order to get the radix 4 at the end, so we can use the
fast degenerate case. It turns out that reversing the order also
improves the noise behaviour. */
for (i=0;i<stages/2;i++)
{
int tmp;
tmp = facbuf[2*i];
facbuf[2*i] = facbuf[2*(stages-i-1)];
facbuf[2*(stages-i-1)] = tmp;
}
for (i=0;i<stages;i++)
{
n /= facbuf[2*i];
facbuf[2*i+1] = n;
}
return 1;
}
@ -532,13 +423,19 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
#endif
}
int opus_fft_alloc_arch_c(kiss_fft_state *st) {
(void)st;
return 0;
}
/*
*
* Allocates all necessary storage space for the fft and ifft.
* The return value is a contiguous block of memory. As such,
* It can be freed with free().
* */
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
const kiss_fft_state *base, int arch)
{
kiss_fft_state *st=NULL;
size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
@ -555,14 +452,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
kiss_twiddle_cpx *twiddles;
st->nfft=nfft;
#ifndef FIXED_POINT
#ifdef FIXED_POINT
st->scale_shift = celt_ilog2(st->nfft);
if (st->nfft == 1<<st->scale_shift)
st->scale = Q15ONE;
else
st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
#else
st->scale = 1.f/nfft;
#endif
if (base != NULL)
{
st->twiddles = base->twiddles;
st->shift = 0;
while (nfft<<st->shift != base->nfft && st->shift < 32)
while (st->shift < 32 && nfft<<st->shift != base->nfft)
st->shift++;
if (st->shift>=32)
goto fail;
@ -581,22 +484,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
if (st->bitrev==NULL)
goto fail;
compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
/* Initialize architecture specific fft parameters */
if (opus_fft_alloc_arch(st, arch))
goto fail;
}
return st;
fail:
opus_fft_free(st);
opus_fft_free(st, arch);
return NULL;
}
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
{
return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
}
void opus_fft_free(const kiss_fft_state *cfg)
void opus_fft_free_arch_c(kiss_fft_state *st) {
(void)st;
}
void opus_fft_free(const kiss_fft_state *cfg, int arch)
{
if (cfg)
{
opus_fft_free_arch((kiss_fft_state *)cfg, arch);
opus_free((opus_int16*)cfg->bitrev);
if (cfg->shift < 0)
opus_free((kiss_twiddle_cpx*)cfg->twiddles);
@ -606,7 +518,7 @@ void opus_fft_free(const kiss_fft_state *cfg)
#endif /* CUSTOM_MODES */
void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
{
int m2, m;
int p;
@ -618,17 +530,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
/* st->shift can be -1 */
shift = st->shift>0 ? st->shift : 0;
celt_assert2 (fin != fout, "In-place FFT not supported");
/* Bit-reverse the input */
for (i=0;i<st->nfft;i++)
{
fout[st->bitrev[i]] = fin[i];
#ifndef FIXED_POINT
fout[st->bitrev[i]].r *= st->scale;
fout[st->bitrev[i]].i *= st->scale;
#endif
}
fstride[0] = 1;
L=0;
do {
@ -647,7 +548,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
switch (st->factors[2*i])
{
case 2:
kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
kf_bfly2(fout, m, fstride[i]);
break;
case 4:
kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
@ -665,55 +566,39 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
}
}
void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
int m2, m;
int p;
int L;
int fstride[MAXFACTORS];
int i;
int shift;
opus_val16 scale;
#ifdef FIXED_POINT
/* Allows us to scale with MULT16_32_Q16(), which is faster than
MULT16_32_Q15() on ARM. */
int scale_shift = st->scale_shift-1;
#endif
scale = st->scale;
/* st->shift can be -1 */
shift = st->shift>0 ? st->shift : 0;
celt_assert2 (fin != fout, "In-place FFT not supported");
/* Bit-reverse the input */
for (i=0;i<st->nfft;i++)
{
kiss_fft_cpx x = fin[i];
fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
}
opus_fft_impl(st, fout);
}
void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
int i;
celt_assert2 (fin != fout, "In-place FFT not supported");
/* Bit-reverse the input */
for (i=0;i<st->nfft;i++)
fout[st->bitrev[i]] = fin[i];
fstride[0] = 1;
L=0;
do {
p = st->factors[2*L];
m = st->factors[2*L+1];
fstride[L+1] = fstride[L]*p;
L++;
} while(m!=1);
m = st->factors[2*L-1];
for (i=L-1;i>=0;i--)
{
if (i!=0)
m2 = st->factors[2*i-1];
else
m2 = 1;
switch (st->factors[2*i])
{
case 2:
ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
case 4:
ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
#ifndef RADIX_TWO_ONLY
case 3:
ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
case 5:
ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
#endif
}
m = m2;
}
for (i=0;i<st->nfft;i++)
fout[i].i = -fout[i].i;
opus_fft_impl(st, fout);
for (i=0;i<st->nfft;i++)
fout[i].i = -fout[i].i;
}

View File

@ -32,6 +32,7 @@
#include <stdlib.h>
#include <math.h>
#include "arch.h"
#include "cpu_support.h"
#ifdef __cplusplus
extern "C" {
@ -77,17 +78,28 @@ typedef struct {
4*4*4*2
*/
typedef struct arch_fft_state{
int is_supported;
void *priv;
} arch_fft_state;
typedef struct kiss_fft_state{
int nfft;
#ifndef FIXED_POINT
kiss_fft_scalar scale;
opus_val16 scale;
#ifdef FIXED_POINT
int scale_shift;
#endif
int shift;
opus_int16 factors[2*MAXFACTORS];
const opus_int16 *bitrev;
const kiss_twiddle_cpx *twiddles;
arch_fft_state *arch_fft;
} kiss_fft_state;
#if defined(HAVE_ARM_NE10)
#include "arm/fft_arm.h"
#endif
/*typedef struct kiss_fft_state* kiss_fft_cfg;*/
/**
@ -113,9 +125,9 @@ typedef struct kiss_fft_state{
* buffer size in *lenmem.
* */
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base);
kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch);
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);
/**
* opus_fft(cfg,in_out_buf)
@ -127,10 +139,59 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
* Note that each element is complex and can be accessed like
f[k].r and f[k].i
* */
void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_fft_free(const kiss_fft_state *cfg);
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
void opus_fft_free(const kiss_fft_state *cfg, int arch);
void opus_fft_free_arch_c(kiss_fft_state *st);
int opus_fft_alloc_arch_c(kiss_fft_state *st);
#if !defined(OVERRIDE_OPUS_FFT)
/* Is run-time CPU detection enabled on this platform? */
#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(
kiss_fft_state *st);
#define opus_fft_alloc_arch(_st, arch) \
((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(
kiss_fft_state *st);
#define opus_fft_free_arch(_st, arch) \
((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
#define opus_fft(_cfg, _fin, _fout, arch) \
((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
#define opus_ifft(_cfg, _fin, _fout, arch) \
((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
#define opus_fft_alloc_arch(_st, arch) \
((void)(arch), opus_fft_alloc_arch_c(_st))
#define opus_fft_free_arch(_st, arch) \
((void)(arch), opus_fft_free_arch_c(_st))
#define opus_fft(_cfg, _fin, _fout, arch) \
((void)(arch), opus_fft_c(_cfg, _fin, _fout))
#define opus_ifft(_cfg, _fin, _fout, arch) \
((void)(arch), opus_ifft_c(_cfg, _fin, _fout))
#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
#endif /* end if !defined(OVERRIDE_OPUS_FFT) */
#ifdef __cplusplus
}

View File

@ -164,7 +164,7 @@ opus_val16 celt_cos_norm(opus_val32 x)
{
return _celt_cos_pi_2(EXTRACT16(x));
} else {
return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
}
} else {
if (x&0x0000ffff)

View File

@ -53,76 +53,100 @@
#include "mathops.h"
#include "stack_alloc.h"
#if defined(MIPSr1_ASM)
#include "mips/mdct_mipsr1.h"
#endif
#ifdef CUSTOM_MODES
int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
{
int i;
int N4;
kiss_twiddle_scalar *trig;
#if defined(FIXED_POINT)
int shift;
int N2=N>>1;
#endif
l->n = N;
N4 = N>>2;
l->maxshift = maxshift;
for (i=0;i<=maxshift;i++)
{
if (i==0)
l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch);
else
l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch);
#ifndef ENABLE_TI_DSPLIB55
if (l->kfft[i]==NULL)
return 0;
#endif
}
l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar));
if (l->trig==NULL)
return 0;
/* We have enough points that sine isn't necessary */
for (shift=0;shift<=maxshift;shift++)
{
/* We have enough points that sine isn't necessary */
#if defined(FIXED_POINT)
for (i=0;i<=N4;i++)
trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
#if 1
for (i=0;i<N2;i++)
trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N));
#else
for (i=0;i<=N4;i++)
trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N);
for (i=0;i<N2;i++)
trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N))));
#endif
#else
for (i=0;i<N2;i++)
trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N);
#endif
trig += N2;
N2 >>= 1;
N >>= 1;
}
return 1;
}
void clt_mdct_clear(mdct_lookup *l)
void clt_mdct_clear(mdct_lookup *l, int arch)
{
int i;
for (i=0;i<=l->maxshift;i++)
opus_fft_free(l->kfft[i]);
opus_fft_free(l->kfft[i], arch);
opus_free((kiss_twiddle_scalar*)l->trig);
}
#endif /* CUSTOM_MODES */
/* Forward MDCT trashes the input array */
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap, int shift, int stride)
#ifndef OVERRIDE_clt_mdct_forward
void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
kiss_twiddle_scalar sine;
VARDECL(kiss_fft_scalar, f);
VARDECL(kiss_fft_scalar, f2);
VARDECL(kiss_fft_cpx, f2);
const kiss_fft_state *st = l->kfft[shift];
const kiss_twiddle_scalar *trig;
opus_val16 scale;
#ifdef FIXED_POINT
/* Allows us to scale with MULT16_32_Q16(), which is faster than
MULT16_32_Q15() on ARM. */
int scale_shift = st->scale_shift-1;
#endif
SAVE_STACK;
(void)arch;
scale = st->scale;
N = l->n;
N >>= shift;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
ALLOC(f2, N2, kiss_fft_scalar);
/* sin(x) ~= x here */
#ifdef FIXED_POINT
sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
#else
sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
#endif
ALLOC(f2, N4, kiss_fft_cpx);
/* Consider the input to be composed of four blocks: [a, b, c, d] */
/* Window, shuffle, fold */
@ -167,123 +191,131 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
/* Pre-rotation */
{
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const kiss_twiddle_scalar *t = &l->trig[0];
const kiss_twiddle_scalar *t = &trig[0];
for(i=0;i<N4;i++)
{
kiss_fft_cpx yc;
kiss_twiddle_scalar t0, t1;
kiss_fft_scalar re, im, yr, yi;
re = yp[0];
im = yp[1];
yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
/* works because the cos is nearly one */
*yp++ = yr + S_MUL(yi,sine);
*yp++ = yi - S_MUL(yr,sine);
t0 = t[i];
t1 = t[N4+i];
re = *yp++;
im = *yp++;
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
yc.r = yr;
yc.i = yi;
yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
f2[st->bitrev[i]] = yc;
}
}
/* N/4 complex FFT, down-scales by 4/N */
opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
/* N/4 complex FFT, does not downscale anymore */
opus_fft_impl(st, f2);
/* Post-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
const kiss_twiddle_scalar *t = &l->trig[0];
const kiss_twiddle_scalar *t = &trig[0];
/* Temp pointers to make it really clear to the compiler what we're doing */
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
/* works because the cos is nearly one */
*yp1 = yr - S_MUL(yi,sine);
*yp2 = yi + S_MUL(yr,sine);;
fp += 2;
yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
*yp1 = yr;
*yp2 = yi;
fp++;
yp1 += 2*stride;
yp2 -= 2*stride;
}
}
RESTORE_STACK;
}
#endif /* OVERRIDE_clt_mdct_forward */
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
#ifndef OVERRIDE_clt_mdct_backward
void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
kiss_twiddle_scalar sine;
VARDECL(kiss_fft_scalar, f2);
SAVE_STACK;
const kiss_twiddle_scalar *trig;
(void) arch;
N = l->n;
N >>= shift;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f2, N2, kiss_fft_scalar);
/* sin(x) ~= x here */
#ifdef FIXED_POINT
sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
#else
sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
#endif
/* Pre-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
kiss_fft_scalar * OPUS_RESTRICT yp = f2;
const kiss_twiddle_scalar *t = &l->trig[0];
kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
for(i=0;i<N4;i++)
{
int rev;
kiss_fft_scalar yr, yi;
yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
/* works because the cos is nearly one */
*yp++ = yr - S_MUL(yi,sine);
*yp++ = yi + S_MUL(yr,sine);
rev = *bitrev++;
yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
/* We swap real and imag because we use an FFT instead of an IFFT. */
yp[2*rev+1] = yr;
yp[2*rev] = yi;
/* Storing the pre-rotation directly in the bitrev order. */
xp1+=2*stride;
xp2-=2*stride;
}
}
/* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
/* Post-rotate and de-shuffle from both ends of the buffer at once to make
it in-place. */
{
kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
const kiss_twiddle_scalar *t = &l->trig[0];
kiss_fft_scalar * yp0 = out+(overlap>>1);
kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
const kiss_twiddle_scalar *t = &trig[0];
/* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
middle pair will be computed twice. */
for(i=0;i<(N4+1)>>1;i++)
{
kiss_fft_scalar re, im, yr, yi;
kiss_twiddle_scalar t0, t1;
re = yp0[0];
im = yp0[1];
t0 = t[i<<shift];
t1 = t[(N4-i)<<shift];
/* We swap real and imag because we're using an FFT instead of an IFFT. */
re = yp0[1];
im = yp0[0];
t0 = t[i];
t1 = t[N4+i];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
re = yp1[0];
im = yp1[1];
/* works because the cos is nearly one */
yp0[0] = -(yr - S_MUL(yi,sine));
yp1[1] = yi + S_MUL(yr,sine);
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
/* We swap real and imag because we're using an FFT instead of an IFFT. */
re = yp1[1];
im = yp1[0];
yp0[0] = yr;
yp1[1] = yi;
t0 = t[(N4-i-1)<<shift];
t1 = t[(i+1)<<shift];
t0 = t[(N4-i-1)];
t1 = t[(N2-i-1)];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
/* works because the cos is nearly one */
yp1[0] = -(yr - S_MUL(yi,sine));
yp0[1] = yi + S_MUL(yr,sine);
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
yp1[0] = yr;
yp0[1] = yi;
yp0 += 2;
yp1 -= 2;
}
@ -307,5 +339,5 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
wp2--;
}
}
RESTORE_STACK;
}
#endif /* OVERRIDE_clt_mdct_backward */

View File

@ -53,18 +53,60 @@ typedef struct {
const kiss_twiddle_scalar * OPUS_RESTRICT trig;
} mdct_lookup;
int clt_mdct_init(mdct_lookup *l,int N, int maxshift);
void clt_mdct_clear(mdct_lookup *l);
#if defined(HAVE_ARM_NE10)
#include "arm/mdct_arm.h"
#endif
int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch);
void clt_mdct_clear(mdct_lookup *l, int arch);
/** Compute a forward MDCT and scale by 4/N, trashes the input array */
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap, int shift, int stride);
void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap,
int shift, int stride, int arch);
/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
(scales implicitly by 1/2) */
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
const opus_val16 * OPUS_RESTRICT window,
int overlap, int shift, int stride, int arch);
#if !defined(OVERRIDE_OPUS_MDCT)
/* Is run-time CPU detection enabled on this platform? */
#if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10)
extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(
const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
int overlap, int shift, int stride, int arch);
#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
_window, _overlap, _shift, \
_stride, _arch))
extern void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(
const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
int overlap, int shift, int stride, int arch);
#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
(*CLT_MDCT_BACKWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
_window, _overlap, _shift, \
_stride, _arch)
#else /* if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) */
#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
clt_mdct_backward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
#endif /* end if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) && !defined(FIXED_POINT) */
#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
#endif

View File

@ -0,0 +1,151 @@
/* Copyright (c) 2007-2008 CSIRO
Copyright (c) 2007-2010 Xiph.Org Foundation
Copyright (c) 2008 Gregory Maxwell
Written by Jean-Marc Valin and Gregory Maxwell */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CELT_MIPSR1_H__
#define __CELT_MIPSR1_H__
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#define CELT_C
#include "os_support.h"
#include "mdct.h"
#include <math.h>
#include "celt.h"
#include "pitch.h"
#include "bands.h"
#include "modes.h"
#include "entcode.h"
#include "quant_bands.h"
#include "rate.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "float_cast.h"
#include <stdarg.h>
#include "celt_lpc.h"
#include "vq.h"
#define OVERRIDE_comb_filter
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap, int arch)
{
int i;
opus_val32 x0, x1, x2, x3, x4;
(void)arch;
/* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
opus_val16 g00, g01, g02, g10, g11, g12;
static const opus_val16 gains[3][3] = {
{QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
{QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
{QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
if (g0==0 && g1==0)
{
/* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
if (x!=y)
OPUS_MOVE(y, x, N);
return;
}
g00 = MULT16_16_P15(g0, gains[tapset0][0]);
g01 = MULT16_16_P15(g0, gains[tapset0][1]);
g02 = MULT16_16_P15(g0, gains[tapset0][2]);
g10 = MULT16_16_P15(g1, gains[tapset1][0]);
g11 = MULT16_16_P15(g1, gains[tapset1][1]);
g12 = MULT16_16_P15(g1, gains[tapset1][2]);
x1 = x[-T1+1];
x2 = x[-T1 ];
x3 = x[-T1-1];
x4 = x[-T1-2];
/* If the filter didn't change, we don't need the overlap */
if (g0==g1 && T0==T1 && tapset0==tapset1)
overlap=0;
for (i=0;i<overlap;i++)
{
opus_val16 f;
opus_val32 res;
f = MULT16_16_Q15(window[i],window[i]);
x0= x[i-T1+2];
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g00)), "r" ((int)x[i-T0]));
asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g01)), "r" ((int)ADD32(x[i-T0-1],x[i-T0+1])));
asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g02)), "r" ((int)ADD32(x[i-T0-2],x[i-T0+2])));
asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g10)), "r" ((int)x2));
asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g11)), "r" ((int)ADD32(x3,x1)));
asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g12)), "r" ((int)ADD32(x4,x0)));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
y[i] = x[i] + res;
x4=x3;
x3=x2;
x2=x1;
x1=x0;
}
x4 = x[i-T1-2];
x3 = x[i-T1-1];
x2 = x[i-T1];
x1 = x[i-T1+1];
if (g1==0)
{
/* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
if (x!=y)
OPUS_MOVE(y+overlap, x+overlap, N-overlap);
return;
}
for (i=overlap;i<N;i++)
{
opus_val32 res;
x0=x[i-T1+2];
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)g10), "r" ((int)x2));
asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g11), "r" ((int)ADD32(x3,x1)));
asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g12), "r" ((int)ADD32(x4,x0)));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
y[i] = x[i] + res;
x4=x3;
x3=x2;
x2=x1;
x1=x0;
}
}
#endif /* __CELT_MIPSR1_H__ */

View File

@ -0,0 +1,126 @@
/* Copyright (C) 2007-2009 Xiph.Org Foundation
Copyright (C) 2003-2008 Jean-Marc Valin
Copyright (C) 2007-2008 CSIRO */
/**
@file fixed_generic.h
@brief Generic fixed-point operations
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CELT_FIXED_GENERIC_MIPSR1_H
#define CELT_FIXED_GENERIC_MIPSR1_H
#undef MULT16_32_Q15_ADD
static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {
int m;
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
return m;
}
#undef MULT16_32_Q15_SUB
static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {
int m;
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
return m;
}
#undef MULT16_16_Q15_ADD
static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {
int m;
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
return m;
}
#undef MULT16_16_Q15_SUB
static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {
int m;
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
return m;
}
#undef MULT16_32_Q16
static inline int MULT16_32_Q16(int a, int b)
{
int c;
asm volatile("MULT $ac1,%0, %1" : : "r" (a), "r" (b));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (16));
return c;
}
#undef MULT16_32_P16
static inline int MULT16_32_P16(int a, int b)
{
int c;
asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
asm volatile("EXTR_R.W %0,$ac1, %1" : "=r" (c): "i" (16));
return c;
}
#undef MULT16_32_Q15
static inline int MULT16_32_Q15(int a, int b)
{
int c;
asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (15));
return c;
}
#undef MULT32_32_Q31
static inline int MULT32_32_Q31(int a, int b)
{
int r;
asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (r): "i" (31));
return r;
}
#undef PSHR32
static inline int PSHR32(int a, int shift)
{
int r;
asm volatile ("SHRAV_R.W %0, %1, %2" :"=r" (r): "r" (a), "r" (shift));
return r;
}
#undef MULT16_16_P15
static inline int MULT16_16_P15(int a, int b)
{
int r;
asm volatile ("mul %0, %1, %2" :"=r" (r): "r" (a), "r" (b));
asm volatile ("SHRA_R.W %0, %1, %2" : "+r" (r): "0" (r), "i"(15));
return r;
}
#endif /* CELT_FIXED_GENERIC_MIPSR1_H */

View File

@ -0,0 +1,167 @@
/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.*/
#ifndef KISS_FFT_MIPSR1_H
#define KISS_FFT_MIPSR1_H
#if !defined(KISS_FFT_GUTS_H)
#error "This file should only be included from _kiss_fft_guts.h"
#endif
#ifdef FIXED_POINT
#define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))
#define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))
#undef S_MUL_ADD
static inline int S_MUL_ADD(int a, int b, int c, int d) {
int m;
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
return m;
}
#undef S_MUL_SUB
static inline int S_MUL_SUB(int a, int b, int c, int d) {
int m;
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
return m;
}
#undef C_MUL
# define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
kiss_fft_cpx m;
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
return m;
}
#undef C_MULC
# define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
kiss_fft_cpx m;
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
return m;
}
#endif /* FIXED_POINT */
#define OVERRIDE_kf_bfly5
static void kf_bfly5(
kiss_fft_cpx * Fout,
const size_t fstride,
const kiss_fft_state *st,
int m,
int N,
int mm
)
{
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
int i, u;
kiss_fft_cpx scratch[13];
const kiss_twiddle_cpx *tw;
kiss_twiddle_cpx ya,yb;
kiss_fft_cpx * Fout_beg = Fout;
#ifdef FIXED_POINT
ya.r = 10126;
ya.i = -31164;
yb.r = -26510;
yb.i = -19261;
#else
ya = st->twiddles[fstride*m];
yb = st->twiddles[fstride*2*m];
#endif
tw=st->twiddles;
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
Fout0=Fout;
Fout1=Fout0+m;
Fout2=Fout0+2*m;
Fout3=Fout0+3*m;
Fout4=Fout0+4*m;
/* For non-custom modes, m is guaranteed to be a multiple of 4. */
for ( u=0; u<m; ++u ) {
scratch[0] = *Fout0;
C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
C_ADD( scratch[7],scratch[1],scratch[4]);
C_SUB( scratch[10],scratch[1],scratch[4]);
C_ADD( scratch[8],scratch[2],scratch[3]);
C_SUB( scratch[9],scratch[2],scratch[3]);
Fout0->r += scratch[7].r + scratch[8].r;
Fout0->i += scratch[7].i + scratch[8].i;
scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);
scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);
scratch[6].r = S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);
scratch[6].i = -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);
C_SUB(*Fout1,scratch[5],scratch[6]);
C_ADD(*Fout4,scratch[5],scratch[6]);
scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);
scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);
scratch[12].r = S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);
scratch[12].i = S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);
C_ADD(*Fout2,scratch[11],scratch[12]);
C_SUB(*Fout3,scratch[11],scratch[12]);
++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
}
}
}
#endif /* KISS_FFT_MIPSR1_H */

View File

@ -0,0 +1,288 @@
/* Copyright (c) 2007-2008 CSIRO
Copyright (c) 2007-2008 Xiph.Org Foundation
Written by Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* This is a simple MDCT implementation that uses a N/4 complex FFT
to do most of the work. It should be relatively straightforward to
plug in pretty much and FFT here.
This replaces the Vorbis FFT (and uses the exact same API), which
was a bit too messy and that was ending up duplicating code
(might as well use the same FFT everywhere).
The algorithm is similar to (and inspired from) Fabrice Bellard's
MDCT implementation in FFMPEG, but has differences in signs, ordering
and scaling in many places.
*/
#ifndef __MDCT_MIPSR1_H__
#define __MDCT_MIPSR1_H__
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#endif
#include "mdct.h"
#include "kiss_fft.h"
#include "_kiss_fft_guts.h"
#include <math.h>
#include "os_support.h"
#include "mathops.h"
#include "stack_alloc.h"
/* Forward MDCT trashes the input array */
#define OVERRIDE_clt_mdct_forward
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
VARDECL(kiss_fft_scalar, f);
VARDECL(kiss_fft_cpx, f2);
const kiss_fft_state *st = l->kfft[shift];
const kiss_twiddle_scalar *trig;
opus_val16 scale;
#ifdef FIXED_POINT
/* Allows us to scale with MULT16_32_Q16(), which is faster than
MULT16_32_Q15() on ARM. */
int scale_shift = st->scale_shift-1;
#endif
(void)arch;
SAVE_STACK;
scale = st->scale;
N = l->n;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
ALLOC(f2, N4, kiss_fft_cpx);
/* Consider the input to be composed of four blocks: [a, b, c, d] */
/* Window, shuffle, fold */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
for(i=0;i<((overlap+3)>>2);i++)
{
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
*yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2);
*yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]);
xp1+=2;
xp2-=2;
wp1+=2;
wp2-=2;
}
wp1 = window;
wp2 = window+overlap-1;
for(;i<N4-((overlap+3)>>2);i++)
{
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
*yp++ = *xp2;
*yp++ = *xp1;
xp1+=2;
xp2-=2;
}
for(;i<N4;i++)
{
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
*yp++ = S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]);
*yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]);
xp1+=2;
xp2-=2;
wp1+=2;
wp2-=2;
}
}
/* Pre-rotation */
{
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const kiss_twiddle_scalar *t = &trig[0];
for(i=0;i<N4;i++)
{
kiss_fft_cpx yc;
kiss_twiddle_scalar t0, t1;
kiss_fft_scalar re, im, yr, yi;
t0 = t[i];
t1 = t[N4+i];
re = *yp++;
im = *yp++;
yr = S_MUL_SUB(re,t0,im,t1);
yi = S_MUL_ADD(im,t0,re,t1);
yc.r = yr;
yc.i = yi;
yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
f2[st->bitrev[i]] = yc;
}
}
/* N/4 complex FFT, does not downscale anymore */
opus_fft_impl(st, f2);
/* Post-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
const kiss_twiddle_scalar *t = &trig[0];
/* Temp pointers to make it really clear to the compiler what we're doing */
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]);
yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]);
*yp1 = yr;
*yp2 = yi;
fp++;
yp1 += 2*stride;
yp2 -= 2*stride;
}
}
RESTORE_STACK;
}
#define OVERRIDE_clt_mdct_backward
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
const kiss_twiddle_scalar *trig;
(void)arch;
N = l->n;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
/* Pre-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
for(i=0;i<N4;i++)
{
int rev;
kiss_fft_scalar yr, yi;
rev = *bitrev++;
yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]);
yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]);
/* We swap real and imag because we use an FFT instead of an IFFT. */
yp[2*rev+1] = yr;
yp[2*rev] = yi;
/* Storing the pre-rotation directly in the bitrev order. */
xp1+=2*stride;
xp2-=2*stride;
}
}
opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
/* Post-rotate and de-shuffle from both ends of the buffer at once to make
it in-place. */
{
kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
const kiss_twiddle_scalar *t = &trig[0];
/* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
middle pair will be computed twice. */
for(i=0;i<(N4+1)>>1;i++)
{
kiss_fft_scalar re, im, yr, yi;
kiss_twiddle_scalar t0, t1;
/* We swap real and imag because we're using an FFT instead of an IFFT. */
re = yp0[1];
im = yp0[0];
t0 = t[i];
t1 = t[N4+i];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL_ADD(re,t0 , im,t1);
yi = S_MUL_SUB(re,t1 , im,t0);
/* We swap real and imag because we're using an FFT instead of an IFFT. */
re = yp1[1];
im = yp1[0];
yp0[0] = yr;
yp1[1] = yi;
t0 = t[(N4-i-1)];
t1 = t[(N2-i-1)];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL_ADD(re,t0,im,t1);
yi = S_MUL_SUB(re,t1,im,t0);
yp1[0] = yr;
yp0[1] = yi;
yp0 += 2;
yp1 -= 2;
}
}
/* Mirror on both sides for TDAC */
{
kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
const opus_val16 * OPUS_RESTRICT wp1 = window;
const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
for(i = 0; i < overlap/2; i++)
{
kiss_fft_scalar x1, x2;
x1 = *xp1;
x2 = *yp1;
*yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
*xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
wp1++;
wp2--;
}
}
}
#endif /* __MDCT_MIPSR1_H__ */

View File

@ -0,0 +1,161 @@
/* Copyright (c) 2007-2008 CSIRO
Copyright (c) 2007-2009 Xiph.Org Foundation
Written by Jean-Marc Valin */
/**
@file pitch.h
@brief Pitch analysis
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef PITCH_MIPSR1_H
#define PITCH_MIPSR1_H
#define OVERRIDE_DUAL_INNER_PROD
static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2, int arch)
{
int j;
opus_val32 xy01=0;
opus_val32 xy02=0;
(void)arch;
asm volatile("MULT $ac1, $0, $0");
asm volatile("MULT $ac2, $0, $0");
/* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
for (j=0;j<N;j++)
{
asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
++j;
asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
}
asm volatile ("mflo %0, $ac1": "=r"(xy01));
asm volatile ("mflo %0, $ac2": "=r"(xy02));
*xy1 = xy01;
*xy2 = xy02;
}
static inline void xcorr_kernel_mips(const opus_val16 * x,
const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
opus_val16 y_0, y_1, y_2, y_3;
opus_int64 sum_0, sum_1, sum_2, sum_3;
sum_0 = (opus_int64)sum[0];
sum_1 = (opus_int64)sum[1];
sum_2 = (opus_int64)sum[2];
sum_3 = (opus_int64)sum[3];
y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
y_0=*y++;
y_1=*y++;
y_2=*y++;
for (j=0;j<len-3;j+=4)
{
opus_val16 tmp;
tmp = *x++;
y_3=*y++;
sum_0 = __builtin_mips_madd( sum_0, tmp, y_0);
sum_1 = __builtin_mips_madd( sum_1, tmp, y_1);
sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
tmp=*x++;
y_0=*y++;
sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
tmp=*x++;
y_1=*y++;
sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
tmp=*x++;
y_2=*y++;
sum_0 = __builtin_mips_madd( sum_0, tmp, y_3 );
sum_1 = __builtin_mips_madd( sum_1, tmp, y_0 );
sum_2 = __builtin_mips_madd( sum_2, tmp, y_1);
sum_3 = __builtin_mips_madd( sum_3, tmp, y_2);
}
if (j++<len)
{
opus_val16 tmp = *x++;
y_3=*y++;
sum_0 = __builtin_mips_madd( sum_0, tmp, y_0 );
sum_1 = __builtin_mips_madd( sum_1, tmp, y_1 );
sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
}
if (j++<len)
{
opus_val16 tmp=*x++;
y_0=*y++;
sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
}
if (j<len)
{
opus_val16 tmp=*x++;
y_1=*y++;
sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
}
sum[0] = (opus_val32)sum_0;
sum[1] = (opus_val32)sum_1;
sum[2] = (opus_val32)sum_2;
sum[3] = (opus_val32)sum_3;
}
#define OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)(arch), xcorr_kernel_mips(x, y, sum, len))
#endif /* PITCH_MIPSR1_H */

View File

@ -0,0 +1,125 @@
/* Copyright (c) 2007-2008 CSIRO
Copyright (c) 2007-2009 Xiph.Org Foundation
Written by Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __VQ_MIPSR1_H__
#define __VQ_MIPSR1_H__
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mathops.h"
#include "arch.h"
static unsigned extract_collapse_mask(int *iy, int N, int B);
static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, int N, opus_val32 Ryy, opus_val16 gain);
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);
static void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch);
#define OVERRIDE_vq_exp_rotation1
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
{
int i;
opus_val16 ms;
celt_norm *Xptr;
Xptr = X;
ms = NEG16(s);
for (i=0;i<len-stride;i++)
{
celt_norm x1, x2;
x1 = Xptr[0];
x2 = Xptr[stride];
Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
*Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
}
Xptr = &X[len-2*stride-1];
for (i=len-2*stride-1;i>=0;i--)
{
celt_norm x1, x2;
x1 = Xptr[0];
x2 = Xptr[stride];
Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
*Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
}
}
#define OVERRIDE_renormalise_vector
#define renormalise_vector(X, N, gain, arch) \
(renormalise_vector_mips(X, N, gain, arch))
void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch)
{
int i;
#ifdef FIXED_POINT
int k;
#endif
opus_val32 E = EPSILON;
opus_val16 g;
opus_val32 t;
celt_norm *xptr = X;
int X0, X1;
(void)arch;
asm volatile("mult $ac1, $0, $0");
asm volatile("MTLO %0, $ac1" : :"r" (E));
/*if(N %4)
printf("error");*/
for (i=0;i<N-2;i+=2)
{
X0 = (int)*xptr++;
asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
X1 = (int)*xptr++;
asm volatile("MADD $ac1, %0, %1" : : "r" (X1), "r" (X1));
}
for (;i<N;i++)
{
X0 = (int)*xptr++;
asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
}
asm volatile("MFLO %0, $ac1" : "=r" (E));
#ifdef FIXED_POINT
k = celt_ilog2(E)>>1;
#endif
t = VSHR32(E, 2*(k-7));
g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
xptr = X;
for (i=0;i<N;i++)
{
*xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
xptr++;
}
/*return celt_sqrt(E);*/
}
#endif /* __VQ_MIPSR1_H__ */

View File

@ -37,6 +37,7 @@
#include "os_support.h"
#include "stack_alloc.h"
#include "quant_bands.h"
#include "cpu_support.h"
static const opus_int16 eband5ms[] = {
/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */
@ -229,6 +230,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
opus_val16 *window;
opus_int16 *logN;
int LM;
int arch = opus_select_arch();
ALLOC_STACK;
#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
if (global_stack==NULL)
@ -389,7 +391,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
compute_pulse_cache(mode, mode->maxLM);
if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
mode->maxLM) == 0)
mode->maxLM, arch) == 0)
goto failure;
if (error)
@ -408,6 +410,8 @@ failure:
#ifdef CUSTOM_MODES
void opus_custom_mode_destroy(CELTMode *mode)
{
int arch = opus_select_arch();
if (mode == NULL)
return;
#ifndef CUSTOM_MODES_ONLY
@ -431,7 +435,7 @@ void opus_custom_mode_destroy(CELTMode *mode)
opus_free((opus_int16*)mode->cache.index);
opus_free((unsigned char*)mode->cache.bits);
opus_free((unsigned char*)mode->cache.caps);
clt_mdct_clear(&mode->mdct);
clt_mdct_clear(&mode->mdct, arch);
opus_free((CELTMode *)mode);
}

View File

@ -39,14 +39,6 @@
#define MAX_PERIOD 1024
#ifndef OVERLAP
#define OVERLAP(mode) ((mode)->overlap)
#endif
#ifndef FRAMESIZE
#define FRAMESIZE(mode) ((mode)->mdctSize)
#endif
typedef struct {
int size;
const opus_int16 *index;

View File

@ -67,18 +67,18 @@ static OPUS_INLINE void opus_free (void *ptr)
}
#endif
/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking */
/** Copy n elements from src to dst. The 0* term provides compile-time type checking */
#ifndef OVERRIDE_OPUS_COPY
#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
#endif
/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term
/** Copy n elements from src to dst, allowing overlapping regions. The 0* term
provides compile-time type checking */
#ifndef OVERRIDE_OPUS_MOVE
#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
#endif
/** Set n elements of dst to zero, starting at address s */
/** Set n elements of dst to zero */
#ifndef OVERRIDE_OPUS_CLEAR
#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
#endif

View File

@ -214,25 +214,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
}
#if 0 /* This is a simple version of the pitch correlation that should work
well on DSPs like Blackfin and TI C5x/C6x */
/* Pure C implementation. */
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
#if defined(OVERRIDE_PITCH_XCORR)
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch)
#else
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch)
#endif
{
#if 0 /* This is a simple version of the pitch correlation that should work
well on DSPs like Blackfin and TI C5x/C6x */
int i, j;
#ifdef FIXED_POINT
opus_val32 maxcorr=1;
#endif
#if !defined(OVERRIDE_PITCH_XCORR)
(void)arch;
#endif
for (i=0;i<max_pitch;i++)
{
opus_val32 sum = 0;
for (j=0;j<len;j++)
sum = MAC16_16(sum, x[j],y[i+j]);
sum = MAC16_16(sum, _x[j], _y[i+j]);
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@ -241,30 +251,25 @@ celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int m
#ifdef FIXED_POINT
return maxcorr;
#endif
}
#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
{
int i,j;
int i;
/*The EDSP version requires that max_pitch is at least 1, and that _x is
32-bit aligned.
Since it's hard to put asserts in assembly, put them here.*/
celt_assert(max_pitch>0);
celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
#ifdef FIXED_POINT
opus_val32 maxcorr=1;
#endif
celt_assert(max_pitch>0);
celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
for (i=0;i<max_pitch-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
xcorr_kernel(_x, _y+i, sum, len);
#if defined(OVERRIDE_PITCH_XCORR)
xcorr_kernel_c(_x, _y+i, sum, len);
#else
xcorr_kernel(_x, _y+i, sum, len, arch);
#endif
xcorr[i]=sum[0];
xcorr[i+1]=sum[1];
xcorr[i+2]=sum[2];
@ -279,9 +284,12 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
for (;i<max_pitch;i++)
{
opus_val32 sum = 0;
for (j=0;j<len;j++)
sum = MAC16_16(sum, _x[j],_y[i+j]);
opus_val32 sum;
#if defined(OVERRIDE_PITCH_XCORR)
sum = celt_inner_prod_c(_x, _y+i, len);
#else
sum = celt_inner_prod(_x, _y+i, len, arch);
#endif
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@ -290,9 +298,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
#ifdef FIXED_POINT
return maxcorr;
#endif
#endif
}
#endif
void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
int len, int max_pitch, int *pitch, int arch)
{
@ -361,12 +369,17 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
#endif
for (i=0;i<max_pitch>>1;i++)
{
opus_val32 sum=0;
opus_val32 sum;
xcorr[i] = 0;
if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
continue;
#ifdef FIXED_POINT
sum = 0;
for (j=0;j<len>>1;j++)
sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
#else
sum = celt_inner_prod_c(x_lp, y+i, len>>1);
#endif
xcorr[i] = MAX32(-1, sum);
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@ -399,9 +412,44 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
RESTORE_STACK;
}
#ifdef FIXED_POINT
static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
{
opus_val32 x2y2;
int sx, sy, shift;
opus_val32 g;
opus_val16 den;
if (xy == 0 || xx == 0 || yy == 0)
return 0;
sx = celt_ilog2(xx)-14;
sy = celt_ilog2(yy)-14;
shift = sx + sy;
x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy));
if (shift & 1) {
if (x2y2 < 32768)
{
x2y2 <<= 1;
shift--;
} else {
x2y2 >>= 1;
shift++;
}
}
den = celt_rsqrt_norm(x2y2);
g = MULT16_32_Q15(den, xy);
g = VSHR32(g, (shift>>1)-1);
return EXTRACT16(MIN32(g, Q15ONE));
}
#else
static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
{
return xy/celt_sqrt(1+xx*yy);
}
#endif
static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
int N, int *T0_, int prev_period, opus_val16 prev_gain)
int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
{
int k, i, T, T0;
opus_val16 g, g0;
@ -426,7 +474,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
T = T0 = *T0_;
ALLOC(yy_lookup, maxperiod+1, opus_val32);
dual_inner_prod(x, x, x-T0, N, &xx, &xy);
dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch);
yy_lookup[0] = xx;
yy=xx;
for (i=1;i<=maxperiod;i++)
@ -437,18 +485,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
yy = yy_lookup[T0];
best_xy = xy;
best_yy = yy;
#ifdef FIXED_POINT
{
opus_val32 x2y2;
int sh, t;
x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
sh = celt_ilog2(x2y2)>>1;
t = VSHR32(x2y2, 2*(sh-7));
g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
}
#else
g = g0 = xy/celt_sqrt(1+xx*yy);
#endif
g = g0 = compute_pitch_gain(xy, xx, yy);
/* Look for any pitch at T/k */
for (k=2;k<=15;k++)
{
@ -456,7 +493,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
opus_val16 g1;
opus_val16 cont=0;
opus_val16 thresh;
T1 = (2*T0+k)/(2*k);
T1 = celt_udiv(2*T0+k, 2*k);
if (T1 < minperiod)
break;
/* Look for another strong correlation at T1b */
@ -468,27 +505,16 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
T1b = T0+T1;
} else
{
T1b = (2*second_check[k]*T0+k)/(2*k);
T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
}
dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
xy += xy2;
yy = yy_lookup[T1] + yy_lookup[T1b];
#ifdef FIXED_POINT
{
opus_val32 x2y2;
int sh, t;
x2y2 = 1+MULT32_32_Q31(xx,yy);
sh = celt_ilog2(x2y2)>>1;
t = VSHR32(x2y2, 2*(sh-7));
g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
}
#else
g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
#endif
dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
xy = HALF32(xy + xy2);
yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
g1 = compute_pitch_gain(xy, xx, yy);
if (abs(T1-prev_period)<=1)
cont = prev_gain;
else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
cont = HALF32(prev_gain);
cont = HALF16(prev_gain);
else
cont = 0;
thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
@ -513,13 +539,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
pg = SHR32(frac_div32(best_xy,best_yy+1),16);
for (k=0;k<3;k++)
{
int T1 = T+k-1;
xy = 0;
for (i=0;i<N;i++)
xy = MAC16_16(xy, x[i], x[i-T1]);
xcorr[k] = xy;
}
xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch);
if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
offset = 1;
else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))

View File

@ -37,11 +37,17 @@
#include "modes.h"
#include "cpu_support.h"
#if defined(__SSE__) && !defined(FIXED_POINT)
#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) \
|| ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT))
#include "x86/pitch_sse.h"
#endif
#if defined(OPUS_ARM_ASM) && defined(FIXED_POINT)
#if defined(MIPSr1_ASM)
#include "mips/pitch_mipsr1.h"
#endif
#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
|| defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
# include "arm/pitch_arm.h"
#endif
@ -52,12 +58,12 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
int len, int max_pitch, int *pitch, int arch);
opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
int N, int *T0, int prev_period, opus_val16 prev_gain);
int N, int *T0, int prev_period, opus_val16 prev_gain, int arch);
/* OPT: This is the kernel you really want to optimize. It gets used a lot
by the prefilter and by the PLC. */
#ifndef OVERRIDE_XCORR_KERNEL
static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
opus_val16 y_0, y_1, y_2, y_3;
@ -122,10 +128,14 @@ static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y,
sum[3] = MAC16_16(sum[3],tmp,y_1);
}
}
#ifndef OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)(arch),xcorr_kernel_c(x, y, sum, len))
#endif /* OVERRIDE_XCORR_KERNEL */
#ifndef OVERRIDE_DUAL_INNER_PROD
static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
static OPUS_INLINE void dual_inner_prod_c(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
@ -139,8 +149,35 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y
*xy1 = xy01;
*xy2 = xy02;
}
#ifndef OVERRIDE_DUAL_INNER_PROD
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
((void)(arch),dual_inner_prod_c(x, y01, y02, N, xy1, xy2))
#endif
/*We make sure a C version is always available for cases where the overhead of
vectorization and passing around an arch flag aren't worth it.*/
static OPUS_INLINE opus_val32 celt_inner_prod_c(const opus_val16 *x,
const opus_val16 *y, int N)
{
int i;
opus_val32 xy=0;
for (i=0;i<N;i++)
xy = MAC16_16(xy, x[i], y[i]);
return xy;
}
#if !defined(OVERRIDE_CELT_INNER_PROD)
# define celt_inner_prod(x, y, N, arch) \
((void)(arch),celt_inner_prod_c(x, y, N))
#endif
#ifdef NON_STATIC_COMB_FILTER_CONST_C
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12);
#endif
#ifdef FIXED_POINT
opus_val32
#else
@ -150,24 +187,14 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
#if !defined(OVERRIDE_PITCH_XCORR)
/*Is run-time CPU detection enabled on this platform?*/
# if defined(OPUS_HAVE_RTCD)
extern
# if defined(FIXED_POINT)
#ifdef FIXED_POINT
opus_val32
# else
#else
void
# endif
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
#endif
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch);
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# else
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch))
# endif
#endif
#endif

View File

@ -292,7 +292,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
#endif
}
if (lfe)
max_decay=3;
max_decay = QCONST16(3.f,DB_SHIFT);
enc_start_state = *enc;
ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);

View File

@ -131,7 +131,7 @@ void compute_pulse_cache(CELTMode *m, int LM)
for (i=0;i<nbEntries;i++)
{
unsigned char *ptr = bits+entryI[i];
opus_int16 tmp[MAX_PULSES+1];
opus_int16 tmp[CELT_MAX_PULSES+1];
get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES);
for (j=1;j<=entryK[i];j++)
ptr[j] = tmp[get_pulses(j)]-1;
@ -296,7 +296,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
done = 0;
for (j=end;j-->start;)
{
int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS);
int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS);
if (tmp < thresh[j] && !done)
{
if (tmp >= alloc_floor)
@ -333,7 +333,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
/*Figure out how many left-over bits we would be adding to this band.
This can include bits we've stolen back from higher, skipped bands.*/
left = total-psum;
percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]);
left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0);
band_width = m->eBands[codedBands]-m->eBands[j];
@ -414,7 +414,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
/* Allocate the remaining bits */
left = total-psum;
percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]);
left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
for (j=start;j<codedBands;j++)
bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j]));
@ -465,7 +465,8 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
offset += NClogN>>3;
/* Divide with rounding */
ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES));
ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))));
ebits[j] = celt_udiv(ebits[j], den)>>BITRES;
/* Make sure not to bust */
if (C*ebits[j] > (bits[j]>>BITRES))

View File

@ -32,7 +32,7 @@
#define MAX_PSEUDO 40
#define LOG_MAX_PSEUDO 6
#define MAX_PULSES 128
#define CELT_MAX_PULSES 128
#define MAX_FINE_BITS 8

View File

@ -116,9 +116,11 @@
#else
#ifdef CELT_C
char *scratch_ptr=0;
char *global_stack=0;
#else
extern char *global_stack;
extern char *scratch_ptr;
#endif /* CELT_C */
#ifdef ENABLE_VALGRIND
@ -140,8 +142,12 @@ extern char *global_stack_top;
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char))))
#if 0 /* Set this to 1 to instrument pseudostack usage */
#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack)
#else
#define RESTORE_STACK (global_stack = _saved_stack)
#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack;
#endif
#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack;
#endif /* ENABLE_VALGRIND */

View File

@ -4,6 +4,11 @@
#include "modes.h"
#include "rate.h"
#ifdef HAVE_ARM_NE10
#define OVERRIDE_FFT 1
#include "static_modes_fixed_arm_ne10.h"
#endif
#ifndef DEF_WINDOW120
#define DEF_WINDOW120
static const opus_val16 window120[120] = {
@ -341,84 +346,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
#ifndef FFT_BITREV480
#define FFT_BITREV480
static const opus_int16 fft_bitrev480[480] = {
0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448,
8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456,
16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464,
24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472,
4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452,
12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460,
20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468,
28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476,
1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449,
9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457,
17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465,
25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473,
5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453,
13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461,
21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469,
29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477,
2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450,
10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458,
18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466,
26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474,
6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454,
14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462,
22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470,
30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478,
3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451,
11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459,
19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467,
27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475,
7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455,
15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463,
23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471,
31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479,
};
#endif
#ifndef FFT_BITREV240
#define FFT_BITREV240
static const opus_int16 fft_bitrev240[240] = {
0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224,
4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228,
8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232,
12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236,
1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225,
5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229,
9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233,
13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237,
2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226,
6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230,
10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234,
14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238,
3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227,
7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231,
11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235,
15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239,
};
#endif
#ifndef FFT_BITREV120
#define FFT_BITREV120
static const opus_int16 fft_bitrev120[120] = {
0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112,
4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116,
1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113,
5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117,
2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114,
6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118,
3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115,
7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119,
};
#endif
#ifndef FFT_BITREV60
#define FFT_BITREV60
static const opus_int16 fft_bitrev60[60] = {
0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56,
1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57,
2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58,
3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59,
};
#endif
@ -426,10 +431,17 @@ static const opus_int16 fft_bitrev60[60] = {
#define FFT_STATE48000_960_0
static const kiss_fft_state fft_state48000_960_0 = {
480, /* nfft */
17476, /* scale */
8, /* scale_shift */
-1, /* shift */
{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev480, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_480,
#else
NULL,
#endif
};
#endif
@ -437,10 +449,17 @@ fft_twiddles48000_960, /* bitrev */
#define FFT_STATE48000_960_1
static const kiss_fft_state fft_state48000_960_1 = {
240, /* nfft */
17476, /* scale */
7, /* scale_shift */
1, /* shift */
{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev240, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_240,
#else
NULL,
#endif
};
#endif
@ -448,10 +467,17 @@ fft_twiddles48000_960, /* bitrev */
#define FFT_STATE48000_960_2
static const kiss_fft_state fft_state48000_960_2 = {
120, /* nfft */
17476, /* scale */
6, /* scale_shift */
2, /* shift */
{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev120, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_120,
#else
NULL,
#endif
};
#endif
@ -459,10 +485,17 @@ fft_twiddles48000_960, /* bitrev */
#define FFT_STATE48000_960_3
static const kiss_fft_state fft_state48000_960_3 = {
60, /* nfft */
17476, /* scale */
5, /* scale_shift */
3, /* shift */
{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev60, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_60,
#else
NULL,
#endif
};
#endif
@ -470,104 +503,368 @@ fft_twiddles48000_960, /* bitrev */
#ifndef MDCT_TWIDDLES960
#define MDCT_TWIDDLES960
static const opus_val16 mdct_twiddles960[481] = {
32767, 32767, 32767, 32767, 32766,
32763, 32762, 32759, 32757, 32753,
32751, 32747, 32743, 32738, 32733,
32729, 32724, 32717, 32711, 32705,
32698, 32690, 32683, 32676, 32667,
32658, 32650, 32640, 32631, 32620,
32610, 32599, 32588, 32577, 32566,
32554, 32541, 32528, 32515, 32502,
32487, 32474, 32459, 32444, 32429,
32413, 32397, 32381, 32364, 32348,
32331, 32313, 32294, 32277, 32257,
32239, 32219, 32200, 32180, 32159,
32138, 32118, 32096, 32074, 32051,
32029, 32006, 31984, 31960, 31936,
31912, 31888, 31863, 31837, 31812,
31786, 31760, 31734, 31707, 31679,
31652, 31624, 31596, 31567, 31539,
31508, 31479, 31450, 31419, 31388,
31357, 31326, 31294, 31262, 31230,
31198, 31164, 31131, 31097, 31063,
31030, 30994, 30959, 30924, 30889,
30853, 30816, 30779, 30743, 30705,
30668, 30629, 30592, 30553, 30515,
30475, 30435, 30396, 30356, 30315,
30274, 30233, 30191, 30149, 30107,
30065, 30022, 29979, 29936, 29891,
29847, 29803, 29758, 29713, 29668,
29622, 29577, 29529, 29483, 29436,
29390, 29341, 29293, 29246, 29197,
29148, 29098, 29050, 29000, 28949,
28899, 28848, 28797, 28746, 28694,
28642, 28590, 28537, 28485, 28432,
28378, 28324, 28271, 28217, 28162,
28106, 28051, 27995, 27940, 27884,
27827, 27770, 27713, 27657, 27598,
27540, 27481, 27423, 27365, 27305,
27246, 27187, 27126, 27066, 27006,
26945, 26883, 26822, 26760, 26698,
26636, 26574, 26510, 26448, 26383,
26320, 26257, 26191, 26127, 26062,
25997, 25931, 25866, 25800, 25734,
25667, 25601, 25533, 25466, 25398,
25330, 25262, 25194, 25125, 25056,
24987, 24917, 24848, 24778, 24707,
24636, 24566, 24495, 24424, 24352,
24280, 24208, 24135, 24063, 23990,
23917, 23842, 23769, 23695, 23622,
23546, 23472, 23398, 23322, 23246,
23171, 23095, 23018, 22942, 22866,
22788, 22711, 22634, 22557, 22478,
22400, 22322, 22244, 22165, 22085,
22006, 21927, 21846, 21766, 21687,
21606, 21524, 21443, 21363, 21282,
21199, 21118, 21035, 20954, 20870,
20788, 20705, 20621, 20538, 20455,
20371, 20286, 20202, 20118, 20034,
19947, 19863, 19777, 19692, 19606,
19520, 19434, 19347, 19260, 19174,
19088, 18999, 18911, 18825, 18737,
18648, 18560, 18472, 18384, 18294,
18205, 18116, 18025, 17936, 17846,
17757, 17666, 17576, 17485, 17395,
17303, 17212, 17122, 17030, 16937,
16846, 16755, 16662, 16569, 16477,
16385, 16291, 16198, 16105, 16012,
15917, 15824, 15730, 15636, 15541,
15447, 15352, 15257, 15162, 15067,
14973, 14875, 14781, 14685, 14589,
14493, 14396, 14300, 14204, 14107,
14010, 13914, 13815, 13718, 13621,
13524, 13425, 13328, 13230, 13133,
13033, 12935, 12836, 12738, 12638,
12540, 12441, 12341, 12241, 12142,
12044, 11943, 11843, 11744, 11643,
11542, 11442, 11342, 11241, 11139,
11039, 10939, 10836, 10736, 10635,
10534, 10431, 10330, 10228, 10127,
10024, 9921, 9820, 9718, 9614,
9512, 9410, 9306, 9204, 9101,
8998, 8895, 8791, 8689, 8585,
8481, 8377, 8274, 8171, 8067,
7962, 7858, 7753, 7650, 7545,
7441, 7336, 7231, 7129, 7023,
6917, 6813, 6709, 6604, 6498,
6393, 6288, 6182, 6077, 5973,
5867, 5760, 5656, 5549, 5445,
5339, 5232, 5127, 5022, 4914,
4809, 4703, 4596, 4490, 4384,
4278, 4171, 4065, 3958, 3852,
3745, 3640, 3532, 3426, 3318,
3212, 3106, 2998, 2891, 2786,
2679, 2570, 2465, 2358, 2251,
2143, 2037, 1929, 1823, 1715,
1609, 1501, 1393, 1287, 1180,
1073, 964, 858, 751, 644,
535, 429, 322, 214, 107,
0, };
static const opus_val16 mdct_twiddles960[1800] = {
32767, 32767, 32767, 32766, 32765,
32763, 32761, 32759, 32756, 32753,
32750, 32746, 32742, 32738, 32733,
32728, 32722, 32717, 32710, 32704,
32697, 32690, 32682, 32674, 32666,
32657, 32648, 32639, 32629, 32619,
32609, 32598, 32587, 32576, 32564,
32552, 32539, 32526, 32513, 32500,
32486, 32472, 32457, 32442, 32427,
32411, 32395, 32379, 32362, 32345,
32328, 32310, 32292, 32274, 32255,
32236, 32217, 32197, 32177, 32157,
32136, 32115, 32093, 32071, 32049,
32027, 32004, 31981, 31957, 31933,
31909, 31884, 31859, 31834, 31809,
31783, 31756, 31730, 31703, 31676,
31648, 31620, 31592, 31563, 31534,
31505, 31475, 31445, 31415, 31384,
31353, 31322, 31290, 31258, 31226,
31193, 31160, 31127, 31093, 31059,
31025, 30990, 30955, 30920, 30884,
30848, 30812, 30775, 30738, 30701,
30663, 30625, 30587, 30548, 30509,
30470, 30430, 30390, 30350, 30309,
30269, 30227, 30186, 30144, 30102,
30059, 30016, 29973, 29930, 29886,
29842, 29797, 29752, 29707, 29662,
29616, 29570, 29524, 29477, 29430,
29383, 29335, 29287, 29239, 29190,
29142, 29092, 29043, 28993, 28943,
28892, 28842, 28791, 28739, 28688,
28636, 28583, 28531, 28478, 28425,
28371, 28317, 28263, 28209, 28154,
28099, 28044, 27988, 27932, 27876,
27820, 27763, 27706, 27648, 27591,
27533, 27474, 27416, 27357, 27298,
27238, 27178, 27118, 27058, 26997,
26936, 26875, 26814, 26752, 26690,
26628, 26565, 26502, 26439, 26375,
26312, 26247, 26183, 26119, 26054,
25988, 25923, 25857, 25791, 25725,
25658, 25592, 25524, 25457, 25389,
25322, 25253, 25185, 25116, 25047,
24978, 24908, 24838, 24768, 24698,
24627, 24557, 24485, 24414, 24342,
24270, 24198, 24126, 24053, 23980,
23907, 23834, 23760, 23686, 23612,
23537, 23462, 23387, 23312, 23237,
23161, 23085, 23009, 22932, 22856,
22779, 22701, 22624, 22546, 22468,
22390, 22312, 22233, 22154, 22075,
21996, 21916, 21836, 21756, 21676,
21595, 21515, 21434, 21352, 21271,
21189, 21107, 21025, 20943, 20860,
20777, 20694, 20611, 20528, 20444,
20360, 20276, 20192, 20107, 20022,
19937, 19852, 19767, 19681, 19595,
19509, 19423, 19336, 19250, 19163,
19076, 18988, 18901, 18813, 18725,
18637, 18549, 18460, 18372, 18283,
18194, 18104, 18015, 17925, 17835,
17745, 17655, 17565, 17474, 17383,
17292, 17201, 17110, 17018, 16927,
16835, 16743, 16650, 16558, 16465,
16372, 16279, 16186, 16093, 15999,
15906, 15812, 15718, 15624, 15529,
15435, 15340, 15245, 15150, 15055,
14960, 14864, 14769, 14673, 14577,
14481, 14385, 14288, 14192, 14095,
13998, 13901, 13804, 13706, 13609,
13511, 13414, 13316, 13218, 13119,
13021, 12923, 12824, 12725, 12626,
12527, 12428, 12329, 12230, 12130,
12030, 11930, 11831, 11730, 11630,
11530, 11430, 11329, 11228, 11128,
11027, 10926, 10824, 10723, 10622,
10520, 10419, 10317, 10215, 10113,
10011, 9909, 9807, 9704, 9602,
9499, 9397, 9294, 9191, 9088,
8985, 8882, 8778, 8675, 8572,
8468, 8364, 8261, 8157, 8053,
7949, 7845, 7741, 7637, 7532,
7428, 7323, 7219, 7114, 7009,
6905, 6800, 6695, 6590, 6485,
6380, 6274, 6169, 6064, 5958,
5853, 5747, 5642, 5536, 5430,
5325, 5219, 5113, 5007, 4901,
4795, 4689, 4583, 4476, 4370,
4264, 4157, 4051, 3945, 3838,
3732, 3625, 3518, 3412, 3305,
3198, 3092, 2985, 2878, 2771,
2664, 2558, 2451, 2344, 2237,
2130, 2023, 1916, 1809, 1702,
1594, 1487, 1380, 1273, 1166,
1059, 952, 844, 737, 630,
523, 416, 308, 201, 94,
-13, -121, -228, -335, -442,
-550, -657, -764, -871, -978,
-1086, -1193, -1300, -1407, -1514,
-1621, -1728, -1835, -1942, -2049,
-2157, -2263, -2370, -2477, -2584,
-2691, -2798, -2905, -3012, -3118,
-3225, -3332, -3439, -3545, -3652,
-3758, -3865, -3971, -4078, -4184,
-4290, -4397, -4503, -4609, -4715,
-4821, -4927, -5033, -5139, -5245,
-5351, -5457, -5562, -5668, -5774,
-5879, -5985, -6090, -6195, -6301,
-6406, -6511, -6616, -6721, -6826,
-6931, -7036, -7140, -7245, -7349,
-7454, -7558, -7663, -7767, -7871,
-7975, -8079, -8183, -8287, -8390,
-8494, -8597, -8701, -8804, -8907,
-9011, -9114, -9217, -9319, -9422,
-9525, -9627, -9730, -9832, -9934,
-10037, -10139, -10241, -10342, -10444,
-10546, -10647, -10748, -10850, -10951,
-11052, -11153, -11253, -11354, -11455,
-11555, -11655, -11756, -11856, -11955,
-12055, -12155, -12254, -12354, -12453,
-12552, -12651, -12750, -12849, -12947,
-13046, -13144, -13242, -13340, -13438,
-13536, -13633, -13731, -13828, -13925,
-14022, -14119, -14216, -14312, -14409,
-14505, -14601, -14697, -14793, -14888,
-14984, -15079, -15174, -15269, -15364,
-15459, -15553, -15647, -15741, -15835,
-15929, -16023, -16116, -16210, -16303,
-16396, -16488, -16581, -16673, -16766,
-16858, -16949, -17041, -17133, -17224,
-17315, -17406, -17497, -17587, -17678,
-17768, -17858, -17948, -18037, -18127,
-18216, -18305, -18394, -18483, -18571,
-18659, -18747, -18835, -18923, -19010,
-19098, -19185, -19271, -19358, -19444,
-19531, -19617, -19702, -19788, -19873,
-19959, -20043, -20128, -20213, -20297,
-20381, -20465, -20549, -20632, -20715,
-20798, -20881, -20963, -21046, -21128,
-21210, -21291, -21373, -21454, -21535,
-21616, -21696, -21776, -21856, -21936,
-22016, -22095, -22174, -22253, -22331,
-22410, -22488, -22566, -22643, -22721,
-22798, -22875, -22951, -23028, -23104,
-23180, -23256, -23331, -23406, -23481,
-23556, -23630, -23704, -23778, -23852,
-23925, -23998, -24071, -24144, -24216,
-24288, -24360, -24432, -24503, -24574,
-24645, -24716, -24786, -24856, -24926,
-24995, -25064, -25133, -25202, -25270,
-25339, -25406, -25474, -25541, -25608,
-25675, -25742, -25808, -25874, -25939,
-26005, -26070, -26135, -26199, -26264,
-26327, -26391, -26455, -26518, -26581,
-26643, -26705, -26767, -26829, -26891,
-26952, -27013, -27073, -27133, -27193,
-27253, -27312, -27372, -27430, -27489,
-27547, -27605, -27663, -27720, -27777,
-27834, -27890, -27946, -28002, -28058,
-28113, -28168, -28223, -28277, -28331,
-28385, -28438, -28491, -28544, -28596,
-28649, -28701, -28752, -28803, -28854,
-28905, -28955, -29006, -29055, -29105,
-29154, -29203, -29251, -29299, -29347,
-29395, -29442, -29489, -29535, -29582,
-29628, -29673, -29719, -29764, -29808,
-29853, -29897, -29941, -29984, -30027,
-30070, -30112, -30154, -30196, -30238,
-30279, -30320, -30360, -30400, -30440,
-30480, -30519, -30558, -30596, -30635,
-30672, -30710, -30747, -30784, -30821,
-30857, -30893, -30929, -30964, -30999,
-31033, -31068, -31102, -31135, -31168,
-31201, -31234, -31266, -31298, -31330,
-31361, -31392, -31422, -31453, -31483,
-31512, -31541, -31570, -31599, -31627,
-31655, -31682, -31710, -31737, -31763,
-31789, -31815, -31841, -31866, -31891,
-31915, -31939, -31963, -31986, -32010,
-32032, -32055, -32077, -32099, -32120,
-32141, -32162, -32182, -32202, -32222,
-32241, -32260, -32279, -32297, -32315,
-32333, -32350, -32367, -32383, -32399,
-32415, -32431, -32446, -32461, -32475,
-32489, -32503, -32517, -32530, -32542,
-32555, -32567, -32579, -32590, -32601,
-32612, -32622, -32632, -32641, -32651,
-32659, -32668, -32676, -32684, -32692,
-32699, -32706, -32712, -32718, -32724,
-32729, -32734, -32739, -32743, -32747,
-32751, -32754, -32757, -32760, -32762,
-32764, -32765, -32767, -32767, -32767,
32767, 32767, 32765, 32761, 32756,
32750, 32742, 32732, 32722, 32710,
32696, 32681, 32665, 32647, 32628,
32608, 32586, 32562, 32538, 32512,
32484, 32455, 32425, 32393, 32360,
32326, 32290, 32253, 32214, 32174,
32133, 32090, 32046, 32001, 31954,
31906, 31856, 31805, 31753, 31700,
31645, 31588, 31530, 31471, 31411,
31349, 31286, 31222, 31156, 31089,
31020, 30951, 30880, 30807, 30733,
30658, 30582, 30504, 30425, 30345,
30263, 30181, 30096, 30011, 29924,
29836, 29747, 29656, 29564, 29471,
29377, 29281, 29184, 29086, 28987,
28886, 28784, 28681, 28577, 28471,
28365, 28257, 28147, 28037, 27925,
27812, 27698, 27583, 27467, 27349,
27231, 27111, 26990, 26868, 26744,
26620, 26494, 26367, 26239, 26110,
25980, 25849, 25717, 25583, 25449,
25313, 25176, 25038, 24900, 24760,
24619, 24477, 24333, 24189, 24044,
23898, 23751, 23602, 23453, 23303,
23152, 22999, 22846, 22692, 22537,
22380, 22223, 22065, 21906, 21746,
21585, 21423, 21261, 21097, 20933,
20767, 20601, 20434, 20265, 20096,
19927, 19756, 19584, 19412, 19239,
19065, 18890, 18714, 18538, 18361,
18183, 18004, 17824, 17644, 17463,
17281, 17098, 16915, 16731, 16546,
16361, 16175, 15988, 15800, 15612,
15423, 15234, 15043, 14852, 14661,
14469, 14276, 14083, 13889, 13694,
13499, 13303, 13107, 12910, 12713,
12515, 12317, 12118, 11918, 11718,
11517, 11316, 11115, 10913, 10710,
10508, 10304, 10100, 9896, 9691,
9486, 9281, 9075, 8869, 8662,
8455, 8248, 8040, 7832, 7623,
7415, 7206, 6996, 6787, 6577,
6366, 6156, 5945, 5734, 5523,
5311, 5100, 4888, 4675, 4463,
4251, 4038, 3825, 3612, 3399,
3185, 2972, 2758, 2544, 2330,
2116, 1902, 1688, 1474, 1260,
1045, 831, 617, 402, 188,
-27, -241, -456, -670, -885,
-1099, -1313, -1528, -1742, -1956,
-2170, -2384, -2598, -2811, -3025,
-3239, -3452, -3665, -3878, -4091,
-4304, -4516, -4728, -4941, -5153,
-5364, -5576, -5787, -5998, -6209,
-6419, -6629, -6839, -7049, -7258,
-7467, -7676, -7884, -8092, -8300,
-8507, -8714, -8920, -9127, -9332,
-9538, -9743, -9947, -10151, -10355,
-10558, -10761, -10963, -11165, -11367,
-11568, -11768, -11968, -12167, -12366,
-12565, -12762, -12960, -13156, -13352,
-13548, -13743, -13937, -14131, -14324,
-14517, -14709, -14900, -15091, -15281,
-15470, -15659, -15847, -16035, -16221,
-16407, -16593, -16777, -16961, -17144,
-17326, -17508, -17689, -17869, -18049,
-18227, -18405, -18582, -18758, -18934,
-19108, -19282, -19455, -19627, -19799,
-19969, -20139, -20308, -20475, -20642,
-20809, -20974, -21138, -21301, -21464,
-21626, -21786, -21946, -22105, -22263,
-22420, -22575, -22730, -22884, -23037,
-23189, -23340, -23490, -23640, -23788,
-23935, -24080, -24225, -24369, -24512,
-24654, -24795, -24934, -25073, -25211,
-25347, -25482, -25617, -25750, -25882,
-26013, -26143, -26272, -26399, -26526,
-26651, -26775, -26898, -27020, -27141,
-27260, -27379, -27496, -27612, -27727,
-27841, -27953, -28065, -28175, -28284,
-28391, -28498, -28603, -28707, -28810,
-28911, -29012, -29111, -29209, -29305,
-29401, -29495, -29587, -29679, -29769,
-29858, -29946, -30032, -30118, -30201,
-30284, -30365, -30445, -30524, -30601,
-30677, -30752, -30825, -30897, -30968,
-31038, -31106, -31172, -31238, -31302,
-31365, -31426, -31486, -31545, -31602,
-31658, -31713, -31766, -31818, -31869,
-31918, -31966, -32012, -32058, -32101,
-32144, -32185, -32224, -32262, -32299,
-32335, -32369, -32401, -32433, -32463,
-32491, -32518, -32544, -32568, -32591,
-32613, -32633, -32652, -32669, -32685,
-32700, -32713, -32724, -32735, -32744,
-32751, -32757, -32762, -32766, -32767,
32767, 32764, 32755, 32741, 32720,
32694, 32663, 32626, 32583, 32535,
32481, 32421, 32356, 32286, 32209,
32128, 32041, 31948, 31850, 31747,
31638, 31523, 31403, 31278, 31148,
31012, 30871, 30724, 30572, 30415,
30253, 30086, 29913, 29736, 29553,
29365, 29172, 28974, 28771, 28564,
28351, 28134, 27911, 27684, 27452,
27216, 26975, 26729, 26478, 26223,
25964, 25700, 25432, 25159, 24882,
24601, 24315, 24026, 23732, 23434,
23133, 22827, 22517, 22204, 21886,
21565, 21240, 20912, 20580, 20244,
19905, 19563, 19217, 18868, 18516,
18160, 17802, 17440, 17075, 16708,
16338, 15964, 15588, 15210, 14829,
14445, 14059, 13670, 13279, 12886,
12490, 12093, 11693, 11291, 10888,
10482, 10075, 9666, 9255, 8843,
8429, 8014, 7597, 7180, 6760,
6340, 5919, 5496, 5073, 4649,
4224, 3798, 3372, 2945, 2517,
2090, 1661, 1233, 804, 375,
-54, -483, -911, -1340, -1768,
-2197, -2624, -3052, -3479, -3905,
-4330, -4755, -5179, -5602, -6024,
-6445, -6865, -7284, -7702, -8118,
-8533, -8946, -9358, -9768, -10177,
-10584, -10989, -11392, -11793, -12192,
-12589, -12984, -13377, -13767, -14155,
-14541, -14924, -15305, -15683, -16058,
-16430, -16800, -17167, -17531, -17892,
-18249, -18604, -18956, -19304, -19649,
-19990, -20329, -20663, -20994, -21322,
-21646, -21966, -22282, -22595, -22904,
-23208, -23509, -23806, -24099, -24387,
-24672, -24952, -25228, -25499, -25766,
-26029, -26288, -26541, -26791, -27035,
-27275, -27511, -27741, -27967, -28188,
-28405, -28616, -28823, -29024, -29221,
-29412, -29599, -29780, -29957, -30128,
-30294, -30455, -30611, -30761, -30906,
-31046, -31181, -31310, -31434, -31552,
-31665, -31773, -31875, -31972, -32063,
-32149, -32229, -32304, -32373, -32437,
-32495, -32547, -32594, -32635, -32671,
-32701, -32726, -32745, -32758, -32766,
32767, 32754, 32717, 32658, 32577,
32473, 32348, 32200, 32029, 31837,
31624, 31388, 31131, 30853, 30553,
30232, 29891, 29530, 29148, 28746,
28324, 27883, 27423, 26944, 26447,
25931, 25398, 24847, 24279, 23695,
23095, 22478, 21846, 21199, 20538,
19863, 19174, 18472, 17757, 17030,
16291, 15541, 14781, 14010, 13230,
12441, 11643, 10837, 10024, 9204,
8377, 7545, 6708, 5866, 5020,
4171, 3319, 2464, 1608, 751,
-107, -965, -1822, -2678, -3532,
-4383, -5232, -6077, -6918, -7754,
-8585, -9409, -10228, -11039, -11843,
-12639, -13426, -14204, -14972, -15730,
-16477, -17213, -17937, -18648, -19347,
-20033, -20705, -21363, -22006, -22634,
-23246, -23843, -24423, -24986, -25533,
-26062, -26573, -27066, -27540, -27995,
-28431, -28848, -29245, -29622, -29979,
-30315, -30630, -30924, -31197, -31449,
-31679, -31887, -32074, -32239, -32381,
-32501, -32600, -32675, -32729, -32759,
};
#endif
static const CELTMode mode48000_960_120 = {

View File

@ -0,0 +1,388 @@
/* The contents of this file was automatically generated by
* dump_mode_arm_ne10.c with arguments: 48000 960
* It contains static definitions for some pre-defined modes. */
#include <NE10_init.h>
#ifndef NE10_FFT_PARAMS48000_960
#define NE10_FFT_PARAMS48000_960
static const ne10_int32_t ne10_factors_480[64] = {
4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_240[64] = {
3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_120[64] = {
3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_60[64] = {
2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_fft_cpx_int32_t ne10_twiddles_480[480] = {
{0,0}, {2147483647,0}, {2147483647,0},
{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955},
{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330},
{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738},
{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141},
{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240},
{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960},
{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282},
{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339},
{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564},
{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839},
{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918},
{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188},
{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252},
{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059},
{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542},
{2147483647,0}, {2147299667,-28109693}, {2146747758,-56214570},
{2145828015,-84309815}, {2144540595,-112390613}, {2142885719,-140452154},
{2140863671,-168489630}, {2138474797,-196498235}, {2135719506,-224473172},
{2132598271,-252409646}, {2129111626,-280302871}, {2125260168,-308148068},
{2121044558,-335940465}, {2116465518,-363675300}, {2111523833,-391347822},
{2106220349,-418953288}, {2100555974,-446486968}, {2094531681,-473944146},
{2088148500,-501320115}, {2081407525,-528610186}, {2074309912,-555809682},
{2066856885,-582913912}, {2059049696,-609918325}, {2050889698,-636818231},
{2042378310,-663608960}, {2033516972,-690285983}, {2024307180,-716844791},
{2014750533,-743280770}, {2004848691,-769589332}, {1994603329,-795766029},
{1984016179,-821806435}, {1973089077,-847706028}, {1961823921,-873460313},
{1950222618,-899064934}, {1938287127,-924515564}, {1926019520,-949807783},
{1913421927,-974937199}, {1900496481,-999899565}, {1887245364,-1024690661},
{1873670877,-1049306180}, {1859775377,-1073741851}, {1845561215,-1097993541},
{1831030826,-1122057097}, {1816186632,-1145928502}, {1801031311,-1169603450},
{1785567394,-1193077993}, {1769797456,-1216348214}, {1753724345,-1239409914},
{1737350743,-1262259248}, {1720679456,-1284892300}, {1703713340,-1307305194},
{1686455222,-1329494189}, {1668908218,-1351455280}, {1651075255,-1373184807},
{1632959307,-1394679144}, {1614563642,-1415934412}, {1595891331,-1436947067},
{1576945572,-1457713510}, {1557729613,-1478230181}, {1538246655,-1498493658},
{1518500216,-1518500282}, {1498493590,-1538246721}, {1478230113,-1557729677},
{1457713441,-1576945636}, {1436946998,-1595891394}, {1415934341,-1614563704},
{1394679073,-1632959368}, {1373184735,-1651075315}, {1351455207,-1668908277},
{1329494115,-1686455280}, {1307305120,-1703713397}, {1284892225,-1720679512},
{1262259172,-1737350799}, {1239409837,-1753724400}, {1216348136,-1769797510},
{1193077915,-1785567446}, {1169603371,-1801031362}, {1145928423,-1816186682},
{1122057017,-1831030875}, {1097993571,-1845561197}, {1073741769,-1859775424},
{1049305987,-1873670985}, {1024690635,-1887245378}, {999899482,-1900496524},
{974937230,-1913421912}, {949807699,-1926019561}, {924515422,-1938287195},
{899064965,-1950222603}, {873460227,-1961823959}, {847705824,-1973089164},
{821806407,-1984016190}, {795765941,-1994603364}, {769589125,-2004848771},
{743280682,-2014750566}, {716844642,-2024307233}, {690286016,-2033516961},
{663608871,-2042378339}, {636818019,-2050889764}, {609918296,-2059049705},
{582913822,-2066856911}, {555809715,-2074309903}, {528610126,-2081407540},
{501319962,-2088148536}, {473944148,-2094531680}, {446486876,-2100555994},
{418953102,-2106220386}, {391347792,-2111523838}, {363675176,-2116465540},
{335940246,-2121044593}, {308148006,-2125260177}, {280302715,-2129111646},
{252409648,-2132598271}, {224473078,-2135719516}, {196498046,-2138474814},
{168489600,-2140863674}, {140452029,-2142885728}, {112390647,-2144540593},
{84309753,-2145828017}, {56214412,-2146747762}, {28109695,-2147299667},
{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613},
{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871},
{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968},
{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325},
{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332},
{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564},
{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851},
{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214},
{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280},
{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181},
{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394},
{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397},
{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362},
{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378},
{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959},
{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233},
{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903},
{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838},
{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516},
{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762},
{-94,-2147483647}, {-56214600,-2146747757}, {-112390835,-2144540584},
{-168489787,-2140863659}, {-224473265,-2135719496}, {-280302901,-2129111622},
{-335940431,-2121044564}, {-391347977,-2111523804}, {-446487060,-2100555955},
{-501320144,-2088148493}, {-555809896,-2074309855}, {-609918476,-2059049651},
{-663609049,-2042378281}, {-716844819,-2024307170}, {-769589300,-2004848703},
{-821806581,-1984016118}, {-873460398,-1961823883}, {-924515591,-1938287114},
{-974937397,-1913421827}, {-1024690575,-1887245411}, {-1073741932,-1859775330},
{-1122057395,-1831030643}, {-1169603421,-1801031330}, {-1216348291,-1769797403},
{-1262259116,-1737350839}, {-1307305268,-1703713283}, {-1351455453,-1668908078},
{-1394679021,-1632959413}, {-1436947137,-1595891268}, {-1478230435,-1557729372},
{-1518500258,-1518500240}, {-1557729742,-1478230045}, {-1595891628,-1436946738},
{-1632959429,-1394679001}, {-1668908417,-1351455035}, {-1703713298,-1307305248},
{-1737350854,-1262259096}, {-1769797708,-1216347848}, {-1801031344,-1169603400},
{-1831030924,-1122056937}, {-1859775343,-1073741910}, {-1887245423,-1024690552},
{-1913422071,-974936918}, {-1938287125,-924515568}, {-1961823997,-873460141},
{-1984016324,-821806084}, {-2004848713,-769589276}, {-2024307264,-716844553},
{-2042378447,-663608538}, {-2059049731,-609918206}, {-2074309994,-555809377},
{-2088148499,-501320119}, {-2100556013,-446486785}, {-2111523902,-391347448},
{-2121044568,-335940406}, {-2129111659,-280302621}, {-2135719499,-224473240},
{-2140863681,-168489506}, {-2144540612,-112390298}, {-2146747758,-56214574},
{2147483647,0}, {2145828015,-84309815}, {2140863671,-168489630},
{2132598271,-252409646}, {2121044558,-335940465}, {2106220349,-418953288},
{2088148500,-501320115}, {2066856885,-582913912}, {2042378310,-663608960},
{2014750533,-743280770}, {1984016179,-821806435}, {1950222618,-899064934},
{1913421927,-974937199}, {1873670877,-1049306180}, {1831030826,-1122057097},
{1785567394,-1193077993}, {1737350743,-1262259248}, {1686455222,-1329494189},
{1632959307,-1394679144}, {1576945572,-1457713510}, {1518500216,-1518500282},
{1457713441,-1576945636}, {1394679073,-1632959368}, {1329494115,-1686455280},
{1262259172,-1737350799}, {1193077915,-1785567446}, {1122057017,-1831030875},
{1049305987,-1873670985}, {974937230,-1913421912}, {899064965,-1950222603},
{821806407,-1984016190}, {743280682,-2014750566}, {663608871,-2042378339},
{582913822,-2066856911}, {501319962,-2088148536}, {418953102,-2106220386},
{335940246,-2121044593}, {252409648,-2132598271}, {168489600,-2140863674},
{84309753,-2145828017}, {-94,-2147483647}, {-84309940,-2145828010},
{-168489787,-2140863659}, {-252409834,-2132598249}, {-335940431,-2121044564},
{-418953286,-2106220349}, {-501320144,-2088148493}, {-582914003,-2066856860},
{-663609049,-2042378281}, {-743280858,-2014750501}, {-821806581,-1984016118},
{-899065136,-1950222525}, {-974937397,-1913421827}, {-1049306374,-1873670768},
{-1122057395,-1831030643}, {-1193078284,-1785567199}, {-1262259116,-1737350839},
{-1329494061,-1686455323}, {-1394679021,-1632959413}, {-1457713485,-1576945595},
{-1518500258,-1518500240}, {-1576945613,-1457713466}, {-1632959429,-1394679001},
{-1686455338,-1329494041}, {-1737350854,-1262259096}, {-1785567498,-1193077837},
{-1831030924,-1122056937}, {-1873671031,-1049305905}, {-1913422071,-974936918},
{-1950222750,-899064648}, {-1984016324,-821806084}, {-2014750687,-743280354},
{-2042378447,-663608538}, {-2066856867,-582913978}, {-2088148499,-501320119},
{-2106220354,-418953261}, {-2121044568,-335940406}, {-2132598282,-252409555},
{-2140863681,-168489506}, {-2145828021,-84309659}, {-2147483647,188},
{-2145828006,84310034}, {-2140863651,168489881}, {-2132598237,252409928},
{-2121044509,335940777}, {-2106220281,418953629}, {-2088148411,501320484},
{-2066856765,582914339}, {-2042378331,663608895}, {-2014750557,743280706},
{-1984016181,821806431}, {-1950222593,899064989}, {-1913421900,974937252},
{-1873670848,1049306232}, {-1831030728,1122057257}, {-1785567289,1193078149},
{-1737350633,1262259400}, {-1686455106,1329494336}, {-1632959185,1394679287},
{-1576945358,1457713742}, {-1518499993,1518500506}, {-1457713209,1576945850},
{-1394678735,1632959656}, {-1329493766,1686455555}, {-1262258813,1737351059},
{-1193077546,1785567692}, {-1122056638,1831031107}, {-1049305599,1873671202},
{-974936606,1913422229}, {-899064330,1950222896}, {-821805761,1984016458},
{-743280025,2014750808}, {-663609179,2042378239}, {-582914134,2066856823},
{-501320277,2088148461}, {-418953420,2106220322}, {-335940566,2121044542},
{-252409716,2132598263}, {-168489668,2140863668}, {-84309821,2145828015},
};
static const ne10_fft_cpx_int32_t ne10_twiddles_240[240] = {
{0,0}, {2147483647,0}, {2147483647,0},
{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248},
{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647},
{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096},
{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895},
{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239},
{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613},
{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871},
{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968},
{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325},
{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332},
{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564},
{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851},
{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214},
{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280},
{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181},
{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394},
{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397},
{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362},
{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378},
{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959},
{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233},
{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903},
{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838},
{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516},
{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762},
{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
{-94,-2147483647}, {-112390835,-2144540584}, {-224473265,-2135719496},
{-335940431,-2121044564}, {-446487060,-2100555955}, {-555809896,-2074309855},
{-663609049,-2042378281}, {-769589300,-2004848703}, {-873460398,-1961823883},
{-974937397,-1913421827}, {-1073741932,-1859775330}, {-1169603421,-1801031330},
{-1262259116,-1737350839}, {-1351455453,-1668908078}, {-1436947137,-1595891268},
{-1518500258,-1518500240}, {-1595891628,-1436946738}, {-1668908417,-1351455035},
{-1737350854,-1262259096}, {-1801031344,-1169603400}, {-1859775343,-1073741910},
{-1913422071,-974936918}, {-1961823997,-873460141}, {-2004848713,-769589276},
{-2042378447,-663608538}, {-2074309994,-555809377}, {-2100556013,-446486785},
{-2121044568,-335940406}, {-2135719499,-224473240}, {-2144540612,-112390298},
{2147483647,0}, {2140863671,-168489630}, {2121044558,-335940465},
{2088148500,-501320115}, {2042378310,-663608960}, {1984016179,-821806435},
{1913421927,-974937199}, {1831030826,-1122057097}, {1737350743,-1262259248},
{1632959307,-1394679144}, {1518500216,-1518500282}, {1394679073,-1632959368},
{1262259172,-1737350799}, {1122057017,-1831030875}, {974937230,-1913421912},
{821806407,-1984016190}, {663608871,-2042378339}, {501319962,-2088148536},
{335940246,-2121044593}, {168489600,-2140863674}, {-94,-2147483647},
{-168489787,-2140863659}, {-335940431,-2121044564}, {-501320144,-2088148493},
{-663609049,-2042378281}, {-821806581,-1984016118}, {-974937397,-1913421827},
{-1122057395,-1831030643}, {-1262259116,-1737350839}, {-1394679021,-1632959413},
{-1518500258,-1518500240}, {-1632959429,-1394679001}, {-1737350854,-1262259096},
{-1831030924,-1122056937}, {-1913422071,-974936918}, {-1984016324,-821806084},
{-2042378447,-663608538}, {-2088148499,-501320119}, {-2121044568,-335940406},
{-2140863681,-168489506}, {-2147483647,188}, {-2140863651,168489881},
{-2121044509,335940777}, {-2088148411,501320484}, {-2042378331,663608895},
{-1984016181,821806431}, {-1913421900,974937252}, {-1831030728,1122057257},
{-1737350633,1262259400}, {-1632959185,1394679287}, {-1518499993,1518500506},
{-1394678735,1632959656}, {-1262258813,1737351059}, {-1122056638,1831031107},
{-974936606,1913422229}, {-821805761,1984016458}, {-663609179,2042378239},
{-501320277,2088148461}, {-335940566,2121044542}, {-168489668,2140863668},
};
static const ne10_fft_cpx_int32_t ne10_twiddles_120[120] = {
{0,0}, {2147483647,0}, {2147483647,0},
{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955},
{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330},
{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738},
{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141},
{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240},
{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960},
{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282},
{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339},
{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564},
{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839},
{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918},
{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188},
{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252},
{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059},
{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542},
};
static const ne10_fft_cpx_int32_t ne10_twiddles_60[60] = {
{0,0}, {2147483647,0}, {2147483647,0},
{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248},
{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647},
{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096},
{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895},
{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239},
};
static const ne10_fft_state_int32_t ne10_fft_state_int32_t_480 = {
120,
(ne10_int32_t *)ne10_factors_480,
(ne10_fft_cpx_int32_t *)ne10_twiddles_480,
NULL,
(ne10_fft_cpx_int32_t *)&ne10_twiddles_480[120],
};
static const arch_fft_state cfg_arch_480 = {
1,
(void *)&ne10_fft_state_int32_t_480,
};
static const ne10_fft_state_int32_t ne10_fft_state_int32_t_240 = {
60,
(ne10_int32_t *)ne10_factors_240,
(ne10_fft_cpx_int32_t *)ne10_twiddles_240,
NULL,
(ne10_fft_cpx_int32_t *)&ne10_twiddles_240[60],
};
static const arch_fft_state cfg_arch_240 = {
1,
(void *)&ne10_fft_state_int32_t_240,
};
static const ne10_fft_state_int32_t ne10_fft_state_int32_t_120 = {
30,
(ne10_int32_t *)ne10_factors_120,
(ne10_fft_cpx_int32_t *)ne10_twiddles_120,
NULL,
(ne10_fft_cpx_int32_t *)&ne10_twiddles_120[30],
};
static const arch_fft_state cfg_arch_120 = {
1,
(void *)&ne10_fft_state_int32_t_120,
};
static const ne10_fft_state_int32_t ne10_fft_state_int32_t_60 = {
15,
(ne10_int32_t *)ne10_factors_60,
(ne10_fft_cpx_int32_t *)ne10_twiddles_60,
NULL,
(ne10_fft_cpx_int32_t *)&ne10_twiddles_60[15],
};
static const arch_fft_state cfg_arch_60 = {
1,
(void *)&ne10_fft_state_int32_t_60,
};
#endif /* end NE10_FFT_PARAMS48000_960 */

View File

@ -4,6 +4,11 @@
#include "modes.h"
#include "rate.h"
#ifdef HAVE_ARM_NE10
#define OVERRIDE_FFT 1
#include "static_modes_float_arm_ne10.h"
#endif
#ifndef DEF_WINDOW120
#define DEF_WINDOW120
static const opus_val16 window120[120] = {
@ -341,84 +346,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
#ifndef FFT_BITREV480
#define FFT_BITREV480
static const opus_int16 fft_bitrev480[480] = {
0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448,
8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456,
16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464,
24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472,
4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452,
12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460,
20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468,
28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476,
1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449,
9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457,
17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465,
25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473,
5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453,
13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461,
21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469,
29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477,
2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450,
10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458,
18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466,
26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474,
6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454,
14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462,
22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470,
30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478,
3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451,
11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459,
19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467,
27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475,
7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455,
15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463,
23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471,
31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479,
};
#endif
#ifndef FFT_BITREV240
#define FFT_BITREV240
static const opus_int16 fft_bitrev240[240] = {
0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224,
4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228,
8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232,
12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236,
1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225,
5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229,
9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233,
13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237,
2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226,
6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230,
10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234,
14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238,
3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227,
7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231,
11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235,
15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239,
};
#endif
#ifndef FFT_BITREV120
#define FFT_BITREV120
static const opus_int16 fft_bitrev120[120] = {
0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112,
4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116,
1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113,
5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117,
2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114,
6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118,
3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115,
7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119,
};
#endif
#ifndef FFT_BITREV60
#define FFT_BITREV60
static const opus_int16 fft_bitrev60[60] = {
0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56,
1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57,
2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58,
3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59,
};
#endif
@ -428,9 +433,14 @@ static const kiss_fft_state fft_state48000_960_0 = {
480, /* nfft */
0.002083333f, /* scale */
-1, /* shift */
{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev480, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_480,
#else
NULL,
#endif
};
#endif
@ -440,9 +450,14 @@ static const kiss_fft_state fft_state48000_960_1 = {
240, /* nfft */
0.004166667f, /* scale */
1, /* shift */
{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev240, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_240,
#else
NULL,
#endif
};
#endif
@ -452,9 +467,14 @@ static const kiss_fft_state fft_state48000_960_2 = {
120, /* nfft */
0.008333333f, /* scale */
2, /* shift */
{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev120, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_120,
#else
NULL,
#endif
};
#endif
@ -464,9 +484,14 @@ static const kiss_fft_state fft_state48000_960_3 = {
60, /* nfft */
0.016666667f, /* scale */
3, /* shift */
{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev60, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_60,
#else
NULL,
#endif
};
#endif
@ -474,104 +499,368 @@ fft_twiddles48000_960, /* bitrev */
#ifndef MDCT_TWIDDLES960
#define MDCT_TWIDDLES960
static const opus_val16 mdct_twiddles960[481] = {
1.0000000f, 0.99999465f, 0.99997858f, 0.99995181f, 0.99991433f,
0.99986614f, 0.99980724f, 0.99973764f, 0.99965732f, 0.99956631f,
0.99946459f, 0.99935216f, 0.99922904f, 0.99909521f, 0.99895068f,
0.99879546f, 0.99862953f, 0.99845292f, 0.99826561f, 0.99806761f,
0.99785892f, 0.99763955f, 0.99740949f, 0.99716875f, 0.99691733f,
0.99665524f, 0.99638247f, 0.99609903f, 0.99580493f, 0.99550016f,
0.99518473f, 0.99485864f, 0.99452190f, 0.99417450f, 0.99381646f,
0.99344778f, 0.99306846f, 0.99267850f, 0.99227791f, 0.99186670f,
0.99144486f, 0.99101241f, 0.99056934f, 0.99011566f, 0.98965139f,
0.98917651f, 0.98869104f, 0.98819498f, 0.98768834f, 0.98717112f,
0.98664333f, 0.98610497f, 0.98555606f, 0.98499659f, 0.98442657f,
0.98384600f, 0.98325491f, 0.98265328f, 0.98204113f, 0.98141846f,
0.98078528f, 0.98014159f, 0.97948742f, 0.97882275f, 0.97814760f,
0.97746197f, 0.97676588f, 0.97605933f, 0.97534232f, 0.97461487f,
0.97387698f, 0.97312866f, 0.97236992f, 0.97160077f, 0.97082121f,
0.97003125f, 0.96923091f, 0.96842019f, 0.96759909f, 0.96676764f,
0.96592582f, 0.96507367f, 0.96421118f, 0.96333837f, 0.96245523f,
0.96156180f, 0.96065806f, 0.95974403f, 0.95881973f, 0.95788517f,
0.95694034f, 0.95598526f, 0.95501995f, 0.95404440f, 0.95305864f,
0.95206267f, 0.95105651f, 0.95004016f, 0.94901364f, 0.94797697f,
0.94693013f, 0.94587315f, 0.94480604f, 0.94372882f, 0.94264149f,
0.94154406f, 0.94043656f, 0.93931897f, 0.93819133f, 0.93705365f,
0.93590592f, 0.93474818f, 0.93358042f, 0.93240268f, 0.93121493f,
0.93001722f, 0.92880955f, 0.92759193f, 0.92636438f, 0.92512690f,
0.92387953f, 0.92262225f, 0.92135509f, 0.92007809f, 0.91879121f,
0.91749449f, 0.91618795f, 0.91487161f, 0.91354545f, 0.91220952f,
0.91086382f, 0.90950836f, 0.90814316f, 0.90676824f, 0.90538363f,
0.90398929f, 0.90258528f, 0.90117161f, 0.89974828f, 0.89831532f,
0.89687273f, 0.89542055f, 0.89395877f, 0.89248742f, 0.89100652f,
0.88951606f, 0.88801610f, 0.88650661f, 0.88498764f, 0.88345918f,
0.88192125f, 0.88037390f, 0.87881711f, 0.87725090f, 0.87567531f,
0.87409035f, 0.87249599f, 0.87089232f, 0.86927933f, 0.86765699f,
0.86602540f, 0.86438453f, 0.86273437f, 0.86107503f, 0.85940641f,
0.85772862f, 0.85604161f, 0.85434547f, 0.85264014f, 0.85092572f,
0.84920218f, 0.84746955f, 0.84572781f, 0.84397704f, 0.84221721f,
0.84044838f, 0.83867056f, 0.83688375f, 0.83508799f, 0.83328325f,
0.83146961f, 0.82964704f, 0.82781562f, 0.82597530f, 0.82412620f,
0.82226820f, 0.82040144f, 0.81852589f, 0.81664154f, 0.81474847f,
0.81284665f, 0.81093620f, 0.80901698f, 0.80708914f, 0.80515262f,
0.80320752f, 0.80125378f, 0.79929149f, 0.79732067f, 0.79534125f,
0.79335335f, 0.79135691f, 0.78935204f, 0.78733867f, 0.78531691f,
0.78328674f, 0.78124818f, 0.77920122f, 0.77714595f, 0.77508232f,
0.77301043f, 0.77093026f, 0.76884183f, 0.76674517f, 0.76464026f,
0.76252720f, 0.76040593f, 0.75827656f, 0.75613907f, 0.75399349f,
0.75183978f, 0.74967807f, 0.74750833f, 0.74533054f, 0.74314481f,
0.74095112f, 0.73874950f, 0.73653993f, 0.73432251f, 0.73209718f,
0.72986405f, 0.72762307f, 0.72537438f, 0.72311787f, 0.72085359f,
0.71858162f, 0.71630192f, 0.71401459f, 0.71171956f, 0.70941701f,
0.70710677f, 0.70478900f, 0.70246363f, 0.70013079f, 0.69779041f,
0.69544260f, 0.69308738f, 0.69072466f, 0.68835458f, 0.68597709f,
0.68359229f, 0.68120013f, 0.67880072f, 0.67639404f, 0.67398011f,
0.67155892f, 0.66913059f, 0.66669509f, 0.66425240f, 0.66180265f,
0.65934581f, 0.65688191f, 0.65441092f, 0.65193298f, 0.64944801f,
0.64695613f, 0.64445727f, 0.64195160f, 0.63943902f, 0.63691954f,
0.63439328f, 0.63186019f, 0.62932037f, 0.62677377f, 0.62422055f,
0.62166055f, 0.61909394f, 0.61652065f, 0.61394081f, 0.61135435f,
0.60876139f, 0.60616195f, 0.60355593f, 0.60094349f, 0.59832457f,
0.59569929f, 0.59306758f, 0.59042957f, 0.58778523f, 0.58513460f,
0.58247766f, 0.57981452f, 0.57714518f, 0.57446961f, 0.57178793f,
0.56910013f, 0.56640624f, 0.56370623f, 0.56100023f, 0.55828818f,
0.55557020f, 0.55284627f, 0.55011641f, 0.54738067f, 0.54463901f,
0.54189157f, 0.53913828f, 0.53637921f, 0.53361450f, 0.53084398f,
0.52806787f, 0.52528601f, 0.52249852f, 0.51970543f, 0.51690688f,
0.51410279f, 0.51129310f, 0.50847793f, 0.50565732f, 0.50283139f,
0.49999997f, 0.49716321f, 0.49432122f, 0.49147383f, 0.48862118f,
0.48576340f, 0.48290042f, 0.48003216f, 0.47715876f, 0.47428025f,
0.47139677f, 0.46850813f, 0.46561448f, 0.46271584f, 0.45981235f,
0.45690383f, 0.45399042f, 0.45107214f, 0.44814915f, 0.44522124f,
0.44228868f, 0.43935137f, 0.43640926f, 0.43346247f, 0.43051104f,
0.42755511f, 0.42459449f, 0.42162932f, 0.41865964f, 0.41568558f,
0.41270697f, 0.40972393f, 0.40673661f, 0.40374494f, 0.40074884f,
0.39774844f, 0.39474390f, 0.39173501f, 0.38872193f, 0.38570469f,
0.38268343f, 0.37965796f, 0.37662842f, 0.37359496f, 0.37055739f,
0.36751585f, 0.36447038f, 0.36142122f, 0.35836797f, 0.35531089f,
0.35225000f, 0.34918544f, 0.34611704f, 0.34304493f, 0.33996926f,
0.33688983f, 0.33380680f, 0.33072019f, 0.32763015f, 0.32453650f,
0.32143936f, 0.31833890f, 0.31523503f, 0.31212767f, 0.30901696f,
0.30590306f, 0.30278577f, 0.29966524f, 0.29654150f, 0.29341470f,
0.29028464f, 0.28715147f, 0.28401522f, 0.28087605f, 0.27773376f,
0.27458861f, 0.27144052f, 0.26828940f, 0.26513541f, 0.26197859f,
0.25881907f, 0.25565666f, 0.25249152f, 0.24932367f, 0.24615327f,
0.24298012f, 0.23980436f, 0.23662604f, 0.23344530f, 0.23026206f,
0.22707623f, 0.22388809f, 0.22069744f, 0.21750443f, 0.21430908f,
0.21111156f, 0.20791165f, 0.20470953f, 0.20150520f, 0.19829884f,
0.19509024f, 0.19187955f, 0.18866692f, 0.18545227f, 0.18223552f,
0.17901681f, 0.17579631f, 0.17257380f, 0.16934945f, 0.16612328f,
0.16289546f, 0.15966577f, 0.15643437f, 0.15320141f, 0.14996669f,
0.14673037f, 0.14349260f, 0.14025329f, 0.13701235f, 0.13376995f,
0.13052612f, 0.12728101f, 0.12403442f, 0.12078650f, 0.11753740f,
0.11428693f, 0.11103523f, 0.10778234f, 0.10452842f, 0.10127326f,
0.098017137f, 0.094759842f, 0.091501652f, 0.088242363f, 0.084982129f,
0.081721103f, 0.078459084f, 0.075196224f, 0.071932560f, 0.068668243f,
0.065403073f, 0.062137201f, 0.058870665f, 0.055603617f, 0.052335974f,
0.049067651f, 0.045798921f, 0.042529582f, 0.039259788f, 0.035989573f,
0.032719092f, 0.029448142f, 0.026176876f, 0.022905329f, 0.019633657f,
0.016361655f, 0.013089478f, 0.0098171604f, 0.0065449764f, 0.0032724839f,
-4.3711390e-08f, };
static const opus_val16 mdct_twiddles960[1800] = {
0.99999994f, 0.99999321f, 0.99997580f, 0.99994773f, 0.99990886f,
0.99985933f, 0.99979913f, 0.99972820f, 0.99964654f, 0.99955416f,
0.99945110f, 0.99933738f, 0.99921292f, 0.99907774f, 0.99893188f,
0.99877530f, 0.99860805f, 0.99843007f, 0.99824142f, 0.99804211f,
0.99783206f, 0.99761140f, 0.99737996f, 0.99713790f, 0.99688518f,
0.99662173f, 0.99634761f, 0.99606287f, 0.99576741f, 0.99546129f,
0.99514455f, 0.99481714f, 0.99447906f, 0.99413031f, 0.99377096f,
0.99340093f, 0.99302030f, 0.99262899f, 0.99222708f, 0.99181455f,
0.99139136f, 0.99095762f, 0.99051321f, 0.99005818f, 0.98959261f,
0.98911643f, 0.98862964f, 0.98813224f, 0.98762429f, 0.98710573f,
0.98657662f, 0.98603696f, 0.98548669f, 0.98492593f, 0.98435456f,
0.98377270f, 0.98318028f, 0.98257732f, 0.98196387f, 0.98133987f,
0.98070538f, 0.98006040f, 0.97940493f, 0.97873890f, 0.97806245f,
0.97737551f, 0.97667813f, 0.97597027f, 0.97525197f, 0.97452319f,
0.97378403f, 0.97303438f, 0.97227436f, 0.97150391f, 0.97072303f,
0.96993178f, 0.96913016f, 0.96831810f, 0.96749574f, 0.96666300f,
0.96581990f, 0.96496642f, 0.96410263f, 0.96322852f, 0.96234411f,
0.96144938f, 0.96054435f, 0.95962906f, 0.95870346f, 0.95776761f,
0.95682150f, 0.95586514f, 0.95489854f, 0.95392174f, 0.95293468f,
0.95193744f, 0.95093000f, 0.94991243f, 0.94888461f, 0.94784665f,
0.94679856f, 0.94574034f, 0.94467193f, 0.94359344f, 0.94250488f,
0.94140619f, 0.94029742f, 0.93917859f, 0.93804967f, 0.93691075f,
0.93576175f, 0.93460274f, 0.93343377f, 0.93225473f, 0.93106574f,
0.92986679f, 0.92865789f, 0.92743903f, 0.92621022f, 0.92497152f,
0.92372292f, 0.92246443f, 0.92119598f, 0.91991776f, 0.91862965f,
0.91733170f, 0.91602397f, 0.91470635f, 0.91337901f, 0.91204184f,
0.91069490f, 0.90933824f, 0.90797186f, 0.90659571f, 0.90520984f,
0.90381432f, 0.90240908f, 0.90099424f, 0.89956969f, 0.89813554f,
0.89669174f, 0.89523834f, 0.89377540f, 0.89230281f, 0.89082074f,
0.88932908f, 0.88782793f, 0.88631725f, 0.88479710f, 0.88326746f,
0.88172835f, 0.88017982f, 0.87862182f, 0.87705445f, 0.87547767f,
0.87389153f, 0.87229604f, 0.87069118f, 0.86907703f, 0.86745358f,
0.86582077f, 0.86417878f, 0.86252749f, 0.86086690f, 0.85919720f,
0.85751826f, 0.85583007f, 0.85413277f, 0.85242635f, 0.85071075f,
0.84898609f, 0.84725231f, 0.84550947f, 0.84375757f, 0.84199661f,
0.84022665f, 0.83844769f, 0.83665979f, 0.83486289f, 0.83305705f,
0.83124226f, 0.82941860f, 0.82758605f, 0.82574469f, 0.82389444f,
0.82203537f, 0.82016748f, 0.81829083f, 0.81640542f, 0.81451124f,
0.81260836f, 0.81069672f, 0.80877650f, 0.80684757f, 0.80490994f,
0.80296379f, 0.80100900f, 0.79904562f, 0.79707366f, 0.79509324f,
0.79310423f, 0.79110676f, 0.78910083f, 0.78708643f, 0.78506362f,
0.78303236f, 0.78099275f, 0.77894479f, 0.77688843f, 0.77482378f,
0.77275085f, 0.77066964f, 0.76858020f, 0.76648247f, 0.76437658f,
0.76226246f, 0.76014024f, 0.75800985f, 0.75587130f, 0.75372469f,
0.75157005f, 0.74940729f, 0.74723655f, 0.74505776f, 0.74287105f,
0.74067634f, 0.73847371f, 0.73626316f, 0.73404479f, 0.73181850f,
0.72958434f, 0.72734243f, 0.72509271f, 0.72283524f, 0.72057003f,
0.71829706f, 0.71601641f, 0.71372813f, 0.71143216f, 0.70912862f,
0.70681745f, 0.70449871f, 0.70217246f, 0.69983864f, 0.69749737f,
0.69514859f, 0.69279242f, 0.69042879f, 0.68805778f, 0.68567938f,
0.68329364f, 0.68090063f, 0.67850029f, 0.67609268f, 0.67367786f,
0.67125577f, 0.66882652f, 0.66639012f, 0.66394657f, 0.66149592f,
0.65903819f, 0.65657341f, 0.65410155f, 0.65162271f, 0.64913690f,
0.64664418f, 0.64414448f, 0.64163786f, 0.63912445f, 0.63660413f,
0.63407701f, 0.63154310f, 0.62900239f, 0.62645501f, 0.62390089f,
0.62134010f, 0.61877263f, 0.61619854f, 0.61361790f, 0.61103064f,
0.60843682f, 0.60583651f, 0.60322970f, 0.60061646f, 0.59799677f,
0.59537065f, 0.59273821f, 0.59009939f, 0.58745426f, 0.58480281f,
0.58214509f, 0.57948118f, 0.57681108f, 0.57413477f, 0.57145232f,
0.56876373f, 0.56606907f, 0.56336832f, 0.56066155f, 0.55794877f,
0.55523002f, 0.55250537f, 0.54977477f, 0.54703826f, 0.54429591f,
0.54154772f, 0.53879374f, 0.53603399f, 0.53326851f, 0.53049731f,
0.52772039f, 0.52493787f, 0.52214974f, 0.51935595f, 0.51655668f,
0.51375180f, 0.51094145f, 0.50812566f, 0.50530440f, 0.50247771f,
0.49964568f, 0.49680826f, 0.49396557f, 0.49111754f, 0.48826426f,
0.48540577f, 0.48254207f, 0.47967321f, 0.47679919f, 0.47392011f,
0.47103590f, 0.46814668f, 0.46525243f, 0.46235323f, 0.45944905f,
0.45653993f, 0.45362595f, 0.45070711f, 0.44778344f, 0.44485497f,
0.44192174f, 0.43898380f, 0.43604112f, 0.43309379f, 0.43014181f,
0.42718524f, 0.42422408f, 0.42125839f, 0.41828820f, 0.41531351f,
0.41233435f, 0.40935081f, 0.40636289f, 0.40337059f, 0.40037400f,
0.39737311f, 0.39436796f, 0.39135858f, 0.38834500f, 0.38532731f,
0.38230544f, 0.37927949f, 0.37624949f, 0.37321547f, 0.37017745f,
0.36713544f, 0.36408952f, 0.36103970f, 0.35798600f, 0.35492846f,
0.35186714f, 0.34880206f, 0.34573323f, 0.34266070f, 0.33958447f,
0.33650464f, 0.33342120f, 0.33033419f, 0.32724363f, 0.32414958f,
0.32105204f, 0.31795108f, 0.31484672f, 0.31173897f, 0.30862790f,
0.30551350f, 0.30239585f, 0.29927495f, 0.29615086f, 0.29302359f,
0.28989318f, 0.28675964f, 0.28362307f, 0.28048345f, 0.27734083f,
0.27419522f, 0.27104670f, 0.26789525f, 0.26474094f, 0.26158381f,
0.25842386f, 0.25526115f, 0.25209570f, 0.24892756f, 0.24575676f,
0.24258332f, 0.23940729f, 0.23622867f, 0.23304754f, 0.22986393f,
0.22667783f, 0.22348931f, 0.22029841f, 0.21710514f, 0.21390954f,
0.21071166f, 0.20751151f, 0.20430915f, 0.20110460f, 0.19789790f,
0.19468907f, 0.19147816f, 0.18826519f, 0.18505022f, 0.18183327f,
0.17861435f, 0.17539354f, 0.17217083f, 0.16894630f, 0.16571994f,
0.16249183f, 0.15926196f, 0.15603039f, 0.15279715f, 0.14956227f,
0.14632578f, 0.14308774f, 0.13984816f, 0.13660708f, 0.13336454f,
0.13012058f, 0.12687522f, 0.12362850f, 0.12038045f, 0.11713112f,
0.11388054f, 0.11062872f, 0.10737573f, 0.10412160f, 0.10086634f,
0.097609997f, 0.094352618f, 0.091094226f, 0.087834857f, 0.084574550f,
0.081313334f, 0.078051247f, 0.074788325f, 0.071524605f, 0.068260118f,
0.064994894f, 0.061728980f, 0.058462404f, 0.055195201f, 0.051927410f,
0.048659060f, 0.045390189f, 0.042120833f, 0.038851023f, 0.035580799f,
0.032310195f, 0.029039243f, 0.025767982f, 0.022496443f, 0.019224664f,
0.015952680f, 0.012680525f, 0.0094082337f, 0.0061358409f, 0.0028633832f,
-0.00040910527f, -0.0036815894f, -0.0069540343f, -0.010226404f, -0.013498665f,
-0.016770782f, -0.020042717f, -0.023314439f, -0.026585912f, -0.029857099f,
-0.033127967f, -0.036398482f, -0.039668605f, -0.042938303f, -0.046207540f,
-0.049476285f, -0.052744497f, -0.056012146f, -0.059279196f, -0.062545612f,
-0.065811358f, -0.069076397f, -0.072340697f, -0.075604223f, -0.078866936f,
-0.082128808f, -0.085389800f, -0.088649876f, -0.091909006f, -0.095167145f,
-0.098424271f, -0.10168034f, -0.10493532f, -0.10818918f, -0.11144188f,
-0.11469338f, -0.11794366f, -0.12119267f, -0.12444039f, -0.12768677f,
-0.13093179f, -0.13417540f, -0.13741758f, -0.14065829f, -0.14389749f,
-0.14713514f, -0.15037122f, -0.15360570f, -0.15683852f, -0.16006967f,
-0.16329910f, -0.16652679f, -0.16975269f, -0.17297678f, -0.17619900f,
-0.17941935f, -0.18263777f, -0.18585424f, -0.18906870f, -0.19228116f,
-0.19549155f, -0.19869985f, -0.20190603f, -0.20511003f, -0.20831184f,
-0.21151142f, -0.21470875f, -0.21790376f, -0.22109644f, -0.22428675f,
-0.22747467f, -0.23066014f, -0.23384315f, -0.23702365f, -0.24020162f,
-0.24337701f, -0.24654980f, -0.24971995f, -0.25288740f, -0.25605217f,
-0.25921419f, -0.26237345f, -0.26552987f, -0.26868346f, -0.27183419f,
-0.27498198f, -0.27812684f, -0.28126872f, -0.28440759f, -0.28754342f,
-0.29067615f, -0.29380578f, -0.29693225f, -0.30005556f, -0.30317566f,
-0.30629250f, -0.30940607f, -0.31251630f, -0.31562322f, -0.31872672f,
-0.32182685f, -0.32492352f, -0.32801670f, -0.33110636f, -0.33419248f,
-0.33727503f, -0.34035397f, -0.34342924f, -0.34650084f, -0.34956875f,
-0.35263291f, -0.35569328f, -0.35874987f, -0.36180258f, -0.36485144f,
-0.36789638f, -0.37093741f, -0.37397444f, -0.37700745f, -0.38003644f,
-0.38306138f, -0.38608220f, -0.38909888f, -0.39211139f, -0.39511973f,
-0.39812380f, -0.40112361f, -0.40411916f, -0.40711036f, -0.41009718f,
-0.41307965f, -0.41605768f, -0.41903123f, -0.42200032f, -0.42496487f,
-0.42792490f, -0.43088034f, -0.43383113f, -0.43677729f, -0.43971881f,
-0.44265559f, -0.44558764f, -0.44851488f, -0.45143735f, -0.45435500f,
-0.45726776f, -0.46017563f, -0.46307856f, -0.46597654f, -0.46886954f,
-0.47175750f, -0.47464043f, -0.47751826f, -0.48039100f, -0.48325855f,
-0.48612097f, -0.48897815f, -0.49183011f, -0.49467680f, -0.49751821f,
-0.50035429f, -0.50318497f, -0.50601029f, -0.50883019f, -0.51164466f,
-0.51445359f, -0.51725709f, -0.52005500f, -0.52284735f, -0.52563411f,
-0.52841520f, -0.53119069f, -0.53396046f, -0.53672451f, -0.53948283f,
-0.54223537f, -0.54498214f, -0.54772300f, -0.55045801f, -0.55318713f,
-0.55591035f, -0.55862761f, -0.56133890f, -0.56404412f, -0.56674337f,
-0.56943649f, -0.57212353f, -0.57480448f, -0.57747924f, -0.58014780f,
-0.58281022f, -0.58546633f, -0.58811617f, -0.59075975f, -0.59339696f,
-0.59602785f, -0.59865236f, -0.60127044f, -0.60388207f, -0.60648727f,
-0.60908598f, -0.61167812f, -0.61426371f, -0.61684275f, -0.61941516f,
-0.62198097f, -0.62454009f, -0.62709254f, -0.62963831f, -0.63217729f,
-0.63470948f, -0.63723493f, -0.63975352f, -0.64226526f, -0.64477009f,
-0.64726806f, -0.64975911f, -0.65224314f, -0.65472025f, -0.65719032f,
-0.65965337f, -0.66210932f, -0.66455823f, -0.66700000f, -0.66943461f,
-0.67186207f, -0.67428231f, -0.67669535f, -0.67910111f, -0.68149966f,
-0.68389088f, -0.68627477f, -0.68865126f, -0.69102043f, -0.69338220f,
-0.69573659f, -0.69808346f, -0.70042288f, -0.70275480f, -0.70507920f,
-0.70739603f, -0.70970529f, -0.71200693f, -0.71430099f, -0.71658736f,
-0.71886611f, -0.72113711f, -0.72340041f, -0.72565591f, -0.72790372f,
-0.73014367f, -0.73237586f, -0.73460019f, -0.73681659f, -0.73902518f,
-0.74122584f, -0.74341851f, -0.74560326f, -0.74778003f, -0.74994880f,
-0.75210953f, -0.75426215f, -0.75640678f, -0.75854325f, -0.76067162f,
-0.76279181f, -0.76490390f, -0.76700771f, -0.76910341f, -0.77119076f,
-0.77326995f, -0.77534080f, -0.77740335f, -0.77945763f, -0.78150350f,
-0.78354102f, -0.78557014f, -0.78759086f, -0.78960317f, -0.79160696f,
-0.79360235f, -0.79558921f, -0.79756755f, -0.79953730f, -0.80149853f,
-0.80345118f, -0.80539525f, -0.80733067f, -0.80925739f, -0.81117553f,
-0.81308490f, -0.81498563f, -0.81687760f, -0.81876087f, -0.82063532f,
-0.82250100f, -0.82435787f, -0.82620591f, -0.82804507f, -0.82987541f,
-0.83169687f, -0.83350939f, -0.83531296f, -0.83710766f, -0.83889335f,
-0.84067005f, -0.84243774f, -0.84419644f, -0.84594607f, -0.84768665f,
-0.84941816f, -0.85114056f, -0.85285389f, -0.85455805f, -0.85625303f,
-0.85793889f, -0.85961550f, -0.86128294f, -0.86294121f, -0.86459017f,
-0.86622989f, -0.86786032f, -0.86948150f, -0.87109333f, -0.87269586f,
-0.87428904f, -0.87587279f, -0.87744725f, -0.87901229f, -0.88056785f,
-0.88211405f, -0.88365078f, -0.88517809f, -0.88669586f, -0.88820416f,
-0.88970292f, -0.89119220f, -0.89267188f, -0.89414203f, -0.89560264f,
-0.89705360f, -0.89849502f, -0.89992678f, -0.90134889f, -0.90276134f,
-0.90416414f, -0.90555727f, -0.90694070f, -0.90831441f, -0.90967834f,
-0.91103262f, -0.91237706f, -0.91371179f, -0.91503674f, -0.91635185f,
-0.91765714f, -0.91895264f, -0.92023826f, -0.92151409f, -0.92277998f,
-0.92403603f, -0.92528218f, -0.92651838f, -0.92774469f, -0.92896110f,
-0.93016750f, -0.93136400f, -0.93255049f, -0.93372697f, -0.93489349f,
-0.93604994f, -0.93719643f, -0.93833286f, -0.93945926f, -0.94057560f,
-0.94168180f, -0.94277799f, -0.94386405f, -0.94494003f, -0.94600588f,
-0.94706154f, -0.94810712f, -0.94914252f, -0.95016778f, -0.95118284f,
-0.95218778f, -0.95318246f, -0.95416695f, -0.95514119f, -0.95610523f,
-0.95705903f, -0.95800257f, -0.95893586f, -0.95985889f, -0.96077162f,
-0.96167403f, -0.96256620f, -0.96344805f, -0.96431959f, -0.96518075f,
-0.96603161f, -0.96687216f, -0.96770233f, -0.96852213f, -0.96933156f,
-0.97013056f, -0.97091925f, -0.97169751f, -0.97246534f, -0.97322279f,
-0.97396982f, -0.97470641f, -0.97543252f, -0.97614825f, -0.97685349f,
-0.97754824f, -0.97823256f, -0.97890645f, -0.97956979f, -0.98022264f,
-0.98086500f, -0.98149687f, -0.98211825f, -0.98272908f, -0.98332942f,
-0.98391914f, -0.98449844f, -0.98506713f, -0.98562527f, -0.98617285f,
-0.98670989f, -0.98723638f, -0.98775226f, -0.98825759f, -0.98875231f,
-0.98923647f, -0.98971003f, -0.99017298f, -0.99062532f, -0.99106705f,
-0.99149817f, -0.99191868f, -0.99232858f, -0.99272782f, -0.99311644f,
-0.99349445f, -0.99386179f, -0.99421853f, -0.99456459f, -0.99489999f,
-0.99522477f, -0.99553883f, -0.99584228f, -0.99613506f, -0.99641716f,
-0.99668860f, -0.99694937f, -0.99719942f, -0.99743885f, -0.99766755f,
-0.99788558f, -0.99809295f, -0.99828959f, -0.99847561f, -0.99865085f,
-0.99881548f, -0.99896932f, -0.99911255f, -0.99924499f, -0.99936682f,
-0.99947786f, -0.99957830f, -0.99966794f, -0.99974692f, -0.99981517f,
-0.99987274f, -0.99991959f, -0.99995571f, -0.99998116f, -0.99999589f,
0.99999964f, 0.99997288f, 0.99990326f, 0.99979085f, 0.99963558f,
0.99943751f, 0.99919659f, 0.99891287f, 0.99858636f, 0.99821711f,
0.99780506f, 0.99735034f, 0.99685282f, 0.99631262f, 0.99572974f,
0.99510419f, 0.99443603f, 0.99372530f, 0.99297196f, 0.99217612f,
0.99133772f, 0.99045694f, 0.98953366f, 0.98856801f, 0.98756003f,
0.98650974f, 0.98541719f, 0.98428243f, 0.98310548f, 0.98188645f,
0.98062533f, 0.97932225f, 0.97797716f, 0.97659022f, 0.97516143f,
0.97369087f, 0.97217858f, 0.97062469f, 0.96902919f, 0.96739221f,
0.96571374f, 0.96399397f, 0.96223283f, 0.96043050f, 0.95858705f,
0.95670253f, 0.95477700f, 0.95281059f, 0.95080340f, 0.94875544f,
0.94666684f, 0.94453770f, 0.94236809f, 0.94015813f, 0.93790787f,
0.93561745f, 0.93328691f, 0.93091643f, 0.92850608f, 0.92605597f,
0.92356616f, 0.92103678f, 0.91846794f, 0.91585976f, 0.91321236f,
0.91052586f, 0.90780038f, 0.90503591f, 0.90223277f, 0.89939094f,
0.89651060f, 0.89359182f, 0.89063478f, 0.88763964f, 0.88460642f,
0.88153529f, 0.87842643f, 0.87527996f, 0.87209594f, 0.86887461f,
0.86561602f, 0.86232042f, 0.85898781f, 0.85561842f, 0.85221243f,
0.84876984f, 0.84529096f, 0.84177583f, 0.83822471f, 0.83463764f,
0.83101481f, 0.82735640f, 0.82366252f, 0.81993335f, 0.81616908f,
0.81236988f, 0.80853581f, 0.80466717f, 0.80076402f, 0.79682660f,
0.79285502f, 0.78884947f, 0.78481019f, 0.78073722f, 0.77663082f,
0.77249116f, 0.76831841f, 0.76411277f, 0.75987434f, 0.75560343f,
0.75130010f, 0.74696463f, 0.74259710f, 0.73819780f, 0.73376691f,
0.72930455f, 0.72481096f, 0.72028631f, 0.71573079f, 0.71114463f,
0.70652801f, 0.70188117f, 0.69720417f, 0.69249737f, 0.68776089f,
0.68299496f, 0.67819971f, 0.67337549f, 0.66852236f, 0.66364062f,
0.65873051f, 0.65379208f, 0.64882571f, 0.64383155f, 0.63880974f,
0.63376063f, 0.62868434f, 0.62358117f, 0.61845124f, 0.61329484f,
0.60811216f, 0.60290343f, 0.59766883f, 0.59240872f, 0.58712316f,
0.58181250f, 0.57647687f, 0.57111657f, 0.56573176f, 0.56032276f,
0.55488980f, 0.54943299f, 0.54395270f, 0.53844911f, 0.53292239f,
0.52737290f, 0.52180082f, 0.51620632f, 0.51058978f, 0.50495136f,
0.49929130f, 0.49360985f, 0.48790723f, 0.48218375f, 0.47643960f,
0.47067502f, 0.46489030f, 0.45908567f, 0.45326138f, 0.44741765f,
0.44155475f, 0.43567297f, 0.42977250f, 0.42385364f, 0.41791660f,
0.41196167f, 0.40598908f, 0.39999911f, 0.39399201f, 0.38796803f,
0.38192743f, 0.37587047f, 0.36979741f, 0.36370850f, 0.35760403f,
0.35148421f, 0.34534934f, 0.33919969f, 0.33303553f, 0.32685706f,
0.32066461f, 0.31445843f, 0.30823877f, 0.30200592f, 0.29576012f,
0.28950164f, 0.28323078f, 0.27694780f, 0.27065292f, 0.26434645f,
0.25802869f, 0.25169984f, 0.24536023f, 0.23901010f, 0.23264973f,
0.22627939f, 0.21989937f, 0.21350993f, 0.20711134f, 0.20070387f,
0.19428782f, 0.18786344f, 0.18143101f, 0.17499080f, 0.16854310f,
0.16208819f, 0.15562633f, 0.14915779f, 0.14268288f, 0.13620184f,
0.12971498f, 0.12322257f, 0.11672486f, 0.11022217f, 0.10371475f,
0.097202882f, 0.090686858f, 0.084166944f, 0.077643424f, 0.071116582f,
0.064586692f, 0.058054037f, 0.051518895f, 0.044981543f, 0.038442269f,
0.031901345f, 0.025359053f, 0.018815678f, 0.012271495f, 0.0057267868f,
-0.00081816671f, -0.0073630852f, -0.013907688f, -0.020451695f, -0.026994826f,
-0.033536803f, -0.040077340f, -0.046616159f, -0.053152986f, -0.059687532f,
-0.066219524f, -0.072748676f, -0.079274714f, -0.085797355f, -0.092316322f,
-0.098831341f, -0.10534211f, -0.11184838f, -0.11834986f, -0.12484626f,
-0.13133731f, -0.13782275f, -0.14430228f, -0.15077563f, -0.15724251f,
-0.16370267f, -0.17015581f, -0.17660165f, -0.18303993f, -0.18947038f,
-0.19589271f, -0.20230664f, -0.20871192f, -0.21510825f, -0.22149536f,
-0.22787298f, -0.23424086f, -0.24059868f, -0.24694622f, -0.25328314f,
-0.25960925f, -0.26592422f, -0.27222782f, -0.27851975f, -0.28479972f,
-0.29106751f, -0.29732284f, -0.30356544f, -0.30979502f, -0.31601134f,
-0.32221413f, -0.32840309f, -0.33457801f, -0.34073856f, -0.34688455f,
-0.35301566f, -0.35913166f, -0.36523229f, -0.37131724f, -0.37738630f,
-0.38343921f, -0.38947567f, -0.39549544f, -0.40149832f, -0.40748394f,
-0.41345215f, -0.41940263f, -0.42533514f, -0.43124944f, -0.43714526f,
-0.44302234f, -0.44888046f, -0.45471936f, -0.46053877f, -0.46633846f,
-0.47211814f, -0.47787762f, -0.48361665f, -0.48933494f, -0.49503228f,
-0.50070840f, -0.50636309f, -0.51199609f, -0.51760709f, -0.52319598f,
-0.52876246f, -0.53430629f, -0.53982723f, -0.54532504f, -0.55079949f,
-0.55625033f, -0.56167740f, -0.56708032f, -0.57245898f, -0.57781315f,
-0.58314258f, -0.58844697f, -0.59372622f, -0.59897995f, -0.60420811f,
-0.60941035f, -0.61458647f, -0.61973625f, -0.62485951f, -0.62995601f,
-0.63502556f, -0.64006782f, -0.64508271f, -0.65007001f, -0.65502942f,
-0.65996075f, -0.66486382f, -0.66973841f, -0.67458433f, -0.67940134f,
-0.68418926f, -0.68894786f, -0.69367695f, -0.69837630f, -0.70304573f,
-0.70768511f, -0.71229410f, -0.71687263f, -0.72142041f, -0.72593731f,
-0.73042315f, -0.73487765f, -0.73930067f, -0.74369204f, -0.74805158f,
-0.75237900f, -0.75667429f, -0.76093709f, -0.76516730f, -0.76936477f,
-0.77352923f, -0.77766061f, -0.78175867f, -0.78582323f, -0.78985411f,
-0.79385114f, -0.79781419f, -0.80174309f, -0.80563760f, -0.80949765f,
-0.81332302f, -0.81711352f, -0.82086903f, -0.82458937f, -0.82827437f,
-0.83192390f, -0.83553779f, -0.83911592f, -0.84265804f, -0.84616417f,
-0.84963393f, -0.85306740f, -0.85646427f, -0.85982448f, -0.86314780f,
-0.86643422f, -0.86968350f, -0.87289548f, -0.87607014f, -0.87920725f,
-0.88230664f, -0.88536829f, -0.88839203f, -0.89137769f, -0.89432514f,
-0.89723432f, -0.90010506f, -0.90293723f, -0.90573072f, -0.90848541f,
-0.91120118f, -0.91387796f, -0.91651553f, -0.91911387f, -0.92167282f,
-0.92419231f, -0.92667222f, -0.92911243f, -0.93151283f, -0.93387336f,
-0.93619382f, -0.93847424f, -0.94071442f, -0.94291431f, -0.94507378f,
-0.94719279f, -0.94927126f, -0.95130903f, -0.95330608f, -0.95526224f,
-0.95717752f, -0.95905179f, -0.96088499f, -0.96267700f, -0.96442777f,
-0.96613729f, -0.96780539f, -0.96943200f, -0.97101706f, -0.97256058f,
-0.97406244f, -0.97552258f, -0.97694093f, -0.97831738f, -0.97965199f,
-0.98094457f, -0.98219514f, -0.98340368f, -0.98457009f, -0.98569429f,
-0.98677629f, -0.98781598f, -0.98881340f, -0.98976845f, -0.99068111f,
-0.99155134f, -0.99237907f, -0.99316430f, -0.99390697f, -0.99460709f,
-0.99526459f, -0.99587947f, -0.99645168f, -0.99698120f, -0.99746799f,
-0.99791211f, -0.99831343f, -0.99867201f, -0.99898779f, -0.99926084f,
-0.99949104f, -0.99967843f, -0.99982297f, -0.99992472f, -0.99998361f,
0.99999869f, 0.99989158f, 0.99961317f, 0.99916345f, 0.99854255f,
0.99775058f, 0.99678761f, 0.99565387f, 0.99434954f, 0.99287480f,
0.99122995f, 0.98941529f, 0.98743105f, 0.98527765f, 0.98295540f,
0.98046476f, 0.97780609f, 0.97497988f, 0.97198665f, 0.96882683f,
0.96550101f, 0.96200979f, 0.95835376f, 0.95453346f, 0.95054960f,
0.94640291f, 0.94209403f, 0.93762374f, 0.93299282f, 0.92820197f,
0.92325211f, 0.91814411f, 0.91287869f, 0.90745693f, 0.90187967f,
0.89614785f, 0.89026248f, 0.88422459f, 0.87803519f, 0.87169534f,
0.86520612f, 0.85856867f, 0.85178405f, 0.84485358f, 0.83777827f,
0.83055943f, 0.82319832f, 0.81569612f, 0.80805415f, 0.80027372f,
0.79235619f, 0.78430289f, 0.77611518f, 0.76779449f, 0.75934225f,
0.75075996f, 0.74204898f, 0.73321080f, 0.72424710f, 0.71515924f,
0.70594883f, 0.69661748f, 0.68716675f, 0.67759830f, 0.66791373f,
0.65811473f, 0.64820296f, 0.63818014f, 0.62804794f, 0.61780810f,
0.60746247f, 0.59701276f, 0.58646071f, 0.57580817f, 0.56505698f,
0.55420899f, 0.54326600f, 0.53222996f, 0.52110273f, 0.50988621f,
0.49858227f, 0.48719296f, 0.47572014f, 0.46416581f, 0.45253196f,
0.44082057f, 0.42903364f, 0.41717321f, 0.40524128f, 0.39323992f,
0.38117120f, 0.36903715f, 0.35683987f, 0.34458145f, 0.33226398f,
0.31988961f, 0.30746040f, 0.29497850f, 0.28244606f, 0.26986524f,
0.25723818f, 0.24456702f, 0.23185398f, 0.21910121f, 0.20631088f,
0.19348522f, 0.18062639f, 0.16773662f, 0.15481812f, 0.14187308f,
0.12890373f, 0.11591230f, 0.10290100f, 0.089872077f, 0.076827750f,
0.063770257f, 0.050701842f, 0.037624735f, 0.024541186f, 0.011453429f,
-0.0016362892f, -0.014725727f, -0.027812643f, -0.040894791f, -0.053969935f,
-0.067035832f, -0.080090240f, -0.093130924f, -0.10615565f, -0.11916219f,
-0.13214831f, -0.14511178f, -0.15805040f, -0.17096193f, -0.18384418f,
-0.19669491f, -0.20951195f, -0.22229309f, -0.23503613f, -0.24773891f,
-0.26039925f, -0.27301496f, -0.28558388f, -0.29810387f, -0.31057280f,
-0.32298848f, -0.33534884f, -0.34765175f, -0.35989508f, -0.37207675f,
-0.38419467f, -0.39624676f, -0.40823093f, -0.42014518f, -0.43198743f,
-0.44375566f, -0.45544785f, -0.46706200f, -0.47859612f, -0.49004826f,
-0.50141639f, -0.51269865f, -0.52389306f, -0.53499764f, -0.54601061f,
-0.55693001f, -0.56775403f, -0.57848072f, -0.58910829f, -0.59963489f,
-0.61005878f, -0.62037814f, -0.63059121f, -0.64069623f, -0.65069145f,
-0.66057515f, -0.67034572f, -0.68000144f, -0.68954057f, -0.69896162f,
-0.70826286f, -0.71744281f, -0.72649974f, -0.73543227f, -0.74423873f,
-0.75291771f, -0.76146764f, -0.76988715f, -0.77817470f, -0.78632891f,
-0.79434842f, -0.80223179f, -0.80997771f, -0.81758487f, -0.82505190f,
-0.83237761f, -0.83956063f, -0.84659988f, -0.85349399f, -0.86024189f,
-0.86684239f, -0.87329435f, -0.87959671f, -0.88574833f, -0.89174819f,
-0.89759529f, -0.90328854f, -0.90882701f, -0.91420978f, -0.91943592f,
-0.92450452f, -0.92941469f, -0.93416560f, -0.93875647f, -0.94318646f,
-0.94745487f, -0.95156091f, -0.95550388f, -0.95928317f, -0.96289814f,
-0.96634805f, -0.96963239f, -0.97275060f, -0.97570217f, -0.97848648f,
-0.98110318f, -0.98355180f, -0.98583186f, -0.98794299f, -0.98988485f,
-0.99165714f, -0.99325943f, -0.99469161f, -0.99595332f, -0.99704438f,
-0.99796462f, -0.99871385f, -0.99929196f, -0.99969882f, -0.99993443f,
0.99999464f, 0.99956632f, 0.99845290f, 0.99665523f, 0.99417448f,
0.99101239f, 0.98717111f, 0.98265326f, 0.97746199f, 0.97160077f,
0.96507365f, 0.95788515f, 0.95004016f, 0.94154406f, 0.93240267f,
0.92262226f, 0.91220951f, 0.90117162f, 0.88951606f, 0.87725091f,
0.86438453f, 0.85092574f, 0.83688372f, 0.82226819f, 0.80708915f,
0.79135692f, 0.77508235f, 0.75827658f, 0.74095112f, 0.72311783f,
0.70478898f, 0.68597710f, 0.66669506f, 0.64695615f, 0.62677377f,
0.60616189f, 0.58513457f, 0.56370622f, 0.54189157f, 0.51970547f,
0.49716324f, 0.47428027f, 0.45107225f, 0.42755505f, 0.40374488f,
0.37965798f, 0.35531086f, 0.33072025f, 0.30590299f, 0.28087607f,
0.25565663f, 0.23026201f, 0.20470956f, 0.17901683f, 0.15320139f,
0.12728097f, 0.10127331f, 0.075196236f, 0.049067631f, 0.022905400f,
-0.0032725304f, -0.029448219f, -0.055603724f, -0.081721120f, -0.10778251f,
-0.13377003f, -0.15966587f, -0.18545228f, -0.21111161f, -0.23662624f,
-0.26197869f, -0.28715160f, -0.31212771f, -0.33688989f, -0.36142120f,
-0.38570482f, -0.40972409f, -0.43346253f, -0.45690393f, -0.48003218f,
-0.50283146f, -0.52528608f, -0.54738069f, -0.56910020f, -0.59042966f,
-0.61135447f, -0.63186026f, -0.65193301f, -0.67155898f, -0.69072473f,
-0.70941705f, -0.72762316f, -0.74533063f, -0.76252723f, -0.77920127f,
-0.79534131f, -0.81093621f, -0.82597536f, -0.84044844f, -0.85434550f,
-0.86765707f, -0.88037395f, -0.89248747f, -0.90398932f, -0.91487163f,
-0.92512697f, -0.93474823f, -0.94372886f, -0.95206273f, -0.95974404f,
-0.96676767f, -0.97312868f, -0.97882277f, -0.98384601f, -0.98819500f,
-0.99186671f, -0.99485862f, -0.99716878f, -0.99879545f, -0.99973762f,
};
#endif
static const CELTMode mode48000_960_120 = {

View File

@ -0,0 +1,404 @@
/* The contents of this file was automatically generated by
* dump_mode_arm_ne10.c with arguments: 48000 960
* It contains static definitions for some pre-defined modes. */
#include <NE10_init.h>
#ifndef NE10_FFT_PARAMS48000_960
#define NE10_FFT_PARAMS48000_960
static const ne10_int32_t ne10_factors_480[64] = {
4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_240[64] = {
3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_120[64] = {
3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_int32_t ne10_factors_60[64] = {
2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, };
static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = {
{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f},
{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f},
{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f},
{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f},
{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f},
{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f},
{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f},
{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f},
{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f},
{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f},
{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f},
{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f},
{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f},
{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f},
{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f},
{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f},
{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f},
{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f},
{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f},
{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f},
{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f},
{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f},
{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f},
{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f},
{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f},
{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f},
{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f},
{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f},
{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f},
{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f},
{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f},
{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f},
{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f},
{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f},
{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f},
{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f},
{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f},
{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f},
{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f},
{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f},
{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f},
{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f},
{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f},
{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f},
{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f},
{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f},
{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f},
{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f},
{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f},
{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f},
{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f},
{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f},
{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f},
{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f},
{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f},
{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f},
{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f},
{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f},
{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f},
{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f},
{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f},
{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f},
{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f},
{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f},
{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f},
{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f},
{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f},
{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f},
{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f},
{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f},
{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f},
{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f},
{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f},
{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f},
{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f},
{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f},
{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f},
{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f},
{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f},
{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f},
{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f},
{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f},
{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f},
{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f},
{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f},
{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f},
{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f},
{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f},
{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f},
{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f},
{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f},
{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f},
{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f},
{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f},
{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f},
{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f},
{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f},
{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f},
{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f},
{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f},
};
static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = {
{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f},
{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f},
{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f},
{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f},
{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f},
{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f},
{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f},
{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f},
{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f},
{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f},
{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f},
{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f},
{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f},
{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f},
{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f},
{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f},
{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f},
{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f},
{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f},
{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f},
{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f},
{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f},
{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f},
{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f},
{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f},
{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f},
{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f},
{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f},
{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f},
{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f},
};
static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = {
{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
};
static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = {
{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
};
static const ne10_fft_state_float32_t ne10_fft_state_float32_t_480 = {
120,
(ne10_int32_t *)ne10_factors_480,
(ne10_fft_cpx_float32_t *)ne10_twiddles_480,
NULL,
(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120],
/* is_forward_scaled = true */
(ne10_int32_t) 1,
/* is_backward_scaled = false */
(ne10_int32_t) 0,
};
static const arch_fft_state cfg_arch_480 = {
1,
(void *)&ne10_fft_state_float32_t_480,
};
static const ne10_fft_state_float32_t ne10_fft_state_float32_t_240 = {
60,
(ne10_int32_t *)ne10_factors_240,
(ne10_fft_cpx_float32_t *)ne10_twiddles_240,
NULL,
(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60],
/* is_forward_scaled = true */
(ne10_int32_t) 1,
/* is_backward_scaled = false */
(ne10_int32_t) 0,
};
static const arch_fft_state cfg_arch_240 = {
1,
(void *)&ne10_fft_state_float32_t_240,
};
static const ne10_fft_state_float32_t ne10_fft_state_float32_t_120 = {
30,
(ne10_int32_t *)ne10_factors_120,
(ne10_fft_cpx_float32_t *)ne10_twiddles_120,
NULL,
(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30],
/* is_forward_scaled = true */
(ne10_int32_t) 1,
/* is_backward_scaled = false */
(ne10_int32_t) 0,
};
static const arch_fft_state cfg_arch_120 = {
1,
(void *)&ne10_fft_state_float32_t_120,
};
static const ne10_fft_state_float32_t ne10_fft_state_float32_t_60 = {
15,
(ne10_int32_t *)ne10_factors_60,
(ne10_fft_cpx_float32_t *)ne10_twiddles_60,
NULL,
(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15],
/* is_forward_scaled = true */
(ne10_int32_t) 1,
/* is_backward_scaled = false */
(ne10_int32_t) 0,
};
static const arch_fft_state cfg_arch_60 = {
1,
(void *)&ne10_fft_state_float32_t_60,
};
#endif /* end NE10_FFT_PARAMS48000_960 */

View File

@ -37,19 +37,23 @@
#include "os_support.h"
#include "bands.h"
#include "rate.h"
#include "pitch.h"
#ifndef OVERRIDE_vq_exp_rotation1
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
{
int i;
opus_val16 ms;
celt_norm *Xptr;
Xptr = X;
ms = NEG16(s);
for (i=0;i<len-stride;i++)
{
celt_norm x1, x2;
x1 = Xptr[0];
x2 = Xptr[stride];
Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
*Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
*Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
}
Xptr = &X[len-2*stride-1];
for (i=len-2*stride-1;i>=0;i--)
@ -57,10 +61,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
celt_norm x1, x2;
x1 = Xptr[0];
x2 = Xptr[stride];
Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
*Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
*Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
}
}
#endif /* OVERRIDE_vq_exp_rotation1 */
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
{
@ -91,7 +96,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
}
/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
extract_collapse_mask().*/
len /= stride;
len = celt_udiv(len, stride);
for (i=0;i<stride;i++)
{
if (dir < 0)
@ -140,13 +145,15 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
return 1;
/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
exp_rotation().*/
N0 = N/B;
N0 = celt_udiv(N, B);
collapse_mask = 0;
i=0; do {
int j;
unsigned tmp=0;
j=0; do {
collapse_mask |= (iy[i*N0+j]!=0)<<i;
tmp |= iy[i*N0+j];
} while (++j<N0);
collapse_mask |= (tmp!=0)<<i;
} while (++i<B);
return collapse_mask;
}
@ -264,7 +271,7 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
best_id = 0;
/* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */
yy = ADD32(yy, 1);
yy = ADD16(yy, 1);
j=0;
do {
opus_val16 Rxy, Ryy;
@ -322,7 +329,6 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
ec_dec *dec, opus_val16 gain)
{
int i;
opus_val32 Ryy;
unsigned collapse_mask;
VARDECL(int, iy);
@ -331,12 +337,7 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
celt_assert2(K>0, "alg_unquant() needs at least one pulse");
celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
ALLOC(iy, N, int);
decode_pulses(iy, N, K, dec);
Ryy = 0;
i=0;
do {
Ryy = MAC16_16(Ryy, iy[i], iy[i]);
} while (++i < N);
Ryy = decode_pulses(iy, N, K, dec);
normalise_residual(iy, X, N, Ryy, gain);
exp_rotation(X, N, -1, B, K, spread);
collapse_mask = extract_collapse_mask(iy, N, B);
@ -344,21 +345,18 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
return collapse_mask;
}
void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
#ifndef OVERRIDE_renormalise_vector
void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
{
int i;
#ifdef FIXED_POINT
int k;
#endif
opus_val32 E = EPSILON;
opus_val32 E;
opus_val16 g;
opus_val32 t;
celt_norm *xptr = X;
for (i=0;i<N;i++)
{
E = MAC16_16(E, *xptr, *xptr);
xptr++;
}
celt_norm *xptr;
E = EPSILON + celt_inner_prod(X, X, N, arch);
#ifdef FIXED_POINT
k = celt_ilog2(E)>>1;
#endif
@ -373,8 +371,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
}
/*return celt_sqrt(E);*/
}
#endif /* OVERRIDE_renormalise_vector */
int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
{
int i;
int itheta;
@ -393,14 +392,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
Eside = MAC16_16(Eside, s, s);
}
} else {
for (i=0;i<N;i++)
{
celt_norm m, s;
m = X[i];
s = Y[i];
Emid = MAC16_16(Emid, m, m);
Eside = MAC16_16(Eside, s, s);
}
Emid += celt_inner_prod(X, X, N, arch);
Eside += celt_inner_prod(Y, Y, N, arch);
}
mid = celt_sqrt(Emid);
side = celt_sqrt(Eside);

View File

@ -37,6 +37,11 @@
#include "entdec.h"
#include "modes.h"
#if defined(MIPSr1_ASM)
#include "mips/vq_mipsr1.h"
#endif
/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
* the pitch and a combination of pulses such that its norm is still equal
* to 1. This is the function that will typically require the most CPU.
@ -63,8 +68,8 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
ec_dec *dec, opus_val16 gain);
void renormalise_vector(celt_norm *X, int N, opus_val16 gain);
void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch);
int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N);
int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch);
#endif /* VQ_H */

View File

@ -0,0 +1,132 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <xmmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "pitch.h"
#include "x86cpu.h"
#if defined(FIXED_POINT)
void celt_fir_sse4_1(const opus_val16 *_x,
const opus_val16 *num,
opus_val16 *_y,
int N,
int ord,
opus_val16 *mem,
int arch)
{
int i,j;
VARDECL(opus_val16, rnum);
VARDECL(opus_val16, x);
__m128i vecNoA;
opus_int32 noA ;
SAVE_STACK;
ALLOC(rnum, ord, opus_val16);
ALLOC(x, N+ord, opus_val16);
for(i=0;i<ord;i++)
rnum[i] = num[ord-i-1];
for(i=0;i<ord;i++)
x[i] = mem[ord-i-1];
for (i=0;i<N-7;i+=8)
{
x[i+ord ]=_x[i ];
x[i+ord+1]=_x[i+1];
x[i+ord+2]=_x[i+2];
x[i+ord+3]=_x[i+3];
x[i+ord+4]=_x[i+4];
x[i+ord+5]=_x[i+5];
x[i+ord+6]=_x[i+6];
x[i+ord+7]=_x[i+7];
}
for (;i<N-3;i+=4)
{
x[i+ord ]=_x[i ];
x[i+ord+1]=_x[i+1];
x[i+ord+2]=_x[i+2];
x[i+ord+3]=_x[i+3];
}
for (;i<N;i++)
x[i+ord]=_x[i];
for(i=0;i<ord;i++)
mem[i] = _x[N-i-1];
#ifdef SMALL_FOOTPRINT
for (i=0;i<N;i++)
{
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
for (j=0;j<ord;j++)
{
sum = MAC16_16(sum,rnum[j],x[i+j]);
}
_y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
}
#else
noA = EXTEND32(1) << SIG_SHIFT >> 1;
vecNoA = _mm_set_epi32(noA, noA, noA, noA);
for (i=0;i<N-3;i+=4)
{
opus_val32 sums[4] = {0};
__m128i vecSum, vecX;
xcorr_kernel(rnum, x+i, sums, ord, arch);
vecSum = _mm_loadu_si128((__m128i *)sums);
vecSum = _mm_add_epi32(vecSum, vecNoA);
vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
vecX = OP_CVTEPI16_EPI32_M64(_x + i);
vecSum = _mm_add_epi32(vecSum, vecX);
vecSum = _mm_packs_epi32(vecSum, vecSum);
_mm_storel_epi64((__m128i *)(_y + i), vecSum);
}
for (;i<N;i++)
{
opus_val32 sum = 0;
for (j=0;j<ord;j++)
sum = MAC16_16(sum, rnum[j], x[i + j]);
_y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
}
#endif
RESTORE_STACK;
}
#endif

View File

@ -0,0 +1,68 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CELT_LPC_SSE_H
#define CELT_LPC_SSE_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
#define OVERRIDE_CELT_FIR
void celt_fir_sse4_1(
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch);
#if defined(OPUS_X86_PRESUME_SSE4_1)
#define celt_fir(x, num, y, N, ord, mem, arch) \
((void)arch, celt_fir_sse4_1(x, num, y, N, ord, mem, arch))
#else
extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch);
# define celt_fir(x, num, y, N, ord, mem, arch) \
((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, mem, arch))
#endif
#endif
#endif

View File

@ -0,0 +1,185 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "macros.h"
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "pitch.h"
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
#include <xmmintrin.h>
#include "arch.h"
void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
{
int j;
__m128 xsum1, xsum2;
xsum1 = _mm_loadu_ps(sum);
xsum2 = _mm_setzero_ps();
for (j = 0; j < len-3; j += 4)
{
__m128 x0 = _mm_loadu_ps(x+j);
__m128 yj = _mm_loadu_ps(y+j);
__m128 y3 = _mm_loadu_ps(y+j+3);
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
_mm_shuffle_ps(yj,y3,0x49)));
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
_mm_shuffle_ps(yj,y3,0x9e)));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
}
if (j < len)
{
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
if (++j < len)
{
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
if (++j < len)
{
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
}
}
}
_mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
}
void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
__m128 xsum1, xsum2;
xsum1 = _mm_setzero_ps();
xsum2 = _mm_setzero_ps();
for (i=0;i<N-3;i+=4)
{
__m128 xi = _mm_loadu_ps(x+i);
__m128 y1i = _mm_loadu_ps(y01+i);
__m128 y2i = _mm_loadu_ps(y02+i);
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
}
/* Horizontal sum */
xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
_mm_store_ss(xy1, xsum1);
xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
_mm_store_ss(xy2, xsum2);
for (;i<N;i++)
{
*xy1 = MAC16_16(*xy1, x[i], y01[i]);
*xy2 = MAC16_16(*xy2, x[i], y02[i]);
}
}
opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y,
int N)
{
int i;
float xy;
__m128 sum;
sum = _mm_setzero_ps();
/* FIXME: We should probably go 8-way and use 2 sums. */
for (i=0;i<N-3;i+=4)
{
__m128 xi = _mm_loadu_ps(x+i);
__m128 yi = _mm_loadu_ps(y+i);
sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));
}
/* Horizontal sum */
sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
_mm_store_ss(&xy, sum);
for (;i<N;i++)
{
xy = MAC16_16(xy, x[i], y[i]);
}
return xy;
}
void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
int i;
__m128 x0v;
__m128 g10v, g11v, g12v;
g10v = _mm_load1_ps(&g10);
g11v = _mm_load1_ps(&g11);
g12v = _mm_load1_ps(&g12);
x0v = _mm_loadu_ps(&x[-T-2]);
for (i=0;i<N-3;i+=4)
{
__m128 yi, yi2, x1v, x2v, x3v, x4v;
const opus_val32 *xp = &x[i-T-2];
yi = _mm_loadu_ps(x+i);
x4v = _mm_loadu_ps(xp+4);
#if 0
/* Slower version with all loads */
x1v = _mm_loadu_ps(xp+1);
x2v = _mm_loadu_ps(xp+2);
x3v = _mm_loadu_ps(xp+3);
#else
x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
#endif
yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
#else
/* Use partial sums */
yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
_mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
yi = _mm_add_ps(yi, yi2);
#endif
x0v=x4v;
_mm_storeu_ps(y+i, yi);
}
#ifdef CUSTOM_MODES
for (;i<N;i++)
{
y[i] = x[i]
+ MULT16_32_Q15(g10,x[i-T])
+ MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
+ MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
}
#endif
}
#endif

View File

@ -1,4 +1,5 @@
/* Copyright (c) 2013 Jean-Marc Valin and John Ridges */
/* Copyright (c) 2013 Jean-Marc Valin and John Ridges
Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/
/**
@file pitch_sse.h
@brief Pitch analysis
@ -32,125 +33,160 @@
#ifndef PITCH_SSE_H
#define PITCH_SSE_H
#include <xmmintrin.h>
#include "arch.h"
#if defined(HAVE_CONFIG_H)
#include "config.h"
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
void xcorr_kernel_sse4_1(
const opus_int16 *x,
const opus_int16 *y,
opus_val32 sum[4],
int len);
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
void xcorr_kernel_sse(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
#endif
#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
#define OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)arch, xcorr_kernel_sse4_1(x, y, sum, len))
#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
#define OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)arch, xcorr_kernel_sse(x, y, sum, len))
#elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
#define OVERRIDE_XCORR_KERNEL
static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
{
int j;
__m128 xsum1, xsum2;
xsum1 = _mm_loadu_ps(sum);
xsum2 = _mm_setzero_ps();
#define xcorr_kernel(x, y, sum, len, arch) \
((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
for (j = 0; j < len-3; j += 4)
{
__m128 x0 = _mm_loadu_ps(x+j);
__m128 yj = _mm_loadu_ps(y+j);
__m128 y3 = _mm_loadu_ps(y+j+3);
#endif
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
_mm_shuffle_ps(yj,y3,0x49)));
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
_mm_shuffle_ps(yj,y3,0x9e)));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
}
if (j < len)
{
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
if (++j < len)
{
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
if (++j < len)
{
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
}
}
}
_mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
}
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
opus_val32 celt_inner_prod_sse4_1(
const opus_int16 *x,
const opus_int16 *y,
int N);
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
opus_val32 celt_inner_prod_sse2(
const opus_int16 *x,
const opus_int16 *y,
int N);
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
opus_val32 celt_inner_prod_sse(
const opus_val16 *x,
const opus_val16 *y,
int N);
#endif
#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
#define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \
((void)arch, celt_inner_prod_sse4_1(x, y, N))
#elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
#define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \
((void)arch, celt_inner_prod_sse2(x, y, N))
#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
#define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \
((void)arch, celt_inner_prod_sse(x, y, N))
#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
(defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
int N);
#define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \
((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N))
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
#define OVERRIDE_DUAL_INNER_PROD
static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
__m128 xsum1, xsum2;
xsum1 = _mm_setzero_ps();
xsum2 = _mm_setzero_ps();
for (i=0;i<N-3;i+=4)
{
__m128 xi = _mm_loadu_ps(x+i);
__m128 y1i = _mm_loadu_ps(y01+i);
__m128 y2i = _mm_loadu_ps(y02+i);
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
}
/* Horizontal sum */
xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
_mm_store_ss(xy1, xsum1);
xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
_mm_store_ss(xy2, xsum2);
for (;i<N;i++)
{
*xy1 = MAC16_16(*xy1, x[i], y01[i]);
*xy2 = MAC16_16(*xy2, x[i], y02[i]);
}
}
#define OVERRIDE_COMB_FILTER_CONST
static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
int i;
__m128 x0v;
__m128 g10v, g11v, g12v;
g10v = _mm_load1_ps(&g10);
g11v = _mm_load1_ps(&g11);
g12v = _mm_load1_ps(&g12);
x0v = _mm_loadu_ps(&x[-T-2]);
for (i=0;i<N-3;i+=4)
{
__m128 yi, yi2, x1v, x2v, x3v, x4v;
const opus_val32 *xp = &x[i-T-2];
yi = _mm_loadu_ps(x+i);
x4v = _mm_loadu_ps(xp+4);
#if 0
/* Slower version with all loads */
x1v = _mm_loadu_ps(xp+1);
x2v = _mm_loadu_ps(xp+2);
x3v = _mm_loadu_ps(xp+3);
#else
x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
#endif
yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
#undef dual_inner_prod
#undef comb_filter_const
void dual_inner_prod_sse(const opus_val16 *x,
const opus_val16 *y01,
const opus_val16 *y02,
int N,
opus_val32 *xy1,
opus_val32 *xy2);
void comb_filter_const_sse(opus_val32 *y,
opus_val32 *x,
int T,
int N,
opus_val16 g10,
opus_val16 g11,
opus_val16 g12);
#if defined(OPUS_X86_PRESUME_SSE)
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
#else
/* Use partial sums */
yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
_mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
yi = _mm_add_ps(yi, yi2);
#endif
x0v=x4v;
_mm_storeu_ps(y+i, yi);
}
#ifdef CUSTOM_MODES
for (;i<N;i++)
{
y[i] = x[i]
+ MULT16_32_Q15(g10,x[i-T])
+ MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
+ MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
}
#endif
}
extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y01,
const opus_val16 *y02,
int N,
opus_val32 *xy1,
opus_val32 *xy2);
#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
opus_val32 *y,
opus_val32 *x,
int T,
int N,
opus_val16 g10,
opus_val16 g11,
opus_val16 g12);
#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
#define NON_STATIC_COMB_FILTER_CONST_C
#endif
#endif
#endif

View File

@ -0,0 +1,95 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <xmmintrin.h>
#include <emmintrin.h>
#include "macros.h"
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "pitch.h"
#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y,
int N)
{
opus_int i, dataSize16;
opus_int32 sum;
__m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
__m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
sum = 0;
dataSize16 = N & ~15;
acc1 = _mm_setzero_si128();
acc2 = _mm_setzero_si128();
for (i=0;i<dataSize16;i+=16)
{
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
}
acc1 = _mm_add_epi32( acc1, acc2 );
if (N - i >= 8)
{
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
i += 8;
}
acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1));
acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E));
sum += _mm_cvtsi128_si32(acc1);
for (;i<N;i++) {
sum = silk_SMLABB(sum, x[i], y[i]);
}
return sum;
}
#endif

View File

@ -0,0 +1,195 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <xmmintrin.h>
#include <emmintrin.h>
#include "macros.h"
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "pitch.h"
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
#include <smmintrin.h>
#include "x86cpu.h"
opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y,
int N)
{
opus_int i, dataSize16;
opus_int32 sum;
__m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
__m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
__m128i inVec1_3210, inVec2_3210;
sum = 0;
dataSize16 = N & ~15;
acc1 = _mm_setzero_si128();
acc2 = _mm_setzero_si128();
for (i=0;i<dataSize16;i+=16) {
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
}
acc1 = _mm_add_epi32(acc1, acc2);
if (N - i >= 8)
{
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
i += 8;
}
if (N - i >= 4)
{
inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]);
inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]);
inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210);
acc1 = _mm_add_epi32(acc1, inVec1_3210);
i += 4;
}
acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1));
acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E));
sum += _mm_cvtsi128_si32(acc1);
for (;i<N;i++)
{
sum = silk_SMLABB(sum, x[i], y[i]);
}
return sum;
}
void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[ 4 ], int len)
{
int j;
__m128i vecX, vecX0, vecX1, vecX2, vecX3;
__m128i vecY0, vecY1, vecY2, vecY3;
__m128i sum0, sum1, sum2, sum3, vecSum;
__m128i initSum;
celt_assert(len >= 3);
sum0 = _mm_setzero_si128();
sum1 = _mm_setzero_si128();
sum2 = _mm_setzero_si128();
sum3 = _mm_setzero_si128();
for (j=0;j<(len-7);j+=8)
{
vecX = _mm_loadu_si128((__m128i *)(&x[j + 0]));
vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0]));
vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1]));
vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2]));
vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3]));
sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0));
sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1));
sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2));
sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3));
}
sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0));
sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E));
sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1));
sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E));
sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2));
sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E));
sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3));
sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E));
vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1),
_mm_unpacklo_epi32(sum2, sum3));
for (;j<(len-3);j+=4)
{
vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
vecX0 = _mm_shuffle_epi32(vecX, 0x00);
vecX1 = _mm_shuffle_epi32(vecX, 0x55);
vecX2 = _mm_shuffle_epi32(vecX, 0xaa);
vecX3 = _mm_shuffle_epi32(vecX, 0xff);
vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]);
vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]);
vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]);
sum0 = _mm_mullo_epi32(vecX0, vecY0);
sum1 = _mm_mullo_epi32(vecX1, vecY1);
sum2 = _mm_mullo_epi32(vecX2, vecY2);
sum3 = _mm_mullo_epi32(vecX3, vecY3);
sum0 = _mm_add_epi32(sum0, sum1);
sum2 = _mm_add_epi32(sum2, sum3);
vecSum = _mm_add_epi32(vecSum, sum0);
vecSum = _mm_add_epi32(vecSum, sum2);
}
for (;j<len;j++)
{
vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
vecX0 = _mm_shuffle_epi32(vecX, 0x00);
vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
sum0 = _mm_mullo_epi32(vecX0, vecY0);
vecSum = _mm_add_epi32(vecSum, sum0);
}
initSum = _mm_loadu_si128((__m128i *)(&sum[0]));
initSum = _mm_add_epi32(initSum, vecSum);
_mm_storeu_si128((__m128i *)sum, initSum);
}
#endif

View File

@ -0,0 +1,155 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(HAVE_CONFIG_H)
#include "config.h"
#endif
#include "x86/x86cpu.h"
#include "celt_lpc.h"
#include "pitch.h"
#include "pitch_sse.h"
#if defined(OPUS_HAVE_RTCD)
# if defined(FIXED_POINT)
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)
void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch
) = {
celt_fir_c, /* non-sse */
celt_fir_c,
celt_fir_c,
MAY_HAVE_SSE4_1(celt_fir), /* sse4.1 */
MAY_HAVE_SSE4_1(celt_fir) /* avx */
};
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len
) = {
xcorr_kernel_c, /* non-sse */
xcorr_kernel_c,
xcorr_kernel_c,
MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1 */
MAY_HAVE_SSE4_1(xcorr_kernel) /* avx */
};
#endif
#if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
int N
) = {
celt_inner_prod_c, /* non-sse */
celt_inner_prod_c,
MAY_HAVE_SSE2(celt_inner_prod),
MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1 */
MAY_HAVE_SSE4_1(celt_inner_prod) /* avx */
};
#endif
# else
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len
) = {
xcorr_kernel_c, /* non-sse */
MAY_HAVE_SSE(xcorr_kernel),
MAY_HAVE_SSE(xcorr_kernel),
MAY_HAVE_SSE(xcorr_kernel),
MAY_HAVE_SSE(xcorr_kernel)
};
opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
int N
) = {
celt_inner_prod_c, /* non-sse */
MAY_HAVE_SSE(celt_inner_prod),
MAY_HAVE_SSE(celt_inner_prod),
MAY_HAVE_SSE(celt_inner_prod),
MAY_HAVE_SSE(celt_inner_prod)
};
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y01,
const opus_val16 *y02,
int N,
opus_val32 *xy1,
opus_val32 *xy2
) = {
dual_inner_prod_c, /* non-sse */
MAY_HAVE_SSE(dual_inner_prod),
MAY_HAVE_SSE(dual_inner_prod),
MAY_HAVE_SSE(dual_inner_prod),
MAY_HAVE_SSE(dual_inner_prod)
};
void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
opus_val32 *y,
opus_val32 *x,
int T,
int N,
opus_val16 g10,
opus_val16 g11,
opus_val16 g12
) = {
comb_filter_const_c, /* non-sse */
MAY_HAVE_SSE(comb_filter_const),
MAY_HAVE_SSE(comb_filter_const),
MAY_HAVE_SSE(comb_filter_const),
MAY_HAVE_SSE(comb_filter_const)
};
#endif
#endif
#endif

View File

@ -0,0 +1,157 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "cpu_support.h"
#include "macros.h"
#include "main.h"
#include "pitch.h"
#include "x86cpu.h"
#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
#if defined(_MSC_VER)
#include <intrin.h>
static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
{
__cpuid((int*)CPUInfo, InfoType);
}
#else
#if defined(CPU_INFO_BY_C)
#include <cpuid.h>
#endif
static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
{
#if defined(CPU_INFO_BY_ASM)
#if defined(__i386__) && defined(__PIC__)
/* %ebx is PIC register in 32-bit, so mustn't clobber it. */
__asm__ __volatile__ (
"xchg %%ebx, %1\n"
"cpuid\n"
"xchg %%ebx, %1\n":
"=a" (CPUInfo[0]),
"=r" (CPUInfo[1]),
"=c" (CPUInfo[2]),
"=d" (CPUInfo[3]) :
"0" (InfoType)
);
#else
__asm__ __volatile__ (
"cpuid":
"=a" (CPUInfo[0]),
"=b" (CPUInfo[1]),
"=c" (CPUInfo[2]),
"=d" (CPUInfo[3]) :
"0" (InfoType)
);
#endif
#elif defined(CPU_INFO_BY_C)
__get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3]));
#endif
}
#endif
typedef struct CPU_Feature{
/* SIMD: 128-bit */
int HW_SSE;
int HW_SSE2;
int HW_SSE41;
/* SIMD: 256-bit */
int HW_AVX;
} CPU_Feature;
static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
{
unsigned int info[4] = {0};
unsigned int nIds = 0;
cpuid(info, 0);
nIds = info[0];
if (nIds >= 1){
cpuid(info, 1);
cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
}
else {
cpu_feature->HW_SSE = 0;
cpu_feature->HW_SSE2 = 0;
cpu_feature->HW_SSE41 = 0;
cpu_feature->HW_AVX = 0;
}
}
int opus_select_arch(void)
{
CPU_Feature cpu_feature;
int arch;
opus_cpu_feature_check(&cpu_feature);
arch = 0;
if (!cpu_feature.HW_SSE)
{
return arch;
}
arch++;
if (!cpu_feature.HW_SSE2)
{
return arch;
}
arch++;
if (!cpu_feature.HW_SSE41)
{
return arch;
}
arch++;
if (!cpu_feature.HW_AVX)
{
return arch;
}
arch++;
return arch;
}
#endif

View File

@ -0,0 +1,93 @@
/* Copyright (c) 2014, Cisco Systems, INC
Written by XiangMingZhu WeiZhou MinPeng YanWang
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if !defined(X86CPU_H)
# define X86CPU_H
# if defined(OPUS_X86_MAY_HAVE_SSE)
# define MAY_HAVE_SSE(name) name ## _sse
# else
# define MAY_HAVE_SSE(name) name ## _c
# endif
# if defined(OPUS_X86_MAY_HAVE_SSE2)
# define MAY_HAVE_SSE2(name) name ## _sse2
# else
# define MAY_HAVE_SSE2(name) name ## _c
# endif
# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
# define MAY_HAVE_SSE4_1(name) name ## _sse4_1
# else
# define MAY_HAVE_SSE4_1(name) name ## _c
# endif
# if defined(OPUS_X86_MAY_HAVE_AVX)
# define MAY_HAVE_AVX(name) name ## _avx
# else
# define MAY_HAVE_AVX(name) name ## _c
# endif
# if defined(OPUS_HAVE_RTCD)
int opus_select_arch(void);
# endif
/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
reference, these require 16-byte alignment and load a full 16 bytes (instead
of 4 or 8), possibly reading out of bounds.
We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
_mm_loadl_epi64(), which should have the same semantics as an m32 or m64
reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
optimize this out when optimizations ARE enabled.
Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
(which is fair, since technically the compiler is always allowed to do the
dereference before invoking the function implementing the intrinsic).
However, it is smart enough to eliminate the extra MOVD instruction.
For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
the extra MOVQ if it's specified explicitly */
# if defined(__clang__) || !defined(__OPTIMIZE__)
# define OP_CVTEPI8_EPI32_M32(x) \
(_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
# else
# define OP_CVTEPI8_EPI32_M32(x) \
(_mm_cvtepi8_epi32(*(__m128i *)(x)))
#endif
# if !defined(__OPTIMIZE__)
# define OP_CVTEPI16_EPI32_M64(x) \
(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
# else
# define OP_CVTEPI16_EPI32_M64(x) \
(_mm_cvtepi16_epi32(*(__m128i *)(x)))
# endif
#endif

View File

@ -142,7 +142,7 @@ extern "C" {
*
* opus_encode() and opus_encode_float() return the number of bytes actually written to the packet.
* The return value <b>can be negative</b>, which indicates that an error has occurred. If the return value
* is 1 byte, then the packet does not need to be transmitted (DTX).
* is 2 bytes or less, then the packet does not need to be transmitted (DTX).
*
* Once the encoder state if no longer needed, it can be destroyed with
*
@ -616,7 +616,10 @@ OPUS_EXPORT void opus_pcm_soft_clip(float *pcm, int frame_size, int channels, fl
* merged. Splitting valid Opus packets is always guaranteed to succeed,
* whereas merging valid packets only succeeds if all frames have the same
* mode, bandwidth, and frame size, and when the total duration of the merged
* packet is no more than 120 ms.
* packet is no more than 120 ms. The 120 ms limit comes from the
* specification and limits decoder memory requirements at a point where
* framing overhead becomes negligible.
*
* The repacketizer currently only operates on elementary Opus
* streams. It will not manipualte multistream packets successfully, except in
* the degenerate case where they consist of data from a single stream.

View File

@ -46,7 +46,7 @@ extern "C" {
#define OPUS_OK 0
/** One or more invalid/out of range arguments @hideinitializer*/
#define OPUS_BAD_ARG -1
/** The mode struct passed is invalid @hideinitializer*/
/** Not enough bytes allocated in the buffer @hideinitializer*/
#define OPUS_BUFFER_TOO_SMALL -2
/** An internal error was detected @hideinitializer*/
#define OPUS_INTERNAL_ERROR -3
@ -65,7 +65,7 @@ extern "C" {
#ifndef OPUS_EXPORT
# if defined(WIN32)
# ifdef OPUS_BUILD
# if defined(OPUS_BUILD) && defined(DLL_EXPORT)
# define OPUS_EXPORT __declspec(dllexport)
# else
# define OPUS_EXPORT
@ -274,7 +274,6 @@ extern "C" {
/** Enables or disables variable bitrate (VBR) in the encoder.
* The configured bitrate may not be met exactly because frames must
* be an integer number of bytes in length.
* @warning Only the MDCT mode of Opus can provide hard CBR behavior.
* @see OPUS_GET_VBR
* @see OPUS_SET_VBR_CONSTRAINT
* @param[in] x <tt>opus_int32</tt>: Allowed values:
@ -454,14 +453,6 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x)
/** Gets the sampling rate the encoder or decoder was initialized with.
* This simply returns the <code>Fs</code> value passed to opus_encoder_init()
* or opus_decoder_init().
* @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
* @hideinitializer
*/
#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
/** Gets the total samples of delay added by the entire codec.
* This can be queried by the encoder and then the provided number of samples can be
* skipped on from the start of the decoder's output to provide time aligned input
@ -498,9 +489,9 @@ extern "C" {
#define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x)
/** Configures the encoder's expected packet loss percentage.
* Higher values with trigger progressively more loss resistant behavior in the encoder
* at the expense of quality at a given bitrate in the lossless case, but greater quality
* under loss.
* Higher values trigger progressively more loss resistant behavior in the encoder
* at the expense of quality at a given bitrate in the absence of packet loss, but
* greater quality under loss.
* @see OPUS_GET_PACKET_LOSS_PERC
* @param[in] x <tt>opus_int32</tt>: Loss percentage in the range 0-100, inclusive (default: 0).
* @hideinitializer */
@ -532,7 +523,19 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x)
/** Configures the depth of signal being encoded.
*
* This is a hint which helps the encoder identify silence and near-silence.
* It represents the number of significant bits of linear intensity below
* which the signal contains ignorable quantization or other noise.
*
* For example, OPUS_SET_LSB_DEPTH(14) would be an appropriate setting
* for G.711 u-law input. OPUS_SET_LSB_DEPTH(16) would be appropriate
* for 16-bit linear pcm input with opus_encode_float().
*
* When using opus_encode() instead of opus_encode_float(), or when libopus
* is compiled for fixed-point, the encoder uses the minimum of the value
* set here and the value 16.
*
* @see OPUS_GET_LSB_DEPTH
* @param[in] x <tt>opus_int32</tt>: Input precision in bits, between 8 and 24
* (default: 24).
@ -545,11 +548,6 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x)
/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
* @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
* @hideinitializer */
#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
/** Configures the encoder's use of variable duration frames.
* When variable duration is enabled, the encoder is free to use a shorter frame
* size than the one requested in the opus_encode*() call.
@ -558,12 +556,12 @@ extern "C" {
* packet. The part of the audio that was not encoded needs to be resent to the
* encoder for the next call. Do not use this option unless you <b>really</b>
* know what you are doing.
* @see OPUS_GET_EXPERT_VARIABLE_DURATION
* @see OPUS_GET_EXPERT_FRAME_DURATION
* @param[in] x <tt>opus_int32</tt>: Allowed values:
* <dl>
* <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
* <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
@ -573,12 +571,12 @@ extern "C" {
* @hideinitializer */
#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
/** Gets the encoder's configured use of variable duration frames.
* @see OPUS_SET_EXPERT_VARIABLE_DURATION
* @see OPUS_SET_EXPERT_FRAME_DURATION
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
* <dl>
* <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
* <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 5 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
@ -589,10 +587,22 @@ extern "C" {
#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
/** If set to 1, disables almost all use of prediction, making frames almost
completely independent. This reduces quality. (default : 0)
* completely independent. This reduces quality.
* @see OPUS_GET_PREDICTION_DISABLED
* @param[in] x <tt>opus_int32</tt>: Allowed values:
* <dl>
* <dt>0</dt><dd>Enable prediction (default).</dd>
* <dt>1</dt><dd>Disable prediction.</dd>
* </dl>
* @hideinitializer */
#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x)
/** Gets the encoder's configured prediction status.
* @see OPUS_SET_PREDICTION_DISABLED
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
* <dl>
* <dt>0</dt><dd>Prediction enabled (default).</dd>
* <dt>1</dt><dd>Prediction disabled.</dd>
* </dl>
* @hideinitializer */
#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x)
@ -649,18 +659,6 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x)
/** Gets the pitch of the last decoded frame, if available.
* This can be used for any post-processing algorithm requiring the use of pitch,
* e.g. time stretching/shortening. If the last frame was not voiced, or if the
* pitch was not coded in the frame, then zero is returned.
*
* This CTL is only implemented for decoder instances.
*
* @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
*
* @hideinitializer */
#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
/** Gets the encoder's configured bandpass or the decoder's last bandpass.
* @see OPUS_SET_BANDWIDTH
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
@ -675,6 +673,14 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x)
/** Gets the sampling rate the encoder or decoder was initialized with.
* This simply returns the <code>Fs</code> value passed to opus_encoder_init()
* or opus_decoder_init().
* @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
* @hideinitializer
*/
#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
/**@}*/
/** @defgroup opus_decoderctls Decoder related CTLs
@ -699,6 +705,23 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x)
/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
* @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
* @hideinitializer */
#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
/** Gets the pitch of the last decoded frame, if available.
* This can be used for any post-processing algorithm requiring the use of pitch,
* e.g. time stretching/shortening. If the last frame was not voiced, or if the
* pitch was not coded in the frame, then zero is returned.
*
* This CTL is only implemented for decoder instances.
*
* @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
*
* @hideinitializer */
#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
/**@}*/
/** @defgroup opus_libinfo Opus library information functions
@ -713,6 +736,10 @@ extern "C" {
OPUS_EXPORT const char *opus_strerror(int error);
/** Gets the libopus version string.
*
* Applications may look for the substring "-fixed" in the version string to
* determine whether they have a fixed-point or floating-point build at
* runtime.
*
* @returns Version string
*/

View File

@ -110,10 +110,10 @@ extern "C" {
* packets produced by the encoder. Some basic information, such as packet
* duration, can be computed without any special negotiation.
*
* The format for multistream Opus packets is defined in the
* <a href="http://tools.ietf.org/html/draft-terriberry-oggopus">Ogg
* encapsulation specification</a> and is based on the self-delimited Opus
* framing described in Appendix B of <a href="http://tools.ietf.org/html/rfc6716">RFC 6716</a>.
* The format for multistream Opus packets is defined in
* <a href="https://tools.ietf.org/html/rfc7845">RFC 7845</a>
* and is based on the self-delimited Opus framing described in Appendix B of
* <a href="https://tools.ietf.org/html/rfc6716">RFC 6716</a>.
* Normal Opus packets are just a degenerate case of multistream Opus packets,
* and can be encoded or decoded with the multistream API by setting
* <code>streams</code> to <code>1</code> when initializing the encoder or
@ -140,7 +140,7 @@ extern "C" {
*
* The output channels specified by the encoder
* should use the
* <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
* <a href="https://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-810004.3.9">Vorbis
* channel ordering</a>. A decoder may wish to apply an additional permutation
* to the mapping the encoder used to achieve a different output channel
* order (e.g. for outputing in WAV order).

View File

@ -71,8 +71,23 @@ static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial ev
y32 = p[ dd ]; /* Q16 */
x_Q16 = silk_LSHIFT( x, 4 );
for( n = dd - 1; n >= 0; n-- ) {
y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */
if ( opus_likely( 8 == dd ) )
{
y32 = silk_SMLAWW( p[ 7 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 6 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 5 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 4 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 3 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 2 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 1 ], y32, x_Q16 );
y32 = silk_SMLAWW( p[ 0 ], y32, x_Q16 );
}
else
{
for( n = dd - 1; n >= 0; n-- ) {
y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */
}
}
return y32;
}

View File

@ -111,7 +111,8 @@ opus_int silk_Decode( /* O Returns error co
opus_int newPacketFlag, /* I Indicates first decoder call for this packet */
ec_dec *psRangeDec, /* I/O Compressor data structure */
opus_int16 *samplesOut, /* O Decoded output speech vector */
opus_int32 *nSamplesOut /* O Number of samples decoded */
opus_int32 *nSamplesOut, /* O Number of samples decoded */
int arch /* I Run-time architecture */
);
#if 0

View File

@ -34,9 +34,8 @@ POSSIBILITY OF SUCH DAMAGE.
/* Generates excitation for CNG LPC synthesis */
static OPUS_INLINE void silk_CNG_exc(
opus_int32 residual_Q10[], /* O CNG residual signal Q10 */
opus_int32 exc_Q14[], /* O CNG excitation signal Q10 */
opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */
opus_int32 Gain_Q16, /* I Gain to apply */
opus_int length, /* I Length */
opus_int32 *rand_seed /* I/O Seed to random index generator */
)
@ -55,7 +54,7 @@ static OPUS_INLINE void silk_CNG_exc(
idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
silk_assert( idx >= 0 );
silk_assert( idx <= CNG_BUF_MASK_MAX );
residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
exc_Q14[ i ] = exc_buf_Q14[ idx ];
}
*rand_seed = seed;
}
@ -85,7 +84,7 @@ void silk_CNG(
)
{
opus_int i, subfr;
opus_int32 sum_Q6, max_Gain_Q16;
opus_int32 LPC_pred_Q10, max_Gain_Q16, gain_Q16, gain_Q10;
opus_int16 A_Q12[ MAX_LPC_ORDER ];
silk_CNG_struct *psCNG = &psDec->sCNG;
SAVE_STACK;
@ -124,47 +123,60 @@ void silk_CNG(
/* Add CNG when packet is lost or during DTX */
if( psDec->lossCnt ) {
VARDECL( opus_int32, CNG_sig_Q10 );
ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
VARDECL( opus_int32, CNG_sig_Q14 );
ALLOC( CNG_sig_Q14, length + MAX_LPC_ORDER, opus_int32 );
/* Generate CNG excitation */
silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );
gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) {
gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 );
gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 );
} else {
gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 );
gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
}
gain_Q10 = silk_RSHIFT( gain_Q16, 6 );
silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed );
/* Convert CNG NLSF to filter representation */
silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
/* Generate CNG signal, by synthesis filtering */
silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
for( i = 0; i < length; i++ ) {
silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
if( psDec->LPC_order == 16 ) {
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
}
/* Update states */
CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) );
CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
/* Scale with Gain and add to input signal */
frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) );
}
silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
} else {
silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( opus_int32 ) );
}

View File

@ -44,7 +44,8 @@ void silk_LPC_analysis_filter(
const opus_int16 *in, /* I Input signal */
const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */
const opus_int32 len, /* I Signal length */
const opus_int32 d /* I Filter order */
const opus_int32 d, /* I Filter order */
int arch /* I Run-time architecture */
)
{
opus_int j;
@ -69,11 +70,12 @@ void silk_LPC_analysis_filter(
for (j=0;j<d;j++) {
mem[ j ] = in[ d - j - 1 ];
}
celt_fir( in + d, num, out + d, len - d, d, mem );
celt_fir( in + d, num, out + d, len - d, d, mem, arch );
for ( j = 0; j < d; j++ ) {
out[ j ] = 0;
}
#else
(void)arch;
for( ix = d; ix < len; ix++ ) {
in_ptr = &in[ ix - 1 ];

View File

@ -46,8 +46,9 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
)
{
opus_int i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
opus_int pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5;
opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16;
opus_int pred_Q10, diff_Q10, rate0_Q5, rate1_Q5;
opus_int16 out0_Q10, out1_Q10;
opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25;
opus_int ind_sort[ NLSF_QUANT_DEL_DEC_STATES ];
opus_int8 ind[ NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
opus_int16 prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
@ -56,6 +57,28 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
opus_int32 RD_max_Q25[ NLSF_QUANT_DEL_DEC_STATES ];
const opus_uint8 *rates_Q5;
opus_int out0_Q10_table[2 * NLSF_QUANT_MAX_AMPLITUDE_EXT];
opus_int out1_Q10_table[2 * NLSF_QUANT_MAX_AMPLITUDE_EXT];
for (i = -NLSF_QUANT_MAX_AMPLITUDE_EXT; i <= NLSF_QUANT_MAX_AMPLITUDE_EXT-1; i++)
{
out0_Q10 = silk_LSHIFT( i, 10 );
out1_Q10 = silk_ADD16( out0_Q10, 1024 );
if( i > 0 ) {
out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else if( i == 0 ) {
out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else if( i == -1 ) {
out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else {
out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
}
out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out0_Q10, quant_step_size_Q16 ), 16 );
out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out1_Q10, quant_step_size_Q16 ), 16 );
}
silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 ); /* must be power of two */
nStates = 1;
@ -63,31 +86,18 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
prev_out_Q10[ 0 ] = 0;
for( i = order - 1; ; i-- ) {
rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 );
in_Q10 = x_Q10[ i ];
for( j = 0; j < nStates; j++ ) {
pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] );
pred_Q10 = silk_RSHIFT( silk_SMULBB( (opus_int16)pred_coef_Q8[ i ], prev_out_Q10[ j ] ), 8 );
res_Q10 = silk_SUB16( in_Q10, pred_Q10 );
ind_tmp = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 );
ind_tmp = silk_RSHIFT( silk_SMULBB( inv_quant_step_size_Q6, res_Q10 ), 16 );
ind_tmp = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
ind[ j ][ i ] = (opus_int8)ind_tmp;
/* compute outputs for ind_tmp and ind_tmp + 1 */
out0_Q10 = silk_LSHIFT( ind_tmp, 10 );
out1_Q10 = silk_ADD16( out0_Q10, 1024 );
if( ind_tmp > 0 ) {
out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else if( ind_tmp == 0 ) {
out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else if( ind_tmp == -1 ) {
out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} else {
out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
}
out0_Q10 = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 );
out1_Q10 = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 );
out0_Q10 = out0_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ];
out1_Q10 = out1_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ];
out0_Q10 = silk_ADD16( out0_Q10, pred_Q10 );
out1_Q10 = silk_ADD16( out1_Q10, pred_Q10 );
prev_out_Q10[ j ] = out0_Q10;

View File

@ -46,7 +46,7 @@ opus_int32 silk_NLSF_encode( /* O Returns
)
{
opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7;
opus_int32 W_tmp_Q9;
opus_int32 W_tmp_Q9, ret;
VARDECL( opus_int32, err_Q26 );
VARDECL( opus_int32, RD_Q25 );
VARDECL( opus_int, tempIndices1 );
@ -131,6 +131,7 @@ opus_int32 silk_NLSF_encode( /* O Returns
/* Decode */
silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
ret = RD_Q25[ 0 ];
RESTORE_STACK;
return RD_Q25[ 0 ];
return ret;
}

View File

@ -130,7 +130,7 @@ void silk_NLSF_stabilize(
/* Keep delta_min distance between the NLSFs */
for( i = 1; i < L; i++ )
NLSF_Q15[i] = silk_max_int( NLSF_Q15[i], NLSF_Q15[i-1] + NDeltaMin_Q15[i] );
NLSF_Q15[i] = silk_max_int( NLSF_Q15[i], silk_ADD_SAT16( NLSF_Q15[i-1], NDeltaMin_Q15[i] ) );
/* Last NLSF should be no higher than 1 - NDeltaMin[L] */
NLSF_Q15[L-1] = silk_min_int( NLSF_Q15[L-1], (1<<15) - NDeltaMin_Q15[L] );

View File

@ -31,6 +31,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
#include "stack_alloc.h"
#include "NSQ.h"
static OPUS_INLINE void silk_nsq_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
@ -46,6 +48,7 @@ static OPUS_INLINE void silk_nsq_scale_states(
const opus_int signal_type /* I Signal type */
);
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
static OPUS_INLINE void silk_noise_shape_quantizer(
silk_nsq_state *NSQ, /* I/O NSQ state */
opus_int signalType, /* I Signal type */
@ -65,10 +68,13 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
opus_int offset_Q10, /* I */
opus_int length, /* I Input length */
opus_int shapingLPCOrder, /* I Noise shaping AR filter order */
opus_int predictLPCOrder /* I Prediction filter order */
opus_int predictLPCOrder, /* I Prediction filter order */
int arch /* I Architecture */
);
#endif
void silk_NSQ(
void silk_NSQ_c
(
const silk_encoder_state *psEncC, /* I/O Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
@ -141,7 +147,7 @@ void silk_NSQ(
silk_assert( start_idx > 0 );
silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
NSQ->rewhite_flag = 1;
NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
@ -152,7 +158,7 @@ void silk_NSQ(
silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
x_Q3 += psEncC->subfr_length;
pulses += psEncC->subfr_length;
@ -172,7 +178,11 @@ void silk_NSQ(
/***********************************/
/* silk_noise_shape_quantizer */
/***********************************/
static OPUS_INLINE void silk_noise_shape_quantizer(
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
static OPUS_INLINE
#endif
void silk_noise_shape_quantizer(
silk_nsq_state *NSQ, /* I/O NSQ state */
opus_int signalType, /* I Signal type */
const opus_int32 x_sc_Q10[], /* I */
@ -191,15 +201,19 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
opus_int offset_Q10, /* I */
opus_int length, /* I Input length */
opus_int shapingLPCOrder, /* I Noise shaping AR filter order */
opus_int predictLPCOrder /* I Prediction filter order */
opus_int predictLPCOrder, /* I Prediction filter order */
int arch /* I Architecture */
)
{
opus_int i, j;
opus_int i;
opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
opus_int32 n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
opus_int32 tmp1, tmp2, sLF_AR_shp_Q14;
opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
#ifdef silk_short_prediction_create_arch_coef
opus_int32 a_Q12_arch[MAX_LPC_ORDER];
#endif
shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
@ -208,32 +222,16 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
/* Set up short term AR state */
psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
#ifdef silk_short_prediction_create_arch_coef
silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
#endif
for( i = 0; i < length; i++ ) {
/* Generate dither */
NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
/* Short-term prediction */
silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ 0 ], a_Q12[ 0 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
if( predictLPCOrder == 16 ) {
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
}
LPC_pred_Q10 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
/* Long-term prediction */
if( signalType == TYPE_VOICED ) {
@ -252,23 +250,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
/* Noise shape feedback */
silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
tmp2 = psLPC_Q14[ 0 ];
tmp1 = NSQ->sAR2_Q14[ 0 ];
NSQ->sAR2_Q14[ 0 ] = tmp2;
n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 );
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] );
for( j = 2; j < shapingLPCOrder; j += 2 ) {
tmp2 = NSQ->sAR2_Q14[ j - 1 ];
NSQ->sAR2_Q14[ j - 1 ] = tmp1;
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] );
tmp1 = NSQ->sAR2_Q14[ j + 0 ];
NSQ->sAR2_Q14[ j + 0 ] = tmp2;
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] );
}
NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 ); /* Q11 -> Q12 */
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );

101
code/opus-1.1/silk/NSQ.h Normal file
View File

@ -0,0 +1,101 @@
/***********************************************************************
Copyright (c) 2014 Vidyo.
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_NSQ_H
#define SILK_NSQ_H
#include "SigProc_FIX.h"
#undef silk_short_prediction_create_arch_coef
static OPUS_INLINE opus_int32 silk_noise_shape_quantizer_short_prediction_c(const opus_int32 *buf32, const opus_int16 *coef16, opus_int order)
{
opus_int32 out;
silk_assert( order == 10 || order == 16 );
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
out = silk_RSHIFT( order, 1 );
out = silk_SMLAWB( out, buf32[ 0 ], coef16[ 0 ] );
out = silk_SMLAWB( out, buf32[ -1 ], coef16[ 1 ] );
out = silk_SMLAWB( out, buf32[ -2 ], coef16[ 2 ] );
out = silk_SMLAWB( out, buf32[ -3 ], coef16[ 3 ] );
out = silk_SMLAWB( out, buf32[ -4 ], coef16[ 4 ] );
out = silk_SMLAWB( out, buf32[ -5 ], coef16[ 5 ] );
out = silk_SMLAWB( out, buf32[ -6 ], coef16[ 6 ] );
out = silk_SMLAWB( out, buf32[ -7 ], coef16[ 7 ] );
out = silk_SMLAWB( out, buf32[ -8 ], coef16[ 8 ] );
out = silk_SMLAWB( out, buf32[ -9 ], coef16[ 9 ] );
if( order == 16 )
{
out = silk_SMLAWB( out, buf32[ -10 ], coef16[ 10 ] );
out = silk_SMLAWB( out, buf32[ -11 ], coef16[ 11 ] );
out = silk_SMLAWB( out, buf32[ -12 ], coef16[ 12 ] );
out = silk_SMLAWB( out, buf32[ -13 ], coef16[ 13 ] );
out = silk_SMLAWB( out, buf32[ -14 ], coef16[ 14 ] );
out = silk_SMLAWB( out, buf32[ -15 ], coef16[ 15 ] );
}
return out;
}
#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) ((void)arch,silk_noise_shape_quantizer_short_prediction_c(in, coef, order))
static OPUS_INLINE opus_int32 silk_NSQ_noise_shape_feedback_loop_c(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order)
{
opus_int32 out;
opus_int32 tmp1, tmp2;
opus_int j;
tmp2 = data0[0];
tmp1 = data1[0];
data1[0] = tmp2;
out = silk_RSHIFT(order, 1);
out = silk_SMLAWB(out, tmp2, coef[0]);
for (j = 2; j < order; j += 2) {
tmp2 = data1[j - 1];
data1[j - 1] = tmp1;
out = silk_SMLAWB(out, tmp1, coef[j - 1]);
tmp1 = data1[j + 0];
data1[j + 0] = tmp2;
out = silk_SMLAWB(out, tmp2, coef[j]);
}
data1[order - 1] = tmp1;
out = silk_SMLAWB(out, tmp1, coef[order - 1]);
/* Q11 -> Q12 */
out = silk_LSHIFT32( out, 1 );
return out;
}
#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) ((void)arch,silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order))
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
#include "arm/NSQ_neon.h"
#endif
#endif /* SILK_NSQ_H */

View File

@ -31,6 +31,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
#include "stack_alloc.h"
#include "NSQ.h"
typedef struct {
opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
@ -57,6 +59,9 @@ typedef struct {
typedef NSQ_sample_struct NSQ_sample_pair[ 2 ];
#if defined(MIPSr1_ASM)
#include "mips/NSQ_del_dec_mipsr1.h"
#endif
static OPUS_INLINE void silk_nsq_del_dec_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
@ -103,10 +108,11 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int decisionDelay /* I */
opus_int decisionDelay, /* I */
int arch /* I */
);
void silk_NSQ_del_dec(
void silk_NSQ_del_dec_c(
const silk_encoder_state *psEncC, /* I/O Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
@ -244,7 +250,7 @@ void silk_NSQ_del_dec(
silk_assert( start_idx > 0 );
silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
NSQ->rewhite_flag = 1;
@ -257,7 +263,7 @@ void silk_NSQ_del_dec(
silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch );
x_Q3 += psEncC->subfr_length;
pulses += psEncC->subfr_length;
@ -303,6 +309,7 @@ void silk_NSQ_del_dec(
/******************************************/
/* Noise shape quantizer for one subframe */
/******************************************/
#ifndef OVERRIDE_silk_noise_shape_quantizer_del_dec
static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
silk_nsq_state *NSQ, /* I/O NSQ state */
NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
@ -329,7 +336,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int decisionDelay /* I */
opus_int decisionDelay, /* I */
int arch /* I */
)
{
opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
@ -339,6 +347,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
opus_int32 tmp1, tmp2, sLF_AR_shp_Q14;
opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
#ifdef silk_short_prediction_create_arch_coef
opus_int32 a_Q12_arch[MAX_LPC_ORDER];
#endif
VARDECL( NSQ_sample_pair, psSampleState );
NSQ_del_dec_struct *psDD;
NSQ_sample_struct *psSS;
@ -351,6 +363,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 );
#ifdef silk_short_prediction_create_arch_coef
silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
#endif
for( i = 0; i < length; i++ ) {
/* Perform common calculations used in all states */
@ -394,27 +410,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
/* Pointer used in short term prediction and shaping */
psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
/* Short-term prediction */
silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ 0 ], a_Q12[ 0 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
if( predictLPCOrder == 16 ) {
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
}
LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
/* Noise shape feedback */
@ -629,6 +625,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
}
RESTORE_STACK;
}
#endif /* OVERRIDE_silk_noise_shape_quantizer_del_dec */
static OPUS_INLINE void silk_nsq_del_dec_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */

View File

@ -46,7 +46,8 @@ static OPUS_INLINE void silk_PLC_update(
static OPUS_INLINE void silk_PLC_conceal(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[] /* O LPC residual signal */
opus_int16 frame[], /* O LPC residual signal */
int arch /* I Run-time architecture */
);
@ -65,7 +66,8 @@ void silk_PLC(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[], /* I/O signal */
opus_int lost /* I Loss flag */
opus_int lost, /* I Loss flag */
int arch /* I Run-time architecture */
)
{
/* PLC control function */
@ -78,7 +80,7 @@ void silk_PLC(
/****************************/
/* Generate Signal */
/****************************/
silk_PLC_conceal( psDec, psDecCtrl, frame );
silk_PLC_conceal( psDec, psDecCtrl, frame, arch );
psDec->lossCnt++;
} else {
@ -165,10 +167,35 @@ static OPUS_INLINE void silk_PLC_update(
psPLC->nb_subfr = psDec->nb_subfr;
}
static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2,
const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr)
{
int i, k;
VARDECL( opus_int16, exc_buf );
opus_int16 *exc_buf_ptr;
SAVE_STACK;
ALLOC( exc_buf, 2*subfr_length, opus_int16 );
/* Find random noise component */
/* Scale previous excitation signal */
exc_buf_ptr = exc_buf;
for( k = 0; k < 2; k++ ) {
for( i = 0; i < subfr_length; i++ ) {
exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) );
}
exc_buf_ptr += subfr_length;
}
/* Find the subframe with lowest energy of the last two and use that as random noise generator */
silk_sum_sqr_shift( energy1, shift1, exc_buf, subfr_length );
silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length );
RESTORE_STACK;
}
static OPUS_INLINE void silk_PLC_conceal(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[] /* O LPC residual signal */
opus_int16 frame[], /* O LPC residual signal */
int arch /* I Run-time architecture */
)
{
opus_int i, j, k;
@ -177,19 +204,26 @@ static OPUS_INLINE void silk_PLC_conceal(
opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr;
opus_int32 LPC_pred_Q10, LTP_pred_Q12;
opus_int16 rand_scale_Q14;
opus_int16 *B_Q14, *exc_buf_ptr;
opus_int16 *B_Q14;
opus_int32 *sLPC_Q14_ptr;
VARDECL( opus_int16, exc_buf );
opus_int16 A_Q12[ MAX_LPC_ORDER ];
#ifdef SMALL_FOOTPRINT
opus_int16 *sLTP;
#else
VARDECL( opus_int16, sLTP );
#endif
VARDECL( opus_int32, sLTP_Q14 );
silk_PLC_struct *psPLC = &psDec->sPLC;
opus_int32 prevGain_Q10[2];
SAVE_STACK;
ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
#ifdef SMALL_FOOTPRINT
/* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */
sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length;
#else
ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
#endif
prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
@ -198,19 +232,7 @@ static OPUS_INLINE void silk_PLC_conceal(
silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) );
}
/* Find random noise component */
/* Scale previous excitation signal */
exc_buf_ptr = exc_buf;
for( k = 0; k < 2; k++ ) {
for( i = 0; i < psPLC->subfr_length; i++ ) {
exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) );
}
exc_buf_ptr += psPLC->subfr_length;
}
/* Find the subframe with lowest energy of the last two and use that as random noise generator */
silk_sum_sqr_shift( &energy1, &shift1, exc_buf, psPLC->subfr_length );
silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length );
silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr);
if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) {
/* First sub-frame has lowest energy */
@ -270,7 +292,7 @@ static OPUS_INLINE void silk_PLC_conceal(
/* Rewhiten LTP state */
idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
silk_assert( idx > 0 );
silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order );
silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order, arch );
/* Scale LTP state */
inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 );
inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 );
@ -343,7 +365,8 @@ static OPUS_INLINE void silk_PLC_conceal(
}
/* Add prediction to LPC excitation */
sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ],
silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ));
/* Scale with Gain */
frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );

View File

@ -48,7 +48,8 @@ void silk_PLC(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[], /* I/O signal */
opus_int lost /* I Loss flag */
opus_int lost, /* I Loss flag */
int arch /* I Run-time architecture */
);
void silk_PLC_glue_frames(

View File

@ -41,7 +41,11 @@ extern "C"
#include "typedef.h"
#include "resampler_structs.h"
#include "macros.h"
#include "cpu_support.h"
#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
#include "x86/SigProc_FIX_sse.h"
#endif
/********************************************************************/
/* SIGNAL PROCESSING FUNCTIONS */
@ -108,7 +112,8 @@ void silk_LPC_analysis_filter(
const opus_int16 *in, /* I Input signal */
const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */
const opus_int32 len, /* I Signal length */
const opus_int32 d /* I Filter order */
const opus_int32 d, /* I Filter order */
int arch /* I Run-time architecture */
);
/* Chirp (bandwidth expand) LP AR filter */
@ -303,7 +308,7 @@ void silk_NLSF_VQ_weights_laroia(
);
/* Compute reflection coefficients from input signal */
void silk_burg_modified(
void silk_burg_modified_c(
opus_int32 *res_nrg, /* O Residual energy */
opus_int *res_nrg_Q, /* O Residual energy Q value */
opus_int32 A_Q16[], /* O Prediction coefficients (length order) */
@ -335,12 +340,15 @@ void silk_scale_vector32_Q26_lshift_18(
/********************************************************************/
/* return sum( inVec1[i] * inVec2[i] ) */
opus_int32 silk_inner_prod_aligned(
const opus_int16 *const inVec1, /* I input vector 1 */
const opus_int16 *const inVec2, /* I input vector 2 */
const opus_int len /* I vector lengths */
const opus_int len, /* I vector lengths */
int arch /* I Run-time architecture */
);
opus_int32 silk_inner_prod_aligned_scale(
const opus_int16 *const inVec1, /* I input vector 1 */
const opus_int16 *const inVec2, /* I input vector 2 */
@ -348,7 +356,7 @@ opus_int32 silk_inner_prod_aligned_scale(
const opus_int len /* I vector lengths */
);
opus_int64 silk_inner_prod16_aligned_64(
opus_int64 silk_inner_prod16_aligned_64_c(
const opus_int16 *inVec1, /* I input vector 1 */
const opus_int16 *inVec2, /* I input vector 2 */
const opus_int len /* I vector lengths */
@ -575,6 +583,14 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
/* the following seems faster on x86 */
#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len))
#endif
#include "Inlines.h"
#include "MacroCount.h"
#include "MacroDebug.h"
@ -587,6 +603,11 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
#include "arm/SigProc_FIX_armv5e.h"
#endif
#if defined(MIPSr1_ASM)
#include "mips/sigproc_fix_mipsr1.h"
#endif
#ifdef __cplusplus
}
#endif

View File

@ -33,10 +33,12 @@ POSSIBILITY OF SUCH DAMAGE.
#include "stack_alloc.h"
/* Silk VAD noise level estimation */
# if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
static OPUS_INLINE void silk_VAD_GetNoiseLevels(
const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */
silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
);
#endif
/**********************************/
/* Initialization of the Silk VAD */
@ -77,7 +79,7 @@ static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -120
/***************************************/
/* Get the speech activity level in Q8 */
/***************************************/
opus_int silk_VAD_GetSA_Q8( /* O Return value, 0 if success */
opus_int silk_VAD_GetSA_Q8_c( /* O Return value, 0 if success */
silk_encoder_state *psEncC, /* I/O Encoder state */
const opus_int16 pIn[] /* I PCM input */
)
@ -296,7 +298,10 @@ opus_int silk_VAD_GetSA_Q8( /* O Return v
/**************************/
/* Noise level estimation */
/**************************/
static OPUS_INLINE void silk_VAD_GetNoiseLevels(
# if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
static OPUS_INLINE
#endif
void silk_VAD_GetNoiseLevels(
const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */
silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
)

View File

@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
void silk_VQ_WMat_EC(
void silk_VQ_WMat_EC_c(
opus_int8 *ind, /* O index of best codebook vector */
opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */
opus_int *gain_Q7, /* O sum of absolute LTP coefficients */
@ -55,7 +55,7 @@ void silk_VQ_WMat_EC(
*rate_dist_Q14 = silk_int32_MAX;
cb_row_Q7 = cb_Q7;
for( k = 0; k < L; k++ ) {
gain_tmp_Q7 = cb_gain_Q7[k];
gain_tmp_Q7 = cb_gain_Q7[k];
diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );
@ -66,8 +66,8 @@ void silk_VQ_WMat_EC(
/* Weighted rate */
sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
/* Penalty for too large gain */
sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
/* Penalty for too large gain */
sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
silk_assert( sum1_Q14 >= 0 );
@ -111,7 +111,7 @@ void silk_VQ_WMat_EC(
if( sum1_Q14 < *rate_dist_Q14 ) {
*rate_dist_Q14 = sum1_Q14;
*ind = (opus_int8)k;
*gain_Q7 = gain_tmp_Q7;
*gain_Q7 = gain_tmp_Q7;
}
/* Go to next cbk vector */

View File

@ -0,0 +1,112 @@
/***********************************************************************
Copyright (C) 2014 Vidyo
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#include "main.h"
#include "stack_alloc.h"
#include "NSQ.h"
#include "celt/cpu_support.h"
#include "celt/arm/armcpu.h"
opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order)
{
int32x4_t coef0 = vld1q_s32(coef32);
int32x4_t coef1 = vld1q_s32(coef32 + 4);
int32x4_t coef2 = vld1q_s32(coef32 + 8);
int32x4_t coef3 = vld1q_s32(coef32 + 12);
int32x4_t a0 = vld1q_s32(buf32 - 15);
int32x4_t a1 = vld1q_s32(buf32 - 11);
int32x4_t a2 = vld1q_s32(buf32 - 7);
int32x4_t a3 = vld1q_s32(buf32 - 3);
int32x4_t b0 = vqdmulhq_s32(coef0, a0);
int32x4_t b1 = vqdmulhq_s32(coef1, a1);
int32x4_t b2 = vqdmulhq_s32(coef2, a2);
int32x4_t b3 = vqdmulhq_s32(coef3, a3);
int32x4_t c0 = vaddq_s32(b0, b1);
int32x4_t c1 = vaddq_s32(b2, b3);
int32x4_t d = vaddq_s32(c0, c1);
int64x2_t e = vpaddlq_s32(d);
int64x1_t f = vadd_s64(vget_low_s64(e), vget_high_s64(e));
opus_int32 out = vget_lane_s32(vreinterpret_s32_s64(f), 0);
out += silk_RSHIFT( order, 1 );
return out;
}
opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order)
{
opus_int32 out;
if (order == 8)
{
int32x4_t a00 = vdupq_n_s32(data0[0]);
int32x4_t a01 = vld1q_s32(data1); /* data1[0] ... [3] */
int32x4_t a0 = vextq_s32 (a00, a01, 3); /* data0[0] data1[0] ...[2] */
int32x4_t a1 = vld1q_s32(data1 + 3); /* data1[3] ... [6] */
/*TODO: Convert these once in advance instead of once per sample, like
silk_noise_shape_quantizer_short_prediction_neon() does.*/
int16x8_t coef16 = vld1q_s16(coef);
int32x4_t coef0 = vmovl_s16(vget_low_s16(coef16));
int32x4_t coef1 = vmovl_s16(vget_high_s16(coef16));
/*This is not bit-exact with the C version, since we do not drop the
lower 16 bits of each multiply, but wait until the end to truncate
precision. This is an encoder-specific calculation (and unlike
silk_noise_shape_quantizer_short_prediction_neon(), is not meant to
simulate what the decoder will do). We still could use vqdmulhq_s32()
like silk_noise_shape_quantizer_short_prediction_neon() and save
half the multiplies, but the speed difference is not large, since we
then need two extra adds.*/
int64x2_t b0 = vmull_s32(vget_low_s32(a0), vget_low_s32(coef0));
int64x2_t b1 = vmlal_s32(b0, vget_high_s32(a0), vget_high_s32(coef0));
int64x2_t b2 = vmlal_s32(b1, vget_low_s32(a1), vget_low_s32(coef1));
int64x2_t b3 = vmlal_s32(b2, vget_high_s32(a1), vget_high_s32(coef1));
int64x1_t c = vadd_s64(vget_low_s64(b3), vget_high_s64(b3));
int64x1_t cS = vrshr_n_s64(c, 15);
int32x2_t d = vreinterpret_s32_s64(cS);
out = vget_lane_s32(d, 0);
vst1q_s32(data1, a0);
vst1q_s32(data1 + 4, a1);
return out;
}
return silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order);
}

View File

@ -0,0 +1,113 @@
/***********************************************************************
Copyright (C) 2014 Vidyo
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_NSQ_NEON_H
#define SILK_NSQ_NEON_H
#include "cpu_support.h"
#undef silk_short_prediction_create_arch_coef
/* For vectorized calc, reverse a_Q12 coefs, convert to 32-bit, and shift for vqdmulhq_s32. */
static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon(opus_int32 *out, const opus_int16 *in, opus_int order)
{
out[15] = in[0] << 15;
out[14] = in[1] << 15;
out[13] = in[2] << 15;
out[12] = in[3] << 15;
out[11] = in[4] << 15;
out[10] = in[5] << 15;
out[9] = in[6] << 15;
out[8] = in[7] << 15;
out[7] = in[8] << 15;
out[6] = in[9] << 15;
if (order == 16)
{
out[5] = in[10] << 15;
out[4] = in[11] << 15;
out[3] = in[12] << 15;
out[2] = in[13] << 15;
out[1] = in[14] << 15;
out[0] = in[15] << 15;
}
else
{
out[5] = 0;
out[4] = 0;
out[3] = 0;
out[2] = 0;
out[1] = 0;
out[0] = 0;
}
}
#if defined(OPUS_ARM_PRESUME_NEON_INTR)
#define silk_short_prediction_create_arch_coef(out, in, order) \
(silk_short_prediction_create_arch_coef_neon(out, in, order))
#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
#define silk_short_prediction_create_arch_coef(out, in, order) \
do { if (arch == OPUS_ARCH_ARM_NEON) { silk_short_prediction_create_arch_coef_neon(out, in, order); } } while (0)
#endif
opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order);
opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order);
#if defined(OPUS_ARM_PRESUME_NEON_INTR)
#undef silk_noise_shape_quantizer_short_prediction
#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \
((void)arch,silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order))
#undef silk_NSQ_noise_shape_feedback_loop
#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) ((void)arch,silk_NSQ_noise_shape_feedback_loop_neon(data0, data1, coef, order))
#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
/* silk_noise_shape_quantizer_short_prediction implementations take different parameters based on arch
(coef vs. coefRev) so can't use the usual IMPL table implementation */
#undef silk_noise_shape_quantizer_short_prediction
#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \
(arch == OPUS_ARCH_ARM_NEON ? \
silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order) : \
silk_noise_shape_quantizer_short_prediction_c(in, coef, order))
extern opus_int32
(*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])(
const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef,
opus_int order);
#undef silk_NSQ_noise_shape_feedback_loop
#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) \
(SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[(arch)&OPUS_ARCHMASK](data0, data1, \
coef, order))
#endif
#endif /* SILK_NSQ_NEON_H */

View File

@ -0,0 +1,55 @@
/***********************************************************************
Copyright (C) 2014 Vidyo
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "NSQ.h"
#if defined(OPUS_HAVE_RTCD)
# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
!defined(OPUS_ARM_PRESUME_NEON_INTR))
/*There is no table for silk_noise_shape_quantizer_short_prediction because the
NEON version takes different parameters than the C version.
Instead RTCD is done via if statements at the call sites.
See NSQ_neon.h for details.*/
opus_int32
(*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])(
const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef,
opus_int order) = {
silk_NSQ_noise_shape_feedback_loop_c, /* ARMv4 */
silk_NSQ_noise_shape_feedback_loop_c, /* EDSP */
silk_NSQ_noise_shape_feedback_loop_c, /* Media */
silk_NSQ_noise_shape_feedback_loop_neon, /* NEON */
};
# endif
#endif /* OPUS_HAVE_RTCD */

View File

@ -0,0 +1,39 @@
/***********************************************************************
Copyright (C) 2015 Vidyo
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_MACROS_ARM64_H
#define SILK_MACROS_ARM64_H
#include <arm_neon.h>
#undef silk_ADD_SAT32
#define silk_ADD_SAT32(a, b) (vqadds_s32((a), (b)))
#undef silk_SUB_SAT32
#define silk_SUB_SAT32(a, b) (vqsubs_s32((a), (b)))
#endif /* SILK_MACROS_ARM64_H */

View File

@ -74,7 +74,7 @@ void silk_encode_signs(
/* Decodes signs of excitation */
void silk_decode_signs(
ec_dec *psRangeDec, /* I/O Compressor data structure */
opus_int pulses[], /* I/O pulse signal */
opus_int16 pulses[], /* I/O pulse signal */
opus_int length, /* I length of input */
const opus_int signalType, /* I Signal type */
const opus_int quantOffsetType, /* I Quantization offset type */
@ -83,7 +83,7 @@ void silk_decode_signs(
{
opus_int i, j, p;
opus_uint8 icdf[ 2 ];
opus_int *q_ptr;
opus_int16 *q_ptr;
const opus_uint8 *icdf_ptr;
icdf[ 1 ] = 0;

View File

@ -70,11 +70,6 @@ opus_int silk_control_SNR(
break;
}
}
/* Reduce coding quality whenever LBRR is enabled, to free up some bits */
if( psEncC->LBRR_enabled ) {
psEncC->SNR_dB_Q7 = silk_SMLABB( psEncC->SNR_dB_Q7, 12 - psEncC->LBRR_GainIncreases, SILK_FIX_CONST( -0.25, 7 ) );
}
}
return ret;

View File

@ -397,9 +397,10 @@ static OPUS_INLINE opus_int silk_setup_LBRR(
const opus_int32 TargetRate_bps /* I */
)
{
opus_int ret = SILK_NO_ERROR;
opus_int LBRR_in_previous_packet, ret = SILK_NO_ERROR;
opus_int32 LBRR_rate_thres_bps;
LBRR_in_previous_packet = psEncC->LBRR_enabled;
psEncC->LBRR_enabled = 0;
if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) {
if( psEncC->fs_kHz == 8 ) {
@ -413,8 +414,13 @@ static OPUS_INLINE opus_int silk_setup_LBRR(
if( TargetRate_bps > LBRR_rate_thres_bps ) {
/* Set gain increase for coding LBRR excitation */
if( LBRR_in_previous_packet == 0 ) {
/* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */
psEncC->LBRR_GainIncreases = 7;
} else {
psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
}
psEncC->LBRR_enabled = 1;
psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
}
}

View File

@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "API.h"
#include "main.h"
#include "stack_alloc.h"
#include "os_support.h"
/************************/
/* Decoder Super Struct */
@ -84,13 +85,15 @@ opus_int silk_Decode( /* O Returns error co
opus_int newPacketFlag, /* I Indicates first decoder call for this packet */
ec_dec *psRangeDec, /* I/O Compressor data structure */
opus_int16 *samplesOut, /* O Decoded output speech vector */
opus_int32 *nSamplesOut /* O Number of samples decoded */
opus_int32 *nSamplesOut, /* O Number of samples decoded */
int arch /* I Run-time architecture */
)
{
opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
opus_int32 nSamplesOutDec, LBRR_symbol;
opus_int16 *samplesOut1_tmp[ 2 ];
VARDECL( opus_int16, samplesOut1_tmp_storage );
VARDECL( opus_int16, samplesOut1_tmp_storage1 );
VARDECL( opus_int16, samplesOut1_tmp_storage2 );
VARDECL( opus_int16, samplesOut2_tmp );
opus_int32 MS_pred_Q13[ 2 ] = { 0 };
opus_int16 *resample_out_ptr;
@ -98,6 +101,7 @@ opus_int silk_Decode( /* O Returns error co
silk_decoder_state *channel_state = psDec->channel_state;
opus_int has_side;
opus_int stereo_to_mono;
int delay_stack_alloc;
SAVE_STACK;
silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
@ -196,7 +200,7 @@ opus_int silk_Decode( /* O Returns error co
for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
for( n = 0; n < decControl->nChannelsInternal; n++ ) {
if( channel_state[ n ].LBRR_flags[ i ] ) {
opus_int pulses[ MAX_FRAME_LENGTH ];
opus_int16 pulses[ MAX_FRAME_LENGTH ];
opus_int condCoding;
if( decControl->nChannelsInternal == 2 && n == 0 ) {
@ -251,13 +255,22 @@ opus_int silk_Decode( /* O Returns error co
psDec->channel_state[ 1 ].first_frame_after_reset = 1;
}
ALLOC( samplesOut1_tmp_storage,
decControl->nChannelsInternal*(
channel_state[ 0 ].frame_length + 2 ),
/* Check if the temp buffer fits into the output PCM buffer. If it fits,
we can delay allocating the temp buffer until after the SILK peak stack
usage. We need to use a < and not a <= because of the two extra samples. */
delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
< decControl->API_sampleRate*decControl->nChannelsAPI;
ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
: decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
opus_int16 );
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
+ channel_state[ 0 ].frame_length + 2;
if ( delay_stack_alloc )
{
samplesOut1_tmp[ 0 ] = samplesOut;
samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
} else {
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
}
if( lostFlag == FLAG_DECODE_NORMAL ) {
has_side = !decode_only_middle;
@ -284,7 +297,7 @@ opus_int silk_Decode( /* O Returns error co
} else {
condCoding = CODE_CONDITIONALLY;
}
ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, arch);
} else {
silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
}
@ -312,6 +325,15 @@ opus_int silk_Decode( /* O Returns error co
resample_out_ptr = samplesOut;
}
ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
: ALLOC_NONE,
opus_int16 );
if ( delay_stack_alloc ) {
OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
}
for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
/* Resample decoded signal to API_sampleRate */

View File

@ -39,7 +39,8 @@ void silk_decode_core(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I Decoder control */
opus_int16 xq[], /* O Decoded speech */
const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */
const opus_int16 pulses[ MAX_FRAME_LENGTH ], /* I Pulse signal */
int arch /* I Run-time architecture */
)
{
opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
@ -147,7 +148,7 @@ void silk_decode_core(
}
silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ],
A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order );
A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order, arch );
/* After rewhitening the LTP state is unscaled */
if( k == 0 ) {
@ -218,7 +219,7 @@ void silk_decode_core(
}
/* Add prediction to LPC excitation */
sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( pres_Q14[ i ], LPC_pred_Q10, 4 );
sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( pres_Q14[ i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) );
/* Scale with gain */
pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );

View File

@ -42,18 +42,16 @@ opus_int silk_decode_frame(
opus_int16 pOut[], /* O Pointer to output speech frame */
opus_int32 *pN, /* O Pointer to size of output frame */
opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */
opus_int condCoding /* I The type of conditional coding to use */
opus_int condCoding, /* I The type of conditional coding to use */
int arch /* I Run-time architecture */
)
{
VARDECL( silk_decoder_control, psDecCtrl );
opus_int L, mv_len, ret = 0;
VARDECL( opus_int, pulses );
SAVE_STACK;
L = psDec->frame_length;
ALLOC( psDecCtrl, 1, silk_decoder_control );
ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
psDecCtrl->LTP_scale_Q14 = 0;
/* Safety checks */
@ -62,6 +60,9 @@ opus_int silk_decode_frame(
if( lostFlag == FLAG_DECODE_NORMAL ||
( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) )
{
VARDECL( opus_int16, pulses );
ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 );
/*********************************************/
/* Decode quantization indices of side info */
/*********************************************/
@ -81,12 +82,12 @@ opus_int silk_decode_frame(
/********************************************************/
/* Run inverse NSQ */
/********************************************************/
silk_decode_core( psDec, psDecCtrl, pOut, pulses );
silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch );
/********************************************************/
/* Update PLC state */
/********************************************************/
silk_PLC( psDec, psDecCtrl, pOut, 0 );
silk_PLC( psDec, psDecCtrl, pOut, 0, arch );
psDec->lossCnt = 0;
psDec->prevSignalType = psDec->indices.signalType;
@ -96,7 +97,7 @@ opus_int silk_decode_frame(
psDec->first_frame_after_reset = 0;
} else {
/* Handle packet loss by extrapolation */
silk_PLC( psDec, psDecCtrl, pOut, 1 );
silk_PLC( psDec, psDecCtrl, pOut, 1, arch );
}
/*************************/
@ -107,16 +108,16 @@ opus_int silk_decode_frame(
silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
/****************************************************************/
/* Ensure smooth connection of extrapolated and good frames */
/****************************************************************/
silk_PLC_glue_frames( psDec, pOut, L );
/************************************************/
/* Comfort noise generation / estimation */
/************************************************/
silk_CNG( psDec, psDecCtrl, pOut, L );
/****************************************************************/
/* Ensure smooth connection of extrapolated and good frames */
/****************************************************************/
silk_PLC_glue_frames( psDec, pOut, L );
/* Update some decoder state variables */
psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];

View File

@ -36,7 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
/*********************************************/
void silk_decode_pulses(
ec_dec *psRangeDec, /* I/O Compressor data structure */
opus_int pulses[], /* O Excitation signal */
opus_int16 pulses[], /* O Excitation signal */
const opus_int signalType, /* I Sigtype */
const opus_int quantOffsetType, /* I quantOffsetType */
const opus_int frame_length /* I Frame length */
@ -44,7 +44,7 @@ void silk_decode_pulses(
{
opus_int i, j, k, iter, abs_q, nLS, RateLevelIndex;
opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ];
opus_int *pulses_ptr;
opus_int16 *pulses_ptr;
const opus_uint8 *cdf_ptr;
/*********************/
@ -69,9 +69,9 @@ void silk_decode_pulses(
sum_pulses[ i ] = ec_dec_icdf( psRangeDec, cdf_ptr, 8 );
/* LSB indication */
while( sum_pulses[ i ] == MAX_PULSES + 1 ) {
while( sum_pulses[ i ] == SILK_MAX_PULSES + 1 ) {
nLshifts[ i ]++;
/* When we've already got 10 LSBs, we shift the table to not allow (MAX_PULSES + 1) */
/* When we've already got 10 LSBs, we shift the table to not allow (SILK_MAX_PULSES + 1) */
sum_pulses[ i ] = ec_dec_icdf( psRangeDec,
silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1] + ( nLshifts[ i ] == 10 ), 8 );
}
@ -84,7 +84,7 @@ void silk_decode_pulses(
if( sum_pulses[ i ] > 0 ) {
silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] );
} else {
silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) );
silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) );
}
}

Some files were not shown because too many files have changed in this diff Show More