Update opus from 1.1.4 to 1.2.1

This commit is contained in:
Zack Middleton 2018-03-16 13:03:37 -05:00
parent c38c823a2a
commit cb24c59567
155 changed files with 6263 additions and 3968 deletions

View file

@ -2005,6 +2005,7 @@ Q3OBJ += \
$(B)/client/opus/lin2log.o \
$(B)/client/opus/log2lin.o \
$(B)/client/opus/LPC_analysis_filter.o \
$(B)/client/opus/LPC_fit.o \
$(B)/client/opus/LPC_inv_pred_gain.o \
$(B)/client/opus/table_LSF_cos.o \
$(B)/client/opus/NLSF2A.o \
@ -2038,11 +2039,9 @@ Q3OBJ += \
$(B)/client/opus/LTP_analysis_filter_FLP.o \
$(B)/client/opus/LTP_scale_ctrl_FLP.o \
$(B)/client/opus/noise_shape_analysis_FLP.o \
$(B)/client/opus/prefilter_FLP.o \
$(B)/client/opus/process_gains_FLP.o \
$(B)/client/opus/regularize_correlations_FLP.o \
$(B)/client/opus/residual_energy_FLP.o \
$(B)/client/opus/solve_LS_FLP.o \
$(B)/client/opus/warped_autocorrelation_FLP.o \
$(B)/client/opus/wrappers_FLP.o \
$(B)/client/opus/autocorrelation_FLP.o \
@ -2051,7 +2050,6 @@ Q3OBJ += \
$(B)/client/opus/energy_FLP.o \
$(B)/client/opus/inner_product_FLP.o \
$(B)/client/opus/k2a_FLP.o \
$(B)/client/opus/levinsondurbin_FLP.o \
$(B)/client/opus/LPC_inv_pred_gain_FLP.o \
$(B)/client/opus/pitch_analysis_core_FLP.o \
$(B)/client/opus/scale_copy_vector_FLP.o \

View file

@ -58,12 +58,12 @@
# define S_MUL(a,b) MULT16_32_Q15(b, a)
# define C_MUL(m,a,b) \
do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
# define C_MULC(m,a,b) \
do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
# define C_MULBYSCALAR( c, s ) \
do{ (c).r = S_MUL( (c).r , s ) ;\
@ -77,17 +77,17 @@
DIVSCALAR( (c).i , div); }while (0)
#define C_ADD( res, a,b)\
do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \
do {(res).r=ADD32_ovflw((a).r,(b).r); (res).i=ADD32_ovflw((a).i,(b).i); \
}while(0)
#define C_SUB( res, a,b)\
do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \
do {(res).r=SUB32_ovflw((a).r,(b).r); (res).i=SUB32_ovflw((a).i,(b).i); \
}while(0)
#define C_ADDTO( res , a)\
do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\
do {(res).r = ADD32_ovflw((res).r, (a).r); (res).i = ADD32_ovflw((res).i,(a).i);\
}while(0)
#define C_SUBFROM( res , a)\
do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \
do {(res).r = ADD32_ovflw((res).r,(a).r); (res).i = SUB32_ovflw((res).i,(a).i); \
}while(0)
#if defined(OPUS_ARM_INLINE_ASM)

View file

@ -46,6 +46,14 @@
# endif
# endif
#if OPUS_GNUC_PREREQ(3, 0)
#define opus_likely(x) (__builtin_expect(!!(x), 1))
#define opus_unlikely(x) (__builtin_expect(!!(x), 0))
#else
#define opus_likely(x) (!!(x))
#define opus_unlikely(x) (!!(x))
#endif
#define CELT_SIG_SCALE 32768.f
#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
@ -93,6 +101,7 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
typedef opus_int16 opus_val16;
typedef opus_int32 opus_val32;
typedef opus_int64 opus_val64;
typedef opus_val32 celt_sig;
typedef opus_val16 celt_norm;
@ -101,6 +110,9 @@ typedef opus_val32 celt_ener;
#define Q15ONE 32767
#define SIG_SHIFT 12
/* Safe saturation value for 32-bit signals. Should be less than
2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/
#define SIG_SAT (300000000)
#define NORM_SCALING 16384
@ -147,6 +159,7 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
typedef float opus_val16;
typedef float opus_val32;
typedef float opus_val64;
typedef float celt_sig;
typedef float celt_norm;
@ -186,6 +199,7 @@ static OPUS_INLINE int celt_isnan(float x)
#define NEG16(x) (-(x))
#define NEG32(x) (-(x))
#define NEG32_ovflw(x) (-(x))
#define EXTRACT16(x) (x)
#define EXTEND32(x) (x)
#define SHR16(a,shift) (a)
@ -202,6 +216,7 @@ static OPUS_INLINE int celt_isnan(float x)
#define SATURATE16(x) (x)
#define ROUND16(a,shift) (a)
#define SROUND16(a,shift) (a)
#define HALF16(x) (.5f*(x))
#define HALF32(x) (.5f*(x))
@ -209,6 +224,8 @@ static OPUS_INLINE int celt_isnan(float x)
#define SUB16(a,b) ((a)-(b))
#define ADD32(a,b) ((a)+(b))
#define SUB32(a,b) ((a)-(b))
#define ADD32_ovflw(a,b) ((a)+(b))
#define SUB32_ovflw(a,b) ((a)-(b))
#define MULT16_16_16(a,b) ((a)*(b))
#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b))
#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b))
@ -243,9 +260,9 @@ static OPUS_INLINE int celt_isnan(float x)
#ifndef GLOBAL_STACK_SIZE
#ifdef FIXED_POINT
#define GLOBAL_STACK_SIZE 100000
#define GLOBAL_STACK_SIZE 120000
#else
#define GLOBAL_STACK_SIZE 100000
#define GLOBAL_STACK_SIZE 120000
#endif
#endif

View file

@ -164,11 +164,11 @@ while (<>) {
$prefix = "";
if ($proc)
{
$prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc) unless ($apple);
$prefix = $prefix.sprintf("\t.type\t%s, %%function", $proc) unless ($apple);
# Make sure we $prefix isn't empty here (for the $apple case).
# We handle mangling the label here, make sure it doesn't match
# the label handling below (if $prefix would be empty).
$prefix = "; ";
$prefix = $prefix."; ";
push(@proc_stack, $proc);
s/^[A-Za-z_\.]\w+/$symprefix$&:/;
}

View file

@ -35,12 +35,29 @@
#if defined(OPUS_HAVE_RTCD)
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N) = {
celt_inner_prod_c, /* ARMv4 */
celt_inner_prod_c, /* EDSP */
celt_inner_prod_c, /* Media */
celt_inner_prod_neon /* NEON */
};
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2) = {
dual_inner_prod_c, /* ARMv4 */
dual_inner_prod_c, /* EDSP */
dual_inner_prod_c, /* Media */
dual_inner_prod_neon /* NEON */
};
# endif
# if defined(FIXED_POINT)
# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
(defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int , int) = {
const opus_val16 *, opus_val32 *, int, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
@ -51,7 +68,7 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
# else /* !FIXED_POINT */
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int) = {
const opus_val16 *, opus_val32 *, int, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
celt_pitch_xcorr_c, /* EDSP */
celt_pitch_xcorr_c, /* Media */

View file

@ -36,7 +36,6 @@
#endif
#endif
#include <NE10_init.h>
#include <NE10_dsp.h>
#include "os_support.h"
#include "kiss_fft.h"

View file

@ -191,107 +191,10 @@ static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
vst1q_f32(sum, SUMM);
}
/*
* Function: xcorr_kernel_neon_float_process1
* ---------------------------------
* Computes single correlation values and stores in *sum
*/
static void xcorr_kernel_neon_float_process1(const float32_t *x,
const float32_t *y, float32_t *sum, int len) {
float32x4_t XX[4];
float32x4_t YY[4];
float32x2_t XX_2;
float32x2_t YY_2;
float32x4_t SUMM;
float32x2_t SUMM_2[2];
const float32_t *xi = x;
const float32_t *yi = y;
SUMM = vdupq_n_f32(0);
/* Work on 16 values per iteration */
while (len >= 16) {
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
XX[2] = vld1q_f32(xi);
xi += 4;
XX[3] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
YY[2] = vld1q_f32(yi);
yi += 4;
YY[3] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
SUMM = vmlaq_f32(SUMM, YY[2], XX[2]);
SUMM = vmlaq_f32(SUMM, YY[3], XX[3]);
len -= 16;
}
/* Work on 8 values */
if (len >= 8) {
XX[0] = vld1q_f32(xi);
xi += 4;
XX[1] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
YY[1] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
SUMM = vmlaq_f32(SUMM, YY[1], XX[1]);
len -= 8;
}
/* Work on 4 values */
if (len >= 4) {
XX[0] = vld1q_f32(xi);
xi += 4;
YY[0] = vld1q_f32(yi);
yi += 4;
SUMM = vmlaq_f32(SUMM, YY[0], XX[0]);
len -= 4;
}
/* Start accumulating results */
SUMM_2[0] = vget_low_f32(SUMM);
if (len >= 2) {
/* While at it, consume 2 more values if available */
XX_2 = vld1_f32(xi);
xi += 2;
YY_2 = vld1_f32(yi);
yi += 2;
SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2);
len -= 2;
}
SUMM_2[1] = vget_high_f32(SUMM);
SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]);
SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]);
/* Ok, now we have result accumulated in SUMM_2[0].0 */
if (len > 0) {
/* Case when you have one value left */
XX_2 = vld1_dup_f32(xi);
YY_2 = vld1_dup_f32(yi);
SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2);
}
vst1_lane_f32(sum, SUMM_2[0], 0);
}
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch) {
opus_val32 *xcorr, int len, int max_pitch, int arch) {
int i;
(void)arch;
celt_assert(max_pitch > 0);
celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
@ -300,12 +203,9 @@ void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
(float32_t *)xcorr+i, len);
}
/* In case max_pitch isn't multiple of 4
* compute single correlation value per iteration
*/
/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
for (; i < max_pitch; i++) {
xcorr_kernel_neon_float_process1((const float32_t *)_x,
(const float32_t *)_y+i, (float32_t *)xcorr+i, len);
xcorr[i] = celt_inner_prod_neon(_x, _y+i, len);
}
}
#endif

View file

@ -44,7 +44,7 @@
.if OPUS_ARM_MAY_HAVE_NEON
@ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
; xcorr_kernel_neon: @ PROC
.type xcorr_kernel_neon, %function; xcorr_kernel_neon: @ PROC
xcorr_kernel_neon_start:
@ input:
@ r3 = int len
@ -156,8 +156,8 @@ xcorr_kernel_neon_process1:
.size xcorr_kernel_neon, .-xcorr_kernel_neon @ ENDP
@ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
@ opus_val32 *xcorr, int len, int max_pitch)
; celt_pitch_xcorr_neon: @ PROC
@ opus_val32 *xcorr, int len, int max_pitch, int arch)
.type celt_pitch_xcorr_neon, %function; celt_pitch_xcorr_neon: @ PROC
@ input:
@ r0 = opus_val16 *_x
@ r1 = opus_val16 *_y
@ -171,6 +171,8 @@ xcorr_kernel_neon_process1:
@ r6 = int max_pitch
@ r12 = int j
@ q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon())
@ ignored:
@ int arch
STMFD sp!, {r4-r6, lr}
LDR r6, [sp, #16]
VMOV.S32 q15, #1
@ -260,7 +262,7 @@ celt_pitch_xcorr_neon_done:
@ This will get used on ARMv7 devices without NEON, so it has been optimized
@ to take advantage of dual-issuing where possible.
; xcorr_kernel_edsp: @ PROC
.type xcorr_kernel_edsp, %function; xcorr_kernel_edsp: @ PROC
xcorr_kernel_edsp_start:
@ input:
@ r3 = int len
@ -344,7 +346,7 @@ xcorr_kernel_edsp_done:
LDMFD sp!, {r2,r4,r5,pc}
.size xcorr_kernel_edsp, .-xcorr_kernel_edsp @ ENDP
; celt_pitch_xcorr_edsp: @ PROC
.type celt_pitch_xcorr_edsp, %function; celt_pitch_xcorr_edsp: @ PROC
@ input:
@ r0 = opus_val16 *_x (must be 32-bit aligned)
@ r1 = opus_val16 *_y (only needs to be 16-bit aligned)
@ -361,6 +363,8 @@ xcorr_kernel_edsp_done:
@ r9 = opus_val32 sum3
@ r1 = int max_pitch
@ r12 = int j
@ ignored:
@ int arch
STMFD sp!, {r4-r11, lr}
MOV r5, r1
LDR r1, [sp, #36]

View file

@ -153,7 +153,7 @@ xcorr_kernel_neon_process1
ENDP
; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
; opus_val32 *xcorr, int len, int max_pitch)
; opus_val32 *xcorr, int len, int max_pitch, int arch)
celt_pitch_xcorr_neon PROC
; input:
; r0 = opus_val16 *_x
@ -168,6 +168,8 @@ celt_pitch_xcorr_neon PROC
; r6 = int max_pitch
; r12 = int j
; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon())
; ignored:
; int arch
STMFD sp!, {r4-r6, lr}
LDR r6, [sp, #16]
VMOV.S32 q15, #1
@ -358,6 +360,8 @@ celt_pitch_xcorr_edsp PROC
; r9 = opus_val32 sum3
; r1 = int max_pitch
; r12 = int j
; ignored:
; int arch
STMFD sp!, {r4-r11, lr}
MOV r5, r1
LDR r1, [sp, #36]

View file

@ -34,7 +34,6 @@
#if !defined(FFT_ARM_H)
#define FFT_ARM_H
#include "config.h"
#include "kiss_fft.h"
#if defined(HAVE_ARM_NE10)

View file

@ -37,7 +37,7 @@ static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
"#MULT16_32_Q16\n\t"
"smull %0, %1, %2, %3\n\t"
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(b),"r"(a<<16)
: "%r"(b),"r"(SHL32(a,16))
);
return rd_hi;
}
@ -54,10 +54,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
"#MULT16_32_Q15\n\t"
"smull %0, %1, %2, %3\n\t"
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(b), "r"(a<<16)
: "%r"(b), "r"(SHL32(a,16))
);
/*We intentionally don't OR in the high bit of rd_lo for speed.*/
return rd_hi<<1;
return SHL32(rd_hi,1);
}
#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))

View file

@ -59,7 +59,7 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
: "=r"(res)
: "r"(b), "r"(a)
);
return res<<1;
return SHL32(res,1);
}
#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
@ -76,7 +76,7 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
"#MAC16_32_Q15\n\t"
"smlawb %0, %1, %2, %3;\n"
: "=r"(res)
: "r"(b<<1), "r"(a), "r"(c)
: "r"(SHL32(b,1)), "r"(a), "r"(c)
);
return res;
}

View file

@ -33,7 +33,6 @@
#if !defined(MDCT_ARM_H)
#define MDCT_ARM_H
#include "config.h"
#include "mdct.h"
#if defined(HAVE_ARM_NE10)

View file

@ -30,11 +30,47 @@
# include "armcpu.h"
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N);
void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01,
const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2);
# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_CELT_INNER_PROD (1)
# define OVERRIDE_DUAL_INNER_PROD (1)
# define celt_inner_prod(x, y, N, arch) ((void)(arch), PRESUME_NEON(celt_inner_prod)(x, y, N))
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), PRESUME_NEON(dual_inner_prod)(x, y01, y02, N, xy1, xy2))
# endif
# endif
# if !defined(OVERRIDE_CELT_INNER_PROD)
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N);
# define OVERRIDE_CELT_INNER_PROD (1)
# define celt_inner_prod(x, y, N, arch) ((*CELT_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y, N))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_CELT_INNER_PROD (1)
# define celt_inner_prod(x, y, N, arch) ((void)(arch), celt_inner_prod_neon(x, y, N))
# endif
# endif
# if !defined(OVERRIDE_DUAL_INNER_PROD)
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x,
const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2);
# define OVERRIDE_DUAL_INNER_PROD (1)
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((*DUAL_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_DUAL_INNER_PROD (1)
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), dual_inner_prod_neon(x, y01, y02, N, xy1, xy2))
# endif
# endif
# if defined(FIXED_POINT)
# if defined(OPUS_ARM_MAY_HAVE_NEON)
opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
opus_val32 *xcorr, int len, int max_pitch, int arch);
# endif
# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
@ -43,7 +79,7 @@ opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
# if defined(OPUS_ARM_MAY_HAVE_EDSP)
opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
opus_val32 *xcorr, int len, int max_pitch, int arch);
# endif
# if defined(OPUS_HAVE_RTCD) && \
@ -52,18 +88,15 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
extern opus_val32
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
const opus_val16 *, opus_val32 *, int, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# define celt_pitch_xcorr (*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])
# elif defined(OPUS_ARM_PRESUME_EDSP) || \
defined(OPUS_ARM_PRESUME_MEDIA) || \
defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
# define celt_pitch_xcorr (PRESUME_NEON(celt_pitch_xcorr))
# endif
@ -99,25 +132,22 @@ extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
/* Float case */
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
opus_val32 *xcorr, int len, int max_pitch, int arch);
#endif
# if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
const opus_val16 *, opus_val32 *, int, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# define celt_pitch_xcorr (*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
# define celt_pitch_xcorr celt_pitch_xcorr_float_neon
# endif

View file

@ -0,0 +1,290 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#include "pitch.h"
#ifdef FIXED_POINT
opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
{
int i;
opus_val32 xy;
int16x8_t x_s16x8, y_s16x8;
int32x4_t xy_s32x4 = vdupq_n_s32(0);
int64x2_t xy_s64x2;
int64x1_t xy_s64x1;
for (i = 0; i < N - 7; i += 8) {
x_s16x8 = vld1q_s16(&x[i]);
y_s16x8 = vld1q_s16(&y[i]);
xy_s32x4 = vmlal_s16(xy_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y_s16x8));
xy_s32x4 = vmlal_s16(xy_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y_s16x8));
}
if (N - i >= 4) {
const int16x4_t x_s16x4 = vld1_s16(&x[i]);
const int16x4_t y_s16x4 = vld1_s16(&y[i]);
xy_s32x4 = vmlal_s16(xy_s32x4, x_s16x4, y_s16x4);
i += 4;
}
xy_s64x2 = vpaddlq_s32(xy_s32x4);
xy_s64x1 = vadd_s64(vget_low_s64(xy_s64x2), vget_high_s64(xy_s64x2));
xy = vget_lane_s32(vreinterpret_s32_s64(xy_s64x1), 0);
for (; i < N; i++) {
xy = MAC16_16(xy, x[i], y[i]);
}
#ifdef OPUS_CHECK_ASM
celt_assert(celt_inner_prod_c(x, y, N) == xy);
#endif
return xy;
}
void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
opus_val32 xy01, xy02;
int16x8_t x_s16x8, y01_s16x8, y02_s16x8;
int32x4_t xy01_s32x4 = vdupq_n_s32(0);
int32x4_t xy02_s32x4 = vdupq_n_s32(0);
int64x2_t xy01_s64x2, xy02_s64x2;
int64x1_t xy01_s64x1, xy02_s64x1;
for (i = 0; i < N - 7; i += 8) {
x_s16x8 = vld1q_s16(&x[i]);
y01_s16x8 = vld1q_s16(&y01[i]);
y02_s16x8 = vld1q_s16(&y02[i]);
xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y01_s16x8));
xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y02_s16x8));
xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y01_s16x8));
xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y02_s16x8));
}
if (N - i >= 4) {
const int16x4_t x_s16x4 = vld1_s16(&x[i]);
const int16x4_t y01_s16x4 = vld1_s16(&y01[i]);
const int16x4_t y02_s16x4 = vld1_s16(&y02[i]);
xy01_s32x4 = vmlal_s16(xy01_s32x4, x_s16x4, y01_s16x4);
xy02_s32x4 = vmlal_s16(xy02_s32x4, x_s16x4, y02_s16x4);
i += 4;
}
xy01_s64x2 = vpaddlq_s32(xy01_s32x4);
xy02_s64x2 = vpaddlq_s32(xy02_s32x4);
xy01_s64x1 = vadd_s64(vget_low_s64(xy01_s64x2), vget_high_s64(xy01_s64x2));
xy02_s64x1 = vadd_s64(vget_low_s64(xy02_s64x2), vget_high_s64(xy02_s64x2));
xy01 = vget_lane_s32(vreinterpret_s32_s64(xy01_s64x1), 0);
xy02 = vget_lane_s32(vreinterpret_s32_s64(xy02_s64x1), 0);
for (; i < N; i++) {
xy01 = MAC16_16(xy01, x[i], y01[i]);
xy02 = MAC16_16(xy02, x[i], y02[i]);
}
*xy1 = xy01;
*xy2 = xy02;
#ifdef OPUS_CHECK_ASM
{
opus_val32 xy1_c, xy2_c;
dual_inner_prod_c(x, y01, y02, N, &xy1_c, &xy2_c);
celt_assert(xy1_c == *xy1);
celt_assert(xy2_c == *xy2);
}
#endif
}
#else /* !FIXED_POINT */
/* ========================================================================== */
#ifdef OPUS_CHECK_ASM
/* This part of code simulates floating-point NEON operations. */
/* celt_inner_prod_neon_float_c_simulation() simulates the floating-point */
/* operations of celt_inner_prod_neon(), and both functions should have bit */
/* exact output. */
static opus_val32 celt_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y, int N)
{
int i;
opus_val32 xy, xy0 = 0, xy1 = 0, xy2 = 0, xy3 = 0;
for (i = 0; i < N - 3; i += 4) {
xy0 = MAC16_16(xy0, x[i + 0], y[i + 0]);
xy1 = MAC16_16(xy1, x[i + 1], y[i + 1]);
xy2 = MAC16_16(xy2, x[i + 2], y[i + 2]);
xy3 = MAC16_16(xy3, x[i + 3], y[i + 3]);
}
xy0 += xy2;
xy1 += xy3;
xy = xy0 + xy1;
for (; i < N; i++) {
xy = MAC16_16(xy, x[i], y[i]);
}
return xy;
}
/* dual_inner_prod_neon_float_c_simulation() simulates the floating-point */
/* operations of dual_inner_prod_neon(), and both functions should have bit */
/* exact output. */
static void dual_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
opus_val32 xy01, xy02, xy01_0 = 0, xy01_1 = 0, xy01_2 = 0, xy01_3 = 0, xy02_0 = 0, xy02_1 = 0, xy02_2 = 0, xy02_3 = 0;
for (i = 0; i < N - 3; i += 4) {
xy01_0 = MAC16_16(xy01_0, x[i + 0], y01[i + 0]);
xy01_1 = MAC16_16(xy01_1, x[i + 1], y01[i + 1]);
xy01_2 = MAC16_16(xy01_2, x[i + 2], y01[i + 2]);
xy01_3 = MAC16_16(xy01_3, x[i + 3], y01[i + 3]);
xy02_0 = MAC16_16(xy02_0, x[i + 0], y02[i + 0]);
xy02_1 = MAC16_16(xy02_1, x[i + 1], y02[i + 1]);
xy02_2 = MAC16_16(xy02_2, x[i + 2], y02[i + 2]);
xy02_3 = MAC16_16(xy02_3, x[i + 3], y02[i + 3]);
}
xy01_0 += xy01_2;
xy02_0 += xy02_2;
xy01_1 += xy01_3;
xy02_1 += xy02_3;
xy01 = xy01_0 + xy01_1;
xy02 = xy02_0 + xy02_1;
for (; i < N; i++) {
xy01 = MAC16_16(xy01, x[i], y01[i]);
xy02 = MAC16_16(xy02, x[i], y02[i]);
}
*xy1 = xy01;
*xy2 = xy02;
}
#endif /* OPUS_CHECK_ASM */
/* ========================================================================== */
opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
{
int i;
opus_val32 xy;
float32x4_t xy_f32x4 = vdupq_n_f32(0);
float32x2_t xy_f32x2;
for (i = 0; i < N - 7; i += 8) {
float32x4_t x_f32x4, y_f32x4;
x_f32x4 = vld1q_f32(&x[i]);
y_f32x4 = vld1q_f32(&y[i]);
xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
x_f32x4 = vld1q_f32(&x[i + 4]);
y_f32x4 = vld1q_f32(&y[i + 4]);
xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
}
if (N - i >= 4) {
const float32x4_t x_f32x4 = vld1q_f32(&x[i]);
const float32x4_t y_f32x4 = vld1q_f32(&y[i]);
xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
i += 4;
}
xy_f32x2 = vadd_f32(vget_low_f32(xy_f32x4), vget_high_f32(xy_f32x4));
xy_f32x2 = vpadd_f32(xy_f32x2, xy_f32x2);
xy = vget_lane_f32(xy_f32x2, 0);
for (; i < N; i++) {
xy = MAC16_16(xy, x[i], y[i]);
}
#ifdef OPUS_CHECK_ASM
celt_assert(ABS32(celt_inner_prod_neon_float_c_simulation(x, y, N) - xy) <= VERY_SMALL);
#endif
return xy;
}
void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
int N, opus_val32 *xy1, opus_val32 *xy2)
{
int i;
opus_val32 xy01, xy02;
float32x4_t xy01_f32x4 = vdupq_n_f32(0);
float32x4_t xy02_f32x4 = vdupq_n_f32(0);
float32x2_t xy01_f32x2, xy02_f32x2;
for (i = 0; i < N - 7; i += 8) {
float32x4_t x_f32x4, y01_f32x4, y02_f32x4;
x_f32x4 = vld1q_f32(&x[i]);
y01_f32x4 = vld1q_f32(&y01[i]);
y02_f32x4 = vld1q_f32(&y02[i]);
xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
x_f32x4 = vld1q_f32(&x[i + 4]);
y01_f32x4 = vld1q_f32(&y01[i + 4]);
y02_f32x4 = vld1q_f32(&y02[i + 4]);
xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
}
if (N - i >= 4) {
const float32x4_t x_f32x4 = vld1q_f32(&x[i]);
const float32x4_t y01_f32x4 = vld1q_f32(&y01[i]);
const float32x4_t y02_f32x4 = vld1q_f32(&y02[i]);
xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
i += 4;
}
xy01_f32x2 = vadd_f32(vget_low_f32(xy01_f32x4), vget_high_f32(xy01_f32x4));
xy02_f32x2 = vadd_f32(vget_low_f32(xy02_f32x4), vget_high_f32(xy02_f32x4));
xy01_f32x2 = vpadd_f32(xy01_f32x2, xy01_f32x2);
xy02_f32x2 = vpadd_f32(xy02_f32x2, xy02_f32x2);
xy01 = vget_lane_f32(xy01_f32x2, 0);
xy02 = vget_lane_f32(xy02_f32x2, 0);
for (; i < N; i++) {
xy01 = MAC16_16(xy01, x[i], y01[i]);
xy02 = MAC16_16(xy02, x[i], y02[i]);
}
*xy1 = xy01;
*xy2 = xy02;
#ifdef OPUS_CHECK_ASM
{
opus_val32 xy1_c, xy2_c;
dual_inner_prod_neon_float_c_simulation(x, y01, y02, N, &xy1_c, &xy2_c);
celt_assert(ABS32(xy1_c - *xy1) <= VERY_SMALL);
celt_assert(ABS32(xy2_c - *xy2) <= VERY_SMALL);
}
#endif
}
#endif /* FIXED_POINT */

View file

@ -65,7 +65,7 @@ opus_uint32 celt_lcg_rand(opus_uint32 seed)
/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
with this approximation is important because it has an impact on the bit allocation */
static opus_int16 bitexact_cos(opus_int16 x)
opus_int16 bitexact_cos(opus_int16 x)
{
opus_int32 tmp;
opus_int16 x2;
@ -77,7 +77,7 @@ static opus_int16 bitexact_cos(opus_int16 x)
return 1+x2;
}
static int bitexact_log2tan(int isin,int icos)
int bitexact_log2tan(int isin,int icos)
{
int lc;
int ls;
@ -92,10 +92,11 @@ static int bitexact_log2tan(int isin,int icos)
#ifdef FIXED_POINT
/* Compute the amplitude (sqrt energy) in each of the bands */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
{
int i, c, N;
const opus_int16 *eBands = m->eBands;
(void)arch;
N = m->shortMdctSize<<LM;
c=0; do {
for (i=0;i<end;i++)
@ -155,7 +156,7 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
#else /* FIXED_POINT */
/* Compute the amplitude (sqrt energy) in each of the bands */
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
{
int i, c, N;
const opus_int16 *eBands = m->eBands;
@ -164,7 +165,7 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
for (i=0;i<end;i++)
{
opus_val32 sum;
sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM, arch);
bandE[i+c*m->nbEBands] = celt_sqrt(sum);
/*printf ("%f ", bandE[i+c*m->nbEBands]);*/
}
@ -224,9 +225,9 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
#endif
j=M*eBands[i];
band_end = M*eBands[i+1];
lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6));
lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],6)));
#ifndef FIXED_POINT
g = celt_exp2(lg);
g = celt_exp2(MIN32(32.f, lg));
#else
/* Handle the integer part of the log energy */
shift = 16-(lg>>DB_SHIFT);
@ -241,12 +242,12 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
/* Handle extreme gains with negative shift. */
if (shift<0)
{
/* For shift < -2 we'd be likely to overflow, so we're capping
the gain here. This shouldn't happen unless the bitstream is
already corrupted. */
if (shift < -2)
/* For shift <= -2 and g > 16384 we'd be likely to overflow, so we're
capping the gain here, which is equivalent to a cap of 18 on lg.
This shouldn't trigger unless the bitstream is already corrupted. */
if (shift <= -2)
{
g = 32767;
g = 16384;
shift = -2;
}
do {
@ -360,6 +361,30 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
}
}
/* Compute the weights to use for optimizing normalized distortion across
channels. We use the amplitude to weight square distortion, which means
that we use the square root of the value we would have been using if we
wanted to minimize the MSE in the non-normalized domain. This roughly
corresponds to some quick-and-dirty perceptual experiments I ran to
measure inter-aural masking (there doesn't seem to be any published data
on the topic). */
static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
{
celt_ener minE;
#if FIXED_POINT
int shift;
#endif
minE = MIN32(Ex, Ey);
/* Adjustment to make the weights a bit more conservative. */
Ex = ADD32(Ex, minE/3);
Ey = ADD32(Ey, minE/3);
#if FIXED_POINT
shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
#endif
w[0] = VSHR32(Ex, shift);
w[1] = VSHR32(Ey, shift);
}
static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
{
int i = bandID;
@ -647,6 +672,7 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
struct band_ctx {
int encode;
int resynth;
const CELTMode *m;
int i;
int intensity;
@ -657,6 +683,9 @@ struct band_ctx {
const celt_ener *bandE;
opus_uint32 seed;
int arch;
int theta_round;
int disable_inv;
int avoid_split_noise;
};
struct split_ctx {
@ -714,8 +743,35 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
if (qn!=1)
{
if (encode)
itheta = (itheta*(opus_int32)qn+8192)>>14;
{
if (!stereo || ctx->theta_round == 0)
{
itheta = (itheta*(opus_int32)qn+8192)>>14;
if (!stereo && ctx->avoid_split_noise && itheta > 0 && itheta < qn)
{
/* Check if the selected value of theta will cause the bit allocation
to inject noise on one side. If so, make sure the energy of that side
is zero. */
int unquantized = celt_udiv((opus_int32)itheta*16384, qn);
imid = bitexact_cos((opus_int16)unquantized);
iside = bitexact_cos((opus_int16)(16384-unquantized));
delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
if (delta > *b)
itheta = qn;
else if (delta < -*b)
itheta = 0;
}
} else {
int down;
/* Bias quantization towards itheta=0 and itheta=16384. */
int bias = itheta > 8192 ? 32767/qn : -32767/qn;
down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
if (ctx->theta_round < 0)
itheta = down;
else
itheta = down+1;
}
}
/* Entropy coding of the angle. We use a uniform pdf for the
time split, a step for stereo, and a triangular one for the rest. */
if (stereo && N>2)
@ -793,7 +849,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
} else if (stereo) {
if (encode)
{
inv = itheta > 8192;
inv = itheta > 8192 && !ctx->disable_inv;
if (inv)
{
int j;
@ -810,6 +866,9 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
inv = ec_dec_bit_logp(ec, 2);
} else
inv = 0;
/* inv flag override to avoid problems with downmixing. */
if (ctx->disable_inv)
inv = 0;
itheta = 0;
}
qalloc = ec_tell_frac(ec) - tell;
@ -845,11 +904,6 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
celt_norm *lowband_out)
{
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
int c;
int stereo;
celt_norm *x = X;
@ -874,7 +928,7 @@ static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
ctx->remaining_bits -= 1<<BITRES;
b-=1<<BITRES;
}
if (resynth)
if (ctx->resynth)
x[0] = sign ? -NORM_SCALING : NORM_SCALING;
x = Y;
} while (++c<1+stereo);
@ -899,11 +953,6 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
int B0=B;
opus_val16 mid=0, side=0;
unsigned cm=0;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
celt_norm *Y=NULL;
int encode;
const CELTMode *m;
@ -935,8 +984,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
fill = (fill&1)|(fill<<1);
B = (B+1)>>1;
compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
LM, 0, &fill);
compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill);
imid = sctx.imid;
iside = sctx.iside;
delta = sctx.delta;
@ -970,24 +1018,20 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
rebalance = ctx->remaining_bits;
if (mbits >= sbits)
{
cm = quant_partition(ctx, X, N, mbits, B,
lowband, LM,
cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
MULT16_16_P15(gain,mid), fill);
rebalance = mbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=0)
sbits += rebalance - (3<<BITRES);
cm |= quant_partition(ctx, Y, N, sbits, B,
next_lowband2, LM,
cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
} else {
cm = quant_partition(ctx, Y, N, sbits, B,
next_lowband2, LM,
cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
rebalance = sbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=16384)
mbits += rebalance - (3<<BITRES);
cm |= quant_partition(ctx, X, N, mbits, B,
lowband, LM,
cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
MULT16_16_P15(gain,mid), fill);
}
} else {
@ -1012,18 +1056,14 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
/* Finally do the actual quantization */
if (encode)
{
cm = alg_quant(X, N, K, spread, B, ec
#ifdef RESYNTH
, gain
#endif
);
cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth, ctx->arch);
} else {
cm = alg_unquant(X, N, K, spread, B, ec, gain);
}
} else {
/* If there's no pulse, fill the band anyway */
int j;
if (resynth)
if (ctx->resynth)
{
unsigned cm_mask;
/* B can be as large as 16, so this shift might overflow an int on a
@ -1080,11 +1120,6 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
int recombine=0;
int longBlocks;
unsigned cm=0;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
int k;
int encode;
int tf_change;
@ -1151,11 +1186,10 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
}
cm = quant_partition(ctx, X, N, b, B, lowband,
LM, gain, fill);
cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill);
/* This code is used by the decoder and by the resynthesis-enabled encoder */
if (resynth)
if (ctx->resynth)
{
/* Undo the sample reorganization going from time order to frequency order */
if (B0>1)
@ -1208,11 +1242,6 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
int inv = 0;
opus_val16 mid=0, side=0;
unsigned cm=0;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !ctx->encode;
#endif
int mbits, sbits, delta;
int itheta;
int qalloc;
@ -1232,8 +1261,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
orig_fill = fill;
compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
LM, 1, &fill);
compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill);
inv = sctx.inv;
imid = sctx.imid;
iside = sctx.iside;
@ -1281,13 +1309,13 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
sign = 1-2*sign;
/* We use orig_fill here because we want to fold the side, but if
itheta==16384, we'll have cleared the low bits of fill. */
cm = quant_band(ctx, x2, N, mbits, B, lowband,
LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
lowband_scratch, orig_fill);
/* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
and there's no need to worry about mixing with the other channel. */
y2[0] = -sign*x2[1];
y2[1] = sign*x2[0];
if (resynth)
if (ctx->resynth)
{
celt_norm tmp;
X[0] = MULT16_16_Q15(mid, X[0]);
@ -1314,38 +1342,32 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
{
/* In stereo mode, we do not apply a scaling to the mid because we need the normalized
mid for folding later. */
cm = quant_band(ctx, X, N, mbits, B,
lowband, LM, lowband_out,
Q15ONE, lowband_scratch, fill);
cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
lowband_scratch, fill);
rebalance = mbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=0)
sbits += rebalance - (3<<BITRES);
/* For a stereo split, the high bits of fill are always zero, so no
folding will be done to the side. */
cm |= quant_band(ctx, Y, N, sbits, B,
NULL, LM, NULL,
side, NULL, fill>>B);
cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
} else {
/* For a stereo split, the high bits of fill are always zero, so no
folding will be done to the side. */
cm = quant_band(ctx, Y, N, sbits, B,
NULL, LM, NULL,
side, NULL, fill>>B);
cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
rebalance = sbits - (rebalance-ctx->remaining_bits);
if (rebalance > 3<<BITRES && itheta!=16384)
mbits += rebalance - (3<<BITRES);
/* In stereo mode, we do not apply a scaling to the mid because we need the normalized
mid for folding later. */
cm |= quant_band(ctx, X, N, mbits, B,
lowband, LM, lowband_out,
Q15ONE, lowband_scratch, fill);
cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
lowband_scratch, fill);
}
}
/* This code is used by the decoder and by the resynthesis-enabled encoder */
if (resynth)
if (ctx->resynth)
{
if (N!=2)
stereo_merge(X, Y, mid, N, ctx->arch);
@ -1359,19 +1381,38 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
return cm;
}
static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
{
int n1, n2;
const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
n1 = M*(eBands[start+1]-eBands[start]);
n2 = M*(eBands[start+2]-eBands[start+1]);
/* Duplicate enough of the first band folding data to be able to fold the second band.
Copies no data for CELT-only mode. */
OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
if (dual_stereo)
OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
}
void quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
opus_uint32 *seed, int arch)
opus_uint32 *seed, int complexity, int arch, int disable_inv)
{
int i;
opus_int32 remaining_bits;
const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
VARDECL(celt_norm, _norm);
VARDECL(celt_norm, _lowband_scratch);
VARDECL(celt_norm, X_save);
VARDECL(celt_norm, Y_save);
VARDECL(celt_norm, X_save2);
VARDECL(celt_norm, Y_save2);
VARDECL(celt_norm, norm_save2);
int resynth_alloc;
celt_norm *lowband_scratch;
int B;
int M;
@ -1379,10 +1420,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
int update_lowband = 1;
int C = Y_ != NULL ? 2 : 1;
int norm_offset;
int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8;
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !encode;
int resynth = !encode || theta_rdo;
#endif
struct band_ctx ctx;
SAVE_STACK;
@ -1395,9 +1437,24 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
norm = _norm;
norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
/* We can use the last band as scratch space because we don't need that
scratch space for the last band. */
lowband_scratch = X_+M*eBands[m->nbEBands-1];
/* For decoding, we can use the last band as scratch space because we don't need that
scratch space for the last band and we don't care about the data there until we're
decoding the last band. */
if (encode && resynth)
resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]);
else
resynth_alloc = ALLOC_NONE;
ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
if (encode && resynth)
lowband_scratch = _lowband_scratch;
else
lowband_scratch = X_+M*eBands[m->nbEBands-1];
ALLOC(X_save, resynth_alloc, celt_norm);
ALLOC(Y_save, resynth_alloc, celt_norm);
ALLOC(X_save2, resynth_alloc, celt_norm);
ALLOC(Y_save2, resynth_alloc, celt_norm);
ALLOC(norm_save2, resynth_alloc, celt_norm);
lowband_offset = 0;
ctx.bandE = bandE;
@ -1408,6 +1465,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
ctx.seed = *seed;
ctx.spread = spread;
ctx.arch = arch;
ctx.disable_inv = disable_inv;
ctx.resynth = resynth;
ctx.theta_round = 0;
/* Avoid injecting noise in the first band on transients. */
ctx.avoid_split_noise = B > 1;
for (i=start;i<end;i++)
{
opus_int32 tell;
@ -1445,8 +1507,15 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
b = 0;
}
#ifdef ENABLE_UPDATE_DRAFT
if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
lowband_offset = i;
if (i == start+1)
special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
#else
if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
lowband_offset = i;
#endif
tf_change = tf_res[i];
ctx.tf_change = tf_change;
@ -1457,7 +1526,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
Y = norm;
lowband_scratch = NULL;
}
if (i==end-1)
if (last && !theta_rdo)
lowband_scratch = NULL;
/* Get a conservative estimate of the collapse_mask's for the bands we're
@ -1472,7 +1541,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
fold_start = lowband_offset;
while(M*eBands[--fold_start] > effective_lowband+norm_offset);
fold_end = lowband_offset-1;
#ifdef ENABLE_UPDATE_DRAFT
while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
#else
while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
#endif
x_cm = y_cm = 0;
fold_i = fold_start; do {
x_cm |= collapse_masks[fold_i*C+0];
@ -1505,13 +1578,77 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
} else {
if (Y!=NULL)
{
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
if (theta_rdo && i < intensity)
{
ec_ctx ec_save, ec_save2;
struct band_ctx ctx_save, ctx_save2;
opus_val32 dist0, dist1;
unsigned cm, cm2;
int nstart_bytes, nend_bytes, save_bytes;
unsigned char *bytes_buf;
unsigned char bytes_save[1275];
opus_val16 w[2];
compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
/* Make a copy. */
cm = x_cm|y_cm;
ec_save = *ec;
ctx_save = ctx;
OPUS_COPY(X_save, X, N);
OPUS_COPY(Y_save, Y, N);
/* Encode and round down. */
ctx.theta_round = -1;
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
/* Save first result. */
cm2 = x_cm;
ec_save2 = *ec;
ctx_save2 = ctx;
OPUS_COPY(X_save2, X, N);
OPUS_COPY(Y_save2, Y, N);
if (!last)
OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
nstart_bytes = ec_save.offs;
nend_bytes = ec_save.storage;
bytes_buf = ec_save.buf+nstart_bytes;
save_bytes = nend_bytes-nstart_bytes;
OPUS_COPY(bytes_save, bytes_buf, save_bytes);
/* Restore */
*ec = ec_save;
ctx = ctx_save;
OPUS_COPY(X, X_save, N);
OPUS_COPY(Y, Y_save, N);
if (i == start+1)
special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
/* Encode and round up. */
ctx.theta_round = 1;
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
if (dist0 >= dist1) {
x_cm = cm2;
*ec = ec_save2;
ctx = ctx_save2;
OPUS_COPY(X, X_save2, N);
OPUS_COPY(Y, Y_save2, N);
if (!last)
OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
OPUS_COPY(bytes_buf, bytes_save, save_bytes);
}
} else {
ctx.theta_round = 0;
x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
}
} else {
x_cm = quant_band(&ctx, X, N, b, B,
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
}
y_cm = x_cm;
}
@ -1521,6 +1658,9 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
/* Update the folding position only as long as we have 1 bit/sample depth. */
update_lowband = b>(N<<BITRES);
/* We only need to avoid noise on a split for the first band. After that, we
have folding. */
ctx.avoid_split_noise = 0;
}
*seed = ctx.seed;

View file

@ -36,12 +36,15 @@
#include "entdec.h"
#include "rate.h"
opus_int16 bitexact_cos(opus_int16 x);
int bitexact_log2tan(int isin,int icos);
/** Compute the amplitude (sqrt energy) in each of the bands
* @param m Mode data
* @param X Spectrum
* @param bandE Square root of the energy for each band (returned)
*/
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM);
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch);
/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
@ -105,7 +108,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed,
int arch);
int complexity, int arch, int disable_inv);
void anti_collapse(const CELTMode *m, celt_norm *X_,
unsigned char *collapse_masks, int LM, int C, int size, int start,

View file

@ -111,26 +111,31 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
t = SATURATE(t, SIG_SAT);
y[i] = t;
x4=SHL32(x[i-T+3],1);
t = MAC16_32_Q16(x[i+1], g10, x1);
t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
t = SATURATE(t, SIG_SAT);
y[i+1] = t;
x3=SHL32(x[i-T+4],1);
t = MAC16_32_Q16(x[i+2], g10, x0);
t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
t = SATURATE(t, SIG_SAT);
y[i+2] = t;
x2=SHL32(x[i-T+5],1);
t = MAC16_32_Q16(x[i+3], g10, x4);
t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
t = SATURATE(t, SIG_SAT);
y[i+3] = t;
x1=SHL32(x[i-T+6],1);
t = MAC16_32_Q16(x[i+4], g10, x3);
t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
t = SATURATE(t, SIG_SAT);
y[i+4] = t;
}
#ifdef CUSTOM_MODES
@ -141,6 +146,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
t = MAC16_32_Q16(x[i], g10, x2);
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
t = SATURATE(t, SIG_SAT);
y[i] = t;
x4=x3;
x3=x2;
@ -169,6 +175,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+ MULT16_32_Q15(g10,x2)
+ MULT16_32_Q15(g11,ADD32(x1,x3))
+ MULT16_32_Q15(g12,ADD32(x0,x4));
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3;
x3=x2;
x2=x1;
@ -200,6 +207,10 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
OPUS_MOVE(y, x, N);
return;
}
/* When the gain is zero, T0 and/or T1 is set to zero. We need
to have then be at least 2 to avoid processing garbage data. */
T0 = IMAX(T0, COMBFILTER_MINPERIOD);
T1 = IMAX(T1, COMBFILTER_MINPERIOD);
g00 = MULT16_16_P15(g0, gains[tapset0][0]);
g01 = MULT16_16_P15(g0, gains[tapset0][1]);
g02 = MULT16_16_P15(g0, gains[tapset0][2]);
@ -225,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+ MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+ MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+ MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
y[i] = SATURATE(y[i], SIG_SAT);
x4=x3;
x3=x2;
x2=x1;
@ -244,11 +256,16 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
}
#endif /* OVERRIDE_comb_filter */
/* TF change table. Positive values mean better frequency resolution (longer
effective window), whereas negative values mean better time resolution
(shorter effective window). The second index is computed as:
4*isTransient + 2*tf_select + per_band_flag */
const signed char tf_select_table[4][8] = {
{0, -1, 0, -1, 0,-1, 0,-1},
{0, -1, 0, -2, 1, 0, 1,-1},
{0, -2, 0, -3, 2, 0, 1,-1},
{0, -2, 0, -3, 3, 0, 1,-1},
/*isTransient=0 isTransient=1 */
{0, -1, 0, -1, 0,-1, 0,-1}, /* 2.5 ms */
{0, -1, 0, -2, 1, 0, 1,-1}, /* 5 ms */
{0, -2, 0, -3, 2, 0, 1,-1}, /* 10 ms */
{0, -2, 0, -3, 3, 0, 1,-1}, /* 20 ms */
};

View file

@ -50,6 +50,8 @@ extern "C" {
#define CELTDecoder OpusCustomDecoder
#define CELTMode OpusCustomMode
#define LEAK_BANDS 19
typedef struct {
int valid;
float tonality;
@ -57,17 +59,25 @@ typedef struct {
float noisiness;
float activity;
float music_prob;
int bandwidth;
}AnalysisInfo;
float vad_prob;
int bandwidth;
float activity_probability;
/* Store as Q6 char to save space. */
unsigned char leak_boost[LEAK_BANDS];
} AnalysisInfo;
typedef struct {
int signalType;
int offset;
} SILKInfo;
#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
/* Encoder/decoder Requests */
#define __celt_check_silkinfo_ptr(ptr) ((ptr) + ((ptr) - (const SILKInfo*)(ptr)))
/* Expose this option again when variable framesize actually works */
#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */
/* Encoder/decoder Requests */
#define CELT_SET_PREDICTION_REQUEST 10002
@ -116,6 +126,9 @@ typedef struct {
#define OPUS_SET_ENERGY_MASK_REQUEST 10026
#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
#define CELT_SET_SILK_INFO_REQUEST 10028
#define CELT_SET_SILK_INFO(x) CELT_SET_SILK_INFO_REQUEST, __celt_check_silkinfo_ptr(x)
/* Encoder stuff */
int celt_encoder_get_size(int channels);

View file

@ -73,6 +73,7 @@ struct OpusCustomDecoder {
int downsample;
int start, end;
int signalling;
int disable_inv;
int arch;
/* Everything beyond this point gets cleared on a reset */
@ -163,6 +164,11 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
st->start = 0;
st->end = st->mode->effEBands;
st->signalling = 1;
#ifdef ENABLE_UPDATE_DRAFT
st->disable_inv = channels == 1;
#else
st->disable_inv = 0;
#endif
st->arch = opus_select_arch();
opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
@ -177,6 +183,36 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
}
#endif /* CUSTOM_MODES */
#ifndef CUSTOM_MODES
/* Special case for stereo with no downsampling and no accumulation. This is
quite common and we can make it faster by processing both channels in the
same loop, reducing overhead due to the dependency loop in the IIR filter. */
static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
celt_sig *mem)
{
celt_sig * OPUS_RESTRICT x0;
celt_sig * OPUS_RESTRICT x1;
celt_sig m0, m1;
int j;
x0=in[0];
x1=in[1];
m0 = mem[0];
m1 = mem[1];
for (j=0;j<N;j++)
{
celt_sig tmp0, tmp1;
/* Add VERY_SMALL to x[] first to reduce dependency chain. */
tmp0 = x0[j] + VERY_SMALL + m0;
tmp1 = x1[j] + VERY_SMALL + m1;
m0 = MULT16_32_Q15(coef0, tmp0);
m1 = MULT16_32_Q15(coef0, tmp1);
pcm[2*j ] = SCALEOUT(SIG2WORD16(tmp0));
pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
}
mem[0] = m0;
mem[1] = m1;
}
#endif
#ifndef RESYNTH
static
@ -190,6 +226,14 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
opus_val16 coef0;
VARDECL(celt_sig, scratch);
SAVE_STACK;
#ifndef CUSTOM_MODES
/* Short version for common case. */
if (downsample == 1 && C == 2 && !accum)
{
deemphasis_stereo_simple(in, pcm, N, coef[0], mem);
return;
}
#endif
#ifndef FIXED_POINT
(void)accum;
celt_assert(accum==0);
@ -225,7 +269,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
/* Shortcut for the standard (non-custom modes) case */
for (j=0;j<N;j++)
{
celt_sig tmp = x[j] + m + VERY_SMALL;
celt_sig tmp = x[j] + VERY_SMALL + m;
m = MULT16_32_Q15(coef0, tmp);
scratch[j] = tmp;
}
@ -246,7 +290,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
{
for (j=0;j<N;j++)
{
celt_sig tmp = x[j] + m + VERY_SMALL;
celt_sig tmp = x[j] + VERY_SMALL + m;
m = MULT16_32_Q15(coef0, tmp);
y[j*C] = SCALEOUT(SIG2WORD16(tmp));
}
@ -333,7 +377,7 @@ void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
downsample, silence);
for (i=0;i<N;i++)
freq[i] = HALF32(ADD32(freq[i],freq2[i]));
freq[i] = ADD32(HALF32(freq[i]), HALF32(freq2[i]));
for (b=0;b<B;b++)
clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
} else {
@ -345,6 +389,12 @@ void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch);
} while (++c<CC);
}
/* Saturate IMDCT output so that we can't overflow in the pitch postfilter
or in the */
c=0; do {
for (i=0;i<N;i++)
out_syn[c][i] = SATURATE(out_syn[c][i], SIG_SAT);
} while (++c<CC);
RESTORE_STACK;
}
@ -506,10 +556,11 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
} else {
/* Pitch-based PLC */
const opus_val16 *window;
opus_val16 *exc;
opus_val16 fade = Q15ONE;
int pitch_index;
VARDECL(opus_val32, etmp);
VARDECL(opus_val16, exc);
VARDECL(opus_val16, _exc);
if (loss_count == 0)
{
@ -520,7 +571,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
}
ALLOC(etmp, overlap, opus_val32);
ALLOC(exc, MAX_PERIOD, opus_val16);
ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
exc = _exc+LPC_ORDER;
window = mode->window;
c=0; do {
opus_val16 decay;
@ -561,6 +613,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
#endif
}
_celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
#ifdef FIXED_POINT
/* For fixed-point, apply bandwidth expansion until we can guarantee that
no overflow can happen in the IIR filter. This means:
32768*sum(abs(filter)) < 2^31 */
while (1) {
opus_val16 tmp=Q15ONE;
opus_val32 sum=QCONST16(1., SIG_SHIFT);
for (i=0;i<LPC_ORDER;i++)
sum += ABS16(lpc[c*LPC_ORDER+i]);
if (sum < 65535) break;
for (i=0;i<LPC_ORDER;i++)
{
tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
}
}
#endif
}
/* We want the excitation for 2 pitch periods in order to look for a
decaying signal, but we can't get more than MAX_PERIOD. */
@ -568,15 +637,14 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
/* Initialize the LPC history with the samples just before the start
of the region for which we're computing the excitation. */
{
opus_val16 lpc_mem[LPC_ORDER];
for (i=0;i<LPC_ORDER;i++)
{
lpc_mem[i] =
ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
exc[MAX_PERIOD-exc_length-LPC_ORDER+i] =
ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-LPC_ORDER+i], SIG_SHIFT);
}
/* Compute the excitation for exc_length samples before the loss. */
celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch);
exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, st->arch);
}
/* Check if the waveform is decaying, and if so how fast.
@ -630,9 +698,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
tmp = ROUND16(
buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
SIG_SHIFT);
S1 += SHR32(MULT16_16(tmp, tmp), 8);
S1 += SHR32(MULT16_16(tmp, tmp), 10);
}
{
opus_val16 lpc_mem[LPC_ORDER];
/* Copy the last decoded samples (prior to the overlap region) to
@ -644,6 +711,10 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
lpc_mem, st->arch);
#ifdef FIXED_POINT
for (i=0; i < extrapolation_len; i++)
buf[DECODE_BUFFER_SIZE-N+i] = SATURATE(buf[DECODE_BUFFER_SIZE-N+i], SIG_SAT);
#endif
}
/* Check if the synthesis energy is higher than expected, which can
@ -654,7 +725,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
for (i=0;i<extrapolation_len;i++)
{
opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
S2 += SHR32(MULT16_16(tmp, tmp), 8);
S2 += SHR32(MULT16_16(tmp, tmp), 10);
}
/* This checks for an "explosion" in the synthesis. */
#ifdef FIXED_POINT
@ -979,7 +1050,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, st->arch);
len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, 0,
st->arch, st->disable_inv);
if (anti_collapse_rsv > 0)
{
@ -1234,6 +1306,26 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
*value=st->rng;
}
break;
case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
{
opus_int32 value = va_arg(ap, opus_int32);
if(value<0 || value>1)
{
goto bad_arg;
}
st->disable_inv = value;
}
break;
case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
{
opus_int32 *value = va_arg(ap, opus_int32*);
if (!value)
{
goto bad_arg;
}
*value = st->disable_inv;
}
break;
default:
goto bad_request;
}

View file

@ -73,8 +73,8 @@ struct OpusCustomEncoder {
int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */
int loss_rate;
int lsb_depth;
int variable_duration;
int lfe;
int disable_inv;
int arch;
/* Everything beyond this point gets cleared on a reset */
@ -98,6 +98,7 @@ struct OpusCustomEncoder {
#endif
int consec_transient;
AnalysisInfo analysis;
SILKInfo silk_info;
opus_val32 preemph_memE[2];
opus_val32 preemph_memD[2];
@ -123,6 +124,7 @@ struct OpusCustomEncoder {
/* opus_val16 oldBandE[], Size = channels*mode->nbEBands */
/* opus_val16 oldLogE[], Size = channels*mode->nbEBands */
/* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */
/* opus_val16 energyError[], Size = channels*mode->nbEBands */
};
int celt_encoder_get_size(int channels)
@ -136,9 +138,10 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int
int size = sizeof(struct CELTEncoder)
+ (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */
+ channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */
+ 3*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */
+ 4*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */
/* opus_val16 oldLogE[channels*mode->nbEBands]; */
/* opus_val16 oldLogE2[channels*mode->nbEBands]; */
/* opus_val16 energyError[channels*mode->nbEBands]; */
return size;
}
@ -178,7 +181,6 @@ static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
st->start = 0;
st->end = st->mode->effEBands;
st->signalling = 1;
st->arch = arch;
st->constrained_vbr = 1;
@ -223,7 +225,8 @@ void opus_custom_encoder_destroy(CELTEncoder *st)
static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
opus_val16 *tf_estimate, int *tf_chan)
opus_val16 *tf_estimate, int *tf_chan, int allow_weak_transients,
int *weak_transient)
{
int i;
VARDECL(opus_val16, tmp);
@ -233,6 +236,12 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
int c;
opus_val16 tf_max;
int len2;
/* Forward masking: 6.7 dB/ms. */
#ifdef FIXED_POINT
int forward_shift = 4;
#else
opus_val16 forward_decay = QCONST16(.0625f,15);
#endif
/* Table of 6*64/x, trained on real data to minimize the average error */
static const unsigned char inv_table[128] = {
255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25,
@ -247,6 +256,19 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
SAVE_STACK;
ALLOC(tmp, len, opus_val16);
*weak_transient = 0;
/* For lower bitrates, let's be more conservative and have a forward masking
decay of 3.3 dB/ms. This avoids having to code transients at very low
bitrate (mostly for hybrid), which can result in unstable energy and/or
partial collapse. */
if (allow_weak_transients)
{
#ifdef FIXED_POINT
forward_shift = 5;
#else
forward_decay = QCONST16(.03125f,15);
#endif
}
len2=len/2;
for (c=0;c<C;c++)
{
@ -269,7 +291,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
mem0 = mem1 + y - 2*x;
mem1 = x - .5f*y;
#endif
tmp[i] = EXTRACT16(SHR32(y,2));
tmp[i] = SROUND16(y, 2);
/*printf("%f ", tmp[i]);*/
}
/*printf("\n");*/
@ -280,7 +302,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
/* Normalize tmp to max range */
{
int shift=0;
shift = 14-celt_ilog2(1+celt_maxabs16(tmp, len));
shift = 14-celt_ilog2(MAX16(1, celt_maxabs16(tmp, len)));
if (shift!=0)
{
for (i=0;i<len;i++)
@ -299,9 +321,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
mean += x2;
#ifdef FIXED_POINT
/* FIXME: Use PSHR16() instead */
tmp[i] = mem0 + PSHR32(x2-mem0,4);
tmp[i] = mem0 + PSHR32(x2-mem0,forward_shift);
#else
tmp[i] = mem0 + MULT16_16_P15(QCONST16(.0625f,15),x2-mem0);
tmp[i] = mem0 + MULT16_16_P15(forward_decay,x2-mem0);
#endif
mem0 = tmp[i];
}
@ -311,6 +333,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
/* Backward pass to compute the pre-echo threshold */
for (i=len2-1;i>=0;i--)
{
/* Backward masking: 13.9 dB/ms. */
#ifdef FIXED_POINT
/* FIXME: Use PSHR16() instead */
tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
@ -359,7 +382,12 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
}
}
is_transient = mask_metric>200;
/* For low bitrates, define "weak transients" that need to be
handled differently to avoid partial collapse. */
if (allow_weak_transients && is_transient && mask_metric<600) {
is_transient = 0;
*weak_transient = 1;
}
/* Arbitrary metric for VBR boost */
tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42);
/* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
@ -549,7 +577,7 @@ static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias
static int tf_analysis(const CELTMode *m, int len, int isTransient,
int *tf_res, int lambda, celt_norm *X, int N0, int LM,
int *tf_sum, opus_val16 tf_estimate, int tf_chan)
opus_val16 tf_estimate, int tf_chan)
{
int i;
VARDECL(int, metric);
@ -574,7 +602,6 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
ALLOC(path0, len, int);
ALLOC(path1, len, int);
*tf_sum = 0;
for (i=0;i<len;i++)
{
int k, N;
@ -629,7 +656,6 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
metric[i] = 2*best_level;
else
metric[i] = -2*best_level;
*tf_sum += (isTransient ? LM : 0) - metric[i]/2;
/* For bands that can't be split to -1, set the metric to the half-way point to avoid
biasing the decision */
if (narrow && (metric[i]==0 || metric[i]==-2*LM))
@ -754,7 +780,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM,
static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
const opus_val16 *bandLogE, int end, int LM, int C, int N0,
AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
int intensity, opus_val16 surround_trim, int arch)
int intensity, opus_val16 surround_trim, opus_int32 equiv_rate, int arch)
{
int i;
opus_val32 diff=0;
@ -762,6 +788,14 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
int trim_index;
opus_val16 trim = QCONST16(5.f, 8);
opus_val16 logXC, logXC2;
/* At low bitrate, reducing the trim seems to help. At higher bitrates, it's less
clear what's best, so we're keeping it as it was before, at least for now. */
if (equiv_rate < 64000) {
trim = QCONST16(4.f, 8);
} else if (equiv_rate < 80000) {
opus_int32 frac = (equiv_rate-64000) >> 10;
trim = QCONST16(4.f, 8) + QCONST16(1.f/16.f, 8)*frac;
}
if (C==2)
{
opus_val16 sum = 0; /* Q10 */
@ -809,7 +843,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
} while (++c<C);
diff /= C*(end-1);
/*printf("%f\n", diff);*/
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
trim -= MAX32(-QCONST16(2.f, 8), MIN32(QCONST16(2.f, 8), SHR32(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
trim -= SHR16(surround_trim, DB_SHIFT-8);
trim -= 2*SHR16(tf_estimate, 14-8);
#ifndef DISABLE_FLOAT_API
@ -930,7 +964,7 @@ static opus_val16 median_of_3(const opus_val16 *x)
static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc)
int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc, AnalysisInfo *analysis)
{
int i, c;
opus_int32 tot_boost=0;
@ -1020,14 +1054,26 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
}
for (i=start;i<end;i++)
{
int width;
int boost;
int boost_bits;
if (i<8)
follower[i] *= 2;
if (i>=12)
follower[i] = HALF16(follower[i]);
}
#ifdef DISABLE_FLOAT_API
(void)analysis;
#else
if (analysis->valid)
{
for (i=start;i<IMIN(LEAK_BANDS, end);i++)
follower[i] = follower[i] + QCONST16(1.f/64.f, DB_SHIFT)*analysis->leak_boost[i];
}
#endif
for (i=start;i<end;i++)
{
int width;
int boost;
int boost_bits;
follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT));
width = C*(eBands[i+1]-eBands[i])<<LM;
@ -1042,11 +1088,11 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);
boost_bits = boost*6<<BITRES;
}
/* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */
/* For CBR and non-transient CVBR frames, limit dynalloc to 2/3 of the bits */
if ((!vbr || (constrained_vbr&&!isTransient))
&& (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4)
&& (tot_boost+boost_bits)>>BITRES>>3 > 2*effectiveBytes/3)
{
opus_int32 cap = ((effectiveBytes/4)<<BITRES<<3);
opus_int32 cap = ((2*effectiveBytes/3)<<BITRES<<3);
offsets[i] = cap-tot_boost;
tot_boost = cap;
break;
@ -1193,7 +1239,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity,
int constrained_vbr, opus_val16 stereo_saving, int tot_boost,
opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth,
int variable_duration, int lfe, int has_surround_mask, opus_val16 surround_masking,
int lfe, int has_surround_mask, opus_val16 surround_masking,
opus_val16 temporal_vbr)
{
/* The target rate in 8th bits per frame */
@ -1235,10 +1281,9 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
}
/* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */
target += tot_boost-(16<<LM);
target += tot_boost-(19<<LM);
/* Apply transient boost, compensating for average boost. */
tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ?
QCONST16(0.02f,14) : QCONST16(0.04f,14);
tf_calibration = QCONST16(0.044f,14);
target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
#ifndef DISABLE_FLOAT_API
@ -1249,7 +1294,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
float tonal;
/* Tonality boost (compensating for the average). */
tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f;
tonal = MAX16(0.f,analysis->tonality-.15f)-0.12f;
tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal);
if (pitch_change)
tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f);
@ -1279,21 +1324,11 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
/*printf("%f %d\n", maxDepth, floor_depth);*/
}
if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
/* Make VBR less aggressive for constrained VBR because we can't keep a higher bitrate
for long. Needs tuning. */
if ((!has_surround_mask||lfe) && constrained_vbr)
{
opus_val16 rate_factor = Q15ONE;
if (bitrate < 64000)
{
#ifdef FIXED_POINT
rate_factor = MAX16(0,(bitrate-32000));
#else
rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
#endif
}
if (constrained_vbr)
rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);
target = base_target + (opus_int32)MULT16_32_Q15(QCONST16(0.67f, 15), target-base_target);
}
if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14))
@ -1331,7 +1366,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
VARDECL(int, tf_res);
VARDECL(unsigned char, collapse_masks);
celt_sig *prefilter_mem;
opus_val16 *oldBandE, *oldLogE, *oldLogE2;
opus_val16 *oldBandE, *oldLogE, *oldLogE2, *energyError;
int shortBlocks=0;
int isTransient=0;
const int CC = st->channels;
@ -1343,7 +1378,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
int end;
int effEnd;
int codedBands;
int tf_sum;
int alloc_trim;
int pitch_index=COMBFILTER_MINPERIOD;
opus_val16 gain1 = 0;
@ -1355,6 +1389,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
opus_int32 total_boost;
opus_int32 balance;
opus_int32 tell;
opus_int32 tell0_frac;
int prefilter_tapset=0;
int pf_on;
int anti_collapse_rsv;
@ -1376,7 +1411,9 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
opus_val16 surround_masking=0;
opus_val16 temporal_vbr=0;
opus_val16 surround_trim = 0;
opus_int32 equiv_rate = 510000;
opus_int32 equiv_rate;
int hybrid;
int weak_transient = 0;
VARDECL(opus_val16, surround_dynalloc);
ALLOC_STACK;
@ -1386,6 +1423,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
eBands = mode->eBands;
start = st->start;
end = st->end;
hybrid = start != 0;
tf_estimate = 0;
if (nbCompressedBytes<2 || pcm==NULL)
{
@ -1409,12 +1447,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD));
oldLogE = oldBandE + CC*nbEBands;
oldLogE2 = oldLogE + CC*nbEBands;
energyError = oldLogE2 + CC*nbEBands;
if (enc==NULL)
{
tell=1;
tell0_frac=tell=1;
nbFilledBytes=0;
} else {
tell0_frac=tell=ec_tell_frac(enc);
tell=ec_tell(enc);
nbFilledBytes=(tell+4)>>3;
}
@ -1467,10 +1507,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (st->bitrate!=OPUS_BITRATE_MAX)
nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
(tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
effectiveBytes = nbCompressedBytes;
effectiveBytes = nbCompressedBytes - nbFilledBytes;
}
equiv_rate = ((opus_int32)nbCompressedBytes*8*50 >> (3-LM)) - (40*C+20)*((400>>LM) - 50);
if (st->bitrate != OPUS_BITRATE_MAX)
equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50);
equiv_rate = IMIN(equiv_rate, st->bitrate - (40*C+20)*((400>>LM) - 50));
if (enc==NULL)
{
@ -1558,8 +1599,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
{
int enabled;
int qg;
enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf
&& st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE);
enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && !hybrid && !silence && !st->disable_pf
&& st->complexity >= 5;
prefilter_tapset = st->tapset_decision;
pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
@ -1568,7 +1609,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
pitch_change = 1;
if (pf_on==0)
{
if(start==0 && tell+16<=total_bits)
if(!hybrid && tell+16<=total_bits)
ec_enc_bit_logp(enc, 0, 1);
} else {
/*This block is not gated by a total bits check only because
@ -1589,8 +1630,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
shortBlocks = 0;
if (st->complexity >= 1 && !st->lfe)
{
/* Reduces the likelihood of energy instability on fricatives at low bitrate
in hybrid mode. It seems like we still want to have real transients on vowels
though (small SILK quantization offset value). */
int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.offset >= 100;
isTransient = transient_analysis(in, N+overlap, CC,
&tf_estimate, &tf_chan);
&tf_estimate, &tf_chan, allow_weak_transients, &weak_transient);
}
if (LM>0 && ec_tell(enc)+3<=total_bits)
{
@ -1610,7 +1655,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (secondMdct)
{
compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
amp2Log2(mode, effEnd, end, bandE, bandLogE2, C);
for (i=0;i<C*nbEBands;i++)
bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
@ -1619,7 +1664,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
if (CC==2&&C==1)
tf_chan = 0;
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
if (st->lfe)
{
@ -1634,7 +1679,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
OPUS_CLEAR(surround_dynalloc, end);
/* This computes how much masking takes place between surround channels */
if (start==0&&st->energy_mask&&!st->lfe)
if (!hybrid&&st->energy_mask&&!st->lfe)
{
int mask_end;
int midband;
@ -1736,14 +1781,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
/* Last chance to catch any transient we might have missed in the
time-domain analysis */
if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe)
if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe && !hybrid)
{
if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C))
{
isTransient = 1;
shortBlocks = M;
compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
compute_band_energies(mode, freq, bandE, effEnd, C, LM);
compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
/* Compensate for the scaling of short vs long mdcts */
for (i=0;i<C*nbEBands;i++)
@ -1762,29 +1807,47 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(tf_res, nbEBands, int);
/* Disable variable tf resolution for hybrid and at very low bitrate */
if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe)
if (effectiveBytes>=15*C && !hybrid && st->complexity>=2 && !st->lfe)
{
int lambda;
if (effectiveBytes<40)
lambda = 12;
else if (effectiveBytes<60)
lambda = 6;
else if (effectiveBytes<100)
lambda = 4;
else
lambda = 3;
lambda*=2;
tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan);
lambda = IMAX(5, 1280/effectiveBytes + 2);
tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, tf_estimate, tf_chan);
for (i=effEnd;i<end;i++)
tf_res[i] = tf_res[effEnd-1];
} else if (hybrid && weak_transient)
{
/* For weak transients, we rely on the fact that improving time resolution using
TF on a long window is imperfect and will not result in an energy collapse at
low bitrate. */
for (i=0;i<end;i++)
tf_res[i] = 1;
tf_select=0;
} else if (hybrid && effectiveBytes<15)
{
/* For low bitrate hybrid, we force temporal resolution to 5 ms rather than 2.5 ms. */
for (i=0;i<end;i++)
tf_res[i] = 0;
tf_select=isTransient;
} else {
tf_sum = 0;
for (i=0;i<end;i++)
tf_res[i] = isTransient;
tf_select=0;
}
ALLOC(error, C*nbEBands, opus_val16);
c=0;
do {
for (i=start;i<end;i++)
{
/* When the energy is stable, slightly bias energy quantization towards
the previous error to make the gain more stable (a constant offset is
better than fluctuations). */
if (ABS32(SUB32(bandLogE[i+c*nbEBands], oldBandE[i+c*nbEBands])) < QCONST16(2.f, DB_SHIFT))
{
bandLogE[i+c*nbEBands] -= MULT16_16_Q15(energyError[i+c*nbEBands], QCONST16(0.25f, 15));
}
}
} while (++c < C);
quant_coarse_energy(mode, start, end, effEnd, bandLogE,
oldBandE, total_bits, error, enc,
C, LM, nbAvailableBytes, st->force_intra,
@ -1798,7 +1861,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
{
st->tapset_decision = 0;
st->spread_decision = SPREAD_NORMAL;
} else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0)
} else if (hybrid)
{
if (st->complexity == 0)
st->spread_decision = SPREAD_NONE;
else if (isTransient)
st->spread_decision = SPREAD_NORMAL;
else
st->spread_decision = SPREAD_AGGRESSIVE;
} else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
{
if (st->complexity == 0)
st->spread_decision = SPREAD_NONE;
@ -1834,7 +1905,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets,
st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis);
/* For LFE, everything interesting is in the first band */
if (st->lfe)
offsets[0] = IMIN(8, effectiveBytes/3);
@ -1896,12 +1967,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
alloc_trim = 5;
if (tell+(6<<BITRES) <= total_bits - total_boost)
{
if (st->lfe)
if (start > 0 || st->lfe)
{
st->stereo_saving = 0;
alloc_trim = 5;
else
} else {
alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate,
st->intensity, surround_trim, st->arch);
st->intensity, surround_trim, equiv_rate, st->arch);
}
ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
tell = ec_tell_frac(enc);
}
@ -1919,17 +1993,36 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
/* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
The CELT allocator will just not be able to use more than that anyway. */
nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
base_target = vbr_rate - ((40*C+20)<<BITRES);
if (!hybrid)
{
base_target = vbr_rate - ((40*C+20)<<BITRES);
} else {
base_target = IMAX(0, vbr_rate - ((9*C+4)<<BITRES));
}
if (st->constrained_vbr)
base_target += (st->vbr_offset>>lm_diff);
target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
if (!hybrid)
{
target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
st->lastCodedBands, C, st->intensity, st->constrained_vbr,
st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth,
st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking,
st->lfe, st->energy_mask!=NULL, surround_masking,
temporal_vbr);
} else {
target = base_target;
/* Tonal frames (offset<100) need more bits than noisy (offset>100) ones. */
if (st->silk_info.offset < 100) target += 12 << BITRES >> (3-LM);
if (st->silk_info.offset > 100) target -= 18 << BITRES >> (3-LM);
/* Boosting bitrate on transients and vowels with significant temporal
spikes. */
target += (opus_int32)MULT16_16_Q14(tf_estimate-QCONST16(.25f,14), (50<<BITRES));
/* If we have a strong transient, let's make sure it has enough bits to code
the first two bands, so that it can use folding rather than noise. */
if (tf_estimate > QCONST16(.7f,14))
target = IMAX(target, 50<<BITRES);
}
/* The current offset is removed from the target and the space used
so far is added*/
target=target+tell;
@ -1937,11 +2030,16 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
result in the encoder running out of bits.
The margin of 2 bytes ensures that none of the bust-prevention logic
in the decoder will have triggered so far. */
min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes;
min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2;
/* Take into account the 37 bits we need to have left in the packet to
signal a redundant frame in hybrid mode. Creating a shorter packet would
create an entropy coder desync. */
if (hybrid)
min_allowed = IMAX(min_allowed, (tell0_frac+(37<<BITRES)+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3));
nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3);
nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes;
nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes);
/* By how much did we "miss" the target on that frame */
delta = target - vbr_rate;
@ -1988,7 +2086,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
st->vbr_reservoir = 0;
/*printf ("+%d\n", adjust);*/
}
nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes);
/*printf("%d\n", nbCompressedBytes*50*8);*/
/* This moves the raw bits to take into account the new compressed size */
ec_enc_shrink(enc, nbCompressedBytes);
@ -2038,7 +2136,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
bandE, pulses, shortBlocks, st->spread_decision,
dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv,
balance, enc, LM, codedBands, &st->rng, st->arch);
balance, enc, LM, codedBands, &st->rng, st->complexity, st->arch, st->disable_inv);
if (anti_collapse_rsv > 0)
{
@ -2049,6 +2147,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ec_enc_bits(enc, anti_collapse_on, 1);
}
quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
OPUS_CLEAR(energyError, nbEBands*CC);
c=0;
do {
for (i=start;i<end;i++)
{
energyError[i+c*nbEBands] = MAX16(-QCONST16(0.5f, 15), MIN16(QCONST16(0.5f, 15), error[i+c*nbEBands]));
}
} while (++c < C);
if (silence)
{
@ -2321,10 +2427,24 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
*value=st->lsb_depth;
}
break;
case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
{
opus_int32 value = va_arg(ap, opus_int32);
st->variable_duration = value;
if(value<0 || value>1)
{
goto bad_arg;
}
st->disable_inv = value;
}
break;
case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
{
opus_int32 *value = va_arg(ap, opus_int32*);
if (!value)
{
goto bad_arg;
}
*value = st->disable_inv;
}
break;
case OPUS_RESET_STATE:
@ -2368,6 +2488,13 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
OPUS_COPY(&st->analysis, info, 1);
}
break;
case CELT_SET_SILK_INFO_REQUEST:
{
SILKInfo *info = va_arg(ap, SILKInfo *);
if (info)
OPUS_COPY(&st->silk_info, info, 1);
}
break;
case CELT_GET_MODE_REQUEST:
{
const CELTMode ** value = va_arg(ap, const CELTMode**);

View file

@ -89,58 +89,40 @@ int p
void celt_fir_c(
const opus_val16 *_x,
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *_y,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch)
{
int i,j;
VARDECL(opus_val16, rnum);
VARDECL(opus_val16, x);
SAVE_STACK;
ALLOC(rnum, ord, opus_val16);
ALLOC(x, N+ord, opus_val16);
for(i=0;i<ord;i++)
rnum[i] = num[ord-i-1];
for(i=0;i<ord;i++)
x[i] = mem[ord-i-1];
for (i=0;i<N;i++)
x[i+ord]=_x[i];
for(i=0;i<ord;i++)
mem[i] = _x[N-i-1];
#ifdef SMALL_FOOTPRINT
(void)arch;
for (i=0;i<N;i++)
{
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
for (j=0;j<ord;j++)
{
sum = MAC16_16(sum,rnum[j],x[i+j]);
}
_y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
}
#else
for (i=0;i<N-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
xcorr_kernel(rnum, x+i, sum, ord, arch);
_y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT)));
_y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
_y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
_y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
opus_val32 sum[4];
sum[0] = SHL32(EXTEND32(x[i ]), SIG_SHIFT);
sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT),
sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
y[i ] = ROUND16(sum[0], SIG_SHIFT);
y[i+1] = ROUND16(sum[1], SIG_SHIFT);
y[i+2] = ROUND16(sum[2], SIG_SHIFT);
y[i+3] = ROUND16(sum[3], SIG_SHIFT);
}
for (;i<N;i++)
{
opus_val32 sum = 0;
opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
for (j=0;j<ord;j++)
sum = MAC16_16(sum,rnum[j],x[i+j]);
_y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
y[i] = ROUND16(sum, SIG_SHIFT);
}
#endif
RESTORE_STACK;
}
@ -166,7 +148,7 @@ void celt_iir(const opus_val32 *_x,
{
mem[j]=mem[j-1];
}
mem[0] = ROUND16(sum,SIG_SHIFT);
mem[0] = SROUND16(sum, SIG_SHIFT);
_y[i] = sum;
}
#else
@ -195,20 +177,20 @@ void celt_iir(const opus_val32 *_x,
xcorr_kernel(rden, y+i, sum, ord, arch);
/* Patch up the result to compensate for the fact that this is an IIR */
y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT);
y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
_y[i ] = sum[0];
sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
_y[i+1] = sum[1];
sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
_y[i+2] = sum[2];
sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
_y[i+3] = sum[3];
}
for (;i<N;i++)
@ -216,7 +198,7 @@ void celt_iir(const opus_val32 *_x,
opus_val32 sum = _x[i];
for (j=0;j<ord;j++)
sum -= MULT16_16(rden[j],y[i+j]);
y[i+ord] = ROUND16(sum,SIG_SHIFT);
y[i+ord] = SROUND16(sum,SIG_SHIFT);
_y[i] = sum;
}
for(i=0;i<ord;i++)

View file

@ -45,12 +45,11 @@ void celt_fir_c(
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch);
#if !defined(OVERRIDE_CELT_FIR)
#define celt_fir(x, num, y, N, ord, mem, arch) \
(celt_fir_c(x, num, y, N, ord, mem, arch))
#define celt_fir(x, num, y, N, ord, arch) \
(celt_fir_c(x, num, y, N, ord, arch))
#endif
void celt_iir(const opus_val32 *x,

View file

@ -59,6 +59,14 @@ extern opus_int64 celt_mips;
#define SHR(a,b) SHR32(a,b)
#define PSHR(a,b) PSHR32(a,b)
/** Add two 32-bit values, ignore any overflows */
#define ADD32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
/** Subtract two 32-bit values, ignore any overflows */
#define SUB32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
/** Negate 32-bit value, ignore any overflows */
#define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(0-(opus_uint32)(a)))
static OPUS_INLINE short NEG16(int x)
{
int res;
@ -227,12 +235,11 @@ static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
#define SROUND16(x,a) (celt_mips--,EXTRACT16(SATURATE(PSHR32(x,a), 32767)));
#define HALF16(x) (SHR16(x,1))
#define HALF32(x) (SHR32(x,1))
//#define SHR(a,shift) ((a) >> (shift))
//#define SHL(a,shift) ((a) << (shift))
#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__)
static OPUS_INLINE short ADD16_(int a, int b, char *file, int line)
{

View file

@ -104,6 +104,9 @@
/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
/** Shift by a and round-to-neareast 32-bit value. Result is a saturated 16-bit value */
#define SROUND16(x,a) EXTRACT16(SATURATE(PSHR32(x,a), 32767));
/** Divide by two */
#define HALF16(x) (SHR16(x,1))
#define HALF32(x) (SHR32(x,1))
@ -117,6 +120,14 @@
/** Subtract two 32-bit values */
#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b))
/** Add two 32-bit values, ignore any overflows */
#define ADD32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
/** Subtract two 32-bit values, ignore any overflows */
#define SUB32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
/** Negate 32-bit value, ignore any overflows */
#define NEG32_ovflw(a) ((opus_val32)(0-(opus_uint32)(a)))
/** 16x16 multiplication where the result fits in 16 bits */
#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b))))

View file

@ -61,7 +61,13 @@
** the config.h file.
*/
#if (HAVE_LRINTF)
/* With GCC, when SSE is available, the fastest conversion is cvtss2si. */
#if defined(__GNUC__) && defined(__SSE__)
#include <xmmintrin.h>
static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_ss(x));}
#elif defined(HAVE_LRINTF)
/* These defines enable functionality introduced with the 1999 ISO C
** standard. They must be defined before the inclusion of math.h to

View file

@ -82,8 +82,8 @@ static void kf_bfly2(
C_SUB( Fout2[0] , Fout[0] , t );
C_ADDTO( Fout[0] , t );
t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
C_SUB( Fout2[1] , Fout[1] , t );
C_ADDTO( Fout[1] , t );
@ -92,8 +92,8 @@ static void kf_bfly2(
C_SUB( Fout2[2] , Fout[2] , t );
C_ADDTO( Fout[2] , t );
t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
C_SUB( Fout2[3] , Fout[3] , t );
C_ADDTO( Fout[3] , t );
Fout += 8;
@ -126,10 +126,10 @@ static void kf_bfly4(
C_ADDTO( *Fout , scratch1 );
C_SUB( scratch1 , Fout[1] , Fout[3] );
Fout[1].r = scratch0.r + scratch1.i;
Fout[1].i = scratch0.i - scratch1.r;
Fout[3].r = scratch0.r - scratch1.i;
Fout[3].i = scratch0.i + scratch1.r;
Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
Fout+=4;
}
} else {
@ -160,10 +160,10 @@ static void kf_bfly4(
tw3 += fstride*3;
C_ADDTO( *Fout , scratch[3] );
Fout[m].r = scratch[5].r + scratch[4].i;
Fout[m].i = scratch[5].i - scratch[4].r;
Fout[m3].r = scratch[5].r - scratch[4].i;
Fout[m3].i = scratch[5].i + scratch[4].r;
Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
++Fout;
}
}
@ -212,18 +212,18 @@ static void kf_bfly3(
tw1 += fstride;
tw2 += fstride*2;
Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
C_MULBYSCALAR( scratch[0] , epi3.i );
C_ADDTO(*Fout,scratch[3]);
Fout[m2].r = Fout[m].r + scratch[0].i;
Fout[m2].i = Fout[m].i - scratch[0].r;
Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
Fout[m].r -= scratch[0].i;
Fout[m].i += scratch[0].r;
Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
++Fout;
} while(--k);
@ -282,22 +282,22 @@ static void kf_bfly5(
C_ADD( scratch[8],scratch[2],scratch[3]);
C_SUB( scratch[9],scratch[2],scratch[3]);
Fout0->r += scratch[7].r + scratch[8].r;
Fout0->i += scratch[7].i + scratch[8].i;
Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
C_SUB(*Fout1,scratch[5],scratch[6]);
C_ADD(*Fout4,scratch[5],scratch[6]);
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
C_ADD(*Fout2,scratch[11],scratch[12]);
C_SUB(*Fout3,scratch[11],scratch[12]);

View file

@ -38,11 +38,44 @@
#include "entcode.h"
#include "os_support.h"
#define PI 3.141592653f
/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
unsigned isqrt32(opus_uint32 _val);
/* CELT doesn't need it for fixed-point, by analysis.c does. */
#if !defined(FIXED_POINT) || defined(ANALYSIS_C)
#define cA 0.43157974f
#define cB 0.67848403f
#define cC 0.08595542f
#define cE ((float)PI/2)
static OPUS_INLINE float fast_atan2f(float y, float x) {
float x2, y2;
x2 = x*x;
y2 = y*y;
/* For very small values, we don't care about the answer, so
we can just return 0. */
if (x2 + y2 < 1e-18f)
{
return 0;
}
if(x2<y2){
float den = (y2 + cB*x2) * (y2 + cC*x2);
return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
}else{
float den = (x2 + cB*y2) * (x2 + cC*y2);
return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
}
}
#undef cA
#undef cB
#undef cC
#undef cD
#endif
#ifndef OVERRIDE_CELT_MAXABS16
static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
{
@ -80,7 +113,6 @@ static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
#ifndef FIXED_POINT
#define PI 3.141592653f
#define celt_sqrt(x) ((float)sqrt(x))
#define celt_rsqrt(x) (1.f/celt_sqrt(x))
#define celt_rsqrt_norm(x) (celt_rsqrt(x))

View file

@ -270,8 +270,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
int rev;
kiss_fft_scalar yr, yi;
rev = *bitrev++;
yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i]));
yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i]));
/* We swap real and imag because we use an FFT instead of an IFFT. */
yp[2*rev+1] = yr;
yp[2*rev] = yi;
@ -301,8 +301,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
t0 = t[i];
t1 = t[N4+i];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
/* We swap real and imag because we're using an FFT instead of an IFFT. */
re = yp1[1];
im = yp1[0];
@ -312,8 +312,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
t0 = t[(N4-i-1)];
t1 = t[(N2-i-1)];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t0) + S_MUL(im,t1);
yi = S_MUL(re,t1) - S_MUL(im,t0);
yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
yp1[0] = yr;
yp0[1] = yi;
yp0 += 2;
@ -333,8 +333,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
kiss_fft_scalar x1, x2;
x1 = *xp1;
x2 = *yp1;
*yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
*xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
*yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1));
*xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1));
wp1++;
wp2--;
}

View file

@ -36,9 +36,6 @@
#include "mathops.h"
#include "arch.h"
static unsigned extract_collapse_mask(int *iy, int N, int B);
static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, int N, opus_val32 Ryy, opus_val16 gain);
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);
static void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch);
#define OVERRIDE_vq_exp_rotation1

View file

@ -427,7 +427,7 @@ void opus_custom_mode_destroy(CELTMode *mode)
}
#endif /* CUSTOM_MODES_ONLY */
opus_free((opus_int16*)mode->eBands);
opus_free((opus_int16*)mode->allocVectors);
opus_free((unsigned char*)mode->allocVectors);
opus_free((opus_val16*)mode->window);
opus_free((opus_int16*)mode->logN);

View file

@ -220,13 +220,8 @@ opus_val32
#else
void
#endif
#if defined(OVERRIDE_PITCH_XCORR)
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch)
#else
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch)
#endif
{
#if 0 /* This is a simple version of the pitch correlation that should work
@ -265,11 +260,7 @@ celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
for (i=0;i<max_pitch-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
#if defined(OVERRIDE_PITCH_XCORR)
xcorr_kernel_c(_x, _y+i, sum, len);
#else
xcorr_kernel(_x, _y+i, sum, len, arch);
#endif
xcorr[i]=sum[0];
xcorr[i+1]=sum[1];
xcorr[i+2]=sum[2];
@ -285,11 +276,7 @@ celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
for (;i<max_pitch;i++)
{
opus_val32 sum;
#if defined(OVERRIDE_PITCH_XCORR)
sum = celt_inner_prod_c(_x, _y+i, len);
#else
sum = celt_inner_prod(_x, _y+i, len, arch);
#endif
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@ -378,7 +365,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
for (j=0;j<len>>1;j++)
sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
#else
sum = celt_inner_prod_c(x_lp, y+i, len>>1);
sum = celt_inner_prod(x_lp, y+i, len>>1, arch);
#endif
xcorr[i] = MAX32(-1, sum);
#ifdef FIXED_POINT
@ -424,7 +411,7 @@ static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy
sx = celt_ilog2(xx)-14;
sy = celt_ilog2(yy)-14;
shift = sx + sy;
x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy));
x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14);
if (shift & 1) {
if (x2y2 < 32768)
{

View file

@ -46,8 +46,7 @@
#include "mips/pitch_mipsr1.h"
#endif
#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
|| defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
# include "arm/pitch_arm.h"
#endif
@ -184,17 +183,10 @@ opus_val32
void
#endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
#if !defined(OVERRIDE_PITCH_XCORR)
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch);
#ifndef OVERRIDE_PITCH_XCORR
# define celt_pitch_xcorr celt_pitch_xcorr_c
#endif
#endif

View file

@ -418,6 +418,7 @@ void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *ol
offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
#endif
oldEBands[i+c*m->nbEBands] += offset;
error[i+c*m->nbEBands] -= offset;
bits_left--;
} while (++c < C);
}
@ -547,9 +548,15 @@ void amp2Log2(const CELTMode *m, int effEnd, int end,
c=0;
do {
for (i=0;i<effEnd;i++)
{
bandLogE[i+c*m->nbEBands] =
celt_log2(SHL32(bandE[i+c*m->nbEBands],2))
celt_log2(bandE[i+c*m->nbEBands])
- SHL16((opus_val16)eMeans[i],6);
#ifdef FIXED_POINT
/* Compensate for bandE[] being Q12 but celt_log2() taking a Q14 input. */
bandLogE[i+c*m->nbEBands] += QCONST16(2.f, DB_SHIFT);
#endif
}
for (i=effEnd;i<end;i++)
bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT);
} while (++c < C);

View file

@ -348,12 +348,17 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
/*This if() block is the only part of the allocation function that
is not a mandatory part of the bitstream: any bands we choose to
skip here must be explicitly signaled.*/
/*Choose a threshold with some hysteresis to keep bands from
fluctuating in and out.*/
int depth_threshold;
/*We choose a threshold with some hysteresis to keep bands from
fluctuating in and out, but we try not to fold below a certain point. */
if (codedBands > 17)
depth_threshold = j<prev ? 7 : 9;
else
depth_threshold = 0;
#ifdef FUZZING
if ((rand()&0x1) == 0)
#else
if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
if (codedBands<=start+2 || (band_bits > (depth_threshold*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
#endif
{
ec_enc_bit_logp(ec, 1, 1);

View file

@ -1,7 +1,7 @@
/* The contents of this file was automatically generated by
* dump_mode_arm_ne10.c with arguments: 48000 960
* It contains static definitions for some pre-defined modes. */
#include <NE10_init.h>
#include <NE10_types.h>
#ifndef NE10_FFT_PARAMS48000_960
#define NE10_FFT_PARAMS48000_960

View file

@ -1,7 +1,7 @@
/* The contents of this file was automatically generated by
* dump_mode_arm_ne10.c with arguments: 48000 960
* It contains static definitions for some pre-defined modes. */
#include <NE10_init.h>
#include <NE10_types.h>
#ifndef NE10_FFT_PARAMS48000_960
#define NE10_FFT_PARAMS48000_960

View file

@ -67,7 +67,7 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
}
#endif /* OVERRIDE_vq_exp_rotation1 */
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
{
static const int SPREAD_FACTOR[3]={15,10,5};
int i;
@ -158,42 +158,27 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
return collapse_mask;
}
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
#ifdef RESYNTH
, opus_val16 gain
#endif
)
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
{
VARDECL(celt_norm, y);
VARDECL(int, iy);
VARDECL(opus_val16, signx);
VARDECL(int, signx);
int i, j;
opus_val16 s;
int pulsesLeft;
opus_val32 sum;
opus_val32 xy;
opus_val16 yy;
unsigned collapse_mask;
SAVE_STACK;
celt_assert2(K>0, "alg_quant() needs at least one pulse");
celt_assert2(N>1, "alg_quant() needs at least two dimensions");
(void)arch;
ALLOC(y, N, celt_norm);
ALLOC(iy, N, int);
ALLOC(signx, N, opus_val16);
exp_rotation(X, N, 1, B, K, spread);
ALLOC(signx, N, int);
/* Get rid of the sign */
sum = 0;
j=0; do {
if (X[j]>0)
signx[j]=1;
else {
signx[j]=-1;
X[j]=-X[j];
}
signx[j] = X[j]<0;
/* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
X[j] = ABS16(X[j]);
iy[j] = 0;
y[j] = 0;
} while (++j<N);
@ -225,7 +210,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
while (++j<N);
sum = QCONST16(1.f,14);
}
rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
#ifdef FIXED_POINT
rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
#else
/* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
#endif
j=0; do {
#ifdef FIXED_POINT
/* It's really important to round *towards zero* here */
@ -240,7 +230,7 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
pulsesLeft -= iy[j];
} while (++j<N);
}
celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
celt_assert2(pulsesLeft>=0, "Allocated too many pulses in the quick pass");
/* This should never happen, but just in case it does (e.g. on silence)
we fill the first bin with pulses. */
@ -256,12 +246,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
pulsesLeft=0;
}
s = 1;
for (i=0;i<pulsesLeft;i++)
{
opus_val16 Rxy, Ryy;
int best_id;
opus_val32 best_num = -VERY_LARGE16;
opus_val16 best_den = 0;
opus_val32 best_num;
opus_val16 best_den;
#ifdef FIXED_POINT
int rshift;
#endif
@ -272,9 +262,22 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
/* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */
yy = ADD16(yy, 1);
j=0;
/* Calculations for position 0 are out of the loop, in part to reduce
mispredicted branches (since the if condition is usually false)
in the loop. */
/* Temporary sums of the new pulse(s) */
Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
/* We're multiplying y[j] by two so we don't have to do it here */
Ryy = ADD16(yy, y[0]);
/* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
Rxy is positive because the sign is pre-computed) */
Rxy = MULT16_16_Q15(Rxy,Rxy);
best_den = Ryy;
best_num = Rxy;
j=1;
do {
opus_val16 Rxy, Ryy;
/* Temporary sums of the new pulse(s) */
Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
/* We're multiplying y[j] by two so we don't have to do it here */
@ -285,8 +288,11 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
Rxy = MULT16_16_Q15(Rxy,Rxy);
/* The idea is to check for num/den >= best_num/best_den, but that way
we can do it without any division */
/* OPT: Make sure to use conditional moves here */
if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
/* OPT: It's not clear whether a cmov is faster than a branch here
since the condition is more often false than true and using
a cmov introduces data dependencies across iterations. The optimal
choice may be architecture-dependent. */
if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
{
best_den = Ryy;
best_num = Rxy;
@ -301,23 +307,47 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
/* Only now that we've made the final choice, update y/iy */
/* Multiplying y[j] by 2 so we don't have to do it everywhere else */
y[best_id] += 2*s;
y[best_id] += 2;
iy[best_id]++;
}
/* Put the original sign back */
j=0;
do {
X[j] = MULT16_16(signx[j],X[j]);
if (signx[j] < 0)
iy[j] = -iy[j];
/*iy[j] = signx[j] ? -iy[j] : iy[j];*/
/* OPT: The is more likely to be compiled without a branch than the code above
but has the same performance otherwise. */
iy[j] = (iy[j]^-signx[j]) + signx[j];
} while (++j<N);
RESTORE_STACK;
return yy;
}
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
opus_val16 gain, int resynth, int arch)
{
VARDECL(int, iy);
opus_val16 yy;
unsigned collapse_mask;
SAVE_STACK;
celt_assert2(K>0, "alg_quant() needs at least one pulse");
celt_assert2(N>1, "alg_quant() needs at least two dimensions");
/* Covers vectorization by up to 4. */
ALLOC(iy, N+3, int);
exp_rotation(X, N, 1, B, K, spread);
yy = op_pvq_search(X, iy, K, N, arch);
encode_pulses(iy, N, K, enc);
#ifdef RESYNTH
normalise_residual(iy, X, N, yy, gain);
exp_rotation(X, N, -1, B, K, spread);
#endif
if (resynth)
{
normalise_residual(iy, X, N, yy, gain);
exp_rotation(X, N, -1, B, K, spread);
}
collapse_mask = extract_collapse_mask(iy, N, B);
RESTORE_STACK;
@ -401,7 +431,7 @@ int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int
/* 0.63662 = 2/pi */
itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
#else
itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
itheta = (int)floor(.5f+16384*0.63662f*fast_atan2f(side,mid));
#endif
return itheta;

View file

@ -37,10 +37,22 @@
#include "entdec.h"
#include "modes.h"
#if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT))
#include "x86/vq_sse.h"
#endif
#if defined(MIPSr1_ASM)
#include "mips/vq_mipsr1.h"
#endif
void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch);
#if !defined(OVERRIDE_OP_PVQ_SEARCH)
#define op_pvq_search(x, iy, K, N, arch) \
(op_pvq_search_c(x, iy, K, N, arch))
#endif
/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
* the pitch and a combination of pulses such that its norm is still equal
@ -51,12 +63,8 @@
* @param enc Entropy encoder state
* @ret A mask indicating which blocks in the band received pulses
*/
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
ec_enc *enc
#ifdef RESYNTH
, opus_val16 gain
#endif
);
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
opus_val16 gain, int resynth, int arch);
/** Algebraic pulse decoder
* @param X Decoded normalised spectrum (returned)

View file

@ -40,65 +40,23 @@
#if defined(FIXED_POINT)
void celt_fir_sse4_1(const opus_val16 *_x,
void celt_fir_sse4_1(const opus_val16 *x,
const opus_val16 *num,
opus_val16 *_y,
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch)
{
int i,j;
VARDECL(opus_val16, rnum);
VARDECL(opus_val16, x);
__m128i vecNoA;
opus_int32 noA ;
SAVE_STACK;
ALLOC(rnum, ord, opus_val16);
ALLOC(x, N+ord, opus_val16);
for(i=0;i<ord;i++)
rnum[i] = num[ord-i-1];
for(i=0;i<ord;i++)
x[i] = mem[ord-i-1];
for (i=0;i<N-7;i+=8)
{
x[i+ord ]=_x[i ];
x[i+ord+1]=_x[i+1];
x[i+ord+2]=_x[i+2];
x[i+ord+3]=_x[i+3];
x[i+ord+4]=_x[i+4];
x[i+ord+5]=_x[i+5];
x[i+ord+6]=_x[i+6];
x[i+ord+7]=_x[i+7];
}
for (;i<N-3;i+=4)
{
x[i+ord ]=_x[i ];
x[i+ord+1]=_x[i+1];
x[i+ord+2]=_x[i+2];
x[i+ord+3]=_x[i+3];
}
for (;i<N;i++)
x[i+ord]=_x[i];
for(i=0;i<ord;i++)
mem[i] = _x[N-i-1];
#ifdef SMALL_FOOTPRINT
for (i=0;i<N;i++)
{
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
for (j=0;j<ord;j++)
{
sum = MAC16_16(sum,rnum[j],x[i+j]);
}
_y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
}
#else
noA = EXTEND32(1) << SIG_SHIFT >> 1;
vecNoA = _mm_set_epi32(noA, noA, noA, noA);
@ -107,25 +65,24 @@ void celt_fir_sse4_1(const opus_val16 *_x,
opus_val32 sums[4] = {0};
__m128i vecSum, vecX;
xcorr_kernel(rnum, x+i, sums, ord, arch);
xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
vecSum = _mm_loadu_si128((__m128i *)sums);
vecSum = _mm_add_epi32(vecSum, vecNoA);
vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
vecX = OP_CVTEPI16_EPI32_M64(_x + i);
vecX = OP_CVTEPI16_EPI32_M64(x + i);
vecSum = _mm_add_epi32(vecSum, vecX);
vecSum = _mm_packs_epi32(vecSum, vecSum);
_mm_storel_epi64((__m128i *)(_y + i), vecSum);
_mm_storel_epi64((__m128i *)(y + i), vecSum);
}
for (;i<N;i++)
{
opus_val32 sum = 0;
for (j=0;j<ord;j++)
sum = MAC16_16(sum, rnum[j], x[i + j]);
_y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
sum = MAC16_16(sum, rnum[j], x[i+j-ord]);
y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT)));
}
#endif
RESTORE_STACK;
}

View file

@ -41,12 +41,11 @@ void celt_fir_sse4_1(
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch);
#if defined(OPUS_X86_PRESUME_SSE4_1)
#define celt_fir(x, num, y, N, ord, mem, arch) \
((void)arch, celt_fir_sse4_1(x, num, y, N, ord, mem, arch))
#define celt_fir(x, num, y, N, ord, arch) \
((void)arch, celt_fir_sse4_1(x, num, y, N, ord, arch))
#else
@ -56,11 +55,10 @@ extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch);
# define celt_fir(x, num, y, N, ord, mem, arch) \
((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, mem, arch))
# define celt_fir(x, num, y, N, ord, arch) \
((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, arch))
#endif
#endif

View file

@ -0,0 +1,50 @@
/* Copyright (c) 2016 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef VQ_SSE_H
#define VQ_SSE_H
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
#define OVERRIDE_OP_PVQ_SEARCH
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch);
#if defined(OPUS_X86_PRESUME_SSE2)
#define op_pvq_search(x, iy, K, N, arch) \
(op_pvq_search_sse2(x, iy, K, N, arch))
#else
extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
celt_norm *_X, int *iy, int K, int N, int arch);
# define op_pvq_search(X, iy, K, N, arch) \
((*OP_PVQ_SEARCH_IMPL[(arch) & OPUS_ARCHMASK])(X, iy, K, N, arch))
#endif
#endif
#endif

View file

@ -0,0 +1,217 @@
/* Copyright (c) 2007-2008 CSIRO
Copyright (c) 2007-2009 Xiph.Org Foundation
Copyright (c) 2007-2016 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <xmmintrin.h>
#include <emmintrin.h>
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "vq.h"
#include "x86cpu.h"
#ifndef FIXED_POINT
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
{
int i, j;
int pulsesLeft;
float xy, yy;
VARDECL(celt_norm, y);
VARDECL(celt_norm, X);
VARDECL(float, signy);
__m128 signmask;
__m128 sums;
__m128i fours;
SAVE_STACK;
(void)arch;
/* All bits set to zero, except for the sign bit. */
signmask = _mm_set_ps1(-0.f);
fours = _mm_set_epi32(4, 4, 4, 4);
ALLOC(y, N+3, celt_norm);
ALLOC(X, N+3, celt_norm);
ALLOC(signy, N+3, float);
OPUS_COPY(X, _X, N);
X[N] = X[N+1] = X[N+2] = 0;
sums = _mm_setzero_ps();
for (j=0;j<N;j+=4)
{
__m128 x4, s4;
x4 = _mm_loadu_ps(&X[j]);
s4 = _mm_cmplt_ps(x4, _mm_setzero_ps());
/* Get rid of the sign */
x4 = _mm_andnot_ps(signmask, x4);
sums = _mm_add_ps(sums, x4);
/* Clear y and iy in case we don't do the projection. */
_mm_storeu_ps(&y[j], _mm_setzero_ps());
_mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
_mm_storeu_ps(&X[j], x4);
_mm_storeu_ps(&signy[j], s4);
}
sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(1, 0, 3, 2)));
sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(2, 3, 0, 1)));
xy = yy = 0;
pulsesLeft = K;
/* Do a pre-search by projecting on the pyramid */
if (K > (N>>1))
{
__m128i pulses_sum;
__m128 yy4, xy4;
__m128 rcp4;
opus_val32 sum = _mm_cvtss_f32(sums);
/* If X is too small, just replace it with a pulse at 0 */
/* Prevents infinities and NaNs from causing too many pulses
to be allocated. 64 is an approximation of infinity here. */
if (!(sum > EPSILON && sum < 64))
{
X[0] = QCONST16(1.f,14);
j=1; do
X[j]=0;
while (++j<N);
sums = _mm_set_ps1(1.f);
}
/* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
rcp4 = _mm_mul_ps(_mm_set_ps1((float)(K+.8)), _mm_rcp_ps(sums));
xy4 = yy4 = _mm_setzero_ps();
pulses_sum = _mm_setzero_si128();
for (j=0;j<N;j+=4)
{
__m128 rx4, x4, y4;
__m128i iy4;
x4 = _mm_loadu_ps(&X[j]);
rx4 = _mm_mul_ps(x4, rcp4);
iy4 = _mm_cvttps_epi32(rx4);
pulses_sum = _mm_add_epi32(pulses_sum, iy4);
_mm_storeu_si128((__m128i*)&iy[j], iy4);
y4 = _mm_cvtepi32_ps(iy4);
xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
/* double the y[] vector so we don't have to do it in the search loop. */
_mm_storeu_ps(&y[j], _mm_add_ps(y4, y4));
}
pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(1, 0, 3, 2)));
pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(2, 3, 0, 1)));
pulsesLeft -= _mm_cvtsi128_si32(pulses_sum);
xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(1, 0, 3, 2)));
xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(2, 3, 0, 1)));
xy = _mm_cvtss_f32(xy4);
yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(1, 0, 3, 2)));
yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(2, 3, 0, 1)));
yy = _mm_cvtss_f32(yy4);
}
X[N] = X[N+1] = X[N+2] = -100;
y[N] = y[N+1] = y[N+2] = 100;
celt_assert2(pulsesLeft>=0, "Allocated too many pulses in the quick pass");
/* This should never happen, but just in case it does (e.g. on silence)
we fill the first bin with pulses. */
if (pulsesLeft > N+3)
{
opus_val16 tmp = (opus_val16)pulsesLeft;
yy = MAC16_16(yy, tmp, tmp);
yy = MAC16_16(yy, tmp, y[0]);
iy[0] += pulsesLeft;
pulsesLeft=0;
}
for (i=0;i<pulsesLeft;i++)
{
int best_id;
__m128 xy4, yy4;
__m128 max, max2;
__m128i count;
__m128i pos;
/* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */
yy = ADD16(yy, 1);
xy4 = _mm_load1_ps(&xy);
yy4 = _mm_load1_ps(&yy);
max = _mm_setzero_ps();
pos = _mm_setzero_si128();
count = _mm_set_epi32(3, 2, 1, 0);
for (j=0;j<N;j+=4)
{
__m128 x4, y4, r4;
x4 = _mm_loadu_ps(&X[j]);
y4 = _mm_loadu_ps(&y[j]);
x4 = _mm_add_ps(x4, xy4);
y4 = _mm_add_ps(y4, yy4);
y4 = _mm_rsqrt_ps(y4);
r4 = _mm_mul_ps(x4, y4);
/* Update the index of the max. */
pos = _mm_max_epi16(pos, _mm_and_si128(count, _mm_castps_si128(_mm_cmpgt_ps(r4, max))));
/* Update the max. */
max = _mm_max_ps(max, r4);
/* Update the indices (+4) */
count = _mm_add_epi32(count, fours);
}
/* Horizontal max */
max2 = _mm_max_ps(max, _mm_shuffle_ps(max, max, _MM_SHUFFLE(1, 0, 3, 2)));
max2 = _mm_max_ps(max2, _mm_shuffle_ps(max2, max2, _MM_SHUFFLE(2, 3, 0, 1)));
/* Now that max2 contains the max at all positions, look at which value(s) of the
partial max is equal to the global max. */
pos = _mm_and_si128(pos, _mm_castps_si128(_mm_cmpeq_ps(max, max2)));
pos = _mm_max_epi16(pos, _mm_unpackhi_epi64(pos, pos));
pos = _mm_max_epi16(pos, _mm_shufflelo_epi16(pos, _MM_SHUFFLE(1, 0, 3, 2)));
best_id = _mm_cvtsi128_si32(pos);
/* Updating the sums of the new pulse(s) */
xy = ADD32(xy, EXTEND32(X[best_id]));
/* We're multiplying y[j] by two so we don't have to do it here */
yy = ADD16(yy, y[best_id]);
/* Only now that we've made the final choice, update y/iy */
/* Multiplying y[j] by 2 so we don't have to do it everywhere else */
y[best_id] += 2;
iy[best_id]++;
}
/* Put the original sign back */
for (j=0;j<N;j+=4)
{
__m128i y4;
__m128i s4;
y4 = _mm_loadu_si128((__m128i*)&iy[j]);
s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
_mm_storeu_si128((__m128i*)&iy[j], y4);
}
RESTORE_STACK;
return yy;
}
#endif

View file

@ -33,6 +33,7 @@
#include "celt_lpc.h"
#include "pitch.h"
#include "pitch_sse.h"
#include "vq.h"
#if defined(OPUS_HAVE_RTCD)
@ -46,7 +47,6 @@ void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
opus_val16 *y,
int N,
int ord,
opus_val16 *mem,
int arch
) = {
celt_fir_c, /* non-sse */
@ -151,5 +151,17 @@ void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)
opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
celt_norm *_X, int *iy, int K, int N, int arch
) = {
op_pvq_search_c, /* non-sse */
op_pvq_search_c,
MAY_HAVE_SSE2(op_pvq_search),
MAY_HAVE_SSE2(op_pvq_search),
MAY_HAVE_SSE2(op_pvq_search)
};
#endif
#endif
#endif

View file

@ -165,8 +165,9 @@ extern "C" {
#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042
#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043
/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */
#define OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST 4046
#define OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST 4047
/* Macros to trigger compilation errors when the wrong types are provided to a CTL */
#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x))
@ -208,6 +209,9 @@ extern "C" {
#define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */
#define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */
#define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */
#define OPUS_FRAMESIZE_80_MS 5007 /**< Use 80 ms frames */
#define OPUS_FRAMESIZE_100_MS 5008 /**< Use 100 ms frames */
#define OPUS_FRAMESIZE_120_MS 5009 /**< Use 120 ms frames */
/**@}*/
@ -566,7 +570,9 @@ extern "C" {
* <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
* <dt>OPUS_FRAMESIZE_80_MS</dt><dd>Use 80 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_100_MS</dt><dd>Use 100 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_120_MS</dt><dd>Use 120 ms frames.</dd>
* </dl>
* @hideinitializer */
#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
@ -581,7 +587,9 @@ extern "C" {
* <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
* <dt>OPUS_FRAMESIZE_80_MS</dt><dd>Use 80 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_100_MS</dt><dd>Use 100 ms frames.</dd>
* <dt>OPUS_FRAMESIZE_120_MS</dt><dd>Use 120 ms frames.</dd>
* </dl>
* @hideinitializer */
#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
@ -681,6 +689,30 @@ extern "C" {
*/
#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
/** If set to 1, disables the use of phase inversion for intensity stereo,
* improving the quality of mono downmixes, but slightly reducing normal
* stereo quality. Disabling phase inversion in the decoder does not comply
* with RFC 6716, although it does not cause any interoperability issue and
* is expected to become part of the Opus standard once RFC 6716 is updated
* by draft-ietf-codec-opus-update.
* @see OPUS_GET_PHASE_INVERSION_DISABLED
* @param[in] x <tt>opus_int32</tt>: Allowed values:
* <dl>
* <dt>0</dt><dd>Enable phase inversion (default).</dd>
* <dt>1</dt><dd>Disable phase inversion.</dd>
* </dl>
* @hideinitializer */
#define OPUS_SET_PHASE_INVERSION_DISABLED(x) OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST, __opus_check_int(x)
/** Gets the encoder's configured phase inversion status.
* @see OPUS_SET_PHASE_INVERSION_DISABLED
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
* <dl>
* <dt>0</dt><dd>Stereo phase inversion enabled (default).</dd>
* <dt>1</dt><dd>Stereo phase inversion disabled.</dd>
* </dl>
* @hideinitializer */
#define OPUS_GET_PHASE_INVERSION_DISABLED(x) OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST, __opus_check_int_ptr(x)
/**@}*/
/** @defgroup opus_decoderctls Decoder related CTLs

View file

@ -273,7 +273,7 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_surround_enc
unsigned char *mapping,
int application,
int *error
) OPUS_ARG_NONNULL(5);
) OPUS_ARG_NONNULL(4) OPUS_ARG_NONNULL(5) OPUS_ARG_NONNULL(6);
/** Initialize a previously allocated multistream encoder state.
* The memory pointed to by \a st must be at least the size returned by
@ -342,7 +342,7 @@ OPUS_EXPORT int opus_multistream_surround_encoder_init(
int *coupled_streams,
unsigned char *mapping,
int application
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(5) OPUS_ARG_NONNULL(6) OPUS_ARG_NONNULL(7);
/** Encodes a multistream Opus frame.
* @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.

View file

@ -34,7 +34,7 @@
#define OPUS_TYPES_H
/* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
#if (defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
#if (defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
#include <stdint.h>
typedef int16_t opus_int16;

View file

@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* Number of binary divisions, when not in low complexity mode */
#define BIN_DIV_STEPS_A2NLSF_FIX 3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */
#define MAX_ITERATIONS_A2NLSF_FIX 30
#define MAX_ITERATIONS_A2NLSF_FIX 16
/* Helper function for A2NLSF(..) */
/* Transforms polynomials from cos(n*f) to cos(f)^n */
@ -130,7 +130,7 @@ void silk_A2NLSF(
const opus_int d /* I Filter order (must be even) */
)
{
opus_int i, k, m, dd, root_ix, ffrac;
opus_int i, k, m, dd, root_ix, ffrac;
opus_int32 xlo, xhi, xmid;
opus_int32 ylo, yhi, ymid, thr;
opus_int32 nom, den;
@ -239,13 +239,13 @@ void silk_A2NLSF(
/* Set NLSFs to white spectrum and exit */
NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 );
for( k = 1; k < d; k++ ) {
NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] );
NLSF[ k ] = (opus_int16)silk_ADD16( NLSF[ k-1 ], NLSF[ 0 ] );
}
return;
}
/* Error: Apply progressively more bandwidth expansion and run again */
silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/
silk_bwexpander_32( a_Q16, d, 65536 - silk_LSHIFT( 1, i ) );
silk_A2NLSF_init( a_Q16, P, Q, dd );
p = P; /* Pointer to polynomial */

View file

@ -138,11 +138,11 @@ void silk_CNG(
gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
}
gain_Q10 = silk_RSHIFT( gain_Q16, 6 );
silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed );
/* Convert CNG NLSF to filter representation */
silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order, psDec->arch );
/* Generate CNG signal, by synthesis filtering */
silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
@ -170,11 +170,11 @@ void silk_CNG(
}
/* Update states */
CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( CNG_sig_Q14[ MAX_LPC_ORDER + i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) );
/* Scale with Gain and add to input signal */
frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) );
}
silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
} else {

View file

@ -39,6 +39,13 @@ POSSIBILITY OF SUCH DAMAGE.
/* first d output samples are set to zero */
/*******************************************/
/* OPT: Using celt_fir() for this function should be faster, but it may cause
integer overflows in intermediate values (not final results), which the
current implementation silences by casting to unsigned. Enabling
this should be safe in pretty much all cases, even though it is not technically
C89-compliant. */
#define USE_CELT_FIR 0
void silk_LPC_analysis_filter(
opus_int16 *out, /* O Output signal */
const opus_int16 *in, /* I Input signal */
@ -49,8 +56,7 @@ void silk_LPC_analysis_filter(
)
{
opus_int j;
#ifdef FIXED_POINT
opus_int16 mem[SILK_MAX_ORDER_LPC];
#if defined(FIXED_POINT) && USE_CELT_FIR
opus_int16 num[SILK_MAX_ORDER_LPC];
#else
int ix;
@ -62,15 +68,12 @@ void silk_LPC_analysis_filter(
silk_assert( (d & 1) == 0 );
silk_assert( d <= len );
#ifdef FIXED_POINT
#if defined(FIXED_POINT) && USE_CELT_FIR
silk_assert( d <= SILK_MAX_ORDER_LPC );
for ( j = 0; j < d; j++ ) {
num[ j ] = -B[ j ];
}
for (j=0;j<d;j++) {
mem[ j ] = in[ d - j - 1 ];
}
celt_fir( in + d, num, out + d, len - d, d, mem, arch );
celt_fir( in + d, num, out + d, len - d, d, arch );
for ( j = 0; j < d; j++ ) {
out[ j ] = 0;
}

View file

@ -0,0 +1,81 @@
/***********************************************************************
Copyright (c) 2013, Koen Vos. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "SigProc_FIX.h"
/* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */
void silk_LPC_fit(
opus_int16 *a_QOUT, /* O Output signal */
opus_int32 *a_QIN, /* I/O Input signal */
const opus_int QOUT, /* I Input Q domain */
const opus_int QIN, /* I Input Q domain */
const opus_int d /* I Filter order */
)
{
opus_int i, k, idx = 0;
opus_int32 maxabs, absval, chirp_Q16;
/* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */
for( i = 0; i < 10; i++ ) {
/* Find maximum absolute value and its index */
maxabs = 0;
for( k = 0; k < d; k++ ) {
absval = silk_abs( a_QIN[k] );
if( absval > maxabs ) {
maxabs = absval;
idx = k;
}
}
maxabs = silk_RSHIFT_ROUND( maxabs, QIN - QOUT );
if( maxabs > silk_int16_MAX ) {
/* Reduce magnitude of prediction coefficients */
maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */
chirp_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ),
silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) );
silk_bwexpander_32( a_QIN, d, chirp_Q16 );
} else {
break;
}
}
if( i == 10 ) {
/* Reached the last iteration, clip the coefficients */
for( k = 0; k < d; k++ ) {
a_QOUT[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a_QIN[ k ], QIN - QOUT ) );
a_QIN[ k ] = silk_LSHIFT( (opus_int32)a_QOUT[ k ], QIN - QOUT );
}
} else {
for( k = 0; k < d; k++ ) {
a_QOUT[ k ] = (opus_int16)silk_RSHIFT_ROUND( a_QIN[ k ], QIN - QOUT );
}
}
}

View file

@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
#include "SigProc_FIX.h"
#include "define.h"
#define QA 24
#define A_LIMIT SILK_FIX_CONST( 0.99975, QA )
@ -38,117 +39,103 @@ POSSIBILITY OF SUCH DAMAGE.
/* Compute inverse of LPC prediction gain, and */
/* test if LPC coefficients are stable (all poles within unit circle) */
static opus_int32 LPC_inverse_pred_gain_QA( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 A_QA[ 2 ][ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */
static opus_int32 LPC_inverse_pred_gain_QA_c( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 A_QA[ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */
const opus_int order /* I Prediction order */
)
{
opus_int k, n, mult2Q;
opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp_QA;
opus_int32 *Aold_QA, *Anew_QA;
opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp1, tmp2;
Anew_QA = A_QA[ order & 1 ];
invGain_Q30 = (opus_int32)1 << 30;
invGain_Q30 = SILK_FIX_CONST( 1, 30 );
for( k = order - 1; k > 0; k-- ) {
/* Check for stability */
if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) {
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
return 0;
}
/* Set RC equal to negated AR coef */
rc_Q31 = -silk_LSHIFT( Anew_QA[ k ], 31 - QA );
rc_Q31 = -silk_LSHIFT( A_QA[ k ], 31 - QA );
/* rc_mult1_Q30 range: [ 1 : 2^30 ] */
rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */
silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) );
/* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
/* Update inverse gain */
/* invGain_Q30 range: [ 0 : 2^30 ] */
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
return 0;
}
/* Swap pointers */
Aold_QA = Anew_QA;
Anew_QA = A_QA[ k & 1 ];
/* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
/* Update AR coefficient */
for( n = 0; n < k; n++ ) {
tmp_QA = Aold_QA[ n ] - MUL32_FRAC_Q( Aold_QA[ k - n - 1 ], rc_Q31, 31 );
Anew_QA[ n ] = MUL32_FRAC_Q( tmp_QA, rc_mult2 , mult2Q );
for( n = 0; n < (k + 1) >> 1; n++ ) {
opus_int64 tmp64;
tmp1 = A_QA[ n ];
tmp2 = A_QA[ k - n - 1 ];
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp1,
MUL32_FRAC_Q( tmp2, rc_Q31, 31 ) ), rc_mult2 ), mult2Q);
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
return 0;
}
A_QA[ n ] = ( opus_int32 )tmp64;
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp2,
MUL32_FRAC_Q( tmp1, rc_Q31, 31 ) ), rc_mult2), mult2Q);
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
return 0;
}
A_QA[ k - n - 1 ] = ( opus_int32 )tmp64;
}
}
/* Check for stability */
if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) {
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
return 0;
}
/* Set RC equal to negated AR coef */
rc_Q31 = -silk_LSHIFT( Anew_QA[ 0 ], 31 - QA );
rc_Q31 = -silk_LSHIFT( A_QA[ 0 ], 31 - QA );
/* Range: [ 1 : 2^30 ] */
rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
/* Update inverse gain */
/* Range: [ 0 : 2^30 ] */
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= 1<<30 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
return 0;
}
return invGain_Q30;
}
/* For input in Q12 domain */
opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 silk_LPC_inverse_pred_gain_c( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
const opus_int order /* I Prediction order */
)
{
opus_int k;
opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
opus_int32 *Anew_QA;
opus_int32 Atmp_QA[ SILK_MAX_ORDER_LPC ];
opus_int32 DC_resp = 0;
Anew_QA = Atmp_QA[ order & 1 ];
/* Increase Q domain of the AR coefficients */
for( k = 0; k < order; k++ ) {
DC_resp += (opus_int32)A_Q12[ k ];
Anew_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 );
Atmp_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 );
}
/* If the DC is unstable, we don't even need to do the full calculations */
if( DC_resp >= 4096 ) {
return 0;
}
return LPC_inverse_pred_gain_QA( Atmp_QA, order );
return LPC_inverse_pred_gain_QA_c( Atmp_QA, order );
}
#ifdef FIXED_POINT
/* For input in Q24 domain */
opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int32 *A_Q24, /* I Prediction coefficients [order] */
const opus_int order /* I Prediction order */
)
{
opus_int k;
opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
opus_int32 *Anew_QA;
Anew_QA = Atmp_QA[ order & 1 ];
/* Increase Q domain of the AR coefficients */
for( k = 0; k < order; k++ ) {
Anew_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA );
}
return LPC_inverse_pred_gain_QA( Atmp_QA, order );
}
#endif

View file

@ -130,6 +130,6 @@ void silk_LP_variable_cutoff(
/* ARMA low-pass filtering */
silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 );
silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1);
silk_biquad_alt_stride1( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length);
}
}

View file

@ -319,14 +319,6 @@ static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
return(tmp);
}
#undef silk_ADD_POS_SAT64
static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
opus_int64 tmp;
ops_count += 1;
tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)));
return(tmp);
}
#undef silk_LSHIFT8
static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){
opus_int8 ret;
@ -699,7 +691,7 @@ return(ret);
#undef silk_LIMIT_32
static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
static OPUS_INLINE opus_int32 silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
{
opus_int32 ret;
ops_count += 6;

View file

@ -539,8 +539,7 @@ static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, cha
no checking needed for silk_POS_SAT32
no checking needed for silk_ADD_POS_SAT8
no checking needed for silk_ADD_POS_SAT16
no checking needed for silk_ADD_POS_SAT32
no checking needed for silk_ADD_POS_SAT64 */
no checking needed for silk_ADD_POS_SAT32 */
#undef silk_LSHIFT8
#define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__)

View file

@ -66,7 +66,8 @@ static OPUS_INLINE void silk_NLSF2A_find_poly(
void silk_NLSF2A(
opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */
const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */
const opus_int d /* I filter order (should be even) */
const opus_int d, /* I filter order (should be even) */
int arch /* I Run-time architecture */
)
{
/* This ordering was found to maximize quality. It improves numerical accuracy of
@ -83,15 +84,14 @@ void silk_NLSF2A(
opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ];
opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta;
opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ];
opus_int32 maxabs, absval, idx=0, sc_Q16;
silk_assert( LSF_COS_TAB_SZ_FIX == 128 );
silk_assert( d==10||d==16 );
silk_assert( d==10 || d==16 );
/* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */
ordering = d == 16 ? ordering16 : ordering10;
for( k = 0; k < d; k++ ) {
silk_assert(NLSF[k] >= 0 );
silk_assert( NLSF[k] >= 0 );
/* f_int on a scale 0-127 (rounded down) */
f_int = silk_RSHIFT( NLSF[k], 15 - 7 );
@ -126,52 +126,15 @@ void silk_NLSF2A(
a32_QA1[ d-k-1 ] = Qtmp - Ptmp; /* QA+1 */
}
/* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */
for( i = 0; i < 10; i++ ) {
/* Find maximum absolute value and its index */
maxabs = 0;
for( k = 0; k < d; k++ ) {
absval = silk_abs( a32_QA1[k] );
if( absval > maxabs ) {
maxabs = absval;
idx = k;
}
}
maxabs = silk_RSHIFT_ROUND( maxabs, QA + 1 - 12 ); /* QA+1 -> Q12 */
/* Convert int32 coefficients to Q12 int16 coefs */
silk_LPC_fit( a_Q12, a32_QA1, 12, QA + 1, d );
if( maxabs > silk_int16_MAX ) {
/* Reduce magnitude of prediction coefficients */
maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */
sc_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ),
silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) );
silk_bwexpander_32( a32_QA1, d, sc_Q16 );
} else {
break;
}
}
if( i == 10 ) {
/* Reached the last iteration, clip the coefficients */
for( i = 0; silk_LPC_inverse_pred_gain( a_Q12, d, arch ) == 0 && i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) {
/* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */
/* on the unscaled coefficients, convert to Q12 and measure again */
silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) );
for( k = 0; k < d; k++ ) {
a_Q12[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ) ); /* QA+1 -> Q12 */
a32_QA1[ k ] = silk_LSHIFT( (opus_int32)a_Q12[ k ], QA + 1 - 12 );
}
} else {
for( k = 0; k < d; k++ ) {
a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
}
}
for( i = 0; i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) {
if( silk_LPC_inverse_pred_gain( a_Q12, d ) < SILK_FIX_CONST( 1.0 / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
/* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */
/* on the unscaled coefficients, convert to Q12 and measure again */
silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) );
for( k = 0; k < d; k++ ) {
a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
}
} else {
break;
a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */
}
}
}

View file

@ -33,36 +33,44 @@ POSSIBILITY OF SUCH DAMAGE.
/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */
void silk_NLSF_VQ(
opus_int32 err_Q26[], /* O Quantization errors [K] */
opus_int32 err_Q24[], /* O Quantization errors [K] */
const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */
const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */
const opus_int16 pWght_Q9[], /* I Codebook weights [K*LPC_order] */
const opus_int K, /* I Number of codebook vectors */
const opus_int LPC_order /* I Number of LPCs */
)
{
opus_int i, m;
opus_int32 diff_Q15, sum_error_Q30, sum_error_Q26;
opus_int i, m;
opus_int32 diff_Q15, diffw_Q24, sum_error_Q24, pred_Q24;
const opus_int16 *w_Q9_ptr;
const opus_uint8 *cb_Q8_ptr;
silk_assert( LPC_order <= 16 );
silk_assert( ( LPC_order & 1 ) == 0 );
/* Loop over codebook */
cb_Q8_ptr = pCB_Q8;
w_Q9_ptr = pWght_Q9;
for( i = 0; i < K; i++ ) {
sum_error_Q26 = 0;
for( m = 0; m < LPC_order; m += 2 ) {
/* Compute weighted squared quantization error for index m */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
sum_error_Q30 = silk_SMULBB( diff_Q15, diff_Q15 );
sum_error_Q24 = 0;
pred_Q24 = 0;
for( m = LPC_order-2; m >= 0; m -= 2 ) {
/* Compute weighted absolute predictive quantization error for index m + 1 */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m + 1 ], (opus_int32)cb_Q8_ptr[ m + 1 ], 7 ); /* range: [ -32767 : 32767 ]*/
diffw_Q24 = silk_SMULBB( diff_Q15, w_Q9_ptr[ m + 1 ] );
sum_error_Q24 = silk_ADD32( sum_error_Q24, silk_abs( silk_SUB_RSHIFT32( diffw_Q24, pred_Q24, 1 ) ) );
pred_Q24 = diffw_Q24;
/* Compute weighted squared quantization error for index m + 1 */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[m + 1], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
sum_error_Q30 = silk_SMLABB( sum_error_Q30, diff_Q15, diff_Q15 );
/* Compute weighted absolute predictive quantization error for index m */
diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)cb_Q8_ptr[ m ], 7 ); /* range: [ -32767 : 32767 ]*/
diffw_Q24 = silk_SMULBB( diff_Q15, w_Q9_ptr[ m ] );
sum_error_Q24 = silk_ADD32( sum_error_Q24, silk_abs( silk_SUB_RSHIFT32( diffw_Q24, pred_Q24, 1 ) ) );
pred_Q24 = diffw_Q24;
sum_error_Q26 = silk_ADD_RSHIFT32( sum_error_Q26, sum_error_Q30, 4 );
silk_assert( sum_error_Q26 >= 0 );
silk_assert( sum_error_Q30 >= 0 );
silk_assert( sum_error_Q24 >= 0 );
}
err_Q26[ i ] = sum_error_Q26;
err_Q24[ i ] = sum_error_Q24;
cb_Q8_ptr += LPC_order;
w_Q9_ptr += LPC_order;
}
}

View file

@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
/* Predictive dequantizer for NLSF residuals */
static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */
static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */
opus_int16 x_Q10[], /* O Output [ order ] */
const opus_int8 indices[], /* I Quantization indices [ order ] */
const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */
@ -70,15 +70,9 @@ void silk_NLSF_decode(
opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
opus_int16 ec_ix[ MAX_LPC_ORDER ];
opus_int16 res_Q10[ MAX_LPC_ORDER ];
opus_int16 W_tmp_QW[ MAX_LPC_ORDER ];
opus_int32 W_tmp_Q9, NLSF_Q15_tmp;
opus_int32 NLSF_Q15_tmp;
const opus_uint8 *pCB_element;
/* Decode first stage */
pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
for( i = 0; i < psNLSF_CB->order; i++ ) {
pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 );
}
const opus_int16 *pCB_Wght_Q9;
/* Unpack entropy table indices and predictor for current CB1 index */
silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] );
@ -86,13 +80,11 @@ void silk_NLSF_decode(
/* Predictive residual dequantizer */
silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order );
/* Weights from codebook vector */
silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order );
/* Apply inverse square-rooted weights and add to output */
/* Apply inverse square-rooted weights to first stage and add to output */
pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
pCB_Wght_Q9 = &psNLSF_CB->CB1_Wght_Q9[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
for( i = 0; i < psNLSF_CB->order; i++ ) {
W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) );
NLSF_Q15_tmp = silk_ADD_LSHIFT32( silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), pCB_Wght_Q9[ i ] ), (opus_int16)pCB_element[ i ], 7 );
pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 );
}

View file

@ -84,7 +84,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
nStates = 1;
RD_Q25[ 0 ] = 0;
prev_out_Q10[ 0 ] = 0;
for( i = order - 1; ; i-- ) {
for( i = order - 1; i >= 0; i-- ) {
rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
in_Q10 = x_Q10[ i ];
for( j = 0; j < nStates; j++ ) {
@ -131,7 +131,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 );
}
if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) {
if( nStates <= NLSF_QUANT_DEL_DEC_STATES/2 ) {
/* double number of states and copy */
for( j = 0; j < nStates; j++ ) {
ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1;
@ -140,7 +140,7 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
ind[ j ][ i ] = ind[ j - nStates ][ i ];
}
} else if( i > 0 ) {
} else {
/* sort lower and upper half of RD_Q25, pairwise */
for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) {
@ -191,8 +191,6 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 );
}
} else { /* i == 0 */
break;
}
}

View file

@ -37,9 +37,9 @@ POSSIBILITY OF SUCH DAMAGE.
/***********************/
opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */
opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */
opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */
opus_int16 *pNLSF_Q15, /* I/O (Un)quantized NLSF vector [ LPC_ORDER ] */
const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */
const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */
const opus_int16 *pW_Q2, /* I NLSF weight vector [ LPC_ORDER ] */
const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */
const opus_int nSurvivors, /* I Max survivors after first stage */
const opus_int signalType /* I Signal type: 0/1/2 */
@ -47,21 +47,19 @@ opus_int32 silk_NLSF_encode( /* O Returns
{
opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7;
opus_int32 W_tmp_Q9, ret;
VARDECL( opus_int32, err_Q26 );
VARDECL( opus_int32, err_Q24 );
VARDECL( opus_int32, RD_Q25 );
VARDECL( opus_int, tempIndices1 );
VARDECL( opus_int8, tempIndices2 );
opus_int16 res_Q15[ MAX_LPC_ORDER ];
opus_int16 res_Q10[ MAX_LPC_ORDER ];
opus_int16 NLSF_tmp_Q15[ MAX_LPC_ORDER ];
opus_int16 W_tmp_QW[ MAX_LPC_ORDER ];
opus_int16 W_adj_Q5[ MAX_LPC_ORDER ];
opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
opus_int16 ec_ix[ MAX_LPC_ORDER ];
const opus_uint8 *pCB_element, *iCDF_ptr;
const opus_int16 *pCB_Wght_Q9;
SAVE_STACK;
silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS );
silk_assert( signalType >= 0 && signalType <= 2 );
silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 );
@ -69,12 +67,12 @@ opus_int32 silk_NLSF_encode( /* O Returns
silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
/* First stage: VQ */
ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 );
silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order );
ALLOC( err_Q24, psNLSF_CB->nVectors, opus_int32 );
silk_NLSF_VQ( err_Q24, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->CB1_Wght_Q9, psNLSF_CB->nVectors, psNLSF_CB->order );
/* Sort the quantization errors */
ALLOC( tempIndices1, nSurvivors, opus_int );
silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors );
silk_insertion_sort_increasing( err_Q24, tempIndices1, psNLSF_CB->nVectors, nSurvivors );
ALLOC( RD_Q25, nSurvivors, opus_int32 );
ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 );
@ -85,23 +83,12 @@ opus_int32 silk_NLSF_encode( /* O Returns
/* Residual after first stage */
pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ ind1 * psNLSF_CB->order ];
pCB_Wght_Q9 = &psNLSF_CB->CB1_Wght_Q9[ ind1 * psNLSF_CB->order ];
for( i = 0; i < psNLSF_CB->order; i++ ) {
NLSF_tmp_Q15[ i ] = silk_LSHIFT16( (opus_int16)pCB_element[ i ], 7 );
res_Q15[ i ] = pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ];
}
/* Weights from codebook vector */
silk_NLSF_VQ_weights_laroia( W_tmp_QW, NLSF_tmp_Q15, psNLSF_CB->order );
/* Apply square-rooted weights */
for( i = 0; i < psNLSF_CB->order; i++ ) {
W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( res_Q15[ i ], W_tmp_Q9 ), 14 );
}
/* Modify input weights accordingly */
for( i = 0; i < psNLSF_CB->order; i++ ) {
W_adj_Q5[ i ] = silk_DIV32_16( silk_LSHIFT( (opus_int32)pW_QW[ i ], 5 ), W_tmp_QW[ i ] );
W_tmp_Q9 = pCB_Wght_Q9[ i ];
res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ], W_tmp_Q9 ), 14 );
W_adj_Q5[ i ] = silk_DIV32_varQ( (opus_int32)pW_Q2[ i ], silk_SMULBB( W_tmp_Q9, W_tmp_Q9 ), 21 );
}
/* Unpack entropy table indices and predictor for current CB1 index */

View file

@ -37,7 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
static OPUS_INLINE void silk_nsq_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
const opus_int32 x_Q3[], /* I input in Q3 */
const opus_int16 x16[], /* I input */
opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */
const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@ -75,14 +75,14 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
void silk_NSQ_c
(
const silk_encoder_state *psEncC, /* I/O Encoder State */
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
@ -117,8 +117,7 @@ void silk_NSQ_c
LSF_interpolation_flag = 1;
}
ALLOC( sLTP_Q15,
psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
/* Set up pointers to start of sub frame */
@ -128,7 +127,7 @@ void silk_NSQ_c
for( k = 0; k < psEncC->nb_subfr; k++ ) {
A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ];
B_Q14 = &LTPCoef_Q14[ k * LTP_ORDER ];
AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ];
AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ];
/* Noise shape parameters */
silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
@ -154,13 +153,13 @@ void silk_NSQ_c
}
}
silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
silk_nsq_scale_states( psEncC, NSQ, x16, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
x_Q3 += psEncC->subfr_length;
x16 += psEncC->subfr_length;
pulses += psEncC->subfr_length;
pxq += psEncC->subfr_length;
}
@ -169,7 +168,6 @@ void silk_NSQ_c
NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
/* Save quantized speech and noise shaping signals */
/* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */
silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
RESTORE_STACK;
@ -250,7 +248,7 @@ void silk_noise_shape_quantizer(
/* Noise shape feedback */
silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(&NSQ->sDiff_shp_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
@ -279,14 +277,27 @@ void silk_noise_shape_quantizer(
r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */
/* Flip sign depending on dither */
if ( NSQ->rand_seed < 0 ) {
r_Q10 = -r_Q10;
if( NSQ->rand_seed < 0 ) {
r_Q10 = -r_Q10;
}
r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
/* Find two quantization level candidates and measure their rate-distortion */
q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
if (Lambda_Q10 > 2048) {
/* For aggressive RDO, the bias becomes more than one pulse. */
int rdo_offset = Lambda_Q10/2 - 512;
if (q1_Q10 > rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 );
} else if (q1_Q10 < -rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 );
} else if (q1_Q10 < 0) {
q1_Q0 = -1;
} else {
q1_Q0 = 0;
}
}
if( q1_Q0 > 0 ) {
q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
@ -337,7 +348,8 @@ void silk_noise_shape_quantizer(
/* Update states */
psLPC_Q14++;
*psLPC_Q14 = xq_Q14;
sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 );
NSQ->sDiff_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_sc_Q10[ i ], 4 );
sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( NSQ->sDiff_shp_Q14, n_AR_Q12, 2 );
NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14;
NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 );
@ -356,7 +368,7 @@ void silk_noise_shape_quantizer(
static OPUS_INLINE void silk_nsq_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
const opus_int32 x_Q3[], /* I input in Q3 */
const opus_int16 x16[], /* I input */
opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */
const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@ -368,28 +380,18 @@ static OPUS_INLINE void silk_nsq_scale_states(
)
{
opus_int i, lag;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26;
lag = pitchL[ subfr ];
inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
silk_assert( inv_gain_Q31 != 0 );
/* Calculate gain adjustment factor */
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
} else {
gain_adj_Q16 = (opus_int32)1 << 16;
}
/* Scale input */
inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 );
for( i = 0; i < psEncC->subfr_length; i++ ) {
x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 );
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
/* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
if( NSQ->rewhite_flag ) {
if( subfr == 0 ) {
@ -403,7 +405,9 @@ static OPUS_INLINE void silk_nsq_scale_states(
}
/* Adjust for changing gain */
if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
/* Scale long-term shaping state */
for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
@ -417,6 +421,7 @@ static OPUS_INLINE void silk_nsq_scale_states(
}
NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 );
NSQ->sDiff_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sDiff_shp_Q14 );
/* Scale short-term prediction and shaping states */
for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
@ -425,5 +430,8 @@ static OPUS_INLINE void silk_nsq_scale_states(
for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] );
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
}
}

View file

@ -43,6 +43,7 @@ typedef struct {
opus_int32 Shape_Q14[ DECISION_DELAY ];
opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
opus_int32 LF_AR_Q14;
opus_int32 Diff_Q14;
opus_int32 Seed;
opus_int32 SeedInit;
opus_int32 RD_Q10;
@ -53,6 +54,7 @@ typedef struct {
opus_int32 RD_Q10;
opus_int32 xq_Q14;
opus_int32 LF_AR_Q14;
opus_int32 Diff_Q14;
opus_int32 sLTP_shp_Q14;
opus_int32 LPC_exc_Q14;
} NSQ_sample_struct;
@ -66,7 +68,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
const opus_int32 x_Q3[], /* I Input in Q3 */
const opus_int16 x16[], /* I Input */
opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */
const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@ -107,20 +109,20 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int predictLPCOrder, /* I Prediction filter order */
opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */
opus_int decisionDelay, /* I */
int arch /* I */
);
void silk_NSQ_del_dec_c(
const silk_encoder_state *psEncC, /* I/O Encoder State */
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int32 x_Q3[], /* I Prefiltered input signal */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
@ -159,6 +161,7 @@ void silk_NSQ_del_dec_c(
psDD->SeedInit = psDD->Seed;
psDD->RD_Q10 = 0;
psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14;
psDD->Diff_Q14 = NSQ->sDiff_shp_Q14;
psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ];
silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
@ -186,8 +189,7 @@ void silk_NSQ_del_dec_c(
LSF_interpolation_flag = 1;
}
ALLOC( sLTP_Q15,
psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
@ -199,7 +201,7 @@ void silk_NSQ_del_dec_c(
for( k = 0; k < psEncC->nb_subfr; k++ ) {
A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
B_Q14 = &LTPCoef_Q14[ k * LTP_ORDER ];
AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ];
AR_shp_Q13 = &AR_Q13[ k * MAX_SHAPE_LPC_ORDER ];
/* Noise shape parameters */
silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
@ -235,7 +237,8 @@ void silk_NSQ_del_dec_c(
psDD = &psDelDec[ Winner_ind ];
last_smple_idx = smpl_buf_idx + decisionDelay;
for( i = 0; i < decisionDelay; i++ ) {
last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY;
if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY;
pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
@ -257,7 +260,7 @@ void silk_NSQ_del_dec_c(
}
}
silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x16, x_sc_Q10, sLTP, sLTP_Q15, k,
psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
@ -265,7 +268,7 @@ void silk_NSQ_del_dec_c(
Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch );
x_Q3 += psEncC->subfr_length;
x16 += psEncC->subfr_length;
pulses += psEncC->subfr_length;
pxq += psEncC->subfr_length;
}
@ -286,7 +289,9 @@ void silk_NSQ_del_dec_c(
last_smple_idx = smpl_buf_idx + decisionDelay;
Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
for( i = 0; i < decisionDelay; i++ ) {
last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
last_smple_idx = ( last_smple_idx - 1 ) % DECISION_DELAY;
if( last_smple_idx < 0 ) last_smple_idx += DECISION_DELAY;
pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
@ -297,10 +302,10 @@ void silk_NSQ_del_dec_c(
/* Update states */
NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14;
NSQ->sDiff_shp_Q14 = psDD->Diff_Q14;
NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
/* Save quantized speech signal */
/* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */
silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
RESTORE_STACK;
@ -335,7 +340,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int predictLPCOrder, /* I Prediction filter order */
opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */
opus_int decisionDelay, /* I */
int arch /* I */
)
@ -416,7 +421,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
/* Noise shape feedback */
silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
/* Output of lowpass section */
tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
tmp2 = silk_SMLAWB( psDD->Diff_Q14, psDD->sAR2_Q14[ 0 ], warping_Q16 );
/* Output of allpass section */
tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
psDD->sAR2_Q14[ 0 ] = tmp2;
@ -462,6 +467,19 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
/* Find two quantization level candidates and measure their rate-distortion */
q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
if (Lambda_Q10 > 2048) {
/* For aggressive RDO, the bias becomes more than one pulse. */
int rdo_offset = Lambda_Q10/2 - 512;
if (q1_Q10 > rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 - rdo_offset, 10 );
} else if (q1_Q10 < -rdo_offset) {
q1_Q0 = silk_RSHIFT( q1_Q10 + rdo_offset, 10 );
} else if (q1_Q10 < 0) {
q1_Q0 = -1;
} else {
q1_Q0 = 0;
}
}
if( q1_Q0 > 0 ) {
q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 );
@ -515,7 +533,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
/* Update states */
sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
psSS[ 0 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 );
sLF_AR_shp_Q14 = silk_SUB32( psSS[ 0 ].Diff_Q14, n_AR_Q14 );
psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14;
psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14;
@ -529,21 +548,22 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
exc_Q14 = -exc_Q14;
}
/* Add predictions */
LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
/* Update states */
sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 );
psSS[ 1 ].Diff_Q14 = silk_SUB_LSHIFT32( xq_Q14, x_Q10[ i ], 4 );
sLF_AR_shp_Q14 = silk_SUB32( psSS[ 1 ].Diff_Q14, n_AR_Q14 );
psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14;
psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14;
psSS[ 1 ].xq_Q14 = xq_Q14;
}
*smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */
last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */
*smpl_buf_idx = ( *smpl_buf_idx - 1 ) % DECISION_DELAY;
if( *smpl_buf_idx < 0 ) *smpl_buf_idx += DECISION_DELAY;
last_smple_idx = ( *smpl_buf_idx + decisionDelay ) % DECISION_DELAY;
/* Find winner */
RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
@ -607,6 +627,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
psDD = &psDelDec[ k ];
psSS = &psSampleState[ k ][ 0 ];
psDD->LF_AR_Q14 = psSS->LF_AR_Q14;
psDD->Diff_Q14 = psSS->Diff_Q14;
psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14;
psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10;
@ -631,7 +652,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
const opus_int32 x_Q3[], /* I Input in Q3 */
const opus_int16 x16[], /* I Input */
opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */
const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */
opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
@ -645,29 +666,19 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
)
{
opus_int i, k, lag;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26;
NSQ_del_dec_struct *psDD;
lag = pitchL[ subfr ];
inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
silk_assert( inv_gain_Q31 != 0 );
/* Calculate gain adjustment factor */
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
} else {
gain_adj_Q16 = (opus_int32)1 << 16;
}
/* Scale input */
inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
inv_gain_Q26 = silk_RSHIFT_ROUND( inv_gain_Q31, 5 );
for( i = 0; i < psEncC->subfr_length; i++ ) {
x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
x_sc_Q10[ i ] = silk_SMULWW( x16[ i ], inv_gain_Q26 );
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
/* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
if( NSQ->rewhite_flag ) {
if( subfr == 0 ) {
@ -681,7 +692,9 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
}
/* Adjust for changing gain */
if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
/* Scale long-term shaping state */
for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
@ -699,6 +712,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
/* Scale scalar states */
psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
psDD->Diff_Q14 = silk_SMULWW( gain_adj_Q16, psDD->Diff_Q14 );
/* Scale short-term prediction and shaping states */
for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
@ -712,5 +726,8 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states(
psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] );
}
}
/* Save inverse gain */
NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
}
}

View file

@ -275,7 +275,7 @@ static OPUS_INLINE void silk_PLC_conceal(
/* Reduce random noise for unvoiced frames with high LPC gain */
opus_int32 invGain_Q30, down_scale_Q30;
invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order );
invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order, arch );
down_scale_Q30 = silk_min_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_HIGH_THRES ), invGain_Q30 );
down_scale_Q30 = silk_max_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_LOW_THRES ), down_scale_Q30 );
@ -328,8 +328,10 @@ static OPUS_INLINE void silk_PLC_conceal(
for( j = 0; j < LTP_ORDER; j++ ) {
B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 );
}
/* Gradually reduce excitation gain */
rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 );
if ( psDec->indices.signalType != TYPE_NO_VOICE_ACTIVITY ) {
/* Gradually reduce excitation gain */
rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 );
}
/* Slowly increase pitch lag */
psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 );

View file

@ -35,7 +35,7 @@ extern "C"
/*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */
#define SILK_MAX_ORDER_LPC 16 /* max order of the LPC analysis in schur() and k2a() */
#define SILK_MAX_ORDER_LPC 24 /* max order of the LPC analysis in schur() and k2a() */
#include <string.h> /* for memset(), memcpy(), memmove() */
#include "typedef.h"
@ -47,6 +47,11 @@ extern "C"
#include "x86/SigProc_FIX_sse.h"
#endif
#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/biquad_alt_arm.h"
#include "arm/LPC_inv_pred_gain_arm.h"
#endif
/********************************************************************/
/* SIGNAL PROCESSING FUNCTIONS */
/********************************************************************/
@ -96,14 +101,22 @@ void silk_resampler_down2_3(
* slower than biquad() but uses more precise coefficients
* can handle (slowly) varying coefficients
*/
void silk_biquad_alt(
void silk_biquad_alt_stride1(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [2] */
opus_int16 *out, /* O output signal */
const opus_int32 len, /* I signal length (must be even) */
opus_int stride /* I Operate on interleaved signal if > 1 */
const opus_int32 len /* I signal length (must be even) */
);
void silk_biquad_alt_stride2_c(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
);
/* Variable order MA prediction error filter. */
@ -132,17 +145,11 @@ void silk_bwexpander_32(
/* Compute inverse of LPC prediction gain, and */
/* test if LPC coefficients are stable (all poles within unit circle) */
opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 silk_LPC_inverse_pred_gain_c( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
const opus_int order /* I Prediction order */
);
/* For input in Q24 domain */
opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int32 *A_Q24, /* I Prediction coefficients [order] */
const opus_int order /* I Prediction order */
);
/* Split signal in two decimated bands using first-order allpass filters */
void silk_ana_filt_bank_1(
const opus_int16 *in, /* I Input signal [N] */
@ -152,6 +159,14 @@ void silk_ana_filt_bank_1(
const opus_int32 N /* I Number of input samples */
);
#if !defined(OVERRIDE_silk_biquad_alt_stride2)
#define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_c(in, B_Q28, A_Q28, S, out, len))
#endif
#if !defined(OVERRIDE_silk_LPC_inverse_pred_gain)
#define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), silk_LPC_inverse_pred_gain_c(A_Q12, order))
#endif
/********************************************************************/
/* SCALAR FUNCTIONS */
/********************************************************************/
@ -271,7 +286,17 @@ void silk_A2NLSF(
void silk_NLSF2A(
opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */
const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */
const opus_int d /* I filter order (should be even) */
const opus_int d, /* I filter order (should be even) */
int arch /* I Run-time architecture */
);
/* Convert int32 coefficients to int16 coefs and make sure there's no wrap-around */
void silk_LPC_fit(
opus_int16 *a_QOUT, /* O Output signal */
opus_int32 *a_QIN, /* I/O Input signal */
const opus_int QOUT, /* I Input Q domain */
const opus_int QIN, /* I Input Q domain */
const opus_int d /* I Filter order */
);
void silk_insertion_sort_increasing(
@ -471,8 +496,7 @@ static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
/* Add with saturation for positive input values */
#define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT32(a, b) ((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT64(a, b) ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)))
#define silk_ADD_POS_SAT32(a, b) ((((opus_uint32)(a)+(opus_uint32)(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)))
#define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */
#define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */
@ -572,7 +596,9 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
/* Make sure to store the result as the seed for the next call (also in between */
/* frames), otherwise result won't be random at all. When only using some of the */
/* bits, take the most significant bits by right-shifting. */
#define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165))
#define RAND_MULTIPLIER 196314165
#define RAND_INCREMENT 907633515
#define silk_RAND(seed) (silk_MLA_ovflw((RAND_INCREMENT), (seed), (RAND_MULTIPLIER)))
/* Add some multiplication functions that can be easily mapped to ARM. */

View file

@ -34,84 +34,95 @@ POSSIBILITY OF SUCH DAMAGE.
/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
void silk_VQ_WMat_EC_c(
opus_int8 *ind, /* O index of best codebook vector */
opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */
opus_int32 *res_nrg_Q15, /* O best residual energy */
opus_int32 *rate_dist_Q8, /* O best total bitrate */
opus_int *gain_Q7, /* O sum of absolute LTP coefficients */
const opus_int16 *in_Q14, /* I input vector to be quantized */
const opus_int32 *W_Q18, /* I weighting matrix */
const opus_int32 *XX_Q17, /* I correlation matrix */
const opus_int32 *xX_Q17, /* I correlation vector */
const opus_int8 *cb_Q7, /* I codebook */
const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */
const opus_uint8 *cl_Q5, /* I code length for each codebook vector */
const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */
const opus_int subfr_len, /* I number of samples per subframe */
const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */
opus_int L /* I number of vectors in codebook */
const opus_int L /* I number of vectors in codebook */
)
{
opus_int k, gain_tmp_Q7;
const opus_int8 *cb_row_Q7;
opus_int16 diff_Q14[ 5 ];
opus_int32 sum1_Q14, sum2_Q16;
opus_int32 neg_xX_Q24[ 5 ];
opus_int32 sum1_Q15, sum2_Q24;
opus_int32 bits_res_Q8, bits_tot_Q8;
/* Negate and convert to new Q domain */
neg_xX_Q24[ 0 ] = -silk_LSHIFT32( xX_Q17[ 0 ], 7 );
neg_xX_Q24[ 1 ] = -silk_LSHIFT32( xX_Q17[ 1 ], 7 );
neg_xX_Q24[ 2 ] = -silk_LSHIFT32( xX_Q17[ 2 ], 7 );
neg_xX_Q24[ 3 ] = -silk_LSHIFT32( xX_Q17[ 3 ], 7 );
neg_xX_Q24[ 4 ] = -silk_LSHIFT32( xX_Q17[ 4 ], 7 );
/* Loop over codebook */
*rate_dist_Q14 = silk_int32_MAX;
*rate_dist_Q8 = silk_int32_MAX;
*res_nrg_Q15 = silk_int32_MAX;
cb_row_Q7 = cb_Q7;
/* In things go really bad, at least *ind is set to something safe. */
*ind = 0;
for( k = 0; k < L; k++ ) {
opus_int32 penalty;
gain_tmp_Q7 = cb_gain_Q7[k];
diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );
diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 );
diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 );
diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 );
/* Weighted rate */
sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
/* Quantization error: 1 - 2 * xX * cb + cb' * XX * cb */
sum1_Q15 = SILK_FIX_CONST( 1.001, 15 );
/* Penalty for too large gain */
sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
penalty = silk_LSHIFT32( silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 11 );
silk_assert( sum1_Q14 >= 0 );
/* first row of XX_Q17 */
sum2_Q24 = silk_MLA( neg_xX_Q24[ 0 ], XX_Q17[ 1 ], cb_row_Q7[ 1 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 2 ], cb_row_Q7[ 2 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 3 ], cb_row_Q7[ 3 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 4 ], cb_row_Q7[ 4 ] );
sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 0 ], cb_row_Q7[ 0 ] );
sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 0 ] );
/* first row of W_Q18 */
sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] );
sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] );
sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] );
/* second row of XX_Q17 */
sum2_Q24 = silk_MLA( neg_xX_Q24[ 1 ], XX_Q17[ 7 ], cb_row_Q7[ 2 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 8 ], cb_row_Q7[ 3 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 9 ], cb_row_Q7[ 4 ] );
sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 6 ], cb_row_Q7[ 1 ] );
sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 1 ] );
/* second row of W_Q18 */
sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] );
sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] );
sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] );
/* third row of XX_Q17 */
sum2_Q24 = silk_MLA( neg_xX_Q24[ 2 ], XX_Q17[ 13 ], cb_row_Q7[ 3 ] );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 14 ], cb_row_Q7[ 4 ] );
sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 12 ], cb_row_Q7[ 2 ] );
sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 2 ] );
/* third row of W_Q18 */
sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] );
sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] );
sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] );
/* fourth row of XX_Q17 */
sum2_Q24 = silk_MLA( neg_xX_Q24[ 3 ], XX_Q17[ 19 ], cb_row_Q7[ 4 ] );
sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 18 ], cb_row_Q7[ 3 ] );
sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 3 ] );
/* fourth row of W_Q18 */
sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] );
sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] );
sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] );
/* last row of W_Q18 */
sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] );
sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] );
silk_assert( sum1_Q14 >= 0 );
/* last row of XX_Q17 */
sum2_Q24 = silk_LSHIFT32( neg_xX_Q24[ 4 ], 1 );
sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 24 ], cb_row_Q7[ 4 ] );
sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 4 ] );
/* find best */
if( sum1_Q14 < *rate_dist_Q14 ) {
*rate_dist_Q14 = sum1_Q14;
*ind = (opus_int8)k;
*gain_Q7 = gain_tmp_Q7;
if( sum1_Q15 >= 0 ) {
/* Translate residual energy to bits using high-rate assumption (6 dB ==> 1 bit/sample) */
bits_res_Q8 = silk_SMULBB( subfr_len, silk_lin2log( sum1_Q15 + penalty) - (15 << 7) );
/* In the following line we reduce the codelength component by half ("-1"); seems to slghtly improve quality */
bits_tot_Q8 = silk_ADD_LSHIFT32( bits_res_Q8, cl_Q5[ k ], 3-1 );
if( bits_tot_Q8 <= *rate_dist_Q8 ) {
*rate_dist_Q8 = bits_tot_Q8;
*res_nrg_Q15 = sum1_Q15 + penalty;
*ind = (opus_int8)k;
*gain_Q7 = gain_tmp_Q7;
}
}
/* Go to next cbk vector */

View file

@ -0,0 +1,57 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_LPC_INV_PRED_GAIN_ARM_H
# define SILK_LPC_INV_PRED_GAIN_ARM_H
# include "celt/arm/armcpu.h"
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
const opus_int order /* I Prediction order */
);
# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_silk_LPC_inverse_pred_gain (1)
# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), PRESUME_NEON(silk_LPC_inverse_pred_gain)(A_Q12, order))
# endif
# endif
# if !defined(OVERRIDE_silk_LPC_inverse_pred_gain)
/*Is run-time CPU detection enabled on this platform?*/
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK+1])(const opus_int16 *A_Q12, const opus_int order);
# define OVERRIDE_silk_LPC_inverse_pred_gain (1)
# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((*SILK_LPC_INVERSE_PRED_GAIN_IMPL[(arch)&OPUS_ARCHMASK])(A_Q12, order))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_silk_LPC_inverse_pred_gain (1)
# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), silk_LPC_inverse_pred_gain_neon(A_Q12, order))
# endif
# endif
#endif /* end SILK_LPC_INV_PRED_GAIN_ARM_H */

View file

@ -0,0 +1,280 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#include "SigProc_FIX.h"
#include "define.h"
#define QA 24
#define A_LIMIT SILK_FIX_CONST( 0.99975, QA )
#define MUL32_FRAC_Q(a32, b32, Q) ((opus_int32)(silk_RSHIFT_ROUND64(silk_SMULL(a32, b32), Q)))
/* The difficulty is how to judge a 64-bit signed integer tmp64 is 32-bit overflowed,
* since NEON has no 64-bit min, max or comparison instructions.
* A failed idea is to compare the results of vmovn(tmp64) and vqmovn(tmp64) whether they are equal or not.
* However, this idea fails when the tmp64 is something like 0xFFFFFFF980000000.
* Here we know that mult2Q >= 1, so the highest bit (bit 63, sign bit) of tmp64 must equal to bit 62.
* tmp64 was shifted left by 1 and we got tmp64'. If high_half(tmp64') != 0 and high_half(tmp64') != -1,
* then we know that bit 31 to bit 63 of tmp64 can not all be the sign bit, and therefore tmp64 is 32-bit overflowed.
* That is, we judge if tmp64' > 0x00000000FFFFFFFF, or tmp64' <= 0xFFFFFFFF00000000.
* We use narrowing shift right 31 bits to tmp32' to save data bandwidth and instructions.
* That is, we judge if tmp32' > 0x00000000, or tmp32' <= 0xFFFFFFFF.
*/
/* Compute inverse of LPC prediction gain, and */
/* test if LPC coefficients are stable (all poles within unit circle) */
static OPUS_INLINE opus_int32 LPC_inverse_pred_gain_QA_neon( /* O Returns inverse prediction gain in energy domain, Q30 */
opus_int32 A_QA[ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */
const opus_int order /* I Prediction order */
)
{
opus_int k, n, mult2Q;
opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp1, tmp2;
opus_int32 max, min;
int32x4_t max_s32x4, min_s32x4;
int32x2_t max_s32x2, min_s32x2;
max_s32x4 = vdupq_n_s32( silk_int32_MIN );
min_s32x4 = vdupq_n_s32( silk_int32_MAX );
invGain_Q30 = SILK_FIX_CONST( 1, 30 );
for( k = order - 1; k > 0; k-- ) {
int32x2_t rc_Q31_s32x2, rc_mult2_s32x2;
int64x2_t mult2Q_s64x2;
/* Check for stability */
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
return 0;
}
/* Set RC equal to negated AR coef */
rc_Q31 = -silk_LSHIFT( A_QA[ k ], 31 - QA );
/* rc_mult1_Q30 range: [ 1 : 2^30 ] */
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */
silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) );
/* Update inverse gain */
/* invGain_Q30 range: [ 0 : 2^30 ] */
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
return 0;
}
/* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
/* Update AR coefficient */
rc_Q31_s32x2 = vdup_n_s32( rc_Q31 );
mult2Q_s64x2 = vdupq_n_s64( -mult2Q );
rc_mult2_s32x2 = vdup_n_s32( rc_mult2 );
for( n = 0; n < ( ( k + 1 ) >> 1 ) - 3; n += 4 ) {
/* We always calculate extra elements of A_QA buffer when ( k % 4 ) != 0, to take the advantage of SIMD parallelization. */
int32x4_t tmp1_s32x4, tmp2_s32x4, t0_s32x4, t1_s32x4, s0_s32x4, s1_s32x4, t_QA0_s32x4, t_QA1_s32x4;
int64x2_t t0_s64x2, t1_s64x2, t2_s64x2, t3_s64x2;
tmp1_s32x4 = vld1q_s32( A_QA + n );
tmp2_s32x4 = vld1q_s32( A_QA + k - n - 4 );
tmp2_s32x4 = vrev64q_s32( tmp2_s32x4 );
tmp2_s32x4 = vcombine_s32( vget_high_s32( tmp2_s32x4 ), vget_low_s32( tmp2_s32x4 ) );
t0_s32x4 = vqrdmulhq_lane_s32( tmp2_s32x4, rc_Q31_s32x2, 0 );
t1_s32x4 = vqrdmulhq_lane_s32( tmp1_s32x4, rc_Q31_s32x2, 0 );
t_QA0_s32x4 = vqsubq_s32( tmp1_s32x4, t0_s32x4 );
t_QA1_s32x4 = vqsubq_s32( tmp2_s32x4, t1_s32x4 );
t0_s64x2 = vmull_s32( vget_low_s32 ( t_QA0_s32x4 ), rc_mult2_s32x2 );
t1_s64x2 = vmull_s32( vget_high_s32( t_QA0_s32x4 ), rc_mult2_s32x2 );
t2_s64x2 = vmull_s32( vget_low_s32 ( t_QA1_s32x4 ), rc_mult2_s32x2 );
t3_s64x2 = vmull_s32( vget_high_s32( t_QA1_s32x4 ), rc_mult2_s32x2 );
t0_s64x2 = vrshlq_s64( t0_s64x2, mult2Q_s64x2 );
t1_s64x2 = vrshlq_s64( t1_s64x2, mult2Q_s64x2 );
t2_s64x2 = vrshlq_s64( t2_s64x2, mult2Q_s64x2 );
t3_s64x2 = vrshlq_s64( t3_s64x2, mult2Q_s64x2 );
t0_s32x4 = vcombine_s32( vmovn_s64( t0_s64x2 ), vmovn_s64( t1_s64x2 ) );
t1_s32x4 = vcombine_s32( vmovn_s64( t2_s64x2 ), vmovn_s64( t3_s64x2 ) );
s0_s32x4 = vcombine_s32( vshrn_n_s64( t0_s64x2, 31 ), vshrn_n_s64( t1_s64x2, 31 ) );
s1_s32x4 = vcombine_s32( vshrn_n_s64( t2_s64x2, 31 ), vshrn_n_s64( t3_s64x2, 31 ) );
max_s32x4 = vmaxq_s32( max_s32x4, s0_s32x4 );
min_s32x4 = vminq_s32( min_s32x4, s0_s32x4 );
max_s32x4 = vmaxq_s32( max_s32x4, s1_s32x4 );
min_s32x4 = vminq_s32( min_s32x4, s1_s32x4 );
t1_s32x4 = vrev64q_s32( t1_s32x4 );
t1_s32x4 = vcombine_s32( vget_high_s32( t1_s32x4 ), vget_low_s32( t1_s32x4 ) );
vst1q_s32( A_QA + n, t0_s32x4 );
vst1q_s32( A_QA + k - n - 4, t1_s32x4 );
}
for( ; n < (k + 1) >> 1; n++ ) {
opus_int64 tmp64;
tmp1 = A_QA[ n ];
tmp2 = A_QA[ k - n - 1 ];
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp1,
MUL32_FRAC_Q( tmp2, rc_Q31, 31 ) ), rc_mult2 ), mult2Q);
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
return 0;
}
A_QA[ n ] = ( opus_int32 )tmp64;
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp2,
MUL32_FRAC_Q( tmp1, rc_Q31, 31 ) ), rc_mult2), mult2Q);
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
return 0;
}
A_QA[ k - n - 1 ] = ( opus_int32 )tmp64;
}
}
/* Check for stability */
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
return 0;
}
max_s32x2 = vmax_s32( vget_low_s32( max_s32x4 ), vget_high_s32( max_s32x4 ) );
min_s32x2 = vmin_s32( vget_low_s32( min_s32x4 ), vget_high_s32( min_s32x4 ) );
max_s32x2 = vmax_s32( max_s32x2, vreinterpret_s32_s64( vshr_n_s64( vreinterpret_s64_s32( max_s32x2 ), 32 ) ) );
min_s32x2 = vmin_s32( min_s32x2, vreinterpret_s32_s64( vshr_n_s64( vreinterpret_s64_s32( min_s32x2 ), 32 ) ) );
max = vget_lane_s32( max_s32x2, 0 );
min = vget_lane_s32( min_s32x2, 0 );
if( ( max > 0 ) || ( min < -1 ) ) {
return 0;
}
/* Set RC equal to negated AR coef */
rc_Q31 = -silk_LSHIFT( A_QA[ 0 ], 31 - QA );
/* Range: [ 1 : 2^30 ] */
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
/* Update inverse gain */
/* Range: [ 0 : 2^30 ] */
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
silk_assert( invGain_Q30 >= 0 );
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
return 0;
}
return invGain_Q30;
}
/* For input in Q12 domain */
opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
const opus_int order /* I Prediction order */
)
{
#ifdef OPUS_CHECK_ASM
const opus_int32 invGain_Q30_c = silk_LPC_inverse_pred_gain_c( A_Q12, order );
#endif
opus_int32 invGain_Q30;
if( ( SILK_MAX_ORDER_LPC != 24 ) || ( order & 1 )) {
invGain_Q30 = silk_LPC_inverse_pred_gain_c( A_Q12, order );
}
else {
opus_int32 Atmp_QA[ SILK_MAX_ORDER_LPC ];
opus_int32 DC_resp;
int16x8_t t0_s16x8, t1_s16x8, t2_s16x8;
int32x4_t t0_s32x4;
const opus_int leftover = order & 7;
/* Increase Q domain of the AR coefficients */
t0_s16x8 = vld1q_s16( A_Q12 + 0 );
t1_s16x8 = vld1q_s16( A_Q12 + 8 );
t2_s16x8 = vld1q_s16( A_Q12 + 16 );
t0_s32x4 = vpaddlq_s16( t0_s16x8 );
switch( order - leftover )
{
case 24:
t0_s32x4 = vpadalq_s16( t0_s32x4, t2_s16x8 );
/* Intend to fall through */
case 16:
t0_s32x4 = vpadalq_s16( t0_s32x4, t1_s16x8 );
vst1q_s32( Atmp_QA + 16, vshll_n_s16( vget_low_s16 ( t2_s16x8 ), QA - 12 ) );
vst1q_s32( Atmp_QA + 20, vshll_n_s16( vget_high_s16( t2_s16x8 ), QA - 12 ) );
/* Intend to fall through */
case 8:
{
const int32x2_t t_s32x2 = vpadd_s32( vget_low_s32( t0_s32x4 ), vget_high_s32( t0_s32x4 ) );
const int64x1_t t_s64x1 = vpaddl_s32( t_s32x2 );
DC_resp = vget_lane_s32( vreinterpret_s32_s64( t_s64x1 ), 0 );
vst1q_s32( Atmp_QA + 8, vshll_n_s16( vget_low_s16 ( t1_s16x8 ), QA - 12 ) );
vst1q_s32( Atmp_QA + 12, vshll_n_s16( vget_high_s16( t1_s16x8 ), QA - 12 ) );
}
break;
default:
DC_resp = 0;
break;
}
A_Q12 += order - leftover;
switch( leftover )
{
case 6:
DC_resp += (opus_int32)A_Q12[ 5 ];
DC_resp += (opus_int32)A_Q12[ 4 ];
/* Intend to fall through */
case 4:
DC_resp += (opus_int32)A_Q12[ 3 ];
DC_resp += (opus_int32)A_Q12[ 2 ];
/* Intend to fall through */
case 2:
DC_resp += (opus_int32)A_Q12[ 1 ];
DC_resp += (opus_int32)A_Q12[ 0 ];
/* Intend to fall through */
default:
break;
}
/* If the DC is unstable, we don't even need to do the full calculations */
if( DC_resp >= 4096 ) {
invGain_Q30 = 0;
} else {
vst1q_s32( Atmp_QA + 0, vshll_n_s16( vget_low_s16 ( t0_s16x8 ), QA - 12 ) );
vst1q_s32( Atmp_QA + 4, vshll_n_s16( vget_high_s16( t0_s16x8 ), QA - 12 ) );
invGain_Q30 = LPC_inverse_pred_gain_QA_neon( Atmp_QA, order );
}
}
#ifdef OPUS_CHECK_ASM
silk_assert( invGain_Q30_c == invGain_Q30 );
#endif
return invGain_Q30;
}

View file

@ -0,0 +1,100 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_NSQ_DEL_DEC_ARM_H
#define SILK_NSQ_DEL_DEC_ARM_H
#include "celt/arm/armcpu.h"
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void silk_NSQ_del_dec_neon(
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
const opus_int Tilt_Q14[MAX_NB_SUBFR],
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
const opus_int LTP_scale_Q14);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((void)(arch), \
PRESUME_NEON(silk_NSQ_del_dec)( \
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
Lambda_Q10, LTP_scale_Q14))
#endif
#endif
#if !defined(OVERRIDE_silk_NSQ_del_dec)
/*Is run-time CPU detection enabled on this platform?*/
#if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
!defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
const opus_int Tilt_Q14[MAX_NB_SUBFR],
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
const opus_int LTP_scale_Q14);
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((*SILK_NSQ_DEL_DEC_IMPL[(arch)&OPUS_ARCHMASK])( \
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
Lambda_Q10, LTP_scale_Q14))
#elif defined(OPUS_ARM_PRESUME_NEON_INTR)
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((void)(arch), \
silk_NSQ_del_dec_neon(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14))
#endif
#endif
#endif /* end SILK_NSQ_DEL_DEC_ARM_H */

File diff suppressed because it is too large Load diff

View file

@ -28,30 +28,31 @@ POSSIBILITY OF SUCH DAMAGE.
#define SILK_NSQ_NEON_H
#include "cpu_support.h"
#include "SigProc_FIX.h"
#undef silk_short_prediction_create_arch_coef
/* For vectorized calc, reverse a_Q12 coefs, convert to 32-bit, and shift for vqdmulhq_s32. */
static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon(opus_int32 *out, const opus_int16 *in, opus_int order)
{
out[15] = in[0] << 15;
out[14] = in[1] << 15;
out[13] = in[2] << 15;
out[12] = in[3] << 15;
out[11] = in[4] << 15;
out[10] = in[5] << 15;
out[9] = in[6] << 15;
out[8] = in[7] << 15;
out[7] = in[8] << 15;
out[6] = in[9] << 15;
out[15] = silk_LSHIFT32(in[0], 15);
out[14] = silk_LSHIFT32(in[1], 15);
out[13] = silk_LSHIFT32(in[2], 15);
out[12] = silk_LSHIFT32(in[3], 15);
out[11] = silk_LSHIFT32(in[4], 15);
out[10] = silk_LSHIFT32(in[5], 15);
out[9] = silk_LSHIFT32(in[6], 15);
out[8] = silk_LSHIFT32(in[7], 15);
out[7] = silk_LSHIFT32(in[8], 15);
out[6] = silk_LSHIFT32(in[9], 15);
if (order == 16)
{
out[5] = in[10] << 15;
out[4] = in[11] << 15;
out[3] = in[12] << 15;
out[2] = in[13] << 15;
out[1] = in[14] << 15;
out[0] = in[15] << 15;
out[5] = silk_LSHIFT32(in[10], 15);
out[4] = silk_LSHIFT32(in[11], 15);
out[3] = silk_LSHIFT32(in[12], 15);
out[2] = silk_LSHIFT32(in[13], 15);
out[1] = silk_LSHIFT32(in[14], 15);
out[0] = silk_LSHIFT32(in[15], 15);
}
else
{

View file

@ -28,13 +28,62 @@ POSSIBILITY OF SUCH DAMAGE.
# include "config.h"
#endif
#include "main_FIX.h"
#include "NSQ.h"
#include "SigProc_FIX.h"
#if defined(OPUS_HAVE_RTCD)
# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
!defined(OPUS_ARM_PRESUME_NEON_INTR))
void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK + 1])(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
) = {
silk_biquad_alt_stride2_c, /* ARMv4 */
silk_biquad_alt_stride2_c, /* EDSP */
silk_biquad_alt_stride2_c, /* Media */
silk_biquad_alt_stride2_neon, /* Neon */
};
opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK + 1])( /* O Returns inverse prediction gain in energy domain, Q30 */
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
const opus_int order /* I Prediction order */
) = {
silk_LPC_inverse_pred_gain_c, /* ARMv4 */
silk_LPC_inverse_pred_gain_c, /* EDSP */
silk_LPC_inverse_pred_gain_c, /* Media */
silk_LPC_inverse_pred_gain_neon, /* Neon */
};
void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
const opus_int LTP_scale_Q14 /* I LTP state scaling */
) = {
silk_NSQ_del_dec_c, /* ARMv4 */
silk_NSQ_del_dec_c, /* EDSP */
silk_NSQ_del_dec_c, /* Media */
silk_NSQ_del_dec_neon, /* Neon */
};
/*There is no table for silk_noise_shape_quantizer_short_prediction because the
NEON version takes different parameters than the C version.
Instead RTCD is done via if statements at the call sites.
@ -52,4 +101,23 @@ opus_int32
# endif
# if defined(FIXED_POINT) && \
defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const SILK_WARPED_AUTOCORRELATION_FIX_IMPL[OPUS_ARCHMASK + 1])(
opus_int32 *corr, /* O Result [order + 1] */
opus_int *scale, /* O Scaling of the correlation vector */
const opus_int16 *input, /* I Input data to correlate */
const opus_int warping_Q16, /* I Warping coefficient */
const opus_int length, /* I Length of input */
const opus_int order /* I Correlation order (even) */
) = {
silk_warped_autocorrelation_FIX_c, /* ARMv4 */
silk_warped_autocorrelation_FIX_c, /* EDSP */
silk_warped_autocorrelation_FIX_c, /* Media */
silk_warped_autocorrelation_FIX_neon, /* Neon */
};
# endif
#endif /* OPUS_HAVE_RTCD */

View file

@ -0,0 +1,68 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_BIQUAD_ALT_ARM_H
# define SILK_BIQUAD_ALT_ARM_H
# include "celt/arm/armcpu.h"
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void silk_biquad_alt_stride2_neon(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
);
# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_silk_biquad_alt_stride2 (1)
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), PRESUME_NEON(silk_biquad_alt_stride2)(in, B_Q28, A_Q28, S, out, len))
# endif
# endif
# if !defined(OVERRIDE_silk_biquad_alt_stride2)
/*Is run-time CPU detection enabled on this platform?*/
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK+1])(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
);
# define OVERRIDE_silk_biquad_alt_stride2 (1)
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((*SILK_BIQUAD_ALT_STRIDE2_IMPL[(arch)&OPUS_ARCHMASK])(in, B_Q28, A_Q28, S, out, len))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_silk_biquad_alt_stride2 (1)
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_neon(in, B_Q28, A_Q28, S, out, len))
# endif
# endif
#endif /* end SILK_BIQUAD_ALT_ARM_H */

View file

@ -0,0 +1,156 @@
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#ifdef OPUS_CHECK_ASM
# include <string.h>
# include "stack_alloc.h"
#endif
#include "SigProc_FIX.h"
static inline void silk_biquad_alt_stride2_kernel( const int32x4_t A_L_s32x4, const int32x4_t A_U_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, const int32x4_t in_s32x4, int32x4_t *S_s32x4, int32x2_t *out32_Q14_s32x2 )
{
int32x4_t t_s32x4, out32_Q14_s32x4;
*out32_Q14_s32x2 = vadd_s32( vget_low_s32( *S_s32x4 ), t_s32x2 ); /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in{0,1} ) */
*S_s32x4 = vcombine_s32( vget_high_s32( *S_s32x4 ), vdup_n_s32( 0 ) ); /* S{0,1} = S{2,3}; S{2,3} = 0; */
*out32_Q14_s32x2 = vshl_n_s32( *out32_Q14_s32x2, 2 ); /* out32_Q14_{0,1} = silk_LSHIFT( silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in{0,1} ), 2 ); */
out32_Q14_s32x4 = vcombine_s32( *out32_Q14_s32x2, *out32_Q14_s32x2 ); /* out32_Q14_{0,1,0,1} */
t_s32x4 = vqdmulhq_s32( out32_Q14_s32x4, A_L_s32x4 ); /* silk_SMULWB( out32_Q14_{0,1,0,1}, A{0,0,1,1}_L_Q28 ) */
*S_s32x4 = vrsraq_n_s32( *S_s32x4, t_s32x4, 14 ); /* S{0,1} = S{2,3} + silk_RSHIFT_ROUND(); S{2,3} = silk_RSHIFT_ROUND(); */
t_s32x4 = vqdmulhq_s32( out32_Q14_s32x4, A_U_s32x4 ); /* silk_SMULWB( out32_Q14_{0,1,0,1}, A{0,0,1,1}_U_Q28 ) */
*S_s32x4 = vaddq_s32( *S_s32x4, t_s32x4 ); /* S0 = silk_SMLAWB( S{0,1,2,3}, out32_Q14_{0,1,0,1}, A{0,0,1,1}_U_Q28 ); */
t_s32x4 = vqdmulhq_s32( in_s32x4, B_Q28_s32x4 ); /* silk_SMULWB( B_Q28[ {1,1,2,2} ], in{0,1,0,1} ) */
*S_s32x4 = vaddq_s32( *S_s32x4, t_s32x4 ); /* S0 = silk_SMLAWB( S0, B_Q28[ {1,1,2,2} ], in{0,1,0,1} ); */
}
void silk_biquad_alt_stride2_neon(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
)
{
/* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
opus_int k = 0;
const int32x2_t offset_s32x2 = vdup_n_s32( (1<<14) - 1 );
const int32x4_t offset_s32x4 = vcombine_s32( offset_s32x2, offset_s32x2 );
int16x4_t in_s16x4 = vdup_n_s16( 0 );
int16x4_t out_s16x4;
int32x2_t A_Q28_s32x2, A_L_s32x2, A_U_s32x2, B_Q28_s32x2, t_s32x2;
int32x4_t A_L_s32x4, A_U_s32x4, B_Q28_s32x4, S_s32x4, out32_Q14_s32x4;
int32x2x2_t t0_s32x2x2, t1_s32x2x2, t2_s32x2x2, S_s32x2x2;
#ifdef OPUS_CHECK_ASM
opus_int32 S_c[ 4 ];
VARDECL( opus_int16, out_c );
SAVE_STACK;
ALLOC( out_c, 2 * len, opus_int16 );
silk_memcpy( &S_c, S, sizeof( S_c ) );
silk_biquad_alt_stride2_c( in, B_Q28, A_Q28, S_c, out_c, len );
#endif
/* Negate A_Q28 values and split in two parts */
A_Q28_s32x2 = vld1_s32( A_Q28 );
A_Q28_s32x2 = vneg_s32( A_Q28_s32x2 );
A_L_s32x2 = vshl_n_s32( A_Q28_s32x2, 18 ); /* ( -A_Q28[] & 0x00003FFF ) << 18 */
A_L_s32x2 = vreinterpret_s32_u32( vshr_n_u32( vreinterpret_u32_s32( A_L_s32x2 ), 3 ) ); /* ( -A_Q28[] & 0x00003FFF ) << 15 */
A_U_s32x2 = vshr_n_s32( A_Q28_s32x2, 14 ); /* silk_RSHIFT( -A_Q28[], 14 ) */
A_U_s32x2 = vshl_n_s32( A_U_s32x2, 16 ); /* silk_RSHIFT( -A_Q28[], 14 ) << 16 (Clip two leading bits to conform to C function.) */
A_U_s32x2 = vshr_n_s32( A_U_s32x2, 1 ); /* silk_RSHIFT( -A_Q28[], 14 ) << 15 */
B_Q28_s32x2 = vld1_s32( B_Q28 );
t_s32x2 = vld1_s32( B_Q28 + 1 );
t0_s32x2x2 = vzip_s32( A_L_s32x2, A_L_s32x2 );
t1_s32x2x2 = vzip_s32( A_U_s32x2, A_U_s32x2 );
t2_s32x2x2 = vzip_s32( t_s32x2, t_s32x2 );
A_L_s32x4 = vcombine_s32( t0_s32x2x2.val[ 0 ], t0_s32x2x2.val[ 1 ] ); /* A{0,0,1,1}_L_Q28 */
A_U_s32x4 = vcombine_s32( t1_s32x2x2.val[ 0 ], t1_s32x2x2.val[ 1 ] ); /* A{0,0,1,1}_U_Q28 */
B_Q28_s32x4 = vcombine_s32( t2_s32x2x2.val[ 0 ], t2_s32x2x2.val[ 1 ] ); /* B_Q28[ {1,1,2,2} ] */
S_s32x4 = vld1q_s32( S ); /* S0 = S[ 0 ]; S3 = S[ 3 ]; */
S_s32x2x2 = vtrn_s32( vget_low_s32( S_s32x4 ), vget_high_s32( S_s32x4 ) ); /* S2 = S[ 1 ]; S1 = S[ 2 ]; */
S_s32x4 = vcombine_s32( S_s32x2x2.val[ 0 ], S_s32x2x2.val[ 1 ] );
for( ; k < len - 1; k += 2 ) {
int32x4_t in_s32x4[ 2 ], t_s32x4;
int32x2_t out32_Q14_s32x2[ 2 ];
/* S[ 2 * i + 0 ], S[ 2 * i + 1 ], S[ 2 * i + 2 ], S[ 2 * i + 3 ]: Q12 */
in_s16x4 = vld1_s16( &in[ 2 * k ] ); /* in{0,1,2,3} = in[ 2 * k + {0,1,2,3} ]; */
in_s32x4[ 0 ] = vshll_n_s16( in_s16x4, 15 ); /* in{0,1,2,3} << 15 */
t_s32x4 = vqdmulhq_lane_s32( in_s32x4[ 0 ], B_Q28_s32x2, 0 ); /* silk_SMULWB( B_Q28[ 0 ], in{0,1,2,3} ) */
in_s32x4[ 1 ] = vcombine_s32( vget_high_s32( in_s32x4[ 0 ] ), vget_high_s32( in_s32x4[ 0 ] ) ); /* in{2,3,2,3} << 15 */
in_s32x4[ 0 ] = vcombine_s32( vget_low_s32 ( in_s32x4[ 0 ] ), vget_low_s32 ( in_s32x4[ 0 ] ) ); /* in{0,1,0,1} << 15 */
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, vget_low_s32 ( t_s32x4 ), in_s32x4[ 0 ], &S_s32x4, &out32_Q14_s32x2[ 0 ] );
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, vget_high_s32( t_s32x4 ), in_s32x4[ 1 ], &S_s32x4, &out32_Q14_s32x2[ 1 ] );
/* Scale back to Q0 and saturate */
out32_Q14_s32x4 = vcombine_s32( out32_Q14_s32x2[ 0 ], out32_Q14_s32x2[ 1 ] ); /* out32_Q14_{0,1,2,3} */
out32_Q14_s32x4 = vaddq_s32( out32_Q14_s32x4, offset_s32x4 ); /* out32_Q14_{0,1,2,3} + (1<<14) - 1 */
out_s16x4 = vqshrn_n_s32( out32_Q14_s32x4, 14 ); /* (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,2,3} + (1<<14) - 1, 14 ) ) */
vst1_s16( &out[ 2 * k ], out_s16x4 ); /* out[ 2 * k + {0,1,2,3} ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,2,3} + (1<<14) - 1, 14 ) ); */
}
/* Process leftover. */
if( k < len ) {
int32x4_t in_s32x4;
int32x2_t out32_Q14_s32x2;
/* S[ 2 * i + 0 ], S[ 2 * i + 1 ]: Q12 */
in_s16x4 = vld1_lane_s16( &in[ 2 * k + 0 ], in_s16x4, 0 ); /* in{0,1} = in[ 2 * k + {0,1} ]; */
in_s16x4 = vld1_lane_s16( &in[ 2 * k + 1 ], in_s16x4, 1 ); /* in{0,1} = in[ 2 * k + {0,1} ]; */
in_s32x4 = vshll_n_s16( in_s16x4, 15 ); /* in{0,1} << 15 */
t_s32x2 = vqdmulh_lane_s32( vget_low_s32( in_s32x4 ), B_Q28_s32x2, 0 ); /* silk_SMULWB( B_Q28[ 0 ], in{0,1} ) */
in_s32x4 = vcombine_s32( vget_low_s32( in_s32x4 ), vget_low_s32( in_s32x4 ) ); /* in{0,1,0,1} << 15 */
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, t_s32x2, in_s32x4, &S_s32x4, &out32_Q14_s32x2 );
/* Scale back to Q0 and saturate */
out32_Q14_s32x2 = vadd_s32( out32_Q14_s32x2, offset_s32x2 ); /* out32_Q14_{0,1} + (1<<14) - 1 */
out32_Q14_s32x4 = vcombine_s32( out32_Q14_s32x2, out32_Q14_s32x2 ); /* out32_Q14_{0,1,0,1} + (1<<14) - 1 */
out_s16x4 = vqshrn_n_s32( out32_Q14_s32x4, 14 ); /* (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,0,1} + (1<<14) - 1, 14 ) ) */
vst1_lane_s16( &out[ 2 * k + 0 ], out_s16x4, 0 ); /* out[ 2 * k + 0 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_0 + (1<<14) - 1, 14 ) ); */
vst1_lane_s16( &out[ 2 * k + 1 ], out_s16x4, 1 ); /* out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_1 + (1<<14) - 1, 14 ) ); */
}
vst1q_lane_s32( &S[ 0 ], S_s32x4, 0 ); /* S[ 0 ] = S0; */
vst1q_lane_s32( &S[ 1 ], S_s32x4, 2 ); /* S[ 1 ] = S2; */
vst1q_lane_s32( &S[ 2 ], S_s32x4, 1 ); /* S[ 2 ] = S1; */
vst1q_lane_s32( &S[ 3 ], S_s32x4, 3 ); /* S[ 3 ] = S3; */
#ifdef OPUS_CHECK_ASM
silk_assert( !memcmp( S_c, S, sizeof( S_c ) ) );
silk_assert( !memcmp( out_c, out, 2 * len * sizeof( opus_int16 ) ) );
RESTORE_STACK;
#endif
}

View file

@ -28,6 +28,11 @@ POSSIBILITY OF SUCH DAMAGE.
#ifndef SILK_MACROS_ARMv4_H
#define SILK_MACROS_ARMv4_H
/* This macro only avoids the undefined behaviour from a left shift of
a negative value. It should only be used in macros that can't include
SigProc_FIX.h. In other cases, use silk_LSHIFT32(). */
#define SAFE_SHL(a,b) ((opus_int32)((opus_uint32)(a) << (b)))
/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
#undef silk_SMULWB
static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
@ -38,7 +43,7 @@ static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
"#silk_SMULWB\n\t"
"smull %0, %1, %2, %3\n\t"
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(a), "r"(b<<16)
: "%r"(a), "r"(SAFE_SHL(b,16))
);
return rd_hi;
}
@ -80,7 +85,7 @@ static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(a), "r"(b)
);
return (rd_hi<<16)+(rd_lo>>16);
return SAFE_SHL(rd_hi,16)+(rd_lo>>16);
}
#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
@ -96,8 +101,10 @@ static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
: "=&r"(rd_lo), "=&r"(rd_hi)
: "%r"(b), "r"(c)
);
return a+(rd_hi<<16)+(rd_lo>>16);
return a+SAFE_SHL(rd_hi,16)+(rd_lo>>16);
}
#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))
#undef SAFE_SHL
#endif /* SILK_MACROS_ARMv4_H */

View file

@ -29,6 +29,11 @@ POSSIBILITY OF SUCH DAMAGE.
#ifndef SILK_MACROS_ARMv5E_H
#define SILK_MACROS_ARMv5E_H
/* This macro only avoids the undefined behaviour from a left shift of
a negative value. It should only be used in macros that can't include
SigProc_FIX.h. In other cases, use silk_LSHIFT32(). */
#define SAFE_SHL(a,b) ((opus_int32)((opus_uint32)(a) << (b)))
/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
#undef silk_SMULWB
static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
@ -190,7 +195,7 @@ static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16)
"#silk_CLZ16\n\t"
"clz %0, %1;\n"
: "=r"(res)
: "r"(in16<<16|0x8000)
: "r"(SAFE_SHL(in16,16)|0x8000)
);
return res;
}
@ -210,4 +215,6 @@ static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32)
}
#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32))
#undef SAFE_SHL
#endif /* SILK_MACROS_ARMv5E_H */

View file

@ -39,14 +39,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include "SigProc_FIX.h"
/* Second order ARMA filter, alternative implementation */
void silk_biquad_alt(
void silk_biquad_alt_stride1(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [2] */
opus_int16 *out, /* O output signal */
const opus_int32 len, /* I signal length (must be even) */
opus_int stride /* I Operate on interleaved signal if > 1 */
const opus_int32 len /* I signal length (must be even) */
)
{
/* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
@ -61,7 +60,7 @@ void silk_biquad_alt(
for( k = 0; k < len; k++ ) {
/* S[ 0 ], S[ 1 ]: Q12 */
inval = in[ k * stride ];
inval = in[ k ];
out32_Q14 = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 );
S[ 0 ] = S[1] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A0_L_Q28 ), 14 );
@ -73,6 +72,50 @@ void silk_biquad_alt(
S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval );
/* Scale back to Q0 and saturate */
out[ k * stride ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) );
out[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) );
}
}
void silk_biquad_alt_stride2_c(
const opus_int16 *in, /* I input signal */
const opus_int32 *B_Q28, /* I MA coefficients [3] */
const opus_int32 *A_Q28, /* I AR coefficients [2] */
opus_int32 *S, /* I/O State vector [4] */
opus_int16 *out, /* O output signal */
const opus_int32 len /* I signal length (must be even) */
)
{
/* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
opus_int k;
opus_int32 A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14[ 2 ];
/* Negate A_Q28 values and split in two parts */
A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF; /* lower part */
A0_U_Q28 = silk_RSHIFT( -A_Q28[ 0 ], 14 ); /* upper part */
A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF; /* lower part */
A1_U_Q28 = silk_RSHIFT( -A_Q28[ 1 ], 14 ); /* upper part */
for( k = 0; k < len; k++ ) {
/* S[ 0 ], S[ 1 ], S[ 2 ], S[ 3 ]: Q12 */
out32_Q14[ 0 ] = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], in[ 2 * k + 0 ] ), 2 );
out32_Q14[ 1 ] = silk_LSHIFT( silk_SMLAWB( S[ 2 ], B_Q28[ 0 ], in[ 2 * k + 1 ] ), 2 );
S[ 0 ] = S[ 1 ] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 0 ], A0_L_Q28 ), 14 );
S[ 2 ] = S[ 3 ] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 1 ], A0_L_Q28 ), 14 );
S[ 0 ] = silk_SMLAWB( S[ 0 ], out32_Q14[ 0 ], A0_U_Q28 );
S[ 2 ] = silk_SMLAWB( S[ 2 ], out32_Q14[ 1 ], A0_U_Q28 );
S[ 0 ] = silk_SMLAWB( S[ 0 ], B_Q28[ 1 ], in[ 2 * k + 0 ] );
S[ 2 ] = silk_SMLAWB( S[ 2 ], B_Q28[ 1 ], in[ 2 * k + 1 ] );
S[ 1 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 0 ], A1_L_Q28 ), 14 );
S[ 3 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14[ 1 ], A1_L_Q28 ), 14 );
S[ 1 ] = silk_SMLAWB( S[ 1 ], out32_Q14[ 0 ], A1_U_Q28 );
S[ 3 ] = silk_SMLAWB( S[ 3 ], out32_Q14[ 1 ], A1_U_Q28 );
S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], in[ 2 * k + 0 ] );
S[ 3 ] = silk_SMLAWB( S[ 3 ], B_Q28[ 2 ], in[ 2 * k + 1 ] );
/* Scale back to Q0 and saturate */
out[ 2 * k + 0 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14[ 0 ] + (1<<14) - 1, 14 ) );
out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14[ 1 ] + (1<<14) - 1, 14 ) );
}
}

View file

@ -45,7 +45,7 @@ void silk_bwexpander(
/* Bias in silk_SMULWB can lead to unstable filters */
for( i = 0; i < d - 1; i++ ) {
ar[ i ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ i ] ), 16 );
chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 );
chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 );
}
ar[ d - 1 ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ d - 1 ] ), 16 );
}

View file

@ -77,6 +77,9 @@ typedef struct {
/* I: Flag to enable in-band Forward Error Correction (FEC); 0/1 */
opus_int useInBandFEC;
/* I: Flag to actually code in-band Forward Error Correction (FEC) in the current packet; 0/1 */
opus_int LBRR_coded;
/* I: Flag to enable discontinuous transmission (DTX); 0/1 */
opus_int useDTX;
@ -110,6 +113,11 @@ typedef struct {
/* O: Tells the Opus encoder we're ready to switch */
opus_int switchReady;
/* O: SILK Signal type */
opus_int signalType;
/* O: SILK offset (dithering) */
opus_int offset;
} silk_EncControlStruct;
/**************************************************************************/

View file

@ -64,8 +64,7 @@ opus_int silk_control_SNR(
/* Find bitrate interval in table and interpolate */
for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) {
if( TargetRate_bps <= rateTable[ k ] ) {
frac_Q6 = silk_DIV32( silk_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ),
rateTable[ k ] - rateTable[ k - 1 ] );
frac_Q6 = silk_DIV32( silk_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ), rateTable[ k ] - rateTable[ k - 1 ] );
psEncC->SNR_dB_Q7 = silk_LSHIFT( silk_SNR_table_Q1[ k - 1 ], 6 ) + silk_MUL( frac_Q6, silk_SNR_table_Q1[ k ] - silk_SNR_table_Q1[ k - 1 ] );
break;
}

View file

@ -57,7 +57,7 @@ static opus_int silk_setup_complexity(
static OPUS_INLINE opus_int silk_setup_LBRR(
silk_encoder_state *psEncC, /* I/O */
const opus_int32 TargetRate_bps /* I */
const silk_EncControlStruct *encControl /* I */
);
@ -65,7 +65,6 @@ static OPUS_INLINE opus_int silk_setup_LBRR(
opus_int silk_control_encoder(
silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */
silk_EncControlStruct *encControl, /* I Control structure */
const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */
const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */
const opus_int channelNb, /* I Channel number */
const opus_int force_fs_kHz
@ -125,7 +124,7 @@ opus_int silk_control_encoder(
/********************************************/
/* Set LBRR usage */
/********************************************/
ret += silk_setup_LBRR( &psEnc->sCmn, TargetRate_bps );
ret += silk_setup_LBRR( &psEnc->sCmn, encControl );
psEnc->sCmn.controlled_since_last_payload = 1;
@ -244,7 +243,6 @@ static opus_int silk_setup_fs(
if( psEnc->sCmn.fs_kHz != fs_kHz ) {
/* reset part of the state */
silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) );
silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) );
silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) );
silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) );
@ -255,7 +253,6 @@ static opus_int silk_setup_fs(
/* Initialize non-zero parameters */
psEnc->sCmn.prevLag = 100;
psEnc->sCmn.first_frame_after_reset = 1;
psEnc->sPrefilt.lagPrev = 100;
psEnc->sShape.LastGainIndex = 10;
psEnc->sCmn.sNSQ.lagPrev = 100;
psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536;
@ -293,13 +290,10 @@ static opus_int silk_setup_fs(
psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
}
if( psEnc->sCmn.fs_kHz == 16 ) {
psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 );
psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
} else if( psEnc->sCmn.fs_kHz == 12 ) {
psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 );
psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
} else {
psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 );
psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
}
}
@ -319,60 +313,75 @@ static opus_int silk_setup_complexity(
/* Set encoding complexity */
silk_assert( Complexity >= 0 && Complexity <= 10 );
if( Complexity < 2 ) {
if( Complexity < 1 ) {
psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX;
psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 );
psEncC->pitchEstimationLPCOrder = 6;
psEncC->shapingLPCOrder = 8;
psEncC->shapingLPCOrder = 12;
psEncC->la_shape = 3 * psEncC->fs_kHz;
psEncC->nStatesDelayedDecision = 1;
psEncC->useInterpolatedNLSFs = 0;
psEncC->LTPQuantLowComplexity = 1;
psEncC->NLSF_MSVQ_Survivors = 2;
psEncC->warping_Q16 = 0;
} else if( Complexity < 2 ) {
psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 );
psEncC->pitchEstimationLPCOrder = 8;
psEncC->shapingLPCOrder = 14;
psEncC->la_shape = 5 * psEncC->fs_kHz;
psEncC->nStatesDelayedDecision = 1;
psEncC->useInterpolatedNLSFs = 0;
psEncC->NLSF_MSVQ_Survivors = 3;
psEncC->warping_Q16 = 0;
} else if( Complexity < 3 ) {
psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX;
psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 );
psEncC->pitchEstimationLPCOrder = 6;
psEncC->shapingLPCOrder = 12;
psEncC->la_shape = 3 * psEncC->fs_kHz;
psEncC->nStatesDelayedDecision = 2;
psEncC->useInterpolatedNLSFs = 0;
psEncC->NLSF_MSVQ_Survivors = 2;
psEncC->warping_Q16 = 0;
} else if( Complexity < 4 ) {
psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 );
psEncC->pitchEstimationLPCOrder = 8;
psEncC->shapingLPCOrder = 10;
psEncC->shapingLPCOrder = 14;
psEncC->la_shape = 5 * psEncC->fs_kHz;
psEncC->nStatesDelayedDecision = 1;
psEncC->nStatesDelayedDecision = 2;
psEncC->useInterpolatedNLSFs = 0;
psEncC->LTPQuantLowComplexity = 0;
psEncC->NLSF_MSVQ_Survivors = 4;
psEncC->warping_Q16 = 0;
} else if( Complexity < 6 ) {
psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.74, 16 );
psEncC->pitchEstimationLPCOrder = 10;
psEncC->shapingLPCOrder = 12;
psEncC->shapingLPCOrder = 16;
psEncC->la_shape = 5 * psEncC->fs_kHz;
psEncC->nStatesDelayedDecision = 2;
psEncC->useInterpolatedNLSFs = 1;
psEncC->LTPQuantLowComplexity = 0;
psEncC->NLSF_MSVQ_Survivors = 8;
psEncC->NLSF_MSVQ_Survivors = 6;
psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
} else if( Complexity < 8 ) {
psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.72, 16 );
psEncC->pitchEstimationLPCOrder = 12;
psEncC->shapingLPCOrder = 14;
psEncC->shapingLPCOrder = 20;
psEncC->la_shape = 5 * psEncC->fs_kHz;
psEncC->nStatesDelayedDecision = 3;
psEncC->useInterpolatedNLSFs = 1;
psEncC->LTPQuantLowComplexity = 0;
psEncC->NLSF_MSVQ_Survivors = 16;
psEncC->NLSF_MSVQ_Survivors = 8;
psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
} else {
psEncC->pitchEstimationComplexity = SILK_PE_MAX_COMPLEX;
psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.7, 16 );
psEncC->pitchEstimationLPCOrder = 16;
psEncC->shapingLPCOrder = 16;
psEncC->shapingLPCOrder = 24;
psEncC->la_shape = 5 * psEncC->fs_kHz;
psEncC->nStatesDelayedDecision = MAX_DEL_DEC_STATES;
psEncC->useInterpolatedNLSFs = 1;
psEncC->LTPQuantLowComplexity = 0;
psEncC->NLSF_MSVQ_Survivors = 32;
psEncC->NLSF_MSVQ_Survivors = 16;
psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
}
@ -387,40 +396,26 @@ static opus_int silk_setup_complexity(
silk_assert( psEncC->warping_Q16 <= 32767 );
silk_assert( psEncC->la_shape <= LA_SHAPE_MAX );
silk_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX );
silk_assert( psEncC->NLSF_MSVQ_Survivors <= NLSF_VQ_MAX_SURVIVORS );
return ret;
}
static OPUS_INLINE opus_int silk_setup_LBRR(
silk_encoder_state *psEncC, /* I/O */
const opus_int32 TargetRate_bps /* I */
const silk_EncControlStruct *encControl /* I */
)
{
opus_int LBRR_in_previous_packet, ret = SILK_NO_ERROR;
opus_int32 LBRR_rate_thres_bps;
LBRR_in_previous_packet = psEncC->LBRR_enabled;
psEncC->LBRR_enabled = 0;
if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) {
if( psEncC->fs_kHz == 8 ) {
LBRR_rate_thres_bps = LBRR_NB_MIN_RATE_BPS;
} else if( psEncC->fs_kHz == 12 ) {
LBRR_rate_thres_bps = LBRR_MB_MIN_RATE_BPS;
psEncC->LBRR_enabled = encControl->LBRR_coded;
if( psEncC->LBRR_enabled ) {
/* Set gain increase for coding LBRR excitation */
if( LBRR_in_previous_packet == 0 ) {
/* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */
psEncC->LBRR_GainIncreases = 7;
} else {
LBRR_rate_thres_bps = LBRR_WB_MIN_RATE_BPS;
}
LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 125 - silk_min( psEncC->PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) );
if( TargetRate_bps > LBRR_rate_thres_bps ) {
/* Set gain increase for coding LBRR excitation */
if( LBRR_in_previous_packet == 0 ) {
/* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */
psEncC->LBRR_GainIncreases = 7;
} else {
psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
}
psEncC->LBRR_enabled = 1;
psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
}
}

View file

@ -39,23 +39,10 @@ extern "C"
unsigned long GetHighResolutionTime(void); /* O time in usec*/
/* make SILK_DEBUG dependent on compiler's _DEBUG */
#if defined _WIN32
#ifdef _DEBUG
#define SILK_DEBUG 1
#else
#define SILK_DEBUG 0
#endif
/* overrule the above */
#if 0
/* #define NO_ASSERTS*/
#undef SILK_DEBUG
#define SILK_DEBUG 1
#endif
#else
#define SILK_DEBUG 0
#endif
/* Set to 1 to enable DEBUG_STORE_DATA() macros for dumping
* intermediate signals from the codec.
*/
#define SILK_DEBUG 0
/* Flag for using timers */
#define SILK_TIC_TOC 0

View file

@ -225,8 +225,6 @@ void silk_decode_core(
pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );
}
/* DEBUG_STORE_DATA( dec.pcm, pxq, psDec->subfr_length * sizeof( opus_int16 ) ) */
/* Update LPC filter state */
silk_memcpy( sLPC_Q14, &sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
pexc_Q14 += psDec->subfr_length;

View file

@ -97,6 +97,7 @@ opus_int silk_decode_frame(
psDec->first_frame_after_reset = 0;
} else {
/* Handle packet loss by extrapolation */
psDec->indices.signalType = psDec->prevSignalType;
silk_PLC( psDec, psDecCtrl, pOut, 1, arch );
}

View file

@ -52,7 +52,7 @@ void silk_decode_parameters(
silk_NLSF_decode( pNLSF_Q15, psDec->indices.NLSFIndices, psDec->psNLSF_CB );
/* Convert NLSF parameters to AR prediction filter coefficients */
silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 1 ], pNLSF_Q15, psDec->LPC_order );
silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 1 ], pNLSF_Q15, psDec->LPC_order, psDec->arch );
/* If just reset, e.g., because internal Fs changed, do not allow interpolation */
/* improves the case of packet loss in the first frame after a switch */
@ -69,7 +69,7 @@ void silk_decode_parameters(
}
/* Convert NLSF parameters to AR prediction filter coefficients */
silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 0 ], pNLSF0_Q15, psDec->LPC_order );
silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 0 ], pNLSF0_Q15, psDec->LPC_order, psDec->arch );
} else {
/* Copy LPC coefficients for first half from second half */
silk_memcpy( psDecCtrl->PredCoef_Q12[ 0 ], psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) );

View file

@ -56,6 +56,7 @@ extern "C"
/* DTX settings */
#define NB_SPEECH_FRAMES_BEFORE_DTX 10 /* eq 200 ms */
#define MAX_CONSECUTIVE_DTX 20 /* eq 400 ms */
#define DTX_ACTIVITY_THRESHOLD 0.1f
/* Maximum sampling frequency */
#define MAX_FS_KHZ 16
@ -147,7 +148,7 @@ extern "C"
#define USE_HARM_SHAPING 1
/* Max LPC order of noise shaping filters */
#define MAX_SHAPE_LPC_ORDER 16
#define MAX_SHAPE_LPC_ORDER 24
#define HARM_SHAPE_FIR_TAPS 3
@ -157,8 +158,7 @@ extern "C"
#define LTP_BUF_LENGTH 512
#define LTP_MASK ( LTP_BUF_LENGTH - 1 )
#define DECISION_DELAY 32
#define DECISION_DELAY_MASK ( DECISION_DELAY - 1 )
#define DECISION_DELAY 40
/* Number of subframes for excitation entropy coding */
#define SHELL_CODEC_FRAME_LENGTH 16
@ -173,11 +173,7 @@ extern "C"
#define MAX_MATRIX_SIZE MAX_LPC_ORDER /* Max of LPC Order and LTP order */
#if( MAX_LPC_ORDER > DECISION_DELAY )
# define NSQ_LPC_BUF_LENGTH MAX_LPC_ORDER
#else
# define NSQ_LPC_BUF_LENGTH DECISION_DELAY
#endif
/***************************/
/* Voice activity detector */
@ -205,7 +201,6 @@ extern "C"
/******************/
#define NLSF_W_Q 2
#define NLSF_VQ_MAX_VECTORS 32
#define NLSF_VQ_MAX_SURVIVORS 32
#define NLSF_QUANT_MAX_AMPLITUDE 4
#define NLSF_QUANT_MAX_AMPLITUDE_EXT 10
#define NLSF_QUANT_LEVEL_ADJ 0.1

View file

@ -233,11 +233,10 @@ opus_int silk_Encode( /* O Returns error co
}
}
TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 );
for( n = 0; n < encControl->nChannelsInternal; n++ ) {
/* Force the side channel to the same rate as the mid */
opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
silk_assert( 0 );
RESTORE_STACK;
return ret;
@ -416,7 +415,6 @@ opus_int silk_Encode( /* O Returns error co
/* Reset side channel encoder memory for first frame with side coding */
if( psEnc->prev_decode_only_middle == 1 ) {
silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );
silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
@ -557,6 +555,10 @@ opus_int silk_Encode( /* O Returns error co
}
}
encControl->signalType = psEnc->state_Fxx[0].sCmn.indices.signalType;
encControl->offset = silk_Quantization_Offsets_Q10
[ psEnc->state_Fxx[0].sCmn.indices.signalType >> 1 ]
[ psEnc->state_Fxx[0].sCmn.indices.quantOffsetType ];
RESTORE_STACK;
return ret;
}

View file

@ -45,7 +45,7 @@ void silk_LTP_analysis_filter_FIX(
const opus_int16 *x_ptr, *x_lag_ptr;
opus_int16 Btmp_Q14[ LTP_ORDER ];
opus_int16 *LTP_res_ptr;
opus_int k, i, j;
opus_int k, i;
opus_int32 LTP_est;
x_ptr = x;
@ -53,9 +53,12 @@ void silk_LTP_analysis_filter_FIX(
for( k = 0; k < nb_subfr; k++ ) {
x_lag_ptr = x_ptr - pitchL[ k ];
for( i = 0; i < LTP_ORDER; i++ ) {
Btmp_Q14[ i ] = LTPCoef_Q14[ k * LTP_ORDER + i ];
}
Btmp_Q14[ 0 ] = LTPCoef_Q14[ k * LTP_ORDER ];
Btmp_Q14[ 1 ] = LTPCoef_Q14[ k * LTP_ORDER + 1 ];
Btmp_Q14[ 2 ] = LTPCoef_Q14[ k * LTP_ORDER + 2 ];
Btmp_Q14[ 3 ] = LTPCoef_Q14[ k * LTP_ORDER + 3 ];
Btmp_Q14[ 4 ] = LTPCoef_Q14[ k * LTP_ORDER + 4 ];
/* LTP analysis FIR filter */
for( i = 0; i < subfr_length + pre_length; i++ ) {
@ -63,9 +66,11 @@ void silk_LTP_analysis_filter_FIX(
/* Long-term prediction */
LTP_est = silk_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] );
for( j = 1; j < LTP_ORDER; j++ ) {
LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ LTP_ORDER / 2 - j ], Btmp_Q14[ j ] );
}
LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 1 ], Btmp_Q14[ 1 ] );
LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 0 ], Btmp_Q14[ 2 ] );
LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -1 ], Btmp_Q14[ 3 ] );
LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -2 ], Btmp_Q14[ 4 ] );
LTP_est = silk_RSHIFT_ROUND( LTP_est, 14 ); /* round and -> Q0*/
/* Subtract long-term prediction */

View file

@ -37,12 +37,12 @@ POSSIBILITY OF SUCH DAMAGE.
#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
#define QA 25
#define N_BITS_HEAD_ROOM 2
#define N_BITS_HEAD_ROOM 3
#define MIN_RSHIFTS -16
#define MAX_RSHIFTS (32 - QA)
/* Compute reflection coefficients from input signal */
void silk_burg_modified(
void silk_burg_modified_c(
opus_int32 *res_nrg, /* O Residual energy */
opus_int *res_nrg_Q, /* O Residual energy Q value */
opus_int32 A_Q16[], /* O Prediction coefficients (length order) */
@ -54,7 +54,7 @@ void silk_burg_modified(
int arch /* I Run-time architecture */
)
{
opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
opus_int k, n, s, lz, rshifts, reached_max_gain;
opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
const opus_int16 *x_ptr;
opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ];
@ -63,27 +63,23 @@ void silk_burg_modified(
opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ];
opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ];
opus_int32 xcorr[ SILK_MAX_ORDER_LPC ];
opus_int64 C0_64;
silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
/* Compute autocorrelations, added over subframes */
silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
if( rshifts > MAX_RSHIFTS ) {
C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
silk_assert( C0 > 0 );
rshifts = MAX_RSHIFTS;
C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch );
lz = silk_CLZ64(C0_64);
rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz;
if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS;
if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS;
if (rshifts > 0) {
C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts );
} else {
lz = silk_CLZ32( C0 ) - 1;
rshifts_extra = N_BITS_HEAD_ROOM - lz;
if( rshifts_extra > 0 ) {
rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
C0 = silk_RSHIFT32( C0, rshifts_extra );
} else {
rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
C0 = silk_LSHIFT32( C0, -rshifts_extra );
}
rshifts += rshifts_extra;
C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts );
}
CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */
silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
if( rshifts > 0 ) {
@ -91,7 +87,7 @@ void silk_burg_modified(
x_ptr = x + s * subfr_length;
for( n = 1; n < D + 1; n++ ) {
C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts );
}
}
} else {
@ -154,8 +150,11 @@ void silk_burg_modified(
C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */
C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */
tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */
tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */
/* We sometimes have get overflows in the multiplications (even beyond +/- 2^32),
but they cancel each other and the real result seems to always fit in a 32-bit
signed integer. This was determined experimentally, not theoretically (unfortunately). */
tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */
tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */
}
tmp1 = -tmp1; /* Q17 */
tmp2 = -tmp2; /* Q17 */
@ -204,12 +203,14 @@ void silk_burg_modified(
/* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */
rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */
/* Newton-Raphson iteration */
rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */
rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */
if( num < 0 ) {
/* Ensure adjusted reflection coefficients has the original sign */
rc_Q31 = -rc_Q31;
if( rc_Q31 > 0 ) {
/* Newton-Raphson iteration */
rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */
rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */
if( num < 0 ) {
/* Ensure adjusted reflection coefficients has the original sign */
rc_Q31 = -rc_Q31;
}
}
invGain_Q30 = minInvGain_Q30;
reached_max_gain = 1;
@ -252,12 +253,12 @@ void silk_burg_modified(
if( rshifts > 0 ) {
for( s = 0; s < nb_subfr; s++ ) {
x_ptr = x + s * subfr_length;
C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts );
C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts );
}
} else {
for( s = 0; s < nb_subfr; s++ ) {
x_ptr = x + s * subfr_length;
C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts );
C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts);
}
}
/* Approximate residual energy */

View file

@ -42,7 +42,8 @@ void silk_corrVector_FIX(
const opus_int L, /* I Length of vectors */
const opus_int order, /* I Max lag for correlation */
opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */
const opus_int rshifts /* I Right shifts of correlations */
const opus_int rshifts, /* I Right shifts of correlations */
int arch /* I Run-time architecture */
)
{
opus_int lag, i;
@ -57,7 +58,7 @@ void silk_corrVector_FIX(
for( lag = 0; lag < order; lag++ ) {
inner_prod = 0;
for( i = 0; i < L; i++ ) {
inner_prod += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts );
inner_prod = silk_ADD_RSHIFT32( inner_prod, silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts );
}
Xt[ lag ] = inner_prod; /* X[:,lag]'*t */
ptr1--; /* Go to next column of X */
@ -65,7 +66,7 @@ void silk_corrVector_FIX(
} else {
silk_assert( rshifts == 0 );
for( lag = 0; lag < order; lag++ ) {
Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L ); /* X[:,lag]'*t */
Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L, arch ); /* X[:,lag]'*t */
ptr1--; /* Go to next column of X */
}
}
@ -76,60 +77,54 @@ void silk_corrMatrix_FIX(
const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */
const opus_int L, /* I Length of vectors */
const opus_int order, /* I Max lag for correlation */
const opus_int head_room, /* I Desired headroom */
opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */
opus_int *rshifts /* I/O Right shifts of correlations */
opus_int32 *nrg, /* O Energy of x vector */
opus_int *rshifts, /* O Right shifts of correlations and energy */
int arch /* I Run-time architecture */
)
{
opus_int i, j, lag, rshifts_local, head_room_rshifts;
opus_int i, j, lag;
opus_int32 energy;
const opus_int16 *ptr1, *ptr2;
/* Calculate energy to find shift used to fit in 32 bits */
silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 );
/* Add shifts to get the desired head room */
head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 );
energy = silk_RSHIFT32( energy, head_room_rshifts );
rshifts_local += head_room_rshifts;
silk_sum_sqr_shift( nrg, rshifts, x, L + order - 1 );
energy = *nrg;
/* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */
/* Remove contribution of first order - 1 samples */
for( i = 0; i < order - 1; i++ ) {
energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local );
}
if( rshifts_local < *rshifts ) {
/* Adjust energy */
energy = silk_RSHIFT32( energy, *rshifts - rshifts_local );
rshifts_local = *rshifts;
energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), *rshifts );
}
/* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */
/* Fill out the diagonal of the correlation matrix */
matrix_ptr( XX, 0, 0, order ) = energy;
silk_assert( energy >= 0 );
ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */
for( j = 1; j < order; j++ ) {
energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) );
energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) );
energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), *rshifts ) );
energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), *rshifts ) );
matrix_ptr( XX, j, j, order ) = energy;
silk_assert( energy >= 0 );
}
ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */
/* Calculate the remaining elements of the correlation matrix */
if( rshifts_local > 0 ) {
if( *rshifts > 0 ) {
/* Right shifting used */
for( lag = 1; lag < order; lag++ ) {
/* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */
energy = 0;
for( i = 0; i < L; i++ ) {
energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local );
energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), *rshifts );
}
/* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */
matrix_ptr( XX, lag, 0, order ) = energy;
matrix_ptr( XX, 0, lag, order ) = energy;
for( j = 1; j < ( order - lag ); j++ ) {
energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) );
energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) );
energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), *rshifts ) );
energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), *rshifts ) );
matrix_ptr( XX, lag + j, j, order ) = energy;
matrix_ptr( XX, j, lag + j, order ) = energy;
}
@ -138,7 +133,7 @@ void silk_corrMatrix_FIX(
} else {
for( lag = 1; lag < order; lag++ ) {
/* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */
energy = silk_inner_prod_aligned( ptr1, ptr2, L );
energy = silk_inner_prod_aligned( ptr1, ptr2, L, arch );
matrix_ptr( XX, lag, 0, order ) = energy;
matrix_ptr( XX, 0, lag, order ) = energy;
/* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */
@ -151,6 +146,5 @@ void silk_corrMatrix_FIX(
ptr2--;/* Update pointer to first sample of next column (lag) in X */
}
}
*rshifts = rshifts_local;
}

View file

@ -29,6 +29,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h"
#endif
#include <stdlib.h>
#include "main_FIX.h"
#include "stack_alloc.h"
#include "tuning_parameters.h"
@ -37,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.
static OPUS_INLINE void silk_LBRR_encode_FIX(
silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */
const opus_int32 xfw_Q3[], /* I Input signal */
const opus_int16 x16[], /* I Input signal */
opus_int condCoding /* I The type of conditional coding used so far for this frame */
);
@ -48,7 +49,7 @@ void silk_encode_do_VAD_FIX(
/****************************/
/* Voice Activity Detection */
/****************************/
silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
/**************************************************/
/* Convert speech activity into VAD and DTX flags */
@ -94,6 +95,9 @@ opus_int silk_encode_frame_FIX(
opus_int16 ec_prevLagIndex_copy;
opus_int ec_prevSignalType_copy;
opus_int8 LastGainIndex_copy2;
opus_int gain_lock[ MAX_NB_SUBFR ] = {0};
opus_int16 best_gain_mult[ MAX_NB_SUBFR ];
opus_int best_sum[ MAX_NB_SUBFR ];
SAVE_STACK;
/* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
@ -118,7 +122,6 @@ opus_int silk_encode_frame_FIX(
silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) );
if( !psEnc->sCmn.prefillFlag ) {
VARDECL( opus_int32, xfw_Q3 );
VARDECL( opus_int16, res_pitch );
VARDECL( opus_uint8, ec_buf_copy );
opus_int16 *res_pitch_frame;
@ -132,7 +135,7 @@ opus_int silk_encode_frame_FIX(
/*****************************************/
/* Find pitch lags, initial LPC analysis */
/*****************************************/
silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame - psEnc->sCmn.ltp_mem_length, psEnc->sCmn.arch );
/************************/
/* Noise shape analysis */
@ -142,23 +145,17 @@ opus_int silk_encode_frame_FIX(
/***************************************************/
/* Find linear prediction coefficients (LPC + LTP) */
/***************************************************/
silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, condCoding );
/****************************************/
/* Process gains */
/****************************************/
silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding );
/*****************************************/
/* Prefiltering for noise shaper */
/*****************************************/
ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 );
silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame );
/****************************************/
/* Low Bitrate Redundant Encoding */
/****************************************/
silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding );
silk_LBRR_encode_FIX( psEnc, &sEncCtrl, x_frame, condCoding );
/* Loop over quantizer and entropy coding to control bitrate */
maxIter = 6;
@ -194,13 +191,19 @@ opus_int silk_encode_frame_FIX(
/* Noise shaping quantization */
/*****************************************/
if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, x_frame, psEnc->sCmn.pulses,
sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR_Q13, sEncCtrl.HarmShapeGain_Q14,
sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14,
psEnc->sCmn.arch );
} else {
silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, x_frame, psEnc->sCmn.pulses,
sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR_Q13, sEncCtrl.HarmShapeGain_Q14,
sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14,
psEnc->sCmn.arch);
}
if ( iter == maxIter && !found_lower ) {
silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
}
/****************************************/
@ -216,6 +219,33 @@ opus_int silk_encode_frame_FIX(
nBits = ec_tell( psRangeEnc );
/* If we still bust after the last iteration, do some damage control. */
if ( iter == maxIter && !found_lower && nBits > maxBits ) {
silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
/* Keep gains the same as the last frame. */
psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
psEnc->sCmn.indices.GainsIndices[ i ] = 4;
}
if (condCoding != CODE_CONDITIONALLY) {
psEnc->sCmn.indices.GainsIndices[ 0 ] = sEncCtrl.lastGainIndexPrev;
}
psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
/* Clear all pulses. */
for ( i = 0; i < psEnc->sCmn.frame_length; i++ ) {
psEnc->sCmn.pulses[ i ] = 0;
}
silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
nBits = ec_tell( psRangeEnc );
}
if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
break;
}
@ -263,15 +293,35 @@ opus_int silk_encode_frame_FIX(
break;
}
if ( !found_lower && nBits > maxBits ) {
int j;
for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
int sum=0;
for ( j = i*psEnc->sCmn.subfr_length; j < (i+1)*psEnc->sCmn.subfr_length; j++ ) {
sum += abs( psEnc->sCmn.pulses[j] );
}
if ( iter == 0 || (sum < best_sum[i] && !gain_lock[i]) ) {
best_sum[i] = sum;
best_gain_mult[i] = gainMult_Q8;
} else {
gain_lock[i] = 1;
}
}
}
if( ( found_lower & found_upper ) == 0 ) {
/* Adjust gain according to high-rate rate/distortion curve */
opus_int32 gain_factor_Q16;
gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) );
if( nBits > maxBits ) {
gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) );
if (gainMult_Q8 < 16384) {
gainMult_Q8 *= 2;
} else {
gainMult_Q8 = 32767;
}
} else {
opus_int32 gain_factor_Q16;
gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
}
gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
} else {
/* Adjust gain by interpolating */
gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower );
@ -285,9 +335,15 @@ opus_int silk_encode_frame_FIX(
}
for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
opus_int16 tmp;
if ( gain_lock[i] ) {
tmp = best_gain_mult[i];
} else {
tmp = gainMult_Q8;
}
sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], tmp ), 8 );
}
/* Quantize gains */
psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16,
@ -329,7 +385,7 @@ opus_int silk_encode_frame_FIX(
static OPUS_INLINE void silk_LBRR_encode_FIX(
silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */
const opus_int32 xfw_Q3[], /* I Input signal */
const opus_int16 x16[], /* I Input signal */
opus_int condCoding /* I The type of conditional coding used so far for this frame */
)
{
@ -368,15 +424,15 @@ static OPUS_INLINE void silk_LBRR_encode_FIX(
/* Noise shaping quantization */
/*****************************************/
if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16,
psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch );
} else {
silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, x16,
psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
psEncCtrl->AR_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch );
}
/* Restore original gains */

View file

@ -92,10 +92,10 @@ void silk_find_LPC_FIX(
silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder );
/* Convert to LPC for residual energy evaluation */
silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder );
silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder, psEncC->arch );
/* Calculate residual energy with NLSF interpolation */
silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder );
silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder, psEncC->arch );
silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder );
silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder );

View file

@ -32,213 +32,68 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main_FIX.h"
#include "tuning_parameters.h"
/* Head room for correlations */
#define LTP_CORRS_HEAD_ROOM 2
void silk_fit_LTP(
opus_int32 LTP_coefs_Q16[ LTP_ORDER ],
opus_int16 LTP_coefs_Q14[ LTP_ORDER ]
);
void silk_find_LTP_FIX(
opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */
opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */
opus_int *LTPredCodGain_Q7, /* O LTP coding gain */
const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */
opus_int32 XXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Correlation matrix */
opus_int32 xXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER ], /* O Correlation vector */
const opus_int16 r_ptr[], /* I Residual signal after LPC */
const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */
const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */
const opus_int subfr_length, /* I subframe length */
const opus_int nb_subfr, /* I number of subframes */
const opus_int mem_offset, /* I number of samples in LTP memory */
opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */
const opus_int subfr_length, /* I Subframe length */
const opus_int nb_subfr, /* I Number of subframes */
int arch /* I Run-time architecture */
)
{
opus_int i, k, lshift;
const opus_int16 *r_ptr, *lag_ptr;
opus_int16 *b_Q14_ptr;
opus_int i, k, extra_shifts;
opus_int xx_shifts, xX_shifts, XX_shifts;
const opus_int16 *lag_ptr;
opus_int32 *XXLTP_Q17_ptr, *xXLTP_Q17_ptr;
opus_int32 xx, nrg, temp;
opus_int32 regu;
opus_int32 *WLTP_ptr;
opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26;
opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits;
opus_int32 temp32, denom32;
opus_int extra_shifts;
opus_int rr_shifts, maxRshifts, maxRshifts_wxtra, LZs;
opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16;
opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ];
opus_int32 wd, m_Q12;
b_Q14_ptr = b_Q14;
WLTP_ptr = WLTP;
r_ptr = &r_lpc[ mem_offset ];
xXLTP_Q17_ptr = xXLTP_Q17;
XXLTP_Q17_ptr = XXLTP_Q17;
for( k = 0; k < nb_subfr; k++ ) {
lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );
silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */
/* Assure headroom */
LZs = silk_CLZ32( rr[k] );
if( LZs < LTP_CORRS_HEAD_ROOM ) {
rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs );
rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs );
}
corr_rshifts[ k ] = rr_shifts;
silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ] ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */
/* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */
silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ] ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */
if( corr_rshifts[ k ] > rr_shifts ) {
rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */
}
silk_assert( rr[ k ] >= 0 );
regu = 1;
regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER );
silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */
/* Limit and store in Q14 */
silk_fit_LTP( b_Q16, b_Q14_ptr );
/* Calculate residual energy */
nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */
/* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */
extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM );
denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */
silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts ); /* Q( -corr_rshifts[ k ] + extra_shifts ) */
denom32 = silk_max( denom32, 1 );
silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX ); /* Wght always < 0.5 in Q0 */
temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 ); /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */
temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 ); /* Q26 */
/* Limit temp such that the below scaling never wraps around */
WLTP_max = 0;
for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {
WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max );
}
lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */
silk_assert( 26 - 18 + lshift >= 0 );
if( 26 - 18 + lshift < 31 ) {
temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) );
}
silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */
w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */
silk_assert( w[k] >= 0 );
r_ptr += subfr_length;
b_Q14_ptr += LTP_ORDER;
WLTP_ptr += LTP_ORDER * LTP_ORDER;
}
maxRshifts = 0;
for( k = 0; k < nb_subfr; k++ ) {
maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts );
}
/* Compute LTP coding gain */
if( LTPredCodGain_Q7 != NULL ) {
LPC_LTP_res_nrg = 0;
LPC_res_nrg = 0;
silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */
for( k = 0; k < nb_subfr; k++ ) {
LPC_res_nrg = silk_ADD32( LPC_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */
LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */
}
LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */
div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 );
*LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) );
silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) );
}
/* smoothing */
/* d = sum( B, 1 ); */
b_Q14_ptr = b_Q14;
for( k = 0; k < nb_subfr; k++ ) {
d_Q14[ k ] = 0;
for( i = 0; i < LTP_ORDER; i++ ) {
d_Q14[ k ] += b_Q14_ptr[ i ];
}
b_Q14_ptr += LTP_ORDER;
}
/* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */
/* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */
max_abs_d_Q14 = 0;
max_w_bits = 0;
for( k = 0; k < nb_subfr; k++ ) {
max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) );
/* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */
/* Find bits needed in Q( 18 - maxRshifts ) */
max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts );
}
/* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */
silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) );
/* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */
extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14;
/* Subtract what we got available; bits in output var plus maxRshifts */
extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */
extra_shifts = silk_max_int( extra_shifts, 0 );
maxRshifts_wxtra = maxRshifts + extra_shifts;
temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */
wd = 0;
for( k = 0; k < nb_subfr; k++ ) {
/* w has at least 2 bits of headroom so no overflow should happen */
temp32 = silk_ADD32( temp32, silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) ); /* Q( 18 - maxRshifts_wxtra ) */
wd = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */
}
m_Q12 = silk_DIV32_varQ( wd, temp32, 12 );
b_Q14_ptr = b_Q14;
for( k = 0; k < nb_subfr; k++ ) {
/* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */
if( 2 - corr_rshifts[k] > 0 ) {
temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] );
silk_sum_sqr_shift( &xx, &xx_shifts, r_ptr, subfr_length + LTP_ORDER ); /* xx in Q( -xx_shifts ) */
silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, XXLTP_Q17_ptr, &nrg, &XX_shifts, arch ); /* XXLTP_Q17_ptr and nrg in Q( -XX_shifts ) */
extra_shifts = xx_shifts - XX_shifts;
if( extra_shifts > 0 ) {
/* Shift XX */
xX_shifts = xx_shifts;
for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {
XXLTP_Q17_ptr[ i ] = silk_RSHIFT32( XXLTP_Q17_ptr[ i ], extra_shifts ); /* Q( -xX_shifts ) */
}
nrg = silk_RSHIFT32( nrg, extra_shifts ); /* Q( -xX_shifts ) */
} else if( extra_shifts < 0 ) {
/* Shift xx */
xX_shifts = XX_shifts;
xx = silk_RSHIFT32( xx, -extra_shifts ); /* Q( -xX_shifts ) */
} else {
temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 );
xX_shifts = xx_shifts;
}
silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, xXLTP_Q17_ptr, xX_shifts, arch ); /* xXLTP_Q17_ptr in Q( -xX_shifts ) */
g_Q26 = silk_MUL(
silk_DIV32(
SILK_FIX_CONST( LTP_SMOOTHING, 26 ),
silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ), /* Q10 */
silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) ); /* Q16 */
temp32 = 0;
for( i = 0; i < LTP_ORDER; i++ ) {
delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 ); /* 1638_Q14 = 0.1_Q0 */
temp32 += delta_b_Q14[ i ]; /* Q14 */
/* At this point all correlations are in Q(-xX_shifts) */
temp = silk_SMLAWB( 1, nrg, SILK_FIX_CONST( LTP_CORR_INV_MAX, 16 ) );
temp = silk_max( temp, xx );
TIC(div)
#if 0
for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {
XXLTP_Q17_ptr[ i ] = silk_DIV32_varQ( XXLTP_Q17_ptr[ i ], temp, 17 );
}
temp32 = silk_DIV32( g_Q26, temp32 ); /* Q14 -> Q12 */
for( i = 0; i < LTP_ORDER; i++ ) {
b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 );
xXLTP_Q17_ptr[ i ] = silk_DIV32_varQ( xXLTP_Q17_ptr[ i ], temp, 17 );
}
b_Q14_ptr += LTP_ORDER;
}
}
void silk_fit_LTP(
opus_int32 LTP_coefs_Q16[ LTP_ORDER ],
opus_int16 LTP_coefs_Q14[ LTP_ORDER ]
)
{
opus_int i;
for( i = 0; i < LTP_ORDER; i++ ) {
LTP_coefs_Q14[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( LTP_coefs_Q16[ i ], 2 ) );
#else
for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {
XXLTP_Q17_ptr[ i ] = (opus_int32)( silk_LSHIFT64( (opus_int64)XXLTP_Q17_ptr[ i ], 17 ) / temp );
}
for( i = 0; i < LTP_ORDER; i++ ) {
xXLTP_Q17_ptr[ i ] = (opus_int32)( silk_LSHIFT64( (opus_int64)xXLTP_Q17_ptr[ i ], 17 ) / temp );
}
#endif
TOC(div)
r_ptr += subfr_length;
XXLTP_Q17_ptr += LTP_ORDER * LTP_ORDER;
xXLTP_Q17_ptr += LTP_ORDER;
}
}

View file

@ -44,7 +44,7 @@ void silk_find_pitch_lags_FIX(
{
opus_int buf_len, i, scale;
opus_int32 thrhld_Q13, res_nrg;
const opus_int16 *x_buf, *x_buf_ptr;
const opus_int16 *x_ptr;
VARDECL( opus_int16, Wsig );
opus_int16 *Wsig_ptr;
opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
@ -61,8 +61,6 @@ void silk_find_pitch_lags_FIX(
/* Safety check */
silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
x_buf = x - psEnc->sCmn.ltp_mem_length;
/*************************************/
/* Estimate LPC AR coefficients */
/*************************************/
@ -72,19 +70,19 @@ void silk_find_pitch_lags_FIX(
ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 );
/* First LA_LTP samples */
x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length;
x_ptr = x + buf_len - psEnc->sCmn.pitch_LPC_win_length;
Wsig_ptr = Wsig;
silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch );
silk_apply_sine_window( Wsig_ptr, x_ptr, 1, psEnc->sCmn.la_pitch );
/* Middle un - windowed samples */
Wsig_ptr += psEnc->sCmn.la_pitch;
x_buf_ptr += psEnc->sCmn.la_pitch;
silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) );
x_ptr += psEnc->sCmn.la_pitch;
silk_memcpy( Wsig_ptr, x_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) );
/* Last LA_LTP samples */
Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );
x_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
silk_apply_sine_window( Wsig_ptr, x_ptr, 2, psEnc->sCmn.la_pitch );
/* Calculate autocorrelation sequence */
silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch );
@ -112,7 +110,7 @@ void silk_find_pitch_lags_FIX(
/*****************************************/
/* LPC analysis filtering */
/*****************************************/
silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder );
silk_LPC_analysis_filter( res, x, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.arch );
if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {
/* Threshold for pitch estimator */

View file

@ -41,13 +41,12 @@ void silk_find_pred_coefs_FIX(
)
{
opus_int i;
opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ];
opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ];
opus_int16 NLSF_Q15[ MAX_LPC_ORDER ];
const opus_int16 *x_ptr;
opus_int16 *x_pre_ptr;
VARDECL( opus_int16, LPC_in_pre );
opus_int32 tmp, min_gain_Q16, minInvGain_Q30;
opus_int LTP_corrs_rshift[ MAX_NB_SUBFR ];
opus_int32 min_gain_Q16, minInvGain_Q30;
SAVE_STACK;
/* weighting for weighted least squares */
@ -61,13 +60,11 @@ void silk_find_pred_coefs_FIX(
/* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */
invGains_Q16[ i ] = silk_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 );
/* Ensure Wght_Q15 a minimum value 1 */
invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 363 );
/* Limit inverse */
invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 100 );
/* Square the inverted gains */
silk_assert( invGains_Q16[ i ] == silk_SAT16( invGains_Q16[ i ] ) );
tmp = silk_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] );
Wght_Q15[ i ] = silk_RSHIFT( tmp, 1 );
/* Invert the inverted and normalized gains */
local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] );
@ -77,23 +74,24 @@ void silk_find_pred_coefs_FIX(
psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder
+ psEnc->sCmn.frame_length, opus_int16 );
if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
VARDECL( opus_int32, WLTP );
VARDECL( opus_int32, xXLTP_Q17 );
VARDECL( opus_int32, XXLTP_Q17 );
/**********/
/* VOICED */
/**********/
silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 );
ALLOC( xXLTP_Q17, psEnc->sCmn.nb_subfr * LTP_ORDER, opus_int32 );
ALLOC( XXLTP_Q17, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 );
/* LTP analysis */
silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7,
res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length,
psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift );
silk_find_LTP_FIX( XXLTP_Q17, xXLTP_Q17, res_pitch,
psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch );
/* Quantize LTP gain parameters */
silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
&psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);
&psEnc->sCmn.sum_log_gain_Q7, &psEncCtrl->LTPredCodGain_Q7, XXLTP_Q17, xXLTP_Q17, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch );
/* Control LTP scaling */
silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding );
@ -118,16 +116,16 @@ void silk_find_pred_coefs_FIX(
silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) );
psEncCtrl->LTPredCodGain_Q7 = 0;
psEnc->sCmn.sum_log_gain_Q7 = 0;
psEnc->sCmn.sum_log_gain_Q7 = 0;
}
/* Limit on total predictive coding gain */
if( psEnc->sCmn.first_frame_after_reset ) {
minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 );
} else {
} else {
minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) ); /* Q16 */
minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30,
silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ),
minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30,
silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ),
silk_SMLAWB( SILK_FIX_CONST( 0.25, 18 ), SILK_FIX_CONST( 0.75, 18 ), psEncCtrl->coding_quality_Q14 ) ), 14 );
}
@ -139,7 +137,7 @@ void silk_find_pred_coefs_FIX(
/* Calculate residual energy using quantized LPC coefficients */
silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains,
psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder, psEnc->sCmn.arch );
/* Copy to prediction struct for use in next frame for interpolation */
silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );

View file

@ -39,14 +39,15 @@ void silk_k2a(
)
{
opus_int k, n;
opus_int32 Atmp[ SILK_MAX_ORDER_LPC ];
opus_int32 rc, tmp1, tmp2;
for( k = 0; k < order; k++ ) {
for( n = 0; n < k; n++ ) {
Atmp[ n ] = A_Q24[ n ];
}
for( n = 0; n < k; n++ ) {
A_Q24[ n ] = silk_SMLAWB( A_Q24[ n ], silk_LSHIFT( Atmp[ k - n - 1 ], 1 ), rc_Q15[ k ] );
rc = rc_Q15[ k ];
for( n = 0; n < (k + 1) >> 1; n++ ) {
tmp1 = A_Q24[ n ];
tmp2 = A_Q24[ k - n - 1 ];
A_Q24[ n ] = silk_SMLAWB( tmp1, silk_LSHIFT( tmp2, 1 ), rc );
A_Q24[ k - n - 1 ] = silk_SMLAWB( tmp2, silk_LSHIFT( tmp1, 1 ), rc );
}
A_Q24[ k ] = -silk_LSHIFT( (opus_int32)rc_Q15[ k ], 9 );
}

View file

@ -39,15 +39,16 @@ void silk_k2a_Q16(
)
{
opus_int k, n;
opus_int32 Atmp[ SILK_MAX_ORDER_LPC ];
opus_int32 rc, tmp1, tmp2;
for( k = 0; k < order; k++ ) {
for( n = 0; n < k; n++ ) {
Atmp[ n ] = A_Q24[ n ];
rc = rc_Q16[ k ];
for( n = 0; n < (k + 1) >> 1; n++ ) {
tmp1 = A_Q24[ n ];
tmp2 = A_Q24[ k - n - 1 ];
A_Q24[ n ] = silk_SMLAWW( tmp1, tmp2, rc );
A_Q24[ k - n - 1 ] = silk_SMLAWW( tmp2, tmp1, rc );
}
for( n = 0; n < k; n++ ) {
A_Q24[ n ] = silk_SMLAWW( A_Q24[ n ], Atmp[ k - n - 1 ], rc_Q16[ k ] );
}
A_Q24[ k ] = -silk_LSHIFT( rc_Q16[ k ], 8 );
A_Q24[ k ] = -silk_LSHIFT( rc, 8 );
}
}

View file

@ -36,6 +36,11 @@ POSSIBILITY OF SUCH DAMAGE.
#include "debug.h"
#include "entenc.h"
#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
|| defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "fixed/arm/warped_autocorrelation_FIX_arm.h"
#endif
#ifndef FORCE_CPP_BUILD
#ifdef __cplusplus
extern "C"
@ -47,6 +52,9 @@ extern "C"
#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FIX
#define silk_encode_frame_Fxx silk_encode_frame_FIX
#define QC 10
#define QS 13
/*********************/
/* Encoder Functions */
/*********************/
@ -81,22 +89,11 @@ opus_int silk_init_encoder(
opus_int silk_control_encoder(
silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */
silk_EncControlStruct *encControl, /* I Control structure */
const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */
const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */
const opus_int channelNb, /* I Channel number */
const opus_int force_fs_kHz
);
/****************/
/* Prefiltering */
/****************/
void silk_prefilter_FIX(
silk_encoder_state_FIX *psEnc, /* I/O Encoder state */
const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */
opus_int32 xw_Q10[], /* O Weighted signal */
const opus_int16 x[] /* I Speech signal */
);
/**************************/
/* Noise shaping analysis */
/**************************/
@ -110,7 +107,7 @@ void silk_noise_shape_analysis_FIX(
);
/* Autocorrelations for a warped frequency axis */
void silk_warped_autocorrelation_FIX(
void silk_warped_autocorrelation_FIX_c(
opus_int32 *corr, /* O Result [order + 1] */
opus_int *scale, /* O Scaling of the correlation vector */
const opus_int16 *input, /* I Input data to correlate */
@ -119,6 +116,11 @@ void silk_warped_autocorrelation_FIX(
const opus_int order /* I Correlation order (even) */
);
#if !defined(OVERRIDE_silk_warped_autocorrelation_FIX)
#define silk_warped_autocorrelation_FIX(corr, scale, input, warping_Q16, length, order, arch) \
((void)(arch), silk_warped_autocorrelation_FIX_c(corr, scale, input, warping_Q16, length, order))
#endif
/* Calculation of LTP state scaling */
void silk_LTP_scale_ctrl_FIX(
silk_encoder_state_FIX *psEnc, /* I/O encoder state */
@ -157,16 +159,13 @@ void silk_find_LPC_FIX(
/* LTP analysis */
void silk_find_LTP_FIX(
opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */
opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */
opus_int *LTPredCodGain_Q7, /* O LTP coding gain */
const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */
opus_int32 XXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Correlation matrix */
opus_int32 xXLTP_Q17[ MAX_NB_SUBFR * LTP_ORDER ], /* O Correlation vector */
const opus_int16 r_lpc[], /* I Residual signal after LPC */
const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */
const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */
const opus_int subfr_length, /* I subframe length */
const opus_int nb_subfr, /* I number of subframes */
const opus_int mem_offset, /* I number of samples in LTP memory */
opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */
const opus_int subfr_length, /* I Subframe length */
const opus_int nb_subfr, /* I Number of subframes */
int arch /* I Run-time architecture */
);
void silk_LTP_analysis_filter_FIX(
@ -190,7 +189,8 @@ void silk_residual_energy_FIX(
const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */
const opus_int subfr_length, /* I Subframe length */
const opus_int nb_subfr, /* I Number of subframes */
const opus_int LPC_order /* I LPC order */
const opus_int LPC_order, /* I LPC order */
int arch /* I Run-time architecture */
);
/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
@ -218,9 +218,10 @@ void silk_corrMatrix_FIX(
const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */
const opus_int L, /* I Length of vectors */
const opus_int order, /* I Max lag for correlation */
const opus_int head_room, /* I Desired headroom */
opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */
opus_int *rshifts /* I/O Right shifts of correlations */
opus_int32 *nrg, /* O Energy of x vector */
opus_int *rshifts, /* O Right shifts of correlations */
int arch /* I Run-time architecture */
);
/* Calculates correlation vector X'*t */
@ -230,23 +231,8 @@ void silk_corrVector_FIX(
const opus_int L, /* I Length of vectors */
const opus_int order, /* I Max lag for correlation */
opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */
const opus_int rshifts /* I Right shifts of correlations */
);
/* Add noise to matrix diagonal */
void silk_regularize_correlations_FIX(
opus_int32 *XX, /* I/O Correlation matrices */
opus_int32 *xx, /* I/O Correlation values */
opus_int32 noise, /* I Noise to add */
opus_int D /* I Dimension of XX */
);
/* Solves Ax = b, assuming A is symmetric */
void silk_solve_LDL_FIX(
opus_int32 *A, /* I Pointer to symetric square matrix A */
opus_int M, /* I Size of matrix */
const opus_int32 *b, /* I Pointer to b vector */
opus_int32 *x_Q16 /* O Pointer to x solution vector */
const opus_int rshifts, /* I Right shifts of correlations */
int arch /* I Run-time architecture */
);
#ifndef FORCE_CPP_BUILD

View file

@ -57,90 +57,86 @@ static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/
/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */
/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
static OPUS_INLINE void limit_warped_coefs(
opus_int32 *coefs_syn_Q24,
opus_int32 *coefs_ana_Q24,
opus_int32 *coefs_Q24,
opus_int lambda_Q16,
opus_int32 limit_Q24,
opus_int order
) {
opus_int i, iter, ind = 0;
opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16;
opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_Q16;
opus_int32 nom_Q16, den_Q24;
opus_int32 limit_Q20, maxabs_Q20;
/* Convert to monic coefficients */
lambda_Q16 = -lambda_Q16;
for( i = order - 1; i > 0; i-- ) {
coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
coefs_Q24[ i - 1 ] = silk_SMLAWB( coefs_Q24[ i - 1 ], coefs_Q24[ i ], lambda_Q16 );
}
lambda_Q16 = -lambda_Q16;
nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 );
den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 );
gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 );
gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 );
den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_Q24[ 0 ], lambda_Q16 );
gain_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
for( i = 0; i < order; i++ ) {
coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
coefs_Q24[ i ] = silk_SMULWW( gain_Q16, coefs_Q24[ i ] );
}
limit_Q20 = silk_RSHIFT(limit_Q24, 4);
for( iter = 0; iter < 10; iter++ ) {
/* Find maximum absolute value */
maxabs_Q24 = -1;
for( i = 0; i < order; i++ ) {
tmp = silk_max( silk_abs_int32( coefs_syn_Q24[ i ] ), silk_abs_int32( coefs_ana_Q24[ i ] ) );
tmp = silk_abs_int32( coefs_Q24[ i ] );
if( tmp > maxabs_Q24 ) {
maxabs_Q24 = tmp;
ind = i;
}
}
if( maxabs_Q24 <= limit_Q24 ) {
/* Use Q20 to avoid any overflow when multiplying by (ind + 1) later. */
maxabs_Q20 = silk_RSHIFT(maxabs_Q24, 4);
if( maxabs_Q20 <= limit_Q20 ) {
/* Coefficients are within range - done */
return;
}
/* Convert back to true warped coefficients */
for( i = 1; i < order; i++ ) {
coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
coefs_Q24[ i - 1 ] = silk_SMLAWB( coefs_Q24[ i - 1 ], coefs_Q24[ i ], lambda_Q16 );
}
gain_syn_Q16 = silk_INVERSE32_varQ( gain_syn_Q16, 32 );
gain_ana_Q16 = silk_INVERSE32_varQ( gain_ana_Q16, 32 );
gain_Q16 = silk_INVERSE32_varQ( gain_Q16, 32 );
for( i = 0; i < order; i++ ) {
coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
coefs_Q24[ i ] = silk_SMULWW( gain_Q16, coefs_Q24[ i ] );
}
/* Apply bandwidth expansion */
chirp_Q16 = SILK_FIX_CONST( 0.99, 16 ) - silk_DIV32_varQ(
silk_SMULWB( maxabs_Q24 - limit_Q24, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ),
silk_MUL( maxabs_Q24, ind + 1 ), 22 );
silk_bwexpander_32( coefs_syn_Q24, order, chirp_Q16 );
silk_bwexpander_32( coefs_ana_Q24, order, chirp_Q16 );
silk_SMULWB( maxabs_Q20 - limit_Q20, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ),
silk_MUL( maxabs_Q20, ind + 1 ), 22 );
silk_bwexpander_32( coefs_Q24, order, chirp_Q16 );
/* Convert to monic warped coefficients */
lambda_Q16 = -lambda_Q16;
for( i = order - 1; i > 0; i-- ) {
coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
coefs_Q24[ i - 1 ] = silk_SMLAWB( coefs_Q24[ i - 1 ], coefs_Q24[ i ], lambda_Q16 );
}
lambda_Q16 = -lambda_Q16;
nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 );
den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 );
gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 );
gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_Q24[ 0 ], lambda_Q16 );
gain_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
for( i = 0; i < order; i++ ) {
coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
coefs_Q24[ i ] = silk_SMULWW( gain_Q16, coefs_Q24[ i ] );
}
}
silk_assert( 0 );
}
/* Disable MIPS version until it's updated. */
#if 0 && defined(MIPSr1_ASM)
#include "mips/noise_shape_analysis_FIX_mipsr1.h"
#endif
/**************************************************************/
/* Compute noise shaping coefficients and initial gain values */
/**************************************************************/
#ifndef OVERRIDE_silk_noise_shape_analysis_FIX
void silk_noise_shape_analysis_FIX(
silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */
silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */
@ -150,14 +146,13 @@ void silk_noise_shape_analysis_FIX(
)
{
silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;
opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
opus_int k, i, nSamples, nSegs, Qnrg, b_Q14, warping_Q16, scale = 0;
opus_int32 SNR_adj_dB_Q7, HarmShapeGain_Q16, Tilt_Q16, tmp32;
opus_int32 nrg, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
opus_int32 BWExp_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ];
opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];
opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ];
opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ];
opus_int32 AR_Q24[ MAX_SHAPE_LPC_ORDER ];
VARDECL( opus_int16, x_windowed );
const opus_int16 *x_ptr, *pitch_res_ptr;
SAVE_STACK;
@ -204,14 +199,14 @@ void silk_noise_shape_analysis_FIX(
if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
/* Initially set to 0; may be overruled in process_gains(..) */
psEnc->sCmn.indices.quantOffsetType = 0;
psEncCtrl->sparseness_Q8 = 0;
} else {
/* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 );
energy_variation_Q7 = 0;
log_energy_prev_Q7 = 0;
pitch_res_ptr = pitch_res;
for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
nSegs = silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2;
for( k = 0; k < nSegs; k++ ) {
silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );
nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/
@ -223,18 +218,12 @@ void silk_noise_shape_analysis_FIX(
pitch_res_ptr += nSamples;
}
psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 -
SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 );
/* Set quantization offset depending on sparseness measure */
if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) {
if( energy_variation_Q7 > SILK_FIX_CONST( ENERGY_VARIATION_THRESHOLD_QNT_OFFSET, 7 ) * (nSegs-1) ) {
psEnc->sCmn.indices.quantOffsetType = 0;
} else {
psEnc->sCmn.indices.quantOffsetType = 1;
}
/* Increase coding SNR for sparse signals */
SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) );
}
/*******************************/
@ -242,14 +231,8 @@ void silk_noise_shape_analysis_FIX(
/*******************************/
/* More BWE for signals with high prediction gain */
strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) );
BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),
BWExp_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),
silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 );
delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ),
SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) );
BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 );
BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 );
/* BWExp1 will be applied after BWExp2, so make it relative */
BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) );
if( psEnc->sCmn.warping_Q16 > 0 ) {
/* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
@ -279,7 +262,7 @@ void silk_noise_shape_analysis_FIX(
if( psEnc->sCmn.warping_Q16 > 0 ) {
/* Calculate warped auto correlation */
silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder, arch );
} else {
/* Calculate regular auto correlation */
silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
@ -294,7 +277,7 @@ void silk_noise_shape_analysis_FIX(
silk_assert( nrg >= 0 );
/* Convert reflection coefficients to prediction coefficients */
silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );
silk_k2a_Q16( AR_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );
Qnrg = -scale; /* range: -12...30*/
silk_assert( Qnrg >= -12 );
@ -313,40 +296,34 @@ void silk_noise_shape_analysis_FIX(
if( psEnc->sCmn.warping_Q16 > 0 ) {
/* Adjust gain for warping */
gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );
silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) {
psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX;
gain_mult_Q16 = warped_gain( AR_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );
silk_assert( psEncCtrl->Gains_Q16[ k ] > 0 );
if( psEncCtrl->Gains_Q16[ k ] < SILK_FIX_CONST( 0.25, 16 ) ) {
psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
} else {
psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 );
if ( psEncCtrl->Gains_Q16[ k ] >= ( silk_int32_MAX >> 1 ) ) {
psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX;
} else {
psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT32( psEncCtrl->Gains_Q16[ k ], 1 );
}
}
silk_assert( psEncCtrl->Gains_Q16[ k ] > 0 );
}
/* Bandwidth expansion for synthesis filter shaping */
silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );
/* Bandwidth expansion */
silk_bwexpander_32( AR_Q24, psEnc->sCmn.shapingLPCOrder, BWExp_Q16 );
/* Compute noise shaping filter coefficients */
silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) );
if( psEnc->sCmn.warping_Q16 > 0 ) {
/* Convert to monic warped prediction coefficients and limit absolute values */
limit_warped_coefs( AR_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );
/* Bandwidth expansion for analysis filter shaping */
silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) );
silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );
/* Ratio of prediction gains, in energy domain */
pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder );
nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder );
/*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/
pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 );
psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 );
/* Convert to monic warped prediction coefficients and limit absolute values */
limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );
/* Convert from Q24 to Q13 and store in int16 */
for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {
psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) );
psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) );
/* Convert from Q24 to Q13 and store in int16 */
for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {
psEncCtrl->AR_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR_Q24[ i ], 11 ) );
}
} else {
silk_LPC_fit( &psEncCtrl->AR_Q13[ k * MAX_SHAPE_LPC_ORDER ], AR_Q24, 13, 24, psEnc->sCmn.shapingLPCOrder );
}
}
@ -363,11 +340,6 @@ void silk_noise_shape_analysis_FIX(
psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 );
}
gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ),
psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 );
for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] );
}
/************************************************/
/* Control low-frequency shaping and noise tilt */
@ -405,14 +377,6 @@ void silk_noise_shape_analysis_FIX(
/****************************/
/* HARMONIC SHAPING CONTROL */
/****************************/
/* Control boosting of harmonic frequencies */
HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ),
psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) );
/* More harmonic boost for noisy input signals */
HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16,
SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) );
if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
/* More harmonic noise shaping for high bitrates or noisy input */
HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ),
@ -430,16 +394,14 @@ void silk_noise_shape_analysis_FIX(
/* Smooth over subframes */
/*************************/
for( k = 0; k < MAX_NB_SUBFR; k++ ) {
psShapeSt->HarmBoost_smth_Q16 =
silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
psShapeSt->HarmShapeGain_smth_Q16 =
silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
psShapeSt->Tilt_smth_Q16 =
silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 );
psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );
psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 );
}
RESTORE_STACK;
}
#endif /* OVERRIDE_silk_noise_shape_analysis_FIX */

View file

@ -72,14 +72,15 @@ static void silk_P_Ana_calc_energy_st3(
opus_int start_lag, /* I lag offset to search around */
opus_int sf_length, /* I length of one 5 ms subframe */
opus_int nb_subfr, /* I number of subframes */
opus_int complexity /* I Complexity setting */
opus_int complexity, /* I Complexity setting */
int arch /* I Run-time architecture */
);
/*************************************************************/
/* FIXED POINT CORE PITCH ANALYSIS FUNCTION */
/*************************************************************/
opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */
const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
const opus_int16 *frame_unscaled, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
opus_int *pitch_out, /* O 4 pitch lag values */
opus_int16 *lagIndex, /* O Lag Index */
opus_int8 *contourIndex, /* O Pitch contour Index */
@ -93,16 +94,17 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
int arch /* I Run-time architecture */
)
{
VARDECL( opus_int16, frame_8kHz );
VARDECL( opus_int16, frame_8kHz_buf );
VARDECL( opus_int16, frame_4kHz );
VARDECL( opus_int16, frame_scaled );
opus_int32 filt_state[ 6 ];
const opus_int16 *input_frame_ptr;
const opus_int16 *frame, *frame_8kHz;
opus_int i, k, d, j;
VARDECL( opus_int16, C );
VARDECL( opus_int32, xcorr32 );
const opus_int16 *target_ptr, *basis_ptr;
opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
opus_int32 cross_corr, normalizer, energy, energy_basis, energy_target;
opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp, shift;
VARDECL( opus_int16, d_comp );
opus_int32 sum, threshold, lag_counter;
opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
@ -118,6 +120,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
const opus_int8 *Lag_CB_ptr;
SAVE_STACK;
/* Check for valid sampling frequency */
silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
@ -136,17 +139,33 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
min_lag = PE_MIN_LAG_MS * Fs_kHz;
max_lag = PE_MAX_LAG_MS * Fs_kHz - 1;
/* Downscale input if necessary */
silk_sum_sqr_shift( &energy, &shift, frame_unscaled, frame_length );
shift += 3 - silk_CLZ32( energy ); /* at least two bits headroom */
ALLOC( frame_scaled, frame_length, opus_int16 );
if( shift > 0 ) {
shift = silk_RSHIFT( shift + 1, 1 );
for( i = 0; i < frame_length; i++ ) {
frame_scaled[ i ] = silk_RSHIFT( frame_unscaled[ i ], shift );
}
frame = frame_scaled;
} else {
frame = frame_unscaled;
}
ALLOC( frame_8kHz_buf, ( Fs_kHz == 8 ) ? 1 : frame_length_8kHz, opus_int16 );
/* Resample from input sampled at Fs_kHz to 8 kHz */
ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
if( Fs_kHz == 16 ) {
silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
silk_resampler_down2( filt_state, frame_8kHz_buf, frame, frame_length );
frame_8kHz = frame_8kHz_buf;
} else if( Fs_kHz == 12 ) {
silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
silk_resampler_down2_3( filt_state, frame_8kHz_buf, frame, frame_length );
frame_8kHz = frame_8kHz_buf;
} else {
silk_assert( Fs_kHz == 8 );
silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
frame_8kHz = frame;
}
/* Decimate again to 4 kHz */
@ -159,19 +178,6 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
}
/*******************************************************************************
** Scale 4 kHz signal down to prevent correlations measures from overflowing
** find scaling as max scaling for each 8kHz(?) subframe
*******************************************************************************/
/* Inner product is calculated with different lengths, so scale for the worst case */
silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
if( shift > 0 ) {
shift = silk_RSHIFT( shift, 1 );
for( i = 0; i < frame_length_4kHz; i++ ) {
frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
}
}
/******************************************************************************
* FIRST STAGE, operating in 4 khz
@ -195,8 +201,8 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
/* Calculate first vector products before loop */
cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ) );
normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch );
normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ) );
normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
@ -310,18 +316,6 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
*************************************************************************************/
/******************************************************************************
** Scale signal down to avoid correlations measures from overflowing
*******************************************************************************/
/* find scaling as max scaling for each subframe */
silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
if( shift > 0 ) {
shift = silk_RSHIFT( shift, 1 );
for( i = 0; i < frame_length_8kHz; i++ ) {
frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
}
}
/*********************************************************************************
* Find energy of each subframe projected onto its history, for a range of delays
*********************************************************************************/
@ -334,7 +328,7 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
silk_assert( target_ptr >= frame_8kHz );
silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );
energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 );
for( j = 0; j < length_d_comp; j++ ) {
d = d_comp[ j ];
basis_ptr = target_ptr - d;
@ -343,9 +337,9 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
silk_assert( basis_ptr >= frame_8kHz );
silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
if( cross_corr > 0 ) {
energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );
energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
(opus_int16)silk_DIV32_varQ( cross_corr,
silk_ADD32( energy_target,
@ -461,24 +455,6 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
silk_assert( *LTPCorr_Q15 >= 0 );
if( Fs_kHz > 8 ) {
VARDECL( opus_int16, scratch_mem );
/***************************************************************************/
/* Scale input signal down to avoid correlations measures from overflowing */
/***************************************************************************/
/* find scaling as max scaling for each subframe */
silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
if( shift > 0 ) {
/* Move signal to scratch mem because the input signal should be unchanged */
shift = silk_RSHIFT( shift, 1 );
for( i = 0; i < frame_length; i++ ) {
scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
}
input_frame_ptr = scratch_mem;
} else {
input_frame_ptr = frame;
}
/* Search in original signal */
CBimax_old = CBimax;
@ -518,15 +494,15 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0
/* Calculate the correlations and energies needed in stage 3 */
ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
lag_counter = 0;
silk_assert( lag == silk_SAT16( lag ) );
contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 );
for( d = start_lag; d <= end_lag; d++ ) {
for( j = 0; j < nb_cbk_search; j++ ) {
cross_corr = 0;
@ -671,7 +647,8 @@ static void silk_P_Ana_calc_energy_st3(
opus_int start_lag, /* I lag offset to search around */
opus_int sf_length, /* I length of one 5 ms subframe */
opus_int nb_subfr, /* I number of subframes */
opus_int complexity /* I Complexity setting */
opus_int complexity, /* I Complexity setting */
int arch /* I Run-time architecture */
)
{
const opus_int16 *target_ptr, *basis_ptr;
@ -705,7 +682,7 @@ static void silk_P_Ana_calc_energy_st3(
/* Calculate the energy for first lag */
basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length );
energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch );
silk_assert( energy >= 0 );
scratch_mem[ lag_counter ] = energy;
lag_counter++;

View file

@ -1,209 +0,0 @@
/***********************************************************************
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "main_FIX.h"
#include "stack_alloc.h"
#include "tuning_parameters.h"
/* Prefilter for finding Quantizer input signal */
static OPUS_INLINE void silk_prefilt_FIX(
silk_prefilter_state_FIX *P, /* I/O state */
opus_int32 st_res_Q12[], /* I short term residual signal */
opus_int32 xw_Q3[], /* O prefiltered signal */
opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */
opus_int Tilt_Q14, /* I Tilt shaping coeficient */
opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */
opus_int lag, /* I Lag for harmonic shaping */
opus_int length /* I Length of signals */
);
void silk_warped_LPC_analysis_filter_FIX(
opus_int32 state[], /* I/O State [order + 1] */
opus_int32 res_Q2[], /* O Residual signal [length] */
const opus_int16 coef_Q13[], /* I Coefficients [order] */
const opus_int16 input[], /* I Input signal [length] */
const opus_int16 lambda_Q16, /* I Warping factor */
const opus_int length, /* I Length of input signal */
const opus_int order /* I Filter order (even) */
)
{
opus_int n, i;
opus_int32 acc_Q11, tmp1, tmp2;
/* Order must be even */
silk_assert( ( order & 1 ) == 0 );
for( n = 0; n < length; n++ ) {
/* Output of lowpass section */
tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
state[ 0 ] = silk_LSHIFT( input[ n ], 14 );
/* Output of allpass section */
tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
state[ 1 ] = tmp2;
acc_Q11 = silk_RSHIFT( order, 1 );
acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
/* Loop over allpass sections */
for( i = 2; i < order; i += 2 ) {
/* Output of allpass section */
tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
state[ i ] = tmp1;
acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
/* Output of allpass section */
tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
state[ i + 1 ] = tmp2;
acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
}
state[ order ] = tmp1;
acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
}
}
void silk_prefilter_FIX(
silk_encoder_state_FIX *psEnc, /* I/O Encoder state */
const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */
opus_int32 xw_Q3[], /* O Weighted signal */
const opus_int16 x[] /* I Speech signal */
)
{
silk_prefilter_state_FIX *P = &psEnc->sPrefilt;
opus_int j, k, lag;
opus_int32 tmp_32;
const opus_int16 *AR1_shp_Q13;
const opus_int16 *px;
opus_int32 *pxw_Q3;
opus_int HarmShapeGain_Q12, Tilt_Q14;
opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14;
VARDECL( opus_int32, x_filt_Q12 );
VARDECL( opus_int32, st_res_Q2 );
opus_int16 B_Q10[ 2 ];
SAVE_STACK;
/* Set up pointers */
px = x;
pxw_Q3 = xw_Q3;
lag = P->lagPrev;
ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 );
ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 );
for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
/* Update Variables that change per sub frame */
if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
lag = psEncCtrl->pitchL[ k ];
}
/* Noise shape parameters */
HarmShapeGain_Q12 = silk_SMULWB( (opus_int32)psEncCtrl->HarmShapeGain_Q14[ k ], 16384 - psEncCtrl->HarmBoost_Q14[ k ] );
silk_assert( HarmShapeGain_Q12 >= 0 );
HarmShapeFIRPacked_Q12 = silk_RSHIFT( HarmShapeGain_Q12, 2 );
HarmShapeFIRPacked_Q12 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q12, 1 ), 16 );
Tilt_Q14 = psEncCtrl->Tilt_Q14[ k ];
LF_shp_Q14 = psEncCtrl->LF_shp_Q14[ k ];
AR1_shp_Q13 = &psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER ];
/* Short term FIR filtering*/
silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px,
psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder );
/* Reduce (mainly) low frequencies during harmonic emphasis */
B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 );
tmp_32 = silk_SMLABB( SILK_FIX_CONST( INPUT_TILT, 26 ), psEncCtrl->HarmBoost_Q14[ k ], HarmShapeGain_Q12 ); /* Q26 */
tmp_32 = silk_SMLABB( tmp_32, psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ); /* Q26 */
tmp_32 = silk_SMULWB( tmp_32, -psEncCtrl->GainsPre_Q14[ k ] ); /* Q24 */
tmp_32 = silk_RSHIFT_ROUND( tmp_32, 14 ); /* Q10 */
B_Q10[ 1 ]= silk_SAT16( tmp_32 );
x_filt_Q12[ 0 ] = silk_MLA( silk_MUL( st_res_Q2[ 0 ], B_Q10[ 0 ] ), P->sHarmHP_Q2, B_Q10[ 1 ] );
for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) {
x_filt_Q12[ j ] = silk_MLA( silk_MUL( st_res_Q2[ j ], B_Q10[ 0 ] ), st_res_Q2[ j - 1 ], B_Q10[ 1 ] );
}
P->sHarmHP_Q2 = st_res_Q2[ psEnc->sCmn.subfr_length - 1 ];
silk_prefilt_FIX( P, x_filt_Q12, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc->sCmn.subfr_length );
px += psEnc->sCmn.subfr_length;
pxw_Q3 += psEnc->sCmn.subfr_length;
}
P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
RESTORE_STACK;
}
/* Prefilter for finding Quantizer input signal */
static OPUS_INLINE void silk_prefilt_FIX(
silk_prefilter_state_FIX *P, /* I/O state */
opus_int32 st_res_Q12[], /* I short term residual signal */
opus_int32 xw_Q3[], /* O prefiltered signal */
opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */
opus_int Tilt_Q14, /* I Tilt shaping coeficient */
opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */
opus_int lag, /* I Lag for harmonic shaping */
opus_int length /* I Length of signals */
)
{
opus_int i, idx, LTP_shp_buf_idx;
opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10;
opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12;
opus_int16 *LTP_shp_buf;
/* To speed up use temp variables instead of using the struct */
LTP_shp_buf = P->sLTP_shp;
LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12;
sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12;
for( i = 0; i < length; i++ ) {
if( lag > 0 ) {
/* unrolled loop */
silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
idx = lag + LTP_shp_buf_idx;
n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
} else {
n_LTP_Q12 = 0;
}
n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) );
LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 );
}
/* Copy temp variable back to state */
P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12;
P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12;
P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
}

Some files were not shown because too many files have changed in this diff Show more