#include "qasm.h" #if id386 #ifndef __clang__ .arch athlon #endif .intel_syntax noprefix .text .globl C(S_WriteLinearBlastStereo16_SSE) C(S_WriteLinearBlastStereo16_SSE): push ebx push esi push edi mov esi,snd_p mov edi,snd_out mov ebx,snd_linear_count test ebx,ebx jz LExit mov ecx,esi and ecx,63 jz LMain and ecx,3 jnz LTail shr ecx,2 not ecx add ecx,17 LClamp1: mov eax,[esi] sar eax,8 cmp eax,32767 jg LClampHigh1 cmp eax,-32768 jnl LClampDone1 mov eax,-32768 jmp LClampDone1 LClampHigh1: mov eax,32767 LClampDone1: mov [edi],ax add esi,4 add edi,2 dec ebx jz LExit dec ecx jnz LClamp1 LMain: mov ecx,ebx shr ecx,4 jz LTail and ebx,15 LAgain: movq mm0, qword ptr [esi+ 0] movq mm1, qword ptr [esi+ 8] movq mm2, qword ptr [esi+16] movq mm3, qword ptr [esi+24] movq mm4, qword ptr [esi+32] movq mm5, qword ptr [esi+40] movq mm6, qword ptr [esi+48] movq mm7, qword ptr [esi+56] psrad mm0,8 psrad mm1,8 psrad mm2,8 psrad mm3,8 psrad mm4,8 psrad mm5,8 psrad mm6,8 psrad mm7,8 packssdw mm0, mm1 packssdw mm2, mm3 packssdw mm4, mm5 packssdw mm6, mm7 movntq qword ptr [edi+ 0], mm0 movntq qword ptr [edi+ 8], mm2 movntq qword ptr [edi+16], mm4 movntq qword ptr [edi+24], mm6 add esi, 64 add edi, 32 dec ecx jnz LAgain LTail: test ebx, ebx jz LEnd LClamp2: mov eax,[esi] sar eax,8 cmp eax,32767 jg LClampHigh2 cmp eax,-32768 jnl LClampDone2 mov eax,-32768 jmp LClampDone2 LClampHigh2: mov eax,32767 LClampDone2: mov [edi],ax add esi,4 add edi,2 dec ebx jnz LClamp2 LEnd: sfence emms LExit: pop edi pop esi pop ebx ret #endif