447 lines
9.9 KiB
C
447 lines
9.9 KiB
C
// Filename:- amd3d.h
|
||
//
|
||
// Macros to support AMD 3DNow! instructions with MS Visual C++
|
||
//
|
||
//
|
||
;/******************************************************************
|
||
; *
|
||
; * Copyright (c) 1996-1998 ADVANCED MICRO DEVICES, INC.
|
||
; * All Rights reserved.
|
||
; *
|
||
; * This software is unpublished and contains the trade secrets
|
||
; * and confidential proprietary information of AMD. Unless
|
||
; * otherwise provided in the Software Agreement associated
|
||
; * herewith, it is licensed in confidence "AS IS" and
|
||
; * is not to be reproduced in whole or part by any means except
|
||
; * for backup. Use, duplication, or disclosure by the Government
|
||
; * is subject to the restrictions in paragraph(b)(3)(B)of the
|
||
; * Rights in Technical Data and Computer Software clause in
|
||
; * DFAR 52.227-7013(a)(Oct 1988). Software owned by Advanced
|
||
; * Micro Devices Inc., One AMD Place, P.O. Box 3453, Sunnyvale,
|
||
; * CA 94088-3453.
|
||
; *
|
||
; ******************************************************************
|
||
*
|
||
* AMD3D.H
|
||
*
|
||
* MACRO FORMAT
|
||
* ============
|
||
* This file contains inline assembly macros that
|
||
* generate AMD-3D instructions in binary format.
|
||
* Therefore, C or C++ programmer can use AMD-3D instructions
|
||
* without any penalty in their C or C++ source code.
|
||
*
|
||
* The macro's name and format conventions are as follow:
|
||
*
|
||
*
|
||
* 1. First argument of macro is a destination and
|
||
* second argument is a source operand.
|
||
* ex) _asm PFCMPEQ (m3, m4)
|
||
* | |
|
||
* dst src
|
||
*
|
||
* 2. The destination operand can be m0 to m7 only.
|
||
* The source operand can be any one of the register
|
||
* m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi
|
||
* that contains effective address.
|
||
* ex) _asm PFRCP (M7, M6)
|
||
* ex) _asm PFRCPIT2 (m0, m4)
|
||
* ex) _asm PFMUL (m3, _edi)
|
||
*
|
||
* 3. The prefetch(w) takes one src operand _eax, ecx, _edx,
|
||
* _ebx, _esi, or _edi that contains effective address.
|
||
* ex) _asm PREFETCH (_edi)
|
||
*
|
||
* EXAMPLE
|
||
* =======
|
||
* Following program doesn't do anything but it shows you
|
||
* how to use inline assembly AMD-3D instructions in C.
|
||
* Note that this will only work in flat memory model which
|
||
* segment registers cs, ds, ss and es point to the same
|
||
* linear address space total less than 4GB.
|
||
*
|
||
* Used Microsoft VC++ 5.0
|
||
*
|
||
* #include <stdio.h>
|
||
* #include "amd3d.h"
|
||
*
|
||
* void main ()
|
||
* {
|
||
* float x = (float)1.25;
|
||
* float y = (float)1.25;
|
||
* float z, zz;
|
||
*
|
||
* _asm {
|
||
* movd mm1, x
|
||
* movd mm2, y
|
||
* pfmul (m1, m2)
|
||
* movd z, mm1
|
||
* femms
|
||
* }
|
||
*
|
||
* printf ("value of z = %f\n", z);
|
||
*
|
||
* //
|
||
* // Demonstration of using the memory instead of
|
||
* // multimedia register
|
||
* //
|
||
* _asm {
|
||
* movd mm3, x
|
||
* lea esi, y // load effective address of y
|
||
* pfmul (m3, _esi)
|
||
* movd zz, mm3
|
||
* femms
|
||
* }
|
||
*
|
||
* printf ("value of zz = %f\n", zz);
|
||
* }
|
||
******************************************************************/
|
||
|
||
#define _MM0 0xc0
|
||
#define _MM1 0xc1
|
||
#define _MM2 0xc2
|
||
#define _MM3 0xc3
|
||
#define _MM4 0xc4
|
||
#define _MM5 0xc5
|
||
#define _MM6 0xc6
|
||
#define _MM7 0xc7
|
||
#define _mm0 0xc0
|
||
#define _mm1 0xc1
|
||
#define _mm2 0xc2
|
||
#define _mm3 0xc3
|
||
#define _mm4 0xc4
|
||
#define _mm5 0xc5
|
||
#define _mm6 0xc6
|
||
#define _mm7 0xc7
|
||
#define _EAX 0x00
|
||
#define _ECX 0x01
|
||
#define _EDX 0x02
|
||
#define _EBX 0x03
|
||
#define _ESI 0x06
|
||
#define _EDI 0x07
|
||
#define _eax 0x00
|
||
#define _ecx 0x01
|
||
#define _edx 0x02
|
||
#define _ebx 0x03
|
||
#define _esi 0x06
|
||
#define _edi 0x07
|
||
#define PF2ID(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x1d \
|
||
}
|
||
#define PFACC(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xae \
|
||
}
|
||
#define PFADD(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x9e \
|
||
}
|
||
#define PFCMPEQ(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xb0 \
|
||
}
|
||
#define PFCMPGE(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x90 \
|
||
}
|
||
#define PFCMPGT(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xa0 \
|
||
}
|
||
#define PFMAX(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xa4 \
|
||
}
|
||
#define PFMIN(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x94 \
|
||
}
|
||
#define PFMUL(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xb4 \
|
||
}
|
||
#define PFRCP(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x96 \
|
||
}
|
||
#define PFRCPIT1(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xa6 \
|
||
}
|
||
#define PFRCPIT2(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xb6 \
|
||
}
|
||
#define PFRSQRT(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x97 \
|
||
}
|
||
#define PFRSQIT1(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xa7 \
|
||
}
|
||
#define PFSUB(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x9a \
|
||
}
|
||
#define PFSUBR(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xaa \
|
||
}
|
||
#define PI2FD(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x0d \
|
||
}
|
||
#define FEMMS \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0e \
|
||
}
|
||
#define PAVGUSB(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xbf \
|
||
}
|
||
#define PMULHRW(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xb7 \
|
||
}
|
||
#define PREFETCH(src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0d \
|
||
_asm _emit 0x00 | src \
|
||
}
|
||
#define PREFETCHW(src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0d \
|
||
_asm _emit 0x08 | src \
|
||
}
|
||
//
|
||
// Exactly same as above except macro names are all
|
||
// lower case latter.
|
||
//
|
||
#define pf2id(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x1d \
|
||
}
|
||
#define pfacc(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xae \
|
||
}
|
||
#define pfadd(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x9e \
|
||
}
|
||
#define pfcmpeq(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xb0 \
|
||
}
|
||
#define pfcmpge(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x90 \
|
||
}
|
||
#define pfcmpgt(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xa0 \
|
||
}
|
||
#define pfmax(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xa4 \
|
||
}
|
||
#define pfmin(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x94 \
|
||
}
|
||
#define pfmul(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xb4 \
|
||
}
|
||
#define pfrcp(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x96 \
|
||
}
|
||
#define pfrcpit1(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xa6 \
|
||
}
|
||
#define pfrcpit2(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xb6 \
|
||
}
|
||
#define pfrsqrt(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x97 \
|
||
}
|
||
#define pfrsqit1(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xa7 \
|
||
}
|
||
#define pfsub(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x9a \
|
||
}
|
||
#define pfsubr(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xaa \
|
||
}
|
||
#define pi2fd(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0x0d \
|
||
}
|
||
#define femms \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0e \
|
||
}
|
||
#define pavgusb(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xbf \
|
||
}
|
||
#define pmulhrw(dst, src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0f \
|
||
_asm _emit ((dst & 0x3f) << 3) | src \
|
||
_asm _emit 0xb7 \
|
||
}
|
||
#define prefetch(src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0d \
|
||
_asm _emit 0x00 | src \
|
||
}
|
||
#define prefetchw(src) \
|
||
{\
|
||
_asm _emit 0x0f \
|
||
_asm _emit 0x0d \
|
||
_asm _emit 0x08 | src \
|
||
}
|
||
|
||
|
||
|
||
|
||
/*
|
||
|
||
Last Updated 07/15/98
|
||
<EFBFBD>1999 Advanced Micro Devices, Inc.
|
||
Trademark Information
|
||
|
||
*/
|
||
|
||
//////////////////////////// eof /////////////////////////////
|
||
|
||
|
||
|