jedi-academy/code/renderer/amd3d.h
2013-04-23 15:21:39 +10:00

471 lines
12 KiB
C

/******************************************************************
; *
; * Copyright (c) 1996-1998 ADVANCED MICRO DEVICES, INC.
; * All Rights reserved.
; *
; * This software is unpublished and contains the trade secrets
; * and confidential proprietary information of AMD. Unless
; * otherwise provided in the Software Agreement associated
; * herewith, it is licensed in confidence "AS IS" and
; * is not to be reproduced in whole or part by any means except
; * for backup. Use, duplication, or disclosure by the Government
; * is subject to the restrictions in paragraph(b)(3)(B)of the
; * Rights in Technical Data and Computer Software clause in
; * DFAR 52.227-7013(a)(Oct 1988). Software owned by Advanced
; * Micro Devices Inc., One AMD Place, P.O. Box 3453, Sunnyvale,
; * CA 94088-3453.
; *
; ******************************************************************
*
* AMD3D.H
*
* MACRO FORMAT
* ============
* This file contains inline assembly macros that
* generate AMD-3D instructions in binary format.
* Therefore, C or C++ programmer can use AMD-3D instructions
* without any penalty in their C or C++ source code.
*
* The macro's name and format conventions are as follow:
*
*
* 1. First argument of macro is a destination and
* second argument is a source operand.
* ex) _asm PFCMPEQ (m3, m4)
* | |
* dst src
*
* 2. The destination operand can be m0 to m7 only.
* The source operand can be any one of the register
* m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi
* that contains effective address.
* ex) _asm PFRCP (M7, M6)
* ex) _asm PFRCPIT2 (m0, m4)
* ex) _asm PFMUL (m3, _edi)
*
* 3. The prefetch(w) takes one src operand _eax, ecx, _edx,
* _ebx, _esi, or _edi that contains effective address.
* ex) _asm PREFETCH (_edi)
*
* EXAMPLE
* =======
* Following program doesn't do anything but it shows you
* how to use inline assembly AMD-3D instructions in C.
* Note that this will only work in flat memory model which
* segment registers cs, ds, ss and es point to the same
* linear address space total less than 4GB.
*
* Used Microsoft VC++ 5.0
*
* #include <stdio.h>
* #include "amd3d.h"
*
* void main ()
* {
* float x = (float)1.25;
* float y = (float)1.25;
* float z, zz;
*
* _asm {
* movd mm1, x
* movd mm2, y
* pfmul (m1, m2)
* movd z, mm1
* femms
* }
*
* printf ("value of z = %f\n", z);
*
* //
* // Demonstration of using the memory instead of
* // multimedia register
* //
* _asm {
* movd mm3, x
* lea esi, y // load effective address of y
* pfmul (m3, _esi)
* movd zz, mm3
* femms
* }
*
* printf ("value of zz = %f\n", zz);
* }
******************************************************************/
#define M0 0xc0
#define M1 0xc1
#define M2 0xc2
#define M3 0xc3
#define M4 0xc4
#define M5 0xc5
#define M6 0xc6
#define M7 0xc7
#define m0 0xc0
#define m1 0xc1
#define m2 0xc2
#define m3 0xc3
#define m4 0xc4
#define m5 0xc5
#define m6 0xc6
#define m7 0xc7
#define _EAX 0x00
#define _ECX 0x01
#define _EDX 0x02
#define _EBX 0x03
#define _ESI 0x06
#define _EDI 0x07
#define _eax 0x00
#define _ecx 0x01
#define _edx 0x02
#define _ebx 0x03
#define _esi 0x06
#define _edi 0x07
#define PF2ID(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x1d \
}
#define PFACC(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xae \
}
#define PFADD(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x9e \
}
#define PFCMPEQ(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xb0 \
}
#define PFCMPGE(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x90 \
}
#define PFCMPGT(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xa0 \
}
#define PFMAX(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xa4 \
}
#define PFMIN(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x94 \
}
#define PFMUL(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xb4 \
}
#define PFRCP(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x96 \
}
#define PFRCPIT1(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xa6 \
}
#define PFRCPIT2(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xb6 \
}
#define PFRSQRT(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x97 \
}
#define PFRSQIT1(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xa7 \
}
#define PFSUB(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x9a \
}
#define PFSUBR(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xaa \
}
#define PI2FD(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x0d \
}
#define FEMMS \
{\
_asm _emit 0x0f \
_asm _emit 0x0e \
}
#define PAVGUSB(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xbf \
}
#define PMULHRW(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xb7 \
}
#define PREFETCH(src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0d \
_asm _emit 0x00 | src \
}
#define PREFETCHW(src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0d \
_asm _emit 0x08 | src \
}
//
// Exactly same as above except macro names are all
// lower case latter.
//
#define pf2id(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x1d \
}
#define pfacc(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xae \
}
#define pfadd(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x9e \
}
#define pfcmpeq(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xb0 \
}
#define pfcmpge(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x90 \
}
#define pfcmpgt(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xa0 \
}
#define pfmax(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xa4 \
}
#define pfmin(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x94 \
}
#define pfmul(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xb4 \
}
#define pfrcp(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x96 \
}
#define pfrcpit1(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xa6 \
}
#define pfrcpit2(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xb6 \
}
#define pfrsqrt(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x97 \
}
#define pfrsqit1(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xa7 \
}
#define pfsub(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x9a \
}
#define pfsubr(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xaa \
}
#define pi2fd(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0x0d \
}
#define femms \
{\
_asm _emit 0x0f \
_asm _emit 0x0e \
}
#define pavgusb(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xbf \
}
#define pmulhrw(dst, src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0f \
_asm _emit ((dst & 0x3f) << 3) | src \
_asm _emit 0xb7 \
}
#define prefetch(src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0d \
_asm _emit 0x00 | src \
}
#define prefetchw(src) \
{\
_asm _emit 0x0f \
_asm _emit 0x0d \
_asm _emit 0x08 | src \
}