mirror of
https://github.com/nzp-team/dquakeplus.git
synced 2024-11-22 11:51:21 +00:00
Use VFPU for most memcpy operations - backported from ADQ
This commit is contained in:
parent
431f5bd09f
commit
a2ce78450d
21 changed files with 387 additions and 63 deletions
|
@ -337,12 +337,12 @@ void SetPal (int i)
|
|||
|
||||
void CL_CopyPlayerInfo (entity_t *ent, entity_t *player)
|
||||
{
|
||||
memcpy (&ent->baseline, &player->baseline, sizeof(entity_state_t));
|
||||
memcpy_vfpu(&ent->baseline, &player->baseline, sizeof(entity_state_t));
|
||||
|
||||
ent->msgtime = player->msgtime;
|
||||
memcpy (ent->msg_origins, player->msg_origins, sizeof(ent->msg_origins));
|
||||
memcpy_vfpu(ent->msg_origins, player->msg_origins, sizeof(ent->msg_origins));
|
||||
VectorCopy (player->origin, ent->origin);
|
||||
memcpy (ent->msg_angles, player->msg_angles, sizeof(ent->msg_angles));
|
||||
memcpy_vfpu(ent->msg_angles, player->msg_angles, sizeof(ent->msg_angles));
|
||||
VectorCopy (player->angles, ent->angles);
|
||||
|
||||
ent->model = (ent == &q3player_body.ent) ? cl.model_precache[cl_modelindex[mi_q3torso]] : cl.model_precache[cl_modelindex[mi_q3head]];
|
||||
|
|
|
@ -205,7 +205,7 @@ void CL_KeepaliveMessage (void)
|
|||
|
||||
// read messages from server, should just be nops
|
||||
old = net_message;
|
||||
memcpy (olddata, net_message.data, net_message.cursize);
|
||||
memcpy_vfpu(olddata, net_message.data, net_message.cursize);
|
||||
|
||||
do
|
||||
{
|
||||
|
@ -227,7 +227,7 @@ void CL_KeepaliveMessage (void)
|
|||
} while (ret);
|
||||
|
||||
net_message = old;
|
||||
memcpy (net_message.data, olddata, net_message.cursize);
|
||||
memcpy_vfpu(net_message.data, olddata, net_message.cursize);
|
||||
|
||||
// check time
|
||||
time = Sys_FloatTime ();
|
||||
|
|
|
@ -124,9 +124,9 @@ void SList_Switch (int a, int b)
|
|||
if (b >= MAX_SERVER_LIST || b < 0)
|
||||
Sys_Error ("SList_Switch: Bad index %d", b);
|
||||
|
||||
memcpy (&temp, &slist[a], sizeof(temp));
|
||||
memcpy (&slist[a], &slist[b], sizeof(temp));
|
||||
memcpy (&slist[b], &temp, sizeof(temp));
|
||||
memcpy_vfpu(&temp, &slist[a], sizeof(temp));
|
||||
memcpy_vfpu(&slist[a], &slist[b], sizeof(temp));
|
||||
memcpy_vfpu(&slist[b], &temp, sizeof(temp));
|
||||
}
|
||||
|
||||
int SList_Length (void)
|
||||
|
|
|
@ -164,7 +164,7 @@ void Cbuf_Execute (void)
|
|||
}
|
||||
|
||||
|
||||
memcpy (line, text, i);
|
||||
memcpy_vfpu(line, text, i);
|
||||
line[i] = 0;
|
||||
|
||||
// delete the text from the command buffer and move remaining commands down
|
||||
|
|
|
@ -406,9 +406,9 @@ void Host_SavegameComment (char *text)
|
|||
|
||||
for (i=0 ; i<SAVEGAME_COMMENT_LENGTH ; i++)
|
||||
text[i] = ' ';
|
||||
memcpy (text, cl.levelname, strlen(cl.levelname));
|
||||
memcpy_vfpu(text, cl.levelname, strlen(cl.levelname));
|
||||
sprintf (kills,"kills:%3i/%3i", cl.stats[STAT_INSTA], cl.stats[STAT_ROUNDCHANGE]);
|
||||
memcpy (text+22, kills, strlen(kills));
|
||||
memcpy_vfpu(text+22, kills, strlen(kills));
|
||||
// convert space to _ to make stdio happy
|
||||
for (i=0 ; i<SAVEGAME_COMMENT_LENGTH ; i++)
|
||||
if (text[i] == ' ')
|
||||
|
|
|
@ -183,7 +183,7 @@ void RotatePointAroundVector( vec3_t dst, const vec3_t dir, const vec3_t point,
|
|||
m[1][2] = vf[1];
|
||||
m[2][2] = vf[2];
|
||||
|
||||
memcpy( im, m, sizeof( im ) );
|
||||
memcpy_vfpu( im, m, sizeof( im ) );
|
||||
|
||||
im[0][1] = m[1][0];
|
||||
im[0][2] = m[2][0];
|
||||
|
|
|
@ -286,16 +286,16 @@ void M_BuildTranslationTable(int top, int bottom)
|
|||
identityTable[j] = j;
|
||||
dest = translationTable;
|
||||
source = identityTable;
|
||||
memcpy (dest, source, 256);
|
||||
memcpy_vfpu(dest, source, 256);
|
||||
|
||||
if (top < 128) // the artists made some backwards ranges. sigh.
|
||||
memcpy (dest + TOP_RANGE, source + top, 16);
|
||||
memcpy_vfpu(dest + TOP_RANGE, source + top, 16);
|
||||
else
|
||||
for (j=0 ; j<16 ; j++)
|
||||
dest[TOP_RANGE+j] = source[top+15-j];
|
||||
|
||||
if (bottom < 128)
|
||||
memcpy (dest + BOTTOM_RANGE, source + bottom, 16);
|
||||
memcpy_vfpu(dest + BOTTOM_RANGE, source + bottom, 16);
|
||||
else
|
||||
for (j=0 ; j<16 ; j++)
|
||||
dest[BOTTOM_RANGE+j] = source[bottom+15-j];
|
||||
|
|
|
@ -940,7 +940,7 @@ int PF_newcheckclient (int check)
|
|||
VectorAdd (ent->v.origin, ent->v.view_ofs, org);
|
||||
leaf = Mod_PointInLeaf (org, sv.worldmodel);
|
||||
pvs = Mod_LeafPVS (leaf, sv.worldmodel);
|
||||
memcpy (checkpvs, pvs, (sv.worldmodel->numleafs+7)>>3 );
|
||||
memcpy_vfpu(checkpvs, pvs, (sv.worldmodel->numleafs+7)>>3 );
|
||||
|
||||
return i;
|
||||
}
|
||||
|
|
|
@ -94,9 +94,9 @@ void IN_Init (void)
|
|||
buttonToGameKeyMap[buttonMaskToShift(PSP_CTRL_RIGHT)] = K_RIGHTARROW;
|
||||
buttonToGameKeyMap[buttonMaskToShift(PSP_CTRL_DOWN)] = K_DOWNARROW;
|
||||
buttonToGameKeyMap[buttonMaskToShift(PSP_CTRL_LEFT)] = K_LEFTARROW;
|
||||
memcpy(buttonToConsoleKeyMap, buttonToGameKeyMap, sizeof(ButtonToKeyMap));
|
||||
memcpy(buttonToMessageKeyMap, buttonToGameKeyMap, sizeof(ButtonToKeyMap));
|
||||
memcpy(buttonToMenuKeyMap, buttonToGameKeyMap, sizeof(ButtonToKeyMap));
|
||||
memcpy_vfpu(buttonToConsoleKeyMap, buttonToGameKeyMap, sizeof(ButtonToKeyMap));
|
||||
memcpy_vfpu(buttonToMessageKeyMap, buttonToGameKeyMap, sizeof(ButtonToKeyMap));
|
||||
memcpy_vfpu(buttonToMenuKeyMap, buttonToGameKeyMap, sizeof(ButtonToKeyMap));
|
||||
|
||||
// Game keys:
|
||||
buttonToGameKeyMap[buttonMaskToShift(PSP_CTRL_LTRIGGER)] = K_AUX1;
|
||||
|
@ -115,7 +115,7 @@ void IN_Init (void)
|
|||
buttonToConsoleKeyMap[buttonMaskToShift(PSP_CTRL_SQUARE)] = K_INS;
|
||||
|
||||
// Message keys:
|
||||
memcpy(buttonToMessageKeyMap, buttonToConsoleKeyMap, sizeof(ButtonToKeyMap));
|
||||
memcpy_vfpu(buttonToMessageKeyMap, buttonToConsoleKeyMap, sizeof(ButtonToKeyMap));
|
||||
|
||||
// Menu keys:
|
||||
buttonToMenuKeyMap[buttonMaskToShift(PSP_CTRL_SQUARE)] = K_INS;
|
||||
|
|
|
@ -77,7 +77,7 @@ static pdpStatStruct gPdpStat;
|
|||
pdpStatStruct *findPdpStat(int socket, pdpStatStruct *pdpStat)
|
||||
{
|
||||
if(socket == pdpStat->pdpId) {
|
||||
memcpy(&gPdpStat, pdpStat, sizeof(pdpStatStruct));
|
||||
memcpy_vfpu(&gPdpStat, pdpStat, sizeof(pdpStatStruct));
|
||||
return &gPdpStat;
|
||||
}
|
||||
if(pdpStat->next) return findPdpStat(socket, pdpStat->next);
|
||||
|
@ -863,7 +863,7 @@ namespace quake
|
|||
pdpStatStruct *tempPdp = findPdpStat(socket, pdpStat);
|
||||
if(tempPdp < 0) return -1;
|
||||
|
||||
memcpy(((struct sockaddr_adhoc *)addr)->mac, tempPdp->mac, 6);
|
||||
memcpy_vfpu(((struct sockaddr_adhoc *)addr)->mac, tempPdp->mac, 6);
|
||||
((struct sockaddr_adhoc *)addr)->port = tempPdp->port;
|
||||
addr->sa_family = ADHOC_NET;
|
||||
return 0;
|
||||
|
|
|
@ -488,7 +488,7 @@ qpic_t *Draw_CacheImg (char *path)
|
|||
// the translatable player picture just for the menu
|
||||
// configuration dialog
|
||||
if (!strcmp (path, "gfx/menuplyr.lmp"))
|
||||
memcpy (menuplyr_pixels, dat->data, dat->width*dat->height);
|
||||
memcpy(menuplyr_pixels, dat->data, dat->width*dat->height);
|
||||
|
||||
pic->pic.width = dat->width;
|
||||
pic->pic.height = dat->height;
|
||||
|
@ -3398,16 +3398,6 @@ void GL_Upload4(int texture_index, const byte *data, int width, int height)
|
|||
memcpy(texture.palette, data + buffer_size, 16 * 4);
|
||||
int i;
|
||||
|
||||
// Copy to VRAM?
|
||||
/*if (texture.vram)
|
||||
{
|
||||
// Copy.
|
||||
memcpy(texture.vram, texture.ram, buffer_size);
|
||||
|
||||
// Flush the data cache.
|
||||
sceKernelDcacheWritebackRange(texture.vram, buffer_size);
|
||||
}*/
|
||||
|
||||
// Flush the data cache.
|
||||
sceKernelDcacheWritebackRange(texture.ram, buffer_size);
|
||||
}
|
||||
|
|
|
@ -130,7 +130,7 @@ qboolean Mod_LoadHLModel (model_t *mod, void *buffer)
|
|||
model = static_cast<hlmodelcache_t*>(Hunk_Alloc(sizeof(hlmodelcache_t)));
|
||||
|
||||
header = static_cast<hlmdl_header_t*>(Hunk_Alloc(com_filesize));
|
||||
memcpy(header, buffer, com_filesize);
|
||||
memcpy_vfpu(header, buffer, com_filesize);
|
||||
|
||||
if (header->version != 10)
|
||||
{
|
||||
|
@ -175,7 +175,7 @@ qboolean Mod_LoadHLModel (model_t *mod, void *buffer)
|
|||
if (!mod->cache.data)
|
||||
return qfalse;
|
||||
|
||||
memcpy (mod->cache.data, model, total);
|
||||
memcpy_vfpu(mod->cache.data, model, total);
|
||||
|
||||
Hunk_FreeToLowMark (start);
|
||||
return qtrue;
|
||||
|
@ -482,7 +482,7 @@ void HL_SetupBones(hlmodel_t *model)
|
|||
}
|
||||
else
|
||||
{
|
||||
memcpy(transform_matrix[i], matrix, 12 * sizeof(float));
|
||||
memcpy_vfpu(transform_matrix[i], matrix, 12 * sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -260,7 +260,7 @@ byte* LoadPCX (FILE *f, int matchwidth, int matchheight)
|
|||
image_width = pcx->xmax+1;
|
||||
image_height = pcx->ymax+1;
|
||||
|
||||
memcpy(image_palette, palette, sizeof(palette));
|
||||
memcpy_vfpu(image_palette, palette, sizeof(palette));
|
||||
image_palette_type = PAL_RGB;
|
||||
|
||||
fclose (f);
|
||||
|
@ -304,7 +304,7 @@ byte *LoadWAL (char *name)
|
|||
size = width * height;
|
||||
|
||||
data = static_cast<byte*>(malloc(size));
|
||||
memcpy(data, (byte *)mt + ofs, size);
|
||||
memcpy_vfpu(data, (byte *)mt + ofs, size);
|
||||
|
||||
image_palette_type = PAL_Q2;
|
||||
|
||||
|
|
|
@ -343,7 +343,7 @@ void GL_MakeAliasModelDisplayListsH2 (model_t *m, aliashdr_t *hdr)
|
|||
|
||||
int* cmds = static_cast<int*>(Hunk_Alloc (numcommands * 4));
|
||||
paliashdr->commands = (byte *)cmds - (byte *)paliashdr;
|
||||
memcpy (cmds, commands, numcommands * 4);
|
||||
memcpy_vfpu(cmds, commands, numcommands * 4);
|
||||
|
||||
trivertx_t* verts = static_cast<trivertx_t*>(Hunk_Alloc (paliashdr->numposes * paliashdr->poseverts
|
||||
* sizeof(trivertx_t) ));
|
||||
|
@ -420,7 +420,7 @@ void GL_MakeAliasModelDisplayLists (model_t *m, aliashdr_t *hdr)
|
|||
|
||||
cmds = static_cast<int*>(Hunk_Alloc (numcommands * 4));
|
||||
paliashdr->commands = (byte *)cmds - (byte *)paliashdr;
|
||||
memcpy (cmds, commands, numcommands * 4);
|
||||
memcpy_vfpu(cmds, commands, numcommands * 4);
|
||||
|
||||
verts = static_cast<trivertx_t*>(Hunk_Alloc (paliashdr->numposes * paliashdr->poseverts
|
||||
* sizeof(trivertx_t) ));
|
||||
|
|
|
@ -537,13 +537,13 @@ void Mod_LoadTextures (lump_t *l)
|
|||
loadmodel->textures[i] = tx;
|
||||
|
||||
|
||||
memcpy (tx->name, mt->name, sizeof(tx->name));
|
||||
memcpy_vfpu(tx->name, mt->name, sizeof(tx->name));
|
||||
tx->width = mt->width;
|
||||
tx->height = mt->height;
|
||||
for (j=0 ; j<MIPLEVELS ; j++)
|
||||
tx->offsets[j] = mt->offsets[j] + sizeof(texture_t) - sizeof(miptex_t);
|
||||
// the pixels immediately follow the structures
|
||||
memcpy ( tx_pixels, mt+1, pixels);
|
||||
memcpy_vfpu( tx_pixels, mt+1, pixels);
|
||||
|
||||
int level = 0;
|
||||
if (r_mipmaps.value > 0)
|
||||
|
@ -776,7 +776,7 @@ void Mod_LoadLighting (lump_t *l)
|
|||
return;
|
||||
}
|
||||
loadmodel->lightdata = static_cast<byte*>(Hunk_AllocName ( l->filelen, loadname));
|
||||
memcpy (loadmodel->lightdata, mod_base + l->fileofs, l->filelen);
|
||||
memcpy_vfpu(loadmodel->lightdata, mod_base + l->fileofs, l->filelen);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -816,7 +816,7 @@ void Mod_LoadLighting (lump_t *l)
|
|||
loadmodel->lightdata = static_cast<byte*>(Hunk_AllocName ( l->filelen*3, litfilename));
|
||||
in = loadmodel->lightdata + l->filelen*2; // place the file at the end, so it will not be overwritten until the very last write
|
||||
out = loadmodel->lightdata;
|
||||
memcpy (in, mod_base + l->fileofs, l->filelen);
|
||||
memcpy_vfpu(in, mod_base + l->fileofs, l->filelen);
|
||||
for (i = 0;i < l->filelen;i++)
|
||||
{
|
||||
d = *in++;
|
||||
|
@ -853,7 +853,7 @@ void Mod_HL_LoadLighting (lump_t *l)
|
|||
}
|
||||
|
||||
loadmodel->lightdata = static_cast<byte*>(Hunk_AllocName ( l->filelen, loadname));
|
||||
memcpy (loadmodel->lightdata, mod_base + l->fileofs, l->filelen);
|
||||
memcpy_vfpu(loadmodel->lightdata, mod_base + l->fileofs, l->filelen);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -871,7 +871,7 @@ void Mod_LoadVisibility (lump_t *l)
|
|||
}
|
||||
|
||||
loadmodel->visdata = static_cast<byte*>(Hunk_AllocName ( l->filelen, loadname));
|
||||
memcpy (loadmodel->visdata, mod_base + l->fileofs, l->filelen);
|
||||
memcpy_vfpu(loadmodel->visdata, mod_base + l->fileofs, l->filelen);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -980,7 +980,7 @@ void Mod_LoadEntities (lump_t *l)
|
|||
}
|
||||
|
||||
loadmodel->entities = static_cast<char*>(Hunk_AllocName ( l->filelen, entfilename));
|
||||
memcpy (loadmodel->entities, mod_base + l->fileofs, l->filelen);
|
||||
memcpy_vfpu(loadmodel->entities, mod_base + l->fileofs, l->filelen);
|
||||
|
||||
if (loadmodel->bspversion == HL_BSPVERSION || loadmodel->bspversion == NZP_BSPVERSION)
|
||||
Mod_ParseWadsFromEntityLump(loadmodel->entities);
|
||||
|
@ -2324,7 +2324,7 @@ void Mod_LoadAliasModel (model_t *mod, void *buffer)
|
|||
if (!mod->cache.data)
|
||||
return;
|
||||
|
||||
memcpy (mod->cache.data, pheader, total);
|
||||
memcpy_vfpu(mod->cache.data, pheader, total);
|
||||
|
||||
Hunk_FreeToLowMark (start);
|
||||
}
|
||||
|
@ -2496,7 +2496,7 @@ void Mod_LoadH2AliasModel (model_t *mod, void *buffer)
|
|||
Cache_Alloc (&mod->cache, total, loadname);
|
||||
if (!mod->cache.data)
|
||||
return;
|
||||
memcpy (mod->cache.data, pheader, total);
|
||||
memcpy_vfpu(mod->cache.data, pheader, total);
|
||||
|
||||
Hunk_FreeToLowMark (start);
|
||||
}
|
||||
|
@ -2650,7 +2650,7 @@ void Mod_LoadQ2AliasModel (model_t *mod, void *buffer)
|
|||
Cache_Alloc (&mod->cache, total, loadname);
|
||||
if (!mod->cache.data)
|
||||
return;
|
||||
memcpy (mod->cache.data, pheader, total);
|
||||
memcpy_vfpu(mod->cache.data, pheader, total);
|
||||
|
||||
Hunk_FreeToLowMark (start);
|
||||
}
|
||||
|
@ -3247,7 +3247,7 @@ void Mod_LoadQ3AliasModel (model_t *mod, void *buffer)
|
|||
if (!mod->cache.data)
|
||||
return;
|
||||
|
||||
memcpy (header, buffer, com_filesize);
|
||||
memcpy_vfpu(header, buffer, com_filesize);
|
||||
base = com_filesize;
|
||||
|
||||
mod->type = mod_md3;
|
||||
|
|
|
@ -74,7 +74,7 @@ static void Image_Resample32 (void *indata, int inwidth, int inheight,void *outd
|
|||
{
|
||||
inrow = (byte *) indata + inwidth4 * yi;
|
||||
if (yi == oldy + 1)
|
||||
memcpy(row1, row2, outwidth4);
|
||||
memcpy_vfpu(row1, row2, outwidth4);
|
||||
else
|
||||
Image_Resample32LerpLine (inrow, row1, inwidth, outwidth);
|
||||
Image_Resample32LerpLine (inrow + inwidth4, row2, inwidth, outwidth);
|
||||
|
@ -116,12 +116,12 @@ static void Image_Resample32 (void *indata, int inwidth, int inheight,void *outd
|
|||
{
|
||||
inrow = (byte *) indata + inwidth4 * yi;
|
||||
if (yi == oldy+1)
|
||||
memcpy(row1, row2, outwidth4);
|
||||
memcpy_vfpu(row1, row2, outwidth4);
|
||||
else
|
||||
Image_Resample32LerpLine (inrow, row1, inwidth, outwidth);
|
||||
oldy = yi;
|
||||
}
|
||||
memcpy(out, row1, outwidth4);
|
||||
memcpy_vfpu(out, row1, outwidth4);
|
||||
}
|
||||
}
|
||||
free(memalloc);
|
||||
|
@ -221,7 +221,7 @@ static void Image_Resample24 (void *indata, int inwidth, int inheight,
|
|||
if (yi != oldy) {
|
||||
inrow = (byte *) indata + inwidth3 * yi;
|
||||
if (yi == oldy + 1)
|
||||
memcpy(row1, row2, outwidth3);
|
||||
memcpy_vfpu(row1, row2, outwidth3);
|
||||
else
|
||||
Image_Resample24LerpLine (inrow, row1, inwidth, outwidth);
|
||||
Image_Resample24LerpLine (inrow + inwidth3, row2, inwidth, outwidth);
|
||||
|
@ -259,12 +259,12 @@ static void Image_Resample24 (void *indata, int inwidth, int inheight,
|
|||
if (yi != oldy) {
|
||||
inrow = (byte *) indata + inwidth3 * yi;
|
||||
if (yi == oldy+1)
|
||||
memcpy(row1, row2, outwidth3);
|
||||
memcpy_vfpu(row1, row2, outwidth3);
|
||||
else
|
||||
Image_Resample24LerpLine (inrow, row1, inwidth, outwidth);
|
||||
oldy = yi;
|
||||
}
|
||||
memcpy(out, row1, outwidth3);
|
||||
memcpy_vfpu(out, row1, outwidth3);
|
||||
}
|
||||
}
|
||||
free(memalloc);
|
||||
|
|
|
@ -232,7 +232,7 @@ void DumpChunks(void)
|
|||
data_p=iff_data;
|
||||
do
|
||||
{
|
||||
memcpy (str, data_p, 4);
|
||||
memcpy_vfpu(str, data_p, 4);
|
||||
data_p += 4;
|
||||
iff_chunk_len = GetLittleLong();
|
||||
Con_Printf ("0x%x : %s (%d)\n", (int)(data_p - 4), str, iff_chunk_len);
|
||||
|
|
|
@ -263,7 +263,7 @@ void SV_ConnectClient (int clientnum)
|
|||
netconnection = client->netconnection;
|
||||
|
||||
if (sv.loadgame)
|
||||
memcpy (spawn_parms, client->spawn_parms, sizeof(spawn_parms));
|
||||
memcpy_vfpu(spawn_parms, client->spawn_parms, sizeof(spawn_parms));
|
||||
memset (client, 0, sizeof(*client));
|
||||
client->netconnection = netconnection;
|
||||
|
||||
|
@ -278,7 +278,7 @@ void SV_ConnectClient (int clientnum)
|
|||
client->privileged = false;
|
||||
|
||||
if (sv.loadgame)
|
||||
memcpy (client->spawn_parms, spawn_parms, sizeof(spawn_parms));
|
||||
memcpy_vfpu(client->spawn_parms, spawn_parms, sizeof(spawn_parms));
|
||||
else
|
||||
{
|
||||
// call the progs to get default spawn parms for the new client
|
||||
|
|
|
@ -1713,7 +1713,7 @@ trace_t SV_Trace_Toss (edict_t *ent, edict_t *ignore)
|
|||
save_frametime = host_frametime;
|
||||
host_frametime = 0.05;
|
||||
|
||||
memcpy (&tempent, ent, sizeof(edict_t));
|
||||
memcpy_vfpu(&tempent, ent, sizeof(edict_t));
|
||||
tent = &tempent;
|
||||
|
||||
while (1)
|
||||
|
|
336
source/zone.c
336
source/zone.c
|
@ -48,6 +48,340 @@ typedef struct
|
|||
void Cache_FreeLow (int new_low_hunk);
|
||||
void Cache_FreeHigh (int new_high_hunk);
|
||||
|
||||
void* memcpy_vfpu( void* dst, void* src, unsigned int size )
|
||||
{
|
||||
u8* src8 = (u8*)src;
|
||||
u8* dst8 = (u8*)dst;
|
||||
|
||||
// < 8 isn't worth trying any optimisations...
|
||||
if (size<8) goto bytecopy;
|
||||
|
||||
// < 64 means we don't gain anything from using vfpu...
|
||||
if (size<64)
|
||||
{
|
||||
// Align dst on 4 bytes or just resume if already done
|
||||
while (((((u32)dst8) & 0x3)!=0) && size) {
|
||||
*dst8++ = *src8++;
|
||||
size--;
|
||||
}
|
||||
if (size<4) goto bytecopy;
|
||||
|
||||
// We are dst aligned now and >= 4 bytes to copy
|
||||
u32* src32 = (u32*)src8;
|
||||
u32* dst32 = (u32*)dst8;
|
||||
switch(((u32)src8)&0x3)
|
||||
{
|
||||
case 0:
|
||||
while (size&0xC)
|
||||
{
|
||||
*dst32++ = *src32++;
|
||||
size -= 4;
|
||||
}
|
||||
if (size==0) return (dst); // fast out
|
||||
while (size>=16)
|
||||
{
|
||||
*dst32++ = *src32++;
|
||||
*dst32++ = *src32++;
|
||||
*dst32++ = *src32++;
|
||||
*dst32++ = *src32++;
|
||||
size -= 16;
|
||||
}
|
||||
if (size==0) return (dst); // fast out
|
||||
src8 = (u8*)src32;
|
||||
dst8 = (u8*)dst32;
|
||||
break;
|
||||
default:
|
||||
{
|
||||
register u32 a, b, c, d;
|
||||
while (size>=4)
|
||||
{
|
||||
a = *src8++;
|
||||
b = *src8++;
|
||||
c = *src8++;
|
||||
d = *src8++;
|
||||
*dst32++ = (d << 24) | (c << 16) | (b << 8) | a;
|
||||
size -= 4;
|
||||
}
|
||||
if (size==0) return (dst); // fast out
|
||||
dst8 = (u8*)dst32;
|
||||
}
|
||||
break;
|
||||
}
|
||||
goto bytecopy;
|
||||
}
|
||||
|
||||
// Align dst on 16 bytes to gain from vfpu aligned stores
|
||||
while ((((u32)dst8) & 0xF)!=0 && size) {
|
||||
*dst8++ = *src8++;
|
||||
size--;
|
||||
}
|
||||
|
||||
// We use uncached dst to use VFPU writeback and free cpu cache for src only
|
||||
u8* udst8 = (u8*)((u32)dst8 | 0x40000000);
|
||||
// We need the 64 byte aligned address to make sure the dcache is invalidated correctly
|
||||
u8* dst64a = ((u32)dst8&~0x3F);
|
||||
// Invalidate the first line that matches up to the dst start
|
||||
if (size>=64)
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"cache 0x1B, 0(%0)\n"
|
||||
"addiu %0, %0, 64\n"
|
||||
"sync\n"
|
||||
".set pop\n"
|
||||
:"+r"(dst64a));
|
||||
switch(((u32)src8&0xF))
|
||||
{
|
||||
// src aligned on 16 bytes too? nice!
|
||||
case 0:
|
||||
while (size>=64)
|
||||
{
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"cache 0x1B, 0(%2)\n" // Dcache writeback invalidate
|
||||
"lv.q c000, 0(%1)\n"
|
||||
"lv.q c010, 16(%1)\n"
|
||||
"lv.q c020, 32(%1)\n"
|
||||
"lv.q c030, 48(%1)\n"
|
||||
"sync\n" // Wait for allegrex writeback
|
||||
"sv.q c000, 0(%0), wb\n"
|
||||
"sv.q c010, 16(%0), wb\n"
|
||||
"sv.q c020, 32(%0), wb\n"
|
||||
"sv.q c030, 48(%0), wb\n"
|
||||
// Lots of variable updates... but get hidden in sv.q latency anyway
|
||||
"addiu %3, %3, -64\n"
|
||||
"addiu %2, %2, 64\n"
|
||||
"addiu %1, %1, 64\n"
|
||||
"addiu %0, %0, 64\n"
|
||||
".set pop\n" // restore assembler option
|
||||
:"+r"(udst8),"+r"(src8),"+r"(dst64a),"+r"(size)
|
||||
:
|
||||
:"memory"
|
||||
);
|
||||
}
|
||||
if (size>16)
|
||||
{
|
||||
// Invalidate the last cache line where the max remaining 63 bytes are
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"cache 0x1B, 0(%0)\n"
|
||||
"sync\n"
|
||||
".set pop\n" // restore assembler option
|
||||
::"r"(dst64a));
|
||||
while (size>=16)
|
||||
{
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"lv.q c000, 0(%1)\n"
|
||||
"sv.q c000, 0(%0), wb\n"
|
||||
// Lots of variable updates... but get hidden in sv.q latency anyway
|
||||
"addiu %2, %2, -16\n"
|
||||
"addiu %1, %1, 16\n"
|
||||
"addiu %0, %0, 16\n"
|
||||
".set pop\n" // restore assembler option
|
||||
:"+r"(udst8),"+r"(src8),"+r"(size)
|
||||
:
|
||||
:"memory"
|
||||
);
|
||||
}
|
||||
}
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"vflush\n" // Flush VFPU writeback cache
|
||||
".set pop\n" // restore assembler option
|
||||
);
|
||||
dst8 = (u8*)((u32)udst8 & ~0x40000000);
|
||||
break;
|
||||
// src is only qword unaligned but word aligned? We can at least use ulv.q
|
||||
case 4:
|
||||
case 8:
|
||||
case 12:
|
||||
while (size>=64)
|
||||
{
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"cache 0x1B, 0(%2)\n" // Dcache writeback invalidate
|
||||
"ulv.q c000, 0(%1)\n"
|
||||
"ulv.q c010, 16(%1)\n"
|
||||
"ulv.q c020, 32(%1)\n"
|
||||
"ulv.q c030, 48(%1)\n"
|
||||
"sync\n" // Wait for allegrex writeback
|
||||
"sv.q c000, 0(%0), wb\n"
|
||||
"sv.q c010, 16(%0), wb\n"
|
||||
"sv.q c020, 32(%0), wb\n"
|
||||
"sv.q c030, 48(%0), wb\n"
|
||||
// Lots of variable updates... but get hidden in sv.q latency anyway
|
||||
"addiu %3, %3, -64\n"
|
||||
"addiu %2, %2, 64\n"
|
||||
"addiu %1, %1, 64\n"
|
||||
"addiu %0, %0, 64\n"
|
||||
".set pop\n" // restore assembler option
|
||||
:"+r"(udst8),"+r"(src8),"+r"(dst64a),"+r"(size)
|
||||
:
|
||||
:"memory"
|
||||
);
|
||||
}
|
||||
if (size>16)
|
||||
// Invalidate the last cache line where the max remaining 63 bytes are
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"cache 0x1B, 0(%0)\n"
|
||||
"sync\n"
|
||||
".set pop\n" // restore assembler option
|
||||
::"r"(dst64a));
|
||||
while (size>=16)
|
||||
{
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"ulv.q c000, 0(%1)\n"
|
||||
"sv.q c000, 0(%0), wb\n"
|
||||
// Lots of variable updates... but get hidden in sv.q latency anyway
|
||||
"addiu %2, %2, -16\n"
|
||||
"addiu %1, %1, 16\n"
|
||||
"addiu %0, %0, 16\n"
|
||||
".set pop\n" // restore assembler option
|
||||
:"+r"(udst8),"+r"(src8),"+r"(size)
|
||||
:
|
||||
:"memory"
|
||||
);
|
||||
}
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"vflush\n" // Flush VFPU writeback cache
|
||||
".set pop\n" // restore assembler option
|
||||
);
|
||||
dst8 = (u8*)((u32)udst8 & ~0x40000000);
|
||||
break;
|
||||
// src not aligned? too bad... have to use unaligned reads
|
||||
default:
|
||||
while (size>=64)
|
||||
{
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"cache 0x1B, 0(%2)\n"
|
||||
|
||||
"lwr $8, 0(%1)\n" //
|
||||
"lwl $8, 3(%1)\n" // $8 = *(s + 0)
|
||||
"lwr $9, 4(%1)\n" //
|
||||
"lwl $9, 7(%1)\n" // $9 = *(s + 4)
|
||||
"lwr $10, 8(%1)\n" //
|
||||
"lwl $10, 11(%1)\n" // $10 = *(s + 8)
|
||||
"lwr $11, 12(%1)\n" //
|
||||
"lwl $11, 15(%1)\n" // $11 = *(s + 12)
|
||||
"mtv $8, s000\n"
|
||||
"mtv $9, s001\n"
|
||||
"mtv $10, s002\n"
|
||||
"mtv $11, s003\n"
|
||||
|
||||
"lwr $8, 16(%1)\n"
|
||||
"lwl $8, 19(%1)\n"
|
||||
"lwr $9, 20(%1)\n"
|
||||
"lwl $9, 23(%1)\n"
|
||||
"lwr $10, 24(%1)\n"
|
||||
"lwl $10, 27(%1)\n"
|
||||
"lwr $11, 28(%1)\n"
|
||||
"lwl $11, 31(%1)\n"
|
||||
"mtv $8, s010\n"
|
||||
"mtv $9, s011\n"
|
||||
"mtv $10, s012\n"
|
||||
"mtv $11, s013\n"
|
||||
|
||||
"lwr $8, 32(%1)\n"
|
||||
"lwl $8, 35(%1)\n"
|
||||
"lwr $9, 36(%1)\n"
|
||||
"lwl $9, 39(%1)\n"
|
||||
"lwr $10, 40(%1)\n"
|
||||
"lwl $10, 43(%1)\n"
|
||||
"lwr $11, 44(%1)\n"
|
||||
"lwl $11, 47(%1)\n"
|
||||
"mtv $8, s020\n"
|
||||
"mtv $9, s021\n"
|
||||
"mtv $10, s022\n"
|
||||
"mtv $11, s023\n"
|
||||
|
||||
"lwr $8, 48(%1)\n"
|
||||
"lwl $8, 51(%1)\n"
|
||||
"lwr $9, 52(%1)\n"
|
||||
"lwl $9, 55(%1)\n"
|
||||
"lwr $10, 56(%1)\n"
|
||||
"lwl $10, 59(%1)\n"
|
||||
"lwr $11, 60(%1)\n"
|
||||
"lwl $11, 63(%1)\n"
|
||||
"mtv $8, s030\n"
|
||||
"mtv $9, s031\n"
|
||||
"mtv $10, s032\n"
|
||||
"mtv $11, s033\n"
|
||||
|
||||
"sync\n"
|
||||
"sv.q c000, 0(%0), wb\n"
|
||||
"sv.q c010, 16(%0), wb\n"
|
||||
"sv.q c020, 32(%0), wb\n"
|
||||
"sv.q c030, 48(%0), wb\n"
|
||||
// Lots of variable updates... but get hidden in sv.q latency anyway
|
||||
"addiu %3, %3, -64\n"
|
||||
"addiu %2, %2, 64\n"
|
||||
"addiu %1, %1, 64\n"
|
||||
"addiu %0, %0, 64\n"
|
||||
".set pop\n" // restore assembler option
|
||||
:"+r"(udst8),"+r"(src8),"+r"(dst64a),"+r"(size)
|
||||
:
|
||||
:"$8","$9","$10","$11","memory"
|
||||
);
|
||||
}
|
||||
if (size>16)
|
||||
// Invalidate the last cache line where the max remaining 63 bytes are
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"cache 0x1B, 0(%0)\n"
|
||||
"sync\n"
|
||||
".set pop\n" // restore assembler option
|
||||
::"r"(dst64a));
|
||||
while (size>=16)
|
||||
{
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"lwr $8, 0(%1)\n" //
|
||||
"lwl $8, 3(%1)\n" // $8 = *(s + 0)
|
||||
"lwr $9, 4(%1)\n" //
|
||||
"lwl $9, 7(%1)\n" // $9 = *(s + 4)
|
||||
"lwr $10, 8(%1)\n" //
|
||||
"lwl $10, 11(%1)\n" // $10 = *(s + 8)
|
||||
"lwr $11, 12(%1)\n" //
|
||||
"lwl $11, 15(%1)\n" // $11 = *(s + 12)
|
||||
"mtv $8, s000\n"
|
||||
"mtv $9, s001\n"
|
||||
"mtv $10, s002\n"
|
||||
"mtv $11, s003\n"
|
||||
|
||||
"sv.q c000, 0(%0), wb\n"
|
||||
// Lots of variable updates... but get hidden in sv.q latency anyway
|
||||
"addiu %2, %2, -16\n"
|
||||
"addiu %1, %1, 16\n"
|
||||
"addiu %0, %0, 16\n"
|
||||
".set pop\n" // restore assembler option
|
||||
:"+r"(udst8),"+r"(src8),"+r"(size)
|
||||
:
|
||||
:"$8","$9","$10","$11","memory"
|
||||
);
|
||||
}
|
||||
asm(".set push\n" // save assembler option
|
||||
".set noreorder\n" // suppress reordering
|
||||
"vflush\n" // Flush VFPU writeback cache
|
||||
".set pop\n" // restore assembler option
|
||||
);
|
||||
dst8 = (u8*)((u32)udst8 & ~0x40000000);
|
||||
break;
|
||||
}
|
||||
|
||||
bytecopy:
|
||||
// Copy the remains byte per byte...
|
||||
while (size--)
|
||||
{
|
||||
*dst8++ = *src8++;
|
||||
}
|
||||
|
||||
return (dst);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
==============================================================================
|
||||
|
@ -374,7 +708,7 @@ void Hunk_Print (qboolean all)
|
|||
//
|
||||
// print the single block
|
||||
//
|
||||
memcpy (name, h->name, 8);
|
||||
memcpy_vfpu(name, h->name, 8);
|
||||
if (all)
|
||||
Con_Printf ("%8p :%8i %8s\n",h, h->size, name);
|
||||
|
||||
|
|
|
@ -127,5 +127,5 @@ void *Cache_Alloc (cache_user_t *c, int size, char *name);
|
|||
|
||||
void Cache_Report (void);
|
||||
|
||||
|
||||
void* memcpy_vfpu(void* dst, void* src, unsigned int size);
|
||||
|
||||
|
|
Loading…
Reference in a new issue