mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2025-03-22 02:11:19 +00:00
[simd] Correct result for dot2f
It turns out gcc optimizes the obvious code nicely. It doesn't do so well for cmul, but I decided to use obvious code anyway (the instruction counts were the same, so maybe it doesn't get better for a single pair of operands).
This commit is contained in:
parent
fd298f3601
commit
9084121ad2
1 changed files with 2 additions and 7 deletions
|
@ -120,9 +120,7 @@ vec2f_t
|
|||
dot2f (vec2f_t a, vec2f_t b)
|
||||
{
|
||||
vec2f_t c = a * b;
|
||||
vec4f_t t = { c[0], c[1], 0, 0 };
|
||||
t = _mm_hadd_ps (t, t);
|
||||
return (vec2f_t) { t[0], t[1] };
|
||||
return (vec2f_t) { c[0] + c[1], c[0] + c[1] };
|
||||
}
|
||||
|
||||
#ifndef IMPLEMENT_VEC2F_Funcs
|
||||
|
@ -135,10 +133,7 @@ cmulf (vec2f_t a, vec2f_t b)
|
|||
{
|
||||
vec2f_t c1 = a * b[0];
|
||||
vec2f_t c2 = a * b[1];
|
||||
vec4f_t c14 ={ c1[0], c1[1], 0, 0 };
|
||||
vec4f_t c24 ={ c2[1], c2[0], 0, 0 };
|
||||
vec4f_t c = _mm_addsub_ps (c14, c24);
|
||||
return (vec2f_t) { c[0], c[1] };
|
||||
return (vec2f_t) { c1[0] - c2[1], c1[1] + c2[0] };
|
||||
}
|
||||
|
||||
#ifndef IMPLEMENT_VEC2F_Funcs
|
||||
|
|
Loading…
Reference in a new issue