This commit is contained in:
Rachael Alexanderson 2017-04-02 02:34:32 -04:00
commit 643bdac514
15 changed files with 563 additions and 124 deletions

View file

@ -1245,6 +1245,7 @@ set (PCH_SOURCES
sound/musicformats/music_cd.cpp
sound/musicformats/music_dumb.cpp
sound/musicformats/music_gme.cpp
sound/musicformats/music_libsndfile.cpp
sound/musicformats/music_mus_midiout.cpp
sound/musicformats/music_smf_midiout.cpp
sound/musicformats/music_hmi_midiout.cpp

View file

@ -1095,7 +1095,7 @@ bool OpenGLSWFrameBuffer::IsValid()
bool OpenGLSWFrameBuffer::Lock(bool buffered)
{
if (LockCount++ > 0)
if (m_Lock++ > 0)
{
return false;
}
@ -1130,16 +1130,16 @@ bool OpenGLSWFrameBuffer::Lock(bool buffered)
void OpenGLSWFrameBuffer::Unlock()
{
if (LockCount == 0)
if (m_Lock == 0)
{
return;
}
if (UpdatePending && LockCount == 1)
if (UpdatePending && m_Lock == 1)
{
Update();
}
else if (--LockCount == 0)
else if (--m_Lock == 0)
{
Buffer = nullptr;
}
@ -1171,13 +1171,13 @@ void OpenGLSWFrameBuffer::Update()
return;
}
if (LockCount != 1)
if (m_Lock != 1)
{
I_FatalError("Framebuffer must have exactly 1 lock to be updated");
if (LockCount > 0)
if (m_Lock > 0)
{
UpdatePending = true;
--LockCount;
--m_Lock;
}
return;
}
@ -1220,7 +1220,7 @@ void OpenGLSWFrameBuffer::Update()
BlitCycles.Clock();
#endif
LockCount = 0;
m_Lock = 0;
Draw3DPart(In2D <= 1);
if (In2D == 0)
{
@ -1276,7 +1276,7 @@ void OpenGLSWFrameBuffer::Flip()
bool OpenGLSWFrameBuffer::PaintToWindow()
{
if (LockCount != 0)
if (m_Lock != 0)
{
return false;
}
@ -1662,7 +1662,7 @@ void OpenGLSWFrameBuffer::GetScreenshotBuffer(const uint8_t *&buffer, int &pitch
void OpenGLSWFrameBuffer::ReleaseScreenshotBuffer()
{
if (LockCount > 0)
if (m_Lock > 0)
{
Super::ReleaseScreenshotBuffer();
}
@ -2410,7 +2410,7 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update()
bool OpenGLSWFrameBuffer::Begin2D(bool copy3d)
{
ClearClipRect();
Super::Begin2D(copy3d);
if (!Accel2D)
{
return false;

View file

@ -48,7 +48,8 @@ void PolySubsectorGBuffer::Resize(int newwidth, int newheight)
{
width = newwidth;
height = newheight;
values.resize(width * height);
int count = BlockWidth() * BlockHeight();
values.resize(count * 64);
}
/////////////////////////////////////////////////////////////////////////////

View file

@ -33,6 +33,8 @@ public:
void Resize(int newwidth, int newheight);
int Width() const { return width; }
int Height() const { return height; }
int BlockWidth() const { return (width + 7) / 8; }
int BlockHeight() const { return (height + 7) / 8; }
uint32_t *Values() { return values.data(); }
private:

View file

@ -96,6 +96,15 @@ private:
__m128i mFDX12;
__m128i mFDX23;
__m128i mFDX31;
__m128i mC1;
__m128i mC2;
__m128i mC3;
__m128i mDX12;
__m128i mDY12;
__m128i mDX23;
__m128i mDY23;
__m128i mDX31;
__m128i mDY31;
#endif
void CoverageTest();
@ -124,7 +133,7 @@ TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args)
subsectorGBuffer = args->subsectorGBuffer;
subsectorDepth = args->uniforms->SubsectorDepth();
subsectorPitch = args->pitch;
subsectorPitch = args->stencilPitch;
// 28.4 fixed-point coordinates
#ifdef NO_SSE
@ -203,6 +212,15 @@ TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args)
mFDX12 = _mm_set1_epi32(FDX12);
mFDX23 = _mm_set1_epi32(FDX23);
mFDX31 = _mm_set1_epi32(FDX31);
mC1 = _mm_set1_epi32(C1);
mC2 = _mm_set1_epi32(C2);
mC3 = _mm_set1_epi32(C3);
mDX12 = _mm_set1_epi32(DX12);
mDY12 = _mm_set1_epi32(DY12);
mDX23 = _mm_set1_epi32(DX23);
mDY23 = _mm_set1_epi32(DY23);
mDX31 = _mm_set1_epi32(DX31);
mDY31 = _mm_set1_epi32(DY31);
#endif
}
@ -270,29 +288,24 @@ void TriangleBlock::Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thre
void TriangleBlock::SubsectorTest()
{
uint32_t *subsector = subsectorGBuffer + X + Y * subsectorPitch;
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
uint32_t *subsector = subsectorGBuffer + block * 64;
uint32_t mask0 = 0;
uint32_t mask1 = 0;
for (int iy = 0; iy < 4; iy++)
for (int i = 0; i < 32; i++)
{
for (int ix = 0; ix < q; ix++)
{
bool covered = subsector[ix] >= subsectorDepth;
mask0 <<= 1;
mask0 |= (uint32_t)covered;
}
subsector += subsectorPitch;
bool covered = *subsector >= subsectorDepth;
mask0 <<= 1;
mask0 |= (uint32_t)covered;
subsector++;
}
for (int iy = 4; iy < q; iy++)
for (int i = 0; i < 32; i++)
{
for (int ix = 0; ix < q; ix++)
{
bool covered = subsector[ix] >= subsectorDepth;
mask1 <<= 1;
mask1 |= (uint32_t)covered;
}
subsector += subsectorPitch;
bool covered = *subsector >= subsectorDepth;
mask1 <<= 1;
mask1 |= (uint32_t)covered;
subsector++;
}
Mask0 = Mask0 & mask0;
@ -303,27 +316,24 @@ void TriangleBlock::SubsectorTest()
void TriangleBlock::SubsectorTest()
{
uint32_t *subsector = subsectorGBuffer + X + Y * subsectorPitch;
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
uint32_t *subsector = subsectorGBuffer + block * 64;
uint32_t mask0 = 0;
uint32_t mask1 = 0;
__m128i msubsectorDepth = _mm_set1_epi32(subsectorDepth);
__m128i mnotxor = _mm_set1_epi32(0xffffffff);
for (int iy = 0; iy < 4; iy++)
for (int iy = 0; iy < 8; iy++)
{
mask0 <<= 4;
mask0 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_xor_si128(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)subsector), msubsectorDepth), mnotxor), _MM_SHUFFLE(0, 1, 2, 3))));
mask0 <<= 4;
mask0 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_xor_si128(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(subsector + 4)), msubsectorDepth), mnotxor), _MM_SHUFFLE(0, 1, 2, 3))));
subsector += subsectorPitch;
subsector += 4;
}
for (int iy = 4; iy < q; iy++)
for (int iy = 0; iy < 8; iy++)
{
mask1 <<= 4;
mask1 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_xor_si128(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)subsector), msubsectorDepth), mnotxor), _MM_SHUFFLE(0, 1, 2, 3))));
mask1 <<= 4;
mask1 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_xor_si128(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(subsector + 4)), msubsectorDepth), mnotxor), _MM_SHUFFLE(0, 1, 2, 3))));
subsector += subsectorPitch;
subsector += 4;
}
Mask0 = Mask0 & mask0;
@ -432,30 +442,44 @@ void TriangleBlock::StencilEqualTest()
uint32_t mask0 = 0;
uint32_t mask1 = 0;
for (int iy = 0; iy < 4; iy++)
for (int iy = 0; iy < 2; iy++)
{
__m128i mstencilBlock = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(stencilBlock + iy * 8)), _mm_setzero_si128());
__m128i mstencilTest = _mm_cmpeq_epi16(mstencilBlock, mstencilTestValue);
__m128i mstencilBlock = _mm_loadu_si128((const __m128i *)stencilBlock);
__m128i mstencilTest = _mm_cmpeq_epi16(_mm_unpacklo_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue);
__m128i mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest);
__m128i mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest);
__m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3)));
mask0 <<= 4;
mask0 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))));
mask0 <<= 4;
mask0 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3))));
mstencilTest = _mm_cmpeq_epi16(_mm_unpackhi_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue);
mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest);
mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest);
__m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3)));
mask0 <<= 16;
mask0 |= _mm_movemask_epi8(_mm_packs_epi16(second, first));
stencilBlock += 16;
}
for (int iy = 4; iy < q; iy++)
for (int iy = 0; iy < 2; iy++)
{
__m128i mstencilBlock = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(stencilBlock + iy * 8)), _mm_setzero_si128());
__m128i mstencilTest = _mm_cmpeq_epi16(mstencilBlock, mstencilTestValue);
__m128i mstencilBlock = _mm_loadu_si128((const __m128i *)stencilBlock);
__m128i mstencilTest = _mm_cmpeq_epi16(_mm_unpacklo_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue);
__m128i mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest);
__m128i mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest);
__m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3)));
mask1 <<= 4;
mask1 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))));
mask1 <<= 4;
mask1 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3))));
mstencilTest = _mm_cmpeq_epi16(_mm_unpackhi_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue);
mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest);
mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest);
__m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3)));
mask1 <<= 16;
mask1 |= _mm_movemask_epi8(_mm_packs_epi16(second, first));
stencilBlock += 16;
}
Mask0 = Mask0 & mask0;
@ -617,50 +641,46 @@ void TriangleBlock::CoverageTest()
int y0 = Y << 4;
int y1 = (Y + q - 1) << 4;
__m128i mY = _mm_set_epi32(y0, y0, y1, y1);
__m128i mX = _mm_set_epi32(x0, x0, x1, x1);
// Evaluate half-space functions
bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0;
bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0;
bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0;
bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0;
int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3);
__m128i mCY1 = _mm_sub_epi32(
_mm_add_epi32(mC1, _mm_shuffle_epi32(_mm_mul_epu32(mDX12, mY), _MM_SHUFFLE(0, 0, 2, 2))),
_mm_shuffle_epi32(_mm_mul_epu32(mDY12, mX), _MM_SHUFFLE(0, 2, 0, 2)));
__m128i mA = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128());
bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0;
bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0;
bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0;
bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0;
int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3);
__m128i mCY2 = _mm_sub_epi32(
_mm_add_epi32(mC2, _mm_shuffle_epi32(_mm_mul_epu32(mDX23, mY), _MM_SHUFFLE(0, 0, 2, 2))),
_mm_shuffle_epi32(_mm_mul_epu32(mDY23, mX), _MM_SHUFFLE(0, 2, 0, 2)));
__m128i mB = _mm_cmpgt_epi32(mCY2, _mm_setzero_si128());
bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0;
bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0;
bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0;
bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0;
int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3);
__m128i mCY3 = _mm_sub_epi32(
_mm_add_epi32(mC3, _mm_shuffle_epi32(_mm_mul_epu32(mDX31, mY), _MM_SHUFFLE(0, 0, 2, 2))),
_mm_shuffle_epi32(_mm_mul_epu32(mDY31, mX), _MM_SHUFFLE(0, 2, 0, 2)));
__m128i mC = _mm_cmpgt_epi32(mCY3, _mm_setzero_si128());
if (a == 0 || b == 0 || c == 0) // Skip block when outside an edge
int abc = _mm_movemask_epi8(_mm_packs_epi16(_mm_packs_epi32(mA, mB), _mm_packs_epi32(mC, _mm_setzero_si128())));
if ((abc & 0xf) == 0 || (abc & 0xf0) == 0 || (abc & 0xf00) == 0) // Skip block when outside an edge
{
Mask0 = 0;
Mask1 = 0;
}
else if (a == 0xf && b == 0xf && c == 0xf) // Accept whole block when totally covered
else if (abc == 0xfff) // Accept whole block when totally covered
{
Mask0 = 0xffffffff;
Mask1 = 0xffffffff;
}
else // Partially covered block
{
x0 = X << 4;
x1 = (X + q - 1) << 4;
int CY1 = C1 + DX12 * y0 - DY12 * x0;
int CY2 = C2 + DX23 * y0 - DY23 * x0;
int CY3 = C3 + DX31 * y0 - DY31 * x0;
uint32_t mask0 = 0;
uint32_t mask1 = 0;
__m128i mCY1 = _mm_sub_epi32(_mm_set1_epi32(CY1), mFDY12Offset);
__m128i mCY2 = _mm_sub_epi32(_mm_set1_epi32(CY2), mFDY23Offset);
__m128i mCY3 = _mm_sub_epi32(_mm_set1_epi32(CY3), mFDY31Offset);
for (int iy = 0; iy < 4; iy++)
mCY1 = _mm_sub_epi32(_mm_shuffle_epi32(mCY1, _MM_SHUFFLE(0, 0, 0, 0)), mFDY12Offset);
mCY2 = _mm_sub_epi32(_mm_shuffle_epi32(mCY2, _MM_SHUFFLE(0, 0, 0, 0)), mFDY23Offset);
mCY3 = _mm_sub_epi32(_mm_shuffle_epi32(mCY3, _MM_SHUFFLE(0, 0, 0, 0)), mFDY31Offset);
for (int iy = 0; iy < 2; iy++)
{
__m128i mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128());
mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0);
@ -668,18 +688,27 @@ void TriangleBlock::CoverageTest()
__m128i mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128());
mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1);
mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1);
mCY1 = _mm_add_epi32(mCY1, mFDX12);
mCY2 = _mm_add_epi32(mCY2, mFDX23);
mCY3 = _mm_add_epi32(mCY3, mFDX31);
__m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3)));
mask0 <<= 4;
mask0 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))));
mask0 <<= 4;
mask0 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3))));
mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128());
mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0);
mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0);
mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128());
mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1);
mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1);
mCY1 = _mm_add_epi32(mCY1, mFDX12);
mCY2 = _mm_add_epi32(mCY2, mFDX23);
mCY3 = _mm_add_epi32(mCY3, mFDX31);
__m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3)));
mask0 <<= 16;
mask0 |= _mm_movemask_epi8(_mm_packs_epi16(second, first));
}
for (int iy = 4; iy < q; iy++)
for (int iy = 0; iy < 2; iy++)
{
__m128i mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128());
mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0);
@ -687,15 +716,24 @@ void TriangleBlock::CoverageTest()
__m128i mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128());
mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1);
mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1);
mCY1 = _mm_add_epi32(mCY1, mFDX12);
mCY2 = _mm_add_epi32(mCY2, mFDX23);
mCY3 = _mm_add_epi32(mCY3, mFDX31);
__m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3)));
mask1 <<= 4;
mask1 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))));
mask1 <<= 4;
mask1 |= _mm_movemask_ps(_mm_castsi128_ps(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3))));
mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128());
mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0);
mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0);
mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128());
mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1);
mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1);
mCY1 = _mm_add_epi32(mCY1, mFDX12);
mCY2 = _mm_add_epi32(mCY2, mFDX23);
mCY3 = _mm_add_epi32(mCY3, mFDX31);
__m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3)));
mask1 <<= 16;
mask1 |= _mm_movemask_epi8(_mm_packs_epi16(second, first));
}
Mask0 = mask0;
@ -755,47 +793,98 @@ void TriangleBlock::StencilWrite()
}
}
#ifdef NO_SSE
void TriangleBlock::SubsectorWrite()
{
auto pitch = subsectorPitch;
uint32_t *subsector = subsectorGBuffer + X + Y * pitch;
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
uint32_t *subsector = subsectorGBuffer + block * 64;
if (Mask0 == 0xffffffff && Mask1 == 0xffffffff)
{
for (int y = 0; y < 8; y++)
for (int i = 0; i < 64; i++)
{
for (int x = 0; x < 8; x++)
subsector[x] = subsectorDepth;
subsector += pitch;
*(subsector++) = subsectorDepth;
}
}
else
{
uint32_t mask0 = Mask0;
uint32_t mask1 = Mask1;
for (int y = 0; y < 4; y++)
for (int i = 0; i < 32; i++)
{
for (int x = 0; x < 8; x++)
{
if (mask0 & (1 << 31))
subsector[x] = subsectorDepth;
mask0 <<= 1;
}
subsector += pitch;
if (mask0 & (1 << 31))
*subsector = subsectorDepth;
mask0 <<= 1;
subsector++;
}
for (int y = 4; y < 8; y++)
for (int i = 0; i < 32; i++)
{
for (int x = 0; x < 8; x++)
{
if (mask1 & (1 << 31))
subsector[x] = subsectorDepth;
mask1 <<= 1;
}
subsector += pitch;
if (mask1 & (1 << 31))
*subsector = subsectorDepth;
mask1 <<= 1;
subsector++;
}
}
}
#else
void TriangleBlock::SubsectorWrite()
{
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
uint32_t *subsector = subsectorGBuffer + block * 64;
__m128i msubsectorDepth = _mm_set1_epi32(subsectorDepth);
if (Mask0 == 0xffffffff && Mask1 == 0xffffffff)
{
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth); subsector += 4;
_mm_storeu_si128((__m128i*)subsector, msubsectorDepth);
}
else
{
__m128i mxormask = _mm_set1_epi32(0xffffffff);
__m128i topfour = _mm_setr_epi32(1 << 31, 1 << 30, 1 << 29, 1 << 28);
__m128i mmask0 = _mm_set1_epi32(Mask0);
__m128i mmask1 = _mm_set1_epi32(Mask1);
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask0 = _mm_slli_epi32(mmask0, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask0 = _mm_slli_epi32(mmask0, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask0 = _mm_slli_epi32(mmask0, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask0 = _mm_slli_epi32(mmask0, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask0 = _mm_slli_epi32(mmask0, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask0 = _mm_slli_epi32(mmask0, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask0 = _mm_slli_epi32(mmask0, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask1 = _mm_slli_epi32(mmask1, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask1 = _mm_slli_epi32(mmask1, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask1 = _mm_slli_epi32(mmask1, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask1 = _mm_slli_epi32(mmask1, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask1 = _mm_slli_epi32(mmask1, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask1 = _mm_slli_epi32(mmask1, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); mmask1 = _mm_slli_epi32(mmask1, 4); subsector += 4;
_mm_maskmoveu_si128(msubsectorDepth, _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)subsector); subsector += 4;
}
}
#endif
void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
{
TriangleBlock block(args);

View file

@ -488,6 +488,11 @@ retry_as_sndsys:
{
info = MOD_OpenSong(*reader);
}
if (info == nullptr)
{
info = SndFile_OpenSong(*reader);
if (info != nullptr) reader = nullptr;
}
if (info == NULL)
{

View file

@ -672,6 +672,7 @@ MusInfo *MOD_OpenSong(FileReader &reader);
const char *GME_CheckFormat(uint32_t header);
MusInfo *GME_OpenSong(FileReader &reader, const char *fmt);
MusInfo *SndFile_OpenSong(FileReader &fr);
// --------------------------------------------------------------------------

View file

@ -160,8 +160,7 @@ public:
virtual MIDIDevice* CreateMIDIDevice() const = 0;
protected:
virtual SoundDecoder *CreateDecoder(FileReader *reader);
static SoundDecoder *CreateDecoder(FileReader *reader);
};
extern SoundRenderer *GSnd;

View file

@ -132,7 +132,7 @@ struct SoundDecoder
virtual size_t read(char *buffer, size_t bytes) = 0;
virtual TArray<char> readAll();
virtual bool seek(size_t ms_offset) = 0;
virtual bool seek(size_t ms_offset, bool ms) = 0;
virtual size_t getSampleOffset() = 0;
virtual size_t getSampleLength() { return 0; }

View file

@ -134,14 +134,14 @@ size_t MPG123Decoder::read(char *buffer, size_t bytes)
return amt;
}
bool MPG123Decoder::seek(size_t ms_offset)
bool MPG123Decoder::seek(size_t ms_offset, bool ms)
{
int enc, channels;
long srate;
if(mpg123_getformat(MPG123, &srate, &channels, &enc) == MPG123_OK)
{
size_t smp_offset = (size_t)((double)ms_offset / 1000. * srate);
size_t smp_offset = ms? (size_t)((double)ms_offset / 1000. * srate) : ms_offset;
if(mpg123_seek(MPG123, (off_t)smp_offset, SEEK_SET) >= 0)
{
Done = false;

View file

@ -16,7 +16,7 @@ struct MPG123Decoder : public SoundDecoder
virtual void getInfo(int *samplerate, ChannelConfig *chans, SampleType *type);
virtual size_t read(char *buffer, size_t bytes);
virtual bool seek(size_t ms_offset);
virtual bool seek(size_t ms_offset, bool ms);
virtual size_t getSampleOffset();
virtual size_t getSampleLength();

View file

@ -0,0 +1,341 @@
/*
** music_libsndfile.cpp
** Uses libsndfile for streaming music formats
**
**---------------------------------------------------------------------------
** Copyright 2017 Christoph Oelckers
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions
** are met:
**
** 1. Redistributions of source code must retain the above copyright
** notice, this list of conditions and the following disclaimer.
** 2. Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in the
** documentation and/or other materials provided with the distribution.
** 3. The name of the author may not be used to endorse or promote products
** derived from this software without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**---------------------------------------------------------------------------
**
*/
// HEADER FILES ------------------------------------------------------------
#include "i_musicinterns.h"
#include "c_cvars.h"
#include "critsec.h"
#include "v_text.h"
#include "files.h"
#include "templates.h"
#include "sndfile_decoder.h"
#include "mpg123_decoder.h"
#include "m_fixed.h"
// MACROS ------------------------------------------------------------------
// TYPES -------------------------------------------------------------------
class SndFileSong : public StreamSong
{
public:
SndFileSong(FileReader *reader, SoundDecoder *decoder, uint32_t loop_start, uint32_t loop_end, bool startass, bool endass);
~SndFileSong();
bool SetSubsong(int subsong);
void Play(bool looping, int subsong);
FString GetStats();
protected:
FCriticalSection CritSec;
FileReader *Reader;
SoundDecoder *Decoder;
int Channels;
int SampleRate;
uint32_t Loop_Start;
uint32_t Loop_End;
int CalcSongLength();
static bool Read(SoundStream *stream, void *buff, int len, void *userdata);
};
// EXTERNAL FUNCTION PROTOTYPES --------------------------------------------
// PUBLIC FUNCTION PROTOTYPES ----------------------------------------------
// PRIVATE FUNCTION PROTOTYPES ---------------------------------------------
// EXTERNAL DATA DECLARATIONS ----------------------------------------------
// PUBLIC DATA DEFINITIONS -------------------------------------------------
// PRIVATE DATA DEFINITIONS ------------------------------------------------
// CODE --------------------------------------------------------------------
//==========================================================================
//
// try to find the LOOP_START/LOOP_END tags
//
// This is a brute force implementation, thanks in no snall part
// that no decent documentation of Ogg headers seems to exist and
// all available tag libraries are horrendously bloated.
// So if we want to do this without any new third party dependencies,
// thanks to the lack of anything that would help to do this properly,
// this was the only solution.
//
//==========================================================================
void FindLoopTags(FileReader *fr, uint32_t *start, bool *startass, uint32_t *end, bool *endass)
{
unsigned char testbuf[256];
fr->Seek(0, SEEK_SET);
long got = fr->Read(testbuf, 256);
auto eqp = testbuf - 1;
int count;
while(true)
{
unsigned char *c = (unsigned char *)memchr(eqp + 1, '=', 256 - (eqp + 1 - testbuf));
if (c == nullptr) return; // If there is no '=' in the first 256 bytes there's also no metadata.
eqp = c;
while (*c >= 32 && *c < 127) c--;
if (*c != 0)
{
// doesn't look like a valid tag, so try again
continue;
}
c -= 3;
int len = LittleLong(*(int*)c);
if (len > 1000000 || len <= (eqp - c + 1))
{
// length looks fishy so retry with the next '='
continue;
}
c -= 4;
count = LittleLong(*(int*)c);
if (count <= 0 || count > 1000)
{
// very unlikely to have 1000 tags
continue;
}
c += 4;
fr->Seek(long(c - testbuf), SEEK_SET);
break; // looks like we found something.
}
for (int i = 0; i < count; i++)
{
int length = 0;
fr->Read(&length, 4);
length = LittleLong(length);
if (length == 0 || length > 1000000) return; // looks like we lost it...
if (length > 25)
{
// This tag is too long to be a valid time stamp so don't even bother.
fr->Seek(length, SEEK_CUR);
continue;
}
fr->Read(testbuf, length);
testbuf[length] = 0;
if (strnicmp((char*)testbuf, "LOOP_START=", 11) == 0)
{
S_ParseTimeTag((char*)testbuf + 11, startass, start);
}
else if (strnicmp((char*)testbuf, "LOOP_END=", 9) == 0)
{
S_ParseTimeTag((char*)testbuf + 9, endass, end);
}
}
}
//==========================================================================
//
// SndFile_OpenSong
//
//==========================================================================
MusInfo *SndFile_OpenSong(FileReader &fr)
{
uint8_t signature[4];
fr.Seek(0, SEEK_SET);
fr.Read(signature, 4);
uint32_t loop_start = 0, loop_end = ~0u;
bool startass = false, endass = false;
if (!memcmp(signature, "OggS", 4) || !memcmp(signature, "fLaC", 4))
{
// Todo: Read loop points from metadata
FindLoopTags(&fr, &loop_start, &startass, &loop_end, &endass);
}
fr.Seek(0, SEEK_SET);
auto decoder = SoundRenderer::CreateDecoder(&fr);
if (decoder == nullptr) return nullptr;
return new SndFileSong(&fr, decoder, loop_start, loop_end, startass, endass);
}
//==========================================================================
//
// SndFileSong - Constructor
//
//==========================================================================
SndFileSong::SndFileSong(FileReader *reader, SoundDecoder *decoder, uint32_t loop_start, uint32_t loop_end, bool startass, bool endass)
{
ChannelConfig iChannels;
SampleType Type;
decoder->getInfo(&SampleRate, &iChannels, &Type);
if (!startass) loop_start = Scale(loop_start, SampleRate, 1000);
if (!endass) loop_end = Scale(loop_end, SampleRate, 1000);
Loop_Start = loop_start;
Loop_End = clamp<uint32_t>(loop_end, 0, (uint32_t)decoder->getSampleLength());
Reader = reader;
Decoder = decoder;
Channels = iChannels == ChannelConfig_Stereo? 2:1;
m_Stream = GSnd->CreateStream(Read, 32*1024, iChannels == ChannelConfig_Stereo? 0 : SoundStream::Mono, SampleRate, this);
}
//==========================================================================
//
// SndFileSong - Destructor
//
//==========================================================================
SndFileSong::~SndFileSong()
{
Stop();
if (m_Stream != nullptr)
{
delete m_Stream;
m_Stream = nullptr;
}
if (Decoder != nullptr)
{
delete Decoder;
}
if (Reader != nullptr)
{
delete Reader;
}
}
//==========================================================================
//
// SndFileSong :: Play
//
//==========================================================================
void SndFileSong::Play(bool looping, int track)
{
m_Status = STATE_Stopped;
m_Looping = looping;
if (m_Stream->Play(looping, 1))
{
m_Status = STATE_Playing;
}
}
//==========================================================================
//
// SndFileSong :: SetSubsong
//
//==========================================================================
bool SndFileSong::SetSubsong(int track)
{
return false;
}
//==========================================================================
//
// SndFileSong :: GetStats
//
//==========================================================================
FString SndFileSong::GetStats()
{
FString out;
size_t SamplePos;
SamplePos = Decoder->getSampleOffset();
int time = int (SamplePos / SampleRate);
out.Format(
"Track: " TEXTCOLOR_YELLOW "%s, %dHz" TEXTCOLOR_NORMAL
" Time:" TEXTCOLOR_YELLOW "%02d:%02d" TEXTCOLOR_NORMAL,
Channels == 2? "Stereo" : "Mono", SampleRate,
time/60,
time % 60);
return out;
}
//==========================================================================
//
// SndFileSong :: Read STATIC
//
//==========================================================================
bool SndFileSong::Read(SoundStream *stream, void *vbuff, int ilen, void *userdata)
{
char *buff = (char*)vbuff;
SndFileSong *song = (SndFileSong *)userdata;
song->CritSec.Enter();
size_t len = size_t(ilen);
size_t currentpos = song->Decoder->getSampleOffset();
size_t framestoread = len / (song->Channels*2);
bool err = false;
if (!song->m_Looping)
{
size_t maxpos = song->Decoder->getSampleLength();
if (currentpos == maxpos)
{
memset(buff, 0, len);
song->CritSec.Leave();
return false;
}
if (currentpos + framestoread > maxpos)
{
size_t got = song->Decoder->read(buff, (maxpos - currentpos) * song->Channels * 2);
memset(buff + got, 0, len - got);
}
else
{
size_t got = song->Decoder->read(buff, len);
err = (got != len);
}
}
else
{
if (currentpos + framestoread > song->Loop_End)
{
size_t endblock = (song->Loop_End - currentpos) * song->Channels * 2;
err = (song->Decoder->read(buff, endblock) != endblock);
buff = buff + endblock;
len -= endblock;
song->Decoder->seek(song->Loop_Start, false);
}
err |= song->Decoder->read(buff, len) != len;
}
song->CritSec.Leave();
return !err;
}

View file

@ -212,7 +212,7 @@ class OpenALSoundStream : public SoundStream
size_t got = self->Decoder->read((char*)ptr, length);
if(got < (unsigned int)length)
{
if(!self->Looping || !self->Decoder->seek(0))
if(!self->Looping || !self->Decoder->seek(0, false))
return false;
got += self->Decoder->read((char*)ptr+got, length-got);
}
@ -361,7 +361,7 @@ public:
virtual bool SetPosition(unsigned int ms_pos)
{
std::unique_lock<std::mutex> lock(Renderer->StreamLock);
if(!Decoder->seek(ms_pos))
if(!Decoder->seek(ms_pos, true))
return false;
if(!Playing.load())

View file

@ -132,9 +132,9 @@ TArray<char> SndFileDecoder::readAll()
return output;
}
bool SndFileDecoder::seek(size_t ms_offset)
bool SndFileDecoder::seek(size_t ms_offset, bool ms)
{
size_t smp_offset = (size_t)((double)ms_offset / 1000. * SndInfo.samplerate);
size_t smp_offset = ms? (size_t)((double)ms_offset / 1000. * SndInfo.samplerate) : ms_offset;
if(sf_seek(SndFile, smp_offset, SEEK_SET) < 0)
return false;
return true;

View file

@ -13,7 +13,7 @@ struct SndFileDecoder : public SoundDecoder
virtual size_t read(char *buffer, size_t bytes);
virtual TArray<char> readAll();
virtual bool seek(size_t ms_offset);
virtual bool seek(size_t ms_offset, bool ms);
virtual size_t getSampleOffset();
virtual size_t getSampleLength();