mirror of
https://github.com/ZDoom/qzdoom.git
synced 2025-01-18 23:21:41 +00:00
Revert "- started adding ARM support. incomplete. won't compile. don't try."
This reverts commit be8abba344
.
This commit is contained in:
parent
2c27b74e34
commit
effe8a1e80
5 changed files with 0 additions and 1448 deletions
|
@ -502,11 +502,6 @@ if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
||||||
set( CMAKE_EXE_LINKER_FLAGS "-stdlib=libc++ ${CMAKE_EXE_LINKER_FLAGS}" )
|
set( CMAKE_EXE_LINKER_FLAGS "-stdlib=libc++ ${CMAKE_EXE_LINKER_FLAGS}" )
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
# ARM processors (Raspberry Pi) - enable ARM NEON support.
|
|
||||||
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
|
|
||||||
set( CMAKE_CXX_FLAGS "-mfpu=neon ${CMAKE_CXX_FLAGS}" )
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
# Remove extra warnings when using the official DirectX headers.
|
# Remove extra warnings when using the official DirectX headers.
|
||||||
# Also, TDM-GCC 4.4.0 no longer accepts glibc-style printf formats as valid,
|
# Also, TDM-GCC 4.4.0 no longer accepts glibc-style printf formats as valid,
|
||||||
# which is a royal pain. The previous version I had been using was fine with them.
|
# which is a royal pain. The previous version I had been using was fine with them.
|
||||||
|
|
1198
src/ila/SSE2NEON.h
1198
src/ila/SSE2NEON.h
File diff suppressed because it is too large
Load diff
|
@ -1,56 +0,0 @@
|
||||||
/*******************************************************************************
|
|
||||||
Copyright (c) 2017 Rachael Alexanderson
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
1. Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer in the documentation
|
|
||||||
and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
3. Neither the name of the copyright holder nor the names of its contributors
|
|
||||||
may be used to endorse or promote products derived from this software
|
|
||||||
without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
================================================================================
|
|
||||||
|
|
||||||
Simple In-Line Assembly support for ARM
|
|
||||||
|
|
||||||
The purpose of this file is to provide SSE2 translations to ARM NEON. Note
|
|
||||||
that this requires a special GCC flag (which should automatically be set by
|
|
||||||
CMAKE).
|
|
||||||
|
|
||||||
As stated in the other included file - this will never be as fast as remaking
|
|
||||||
the SSE2 instructions from the ground up. But since I have no interest in
|
|
||||||
doing so, myself - have at it if you want proper ARM support!
|
|
||||||
|
|
||||||
For Raspberry Pi, note that this requires RPI2 or higher.
|
|
||||||
|
|
||||||
*******************************************************************************/
|
|
||||||
|
|
||||||
#if defined(__ARM_NEON__)
|
|
||||||
#include "ila/sse_to_neon.hpp"
|
|
||||||
// credit to Magnus Norddahl
|
|
||||||
inline __m128i _mm_mullo_epi16(const __m128i& a, const __m128i& b){
|
|
||||||
return vqrdmulhq_s16(reinterpret_cast<int16x8_t>(a),reinterpret_cast<int16x8_t>(b));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// Just standard old Intel!
|
|
||||||
#include <emmintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,187 +0,0 @@
|
||||||
//
|
|
||||||
// sse_to_neon.hpp
|
|
||||||
// neon_test
|
|
||||||
//
|
|
||||||
// Created by Tim Oberhauser on 11/16/13.
|
|
||||||
// Copyright (c) 2013 Tim Oberhauser. All rights reserved.
|
|
||||||
//
|
|
||||||
|
|
||||||
#ifndef neon_test_sse_to_neon_hpp
|
|
||||||
#define neon_test_sse_to_neon_hpp
|
|
||||||
|
|
||||||
#include <arm_neon.h>
|
|
||||||
|
|
||||||
#if defined(__MM_MALLOC_H)
|
|
||||||
// copied from mm_malloc.h {
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
/* We can't depend on <stdlib.h> since the prototype of posix_memalign
|
|
||||||
may not be visible. */
|
|
||||||
#ifndef __cplusplus
|
|
||||||
extern int posix_memalign (void **, size_t, size_t);
|
|
||||||
#else
|
|
||||||
extern "C" int posix_memalign (void **, size_t, size_t) throw ();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static __inline void *
|
|
||||||
_mm_malloc (size_t size, size_t alignment)
|
|
||||||
{
|
|
||||||
void *ptr;
|
|
||||||
if (alignment == 1)
|
|
||||||
return malloc (size);
|
|
||||||
if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4))
|
|
||||||
alignment = sizeof (void *);
|
|
||||||
if (posix_memalign (&ptr, alignment, size) == 0)
|
|
||||||
return ptr;
|
|
||||||
else
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline void
|
|
||||||
_mm_free (void * ptr)
|
|
||||||
{
|
|
||||||
free (ptr);
|
|
||||||
}
|
|
||||||
// } copied from mm_malloc.h
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
typedef int16x8_t __m128i;
|
|
||||||
typedef float32x4_t __m128;
|
|
||||||
|
|
||||||
|
|
||||||
// ADDITION
|
|
||||||
inline __m128i _mm_add_epi16(const __m128i& a, const __m128i& b){
|
|
||||||
return vaddq_s16(reinterpret_cast<int16x8_t>(a),reinterpret_cast<int16x8_t>(b));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline __m128 _mm_add_ps(const __m128& a, const __m128& b){
|
|
||||||
return vaddq_f32(a,b);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// SUBTRACTION
|
|
||||||
inline __m128i _mm_sub_epi16(const __m128i& a, const __m128i& b){
|
|
||||||
return vsubq_s16(reinterpret_cast<int16x8_t>(a),reinterpret_cast<int16x8_t>(b));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline __m128 _mm_sub_ps(const __m128& a, const __m128& b){
|
|
||||||
return vsubq_f32(a,b);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// MULTIPLICATION
|
|
||||||
#if 0
|
|
||||||
inline __m128i _mm_mullo_epi16(const __m128i& a, const __m128i& b){
|
|
||||||
return vqrdmulhq_s16(reinterpret_cast<int16x8_t>(a),reinterpret_cast<int16x8_t>(b));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
inline __m128 _mm_mul_ps(const __m128& a, const __m128& b){
|
|
||||||
return vmulq_f32(a,b);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// SET VALUE
|
|
||||||
inline __m128i _mm_set1_epi16(const int16_t w){
|
|
||||||
return vmovq_n_s16(w);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline __m128i _mm_setzero_si128(){
|
|
||||||
return vmovq_n_s16(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline __m128 _mm_set1_ps(const float32_t& w){
|
|
||||||
return vmovq_n_f32(w);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// STORE
|
|
||||||
inline void _mm_storeu_si128(__m128i* p, __m128i& a){
|
|
||||||
vst1q_s16(reinterpret_cast<int16_t*>(p),reinterpret_cast<int16x8_t>(a));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void _mm_store_ps(float32_t* p, __m128&a){
|
|
||||||
vst1q_f32(p,a);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// LOAD
|
|
||||||
inline __m128i _mm_loadu_si128(__m128i* p){//For SSE address p does not need be 16-byte aligned
|
|
||||||
return reinterpret_cast<__m128i>(vld1q_s16(reinterpret_cast<int16_t*>(p)));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline __m128i _mm_load_si128(__m128i* p){//For SSE address p must be 16-byte aligned
|
|
||||||
return reinterpret_cast<__m128i>(vld1q_s16(reinterpret_cast<int16_t*>(p)));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline __m128 _mm_load_ps(const float32_t* p){
|
|
||||||
return reinterpret_cast<__m128>(vld1q_f32(p));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// SHIFT OPERATIONS
|
|
||||||
inline __m128i _mm_srai_epi16(const __m128i& a, const int count){
|
|
||||||
int16x8_t b = vmovq_n_s16(-count);
|
|
||||||
return reinterpret_cast<__m128i>(vshlq_s16(a,b));
|
|
||||||
// return vrshrq_n_s16(a, count);// TODO Argument to '__builtin_neon_vrshrq_n_v' must be a constant integer
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// MIN/MAX OPERATIONS
|
|
||||||
inline __m128 _mm_max_ps(const __m128& a, const __m128& b){
|
|
||||||
return reinterpret_cast<__m128>(vmaxq_f32(reinterpret_cast<float32x4_t>(a),reinterpret_cast<float32x4_t>(b)));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// SINGLE ELEMENT ACCESS
|
|
||||||
inline int16_t _mm_extract_epi16(__m128i& a, int index){
|
|
||||||
return (reinterpret_cast<int16_t*>(&a))[index];
|
|
||||||
// return vgetq_lane_s16(a,index);// TODO Argument to '__builtin_neon_vgetq_lane_i16' must be a constant integer
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// MISCELLANOUS
|
|
||||||
inline __m128i _mm_sad_epu8 (__m128i a, __m128i b){
|
|
||||||
uint64x2_t sad = reinterpret_cast<uint64x2_t>(vabdq_u8(reinterpret_cast<uint8x16_t>(a),reinterpret_cast<uint8x16_t>(b)));
|
|
||||||
sad = reinterpret_cast<uint64x2_t>(vpaddlq_u8(reinterpret_cast<uint8x16_t>(sad)));
|
|
||||||
sad = reinterpret_cast<uint64x2_t>(vpaddlq_u16(reinterpret_cast<uint16x8_t>(sad)));
|
|
||||||
sad = vpaddlq_u32(reinterpret_cast<uint32x4_t>(sad));
|
|
||||||
return reinterpret_cast<__m128i>(sad);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// LOGICAL OPERATIONS
|
|
||||||
inline __m128 _mm_and_ps(__m128& a, __m128& b){
|
|
||||||
return reinterpret_cast<__m128>(vandq_u32(reinterpret_cast<uint32x4_t>(a),reinterpret_cast<uint32x4_t>(b)));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// CONVERSIONS
|
|
||||||
inline __m128i _mm_packus_epi16 (const __m128i a, const __m128i b){
|
|
||||||
__m128i result = _mm_setzero_si128();
|
|
||||||
int8x8_t* a_narrow = reinterpret_cast<int8x8_t*>(&result);
|
|
||||||
int8x8_t* b_narrow = &a_narrow[1];
|
|
||||||
*a_narrow = reinterpret_cast<int8x8_t>(vqmovun_s16(a));
|
|
||||||
*b_narrow = reinterpret_cast<int8x8_t>(vqmovun_s16(b));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// In my case this function was only needed to convert 8 bit to 16 bit integers by extending with zeros, the general case is not implemented!!!
|
|
||||||
inline __m128i _mm_unpacklo_epi8(__m128i a, const __m128i dummy_zero){
|
|
||||||
// dummy_zero is a dummy variable
|
|
||||||
uint8x8_t* a_low = reinterpret_cast<uint8x8_t*>(&a);
|
|
||||||
return reinterpret_cast<__m128i>(vmovl_u8(*a_low));
|
|
||||||
}
|
|
||||||
|
|
||||||
// In my case this function was only needed to convert 8 bit to 16 bit integers by extending with zeros, the general case is not implemented!!!
|
|
||||||
inline __m128i _mm_unpackhi_epi8(__m128i a, const __m128i dummy_zero){
|
|
||||||
// dummy_zero is a dummy variable
|
|
||||||
uint8x8_t* a_low = reinterpret_cast<uint8x8_t*>(&a);
|
|
||||||
return reinterpret_cast<__m128i>(vmovl_u8(a_low[1]));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -36,8 +36,6 @@
|
||||||
|
|
||||||
#ifndef NO_SSE
|
#ifndef NO_SSE
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#else
|
|
||||||
#include "ila/ila.h"
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct FSpecialColormap;
|
struct FSpecialColormap;
|
||||||
|
|
Loading…
Reference in a new issue