mirror of
https://github.com/ZDoom/zdoom-macos-deps.git
synced 2024-11-28 22:52:17 +00:00
108 lines
3.6 KiB
C++
108 lines
3.6 KiB
C++
// Copyright 2020 Google LLC
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#ifndef HIGHWAY_HWY_CACHE_CONTROL_H_
|
|
#define HIGHWAY_HWY_CACHE_CONTROL_H_
|
|
|
|
#include "hwy/base.h"
|
|
|
|
// Requires SSE2; fails to compile on 32-bit Clang 7 (see
|
|
// https://github.com/gperftools/gperftools/issues/946).
|
|
#if !defined(__SSE2__) || (HWY_COMPILER_CLANG && HWY_ARCH_X86_32)
|
|
#undef HWY_DISABLE_CACHE_CONTROL
|
|
#define HWY_DISABLE_CACHE_CONTROL
|
|
#endif
|
|
|
|
// intrin.h is sufficient on MSVC and already included by base.h.
|
|
#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC
|
|
#include <emmintrin.h> // SSE2
|
|
#include <xmmintrin.h> // _mm_prefetch
|
|
#endif
|
|
|
|
namespace hwy {
|
|
|
|
// Even if N*sizeof(T) is smaller, Stream may write a multiple of this size.
|
|
#define HWY_STREAM_MULTIPLE 16
|
|
|
|
// The following functions may also require an attribute.
|
|
#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC
|
|
#define HWY_ATTR_CACHE __attribute__((target("sse2")))
|
|
#else
|
|
#define HWY_ATTR_CACHE
|
|
#endif
|
|
|
|
// Windows.h #defines this, which causes infinite recursion. Temporarily
|
|
// undefine to avoid conflict with our function.
|
|
// TODO(janwas): remove when this function is removed.
|
|
#pragma push_macro("LoadFence")
|
|
#undef LoadFence
|
|
|
|
// Delays subsequent loads until prior loads are visible. Beware of potentially
|
|
// differing behavior across architectures and vendors: on Intel but not
|
|
// AMD CPUs, also serves as a full fence (waits for all prior instructions to
|
|
// complete).
|
|
HWY_INLINE HWY_ATTR_CACHE void LoadFence() {
|
|
#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
|
|
_mm_lfence();
|
|
#endif
|
|
}
|
|
|
|
// TODO(janwas): remove when this function is removed. (See above.)
|
|
#pragma pop_macro("LoadFence")
|
|
|
|
// Ensures values written by previous `Stream` calls are visible on the current
|
|
// core. This is NOT sufficient for synchronizing across cores; when `Stream`
|
|
// outputs are to be consumed by other core(s), the producer must publish
|
|
// availability (e.g. via mutex or atomic_flag) after `FlushStream`.
|
|
HWY_INLINE HWY_ATTR_CACHE void FlushStream() {
|
|
#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
|
|
_mm_sfence();
|
|
#endif
|
|
}
|
|
|
|
// Optionally begins loading the cache line containing "p" to reduce latency of
|
|
// subsequent actual loads.
|
|
template <typename T>
|
|
HWY_INLINE HWY_ATTR_CACHE void Prefetch(const T* p) {
|
|
#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
|
|
_mm_prefetch(reinterpret_cast<const char*>(p), _MM_HINT_T0);
|
|
#elif HWY_COMPILER_GCC // includes clang
|
|
// Hint=0 (NTA) behavior differs, but skipping outer caches is probably not
|
|
// desirable, so use the default 3 (keep in caches).
|
|
__builtin_prefetch(p, /*write=*/0, /*hint=*/3);
|
|
#else
|
|
(void)p;
|
|
#endif
|
|
}
|
|
|
|
// Invalidates and flushes the cache line containing "p", if possible.
|
|
HWY_INLINE HWY_ATTR_CACHE void FlushCacheline(const void* p) {
|
|
#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
|
|
_mm_clflush(p);
|
|
#else
|
|
(void)p;
|
|
#endif
|
|
}
|
|
|
|
// When called inside a spin-loop, may reduce power consumption.
|
|
HWY_INLINE HWY_ATTR_CACHE void Pause() {
|
|
#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
|
|
_mm_pause();
|
|
#endif
|
|
}
|
|
|
|
} // namespace hwy
|
|
|
|
#endif // HIGHWAY_HWY_CACHE_CONTROL_H_
|