Removed drawergen tool and all LLVM dependencies (don't let the door hit you on your way out, llvm!)

This commit is contained in:
Magnus Norddahl 2017-02-23 08:28:18 +01:00
parent c5683bbde5
commit ad507ca246
76 changed files with 36 additions and 9349 deletions

View file

@ -588,20 +588,6 @@ add_custom_target( revision_check ALL
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
DEPENDS updaterevision )
# Run drawer codegen tool
if ( WIN32 )
add_custom_target( drawergen_target ALL
COMMAND drawergen src/r_drawersasm.obj
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS drawergen )
else()
add_custom_target( drawergen_target ALL
COMMAND drawergen src/r_drawersasm.o
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS drawergen )
endif()
# Libraries ZDoom needs
message( STATUS "Fluid synth libs: ${FLUIDSYNTH_LIBRARIES}" )
@ -857,7 +843,6 @@ set( FASTMATH_PCH_SOURCES
swrenderer/drawers/r_draw.cpp
swrenderer/drawers/r_draw_pal.cpp
swrenderer/drawers/r_draw_rgba.cpp
swrenderer/drawers/r_drawers.cpp
swrenderer/drawers/r_thread.cpp
swrenderer/scene/r_3dfloors.cpp
swrenderer/scene/r_light.cpp
@ -1296,16 +1281,6 @@ set (PCH_SOURCES
)
enable_precompiled_headers( g_pch.h PCH_SOURCES )
if ( WIN32 )
set (CODEGENOBJ_SOURCES
r_drawersasm.obj
)
else()
set (CODEGENOBJ_SOURCES
r_drawersasm.o
)
endif()
add_executable( zdoom WIN32 MACOSX_BUNDLE
${HEADER_FILES}
${NOT_COMPILED_SOURCE_FILES}
@ -1336,11 +1311,8 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE
math/tanh.c
math/fastsin.cpp
zzautozend.cpp
${CMAKE_BINARY_DIR}/src/${CODEGENOBJ_SOURCES}
)
set_source_files_properties( ${CODEGENOBJ_SOURCES} PROPERTIES EXTERNAL_OBJECT true GENERATED true)
set_source_files_properties( ${FASTMATH_SOURCES} PROPERTIES COMPILE_FLAGS ${ZD_FASTMATH_FLAG} )
set_source_files_properties( xlat/parse_xlat.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c" )
set_source_files_properties( sc_man.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/sc_man_scanner.h" )
@ -1378,7 +1350,7 @@ include_directories( .
${CMAKE_BINARY_DIR}/gdtoa
${SYSTEM_SOURCES_DIR} )
add_dependencies( zdoom revision_check drawergen_target )
add_dependencies( zdoom revision_check )
# Due to some quirks, we need to do this in this order
if( NOT ZDOOM_OUTPUT_OLDSTYLE )

View file

@ -85,8 +85,6 @@ void PolyTriangleDrawer::draw(const PolyDrawArgs &args)
PolyRenderer::Instance()->Thread.DrawQueue->Push<DrawPolyTrianglesCommand>(args, mirror);
}
EXTERN_CVAR(Bool, r_phpdrawers);
void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadData *thread)
{
if (drawargs.vcount < 3)
@ -100,21 +98,11 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD
if (!r_debug_trisetup) // For profiling how much time is spent in setup vs drawal
{
int bmode = (int)drawargs.blendmode;
if (r_phpdrawers)
{
if (drawargs.writeColor && drawargs.texturePixels)
drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriDraw32[bmode] : ScreenTriangle::TriDraw8[bmode];
else if (drawargs.writeColor)
drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriFill32[bmode] : ScreenTriangle::TriFill8[bmode];
}
else
{
auto llvm = Drawers::Instance();
if (drawargs.writeColor && drawargs.texturePixels)
drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode];
else if (drawargs.writeColor)
drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode];
}
if (drawargs.writeColor && drawargs.texturePixels)
drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriDraw32[bmode] : ScreenTriangle::TriDraw8[bmode];
else if (drawargs.writeColor)
drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriFill32[bmode] : ScreenTriangle::TriFill8[bmode];
}
if (drawargs.writeStencil)

View file

@ -62,104 +62,66 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
// Level of detail texture bias
CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG when a good default has been decided
CVAR(Bool, r_phpdrawers, false, 0);
namespace swrenderer
{
void SWTruecolorDrawers::DrawWallColumn(const WallDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawWall32Command>(args);
else
Queue->Push<DrawWall1LLVMCommand>(args);
Queue->Push<DrawWall32Command>(args);
}
void SWTruecolorDrawers::DrawWallMaskedColumn(const WallDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawWallMasked32Command>(args);
else
Queue->Push<DrawWallMasked1LLVMCommand>(args);
Queue->Push<DrawWallMasked32Command>(args);
}
void SWTruecolorDrawers::DrawWallAddColumn(const WallDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawWallAddClamp32Command>(args);
else
Queue->Push<DrawWallAdd1LLVMCommand>(args);
Queue->Push<DrawWallAddClamp32Command>(args);
}
void SWTruecolorDrawers::DrawWallAddClampColumn(const WallDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawWallAddClamp32Command>(args);
else
Queue->Push<DrawWallAddClamp1LLVMCommand>(args);
Queue->Push<DrawWallAddClamp32Command>(args);
}
void SWTruecolorDrawers::DrawWallSubClampColumn(const WallDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawWallSubClamp32Command>(args);
else
Queue->Push<DrawWallSubClamp1LLVMCommand>(args);
Queue->Push<DrawWallSubClamp32Command>(args);
}
void SWTruecolorDrawers::DrawWallRevSubClampColumn(const WallDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawWallRevSubClamp32Command>(args);
else
Queue->Push<DrawWallRevSubClamp1LLVMCommand>(args);
Queue->Push<DrawWallRevSubClamp32Command>(args);
}
void SWTruecolorDrawers::DrawColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSprite32Command>(args);
else
Queue->Push<DrawColumnLLVMCommand>(args);
Queue->Push<DrawSprite32Command>(args);
}
void SWTruecolorDrawers::FillColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<FillSprite32Command>(args);
else
Queue->Push<FillColumnLLVMCommand>(args);
Queue->Push<FillSprite32Command>(args);
}
void SWTruecolorDrawers::FillAddColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<FillSpriteAddClamp32Command>(args);
else
Queue->Push<FillColumnAddLLVMCommand>(args);
Queue->Push<FillSpriteAddClamp32Command>(args);
}
void SWTruecolorDrawers::FillAddClampColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<FillSpriteAddClamp32Command>(args);
else
Queue->Push<FillColumnAddClampLLVMCommand>(args);
Queue->Push<FillSpriteAddClamp32Command>(args);
}
void SWTruecolorDrawers::FillSubClampColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<FillSpriteSubClamp32Command>(args);
else
Queue->Push<FillColumnSubClampLLVMCommand>(args);
Queue->Push<FillSpriteSubClamp32Command>(args);
}
void SWTruecolorDrawers::FillRevSubClampColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<FillSpriteRevSubClamp32Command>(args);
else
Queue->Push<FillColumnRevSubClampLLVMCommand>(args);
Queue->Push<FillSpriteRevSubClamp32Command>(args);
}
void SWTruecolorDrawers::DrawFuzzColumn(const SpriteDrawerArgs &args)
@ -170,410 +132,92 @@ namespace swrenderer
void SWTruecolorDrawers::DrawAddColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpriteAddClamp32Command>(args);
else
Queue->Push<DrawColumnAddLLVMCommand>(args);
Queue->Push<DrawSpriteAddClamp32Command>(args);
}
void SWTruecolorDrawers::DrawTranslatedColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpriteTranslated32Command>(args);
else
Queue->Push<DrawColumnTranslatedLLVMCommand>(args);
Queue->Push<DrawSpriteTranslated32Command>(args);
}
void SWTruecolorDrawers::DrawTranslatedAddColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpriteTranslatedAddClamp32Command>(args);
else
Queue->Push<DrawColumnTlatedAddLLVMCommand>(args);
Queue->Push<DrawSpriteTranslatedAddClamp32Command>(args);
}
void SWTruecolorDrawers::DrawShadedColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpriteShaded32Command>(args);
else
Queue->Push<DrawColumnShadedLLVMCommand>(args);
Queue->Push<DrawSpriteShaded32Command>(args);
}
void SWTruecolorDrawers::DrawAddClampColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpriteAddClamp32Command>(args);
else
Queue->Push<DrawColumnAddClampLLVMCommand>(args);
Queue->Push<DrawSpriteAddClamp32Command>(args);
}
void SWTruecolorDrawers::DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpriteTranslatedAddClamp32Command>(args);
else
Queue->Push<DrawColumnAddClampTranslatedLLVMCommand>(args);
Queue->Push<DrawSpriteTranslatedAddClamp32Command>(args);
}
void SWTruecolorDrawers::DrawSubClampColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpriteSubClamp32Command>(args);
else
Queue->Push<DrawColumnSubClampLLVMCommand>(args);
Queue->Push<DrawSpriteSubClamp32Command>(args);
}
void SWTruecolorDrawers::DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpriteTranslatedSubClamp32Command>(args);
else
Queue->Push<DrawColumnSubClampTranslatedLLVMCommand>(args);
Queue->Push<DrawSpriteTranslatedSubClamp32Command>(args);
}
void SWTruecolorDrawers::DrawRevSubClampColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpriteRevSubClamp32Command>(args);
else
Queue->Push<DrawColumnRevSubClampLLVMCommand>(args);
Queue->Push<DrawSpriteRevSubClamp32Command>(args);
}
void SWTruecolorDrawers::DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpriteTranslatedRevSubClamp32Command>(args);
else
Queue->Push<DrawColumnRevSubClampTranslatedLLVMCommand>(args);
Queue->Push<DrawSpriteTranslatedRevSubClamp32Command>(args);
}
void SWTruecolorDrawers::DrawSpan(const SpanDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpan32Command>(args);
else
Queue->Push<DrawSpanLLVMCommand>(args);
Queue->Push<DrawSpan32Command>(args);
}
void SWTruecolorDrawers::DrawSpanMasked(const SpanDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpanMasked32Command>(args);
else
Queue->Push<DrawSpanMaskedLLVMCommand>(args);
Queue->Push<DrawSpanMasked32Command>(args);
}
void SWTruecolorDrawers::DrawSpanTranslucent(const SpanDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpanTranslucent32Command>(args);
else
Queue->Push<DrawSpanTranslucentLLVMCommand>(args);
Queue->Push<DrawSpanTranslucent32Command>(args);
}
void SWTruecolorDrawers::DrawSpanMaskedTranslucent(const SpanDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpanAddClamp32Command>(args);
else
Queue->Push<DrawSpanMaskedTranslucentLLVMCommand>(args);
Queue->Push<DrawSpanAddClamp32Command>(args);
}
void SWTruecolorDrawers::DrawSpanAddClamp(const SpanDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpanTranslucent32Command>(args);
else
Queue->Push<DrawSpanAddClampLLVMCommand>(args);
Queue->Push<DrawSpanTranslucent32Command>(args);
}
void SWTruecolorDrawers::DrawSpanMaskedAddClamp(const SpanDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSpanAddClamp32Command>(args);
else
Queue->Push<DrawSpanMaskedAddClampLLVMCommand>(args);
Queue->Push<DrawSpanAddClamp32Command>(args);
}
void SWTruecolorDrawers::DrawSingleSkyColumn(const SkyDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSkySingle32Command>(args);
else
Queue->Push<DrawSingleSky1LLVMCommand>(args);
Queue->Push<DrawSkySingle32Command>(args);
}
void SWTruecolorDrawers::DrawDoubleSkyColumn(const SkyDrawerArgs &args)
{
if (r_phpdrawers)
Queue->Push<DrawSkyDouble32Command>(args);
else
Queue->Push<DrawDoubleSky1LLVMCommand>(args);
}
DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs)
{
auto shade_constants = drawerargs.ColormapConstants();
args.xfrac = drawerargs.TextureUPos();
args.yfrac = drawerargs.TextureVPos();
args.xstep = drawerargs.TextureUStep();
args.ystep = drawerargs.TextureVStep();
args.x1 = drawerargs.DestX1();
args.x2 = drawerargs.DestX2();
args.y = drawerargs.DestY();
args.xbits = drawerargs.TextureWidthBits();
args.ybits = drawerargs.TextureHeightBits();
args.destorg = (uint32_t*)RenderViewport::Instance()->GetDest(0, 0);
args.destpitch = RenderViewport::Instance()->RenderTarget->GetPitch();
args.source = (const uint32_t*)drawerargs.TexturePixels();
args.light = LightBgra::calc_light_multiplier(drawerargs.Light());
args.light_red = shade_constants.light_red;
args.light_green = shade_constants.light_green;
args.light_blue = shade_constants.light_blue;
args.light_alpha = shade_constants.light_alpha;
args.fade_red = shade_constants.fade_red;
args.fade_green = shade_constants.fade_green;
args.fade_blue = shade_constants.fade_blue;
args.fade_alpha = shade_constants.fade_alpha;
args.desaturate = shade_constants.desaturate;
args.srcalpha = drawerargs.SrcAlpha() >> (FRACBITS - 8);
args.destalpha = drawerargs.DestAlpha() >> (FRACBITS - 8);
args.flags = 0;
if (shade_constants.simple_shade)
args.flags |= DrawSpanArgs::simple_shade;
if (!sampler_setup(drawerargs.TextureLOD(), args.source, args.xbits, args.ybits, drawerargs.MipmappedTexture()))
args.flags |= DrawSpanArgs::nearest_filter;
args.viewpos_x = drawerargs.dc_viewpos.X;
args.step_viewpos_x = drawerargs.dc_viewpos_step.X;
args.dynlights = drawerargs.dc_lights;
args.num_dynlights = drawerargs.dc_num_lights;
}
void DrawSpanLLVMCommand::Execute(DrawerThread *thread)
{
if (thread->skipped_by_thread(args.y))
return;
Drawers::Instance()->DrawSpan(&args);
}
FString DrawSpanLLVMCommand::DebugInfo()
{
return "DrawSpan\n" + args.ToString();
}
bool DrawSpanLLVMCommand::sampler_setup(double lod, const uint32_t * &source, int &xbits, int &ybits, bool mipmapped)
{
bool magnifying = lod < 0.0;
if (r_mipmap && mipmapped)
{
int level = (int)lod;
while (level > 0)
{
if (xbits <= 2 || ybits <= 2)
break;
source += (1 << (xbits)) * (1 << (ybits));
xbits -= 1;
ybits -= 1;
level--;
}
}
return (magnifying && r_magfilter) || (!magnifying && r_minfilter);
}
/////////////////////////////////////////////////////////////////////////////
void DrawSpanMaskedLLVMCommand::Execute(DrawerThread *thread)
{
if (thread->skipped_by_thread(args.y))
return;
Drawers::Instance()->DrawSpanMasked(&args);
}
void DrawSpanTranslucentLLVMCommand::Execute(DrawerThread *thread)
{
if (thread->skipped_by_thread(args.y))
return;
Drawers::Instance()->DrawSpanTranslucent(&args);
}
void DrawSpanMaskedTranslucentLLVMCommand::Execute(DrawerThread *thread)
{
if (thread->skipped_by_thread(args.y))
return;
Drawers::Instance()->DrawSpanMaskedTranslucent(&args);
}
void DrawSpanAddClampLLVMCommand::Execute(DrawerThread *thread)
{
if (thread->skipped_by_thread(args.y))
return;
Drawers::Instance()->DrawSpanAddClamp(&args);
}
void DrawSpanMaskedAddClampLLVMCommand::Execute(DrawerThread *thread)
{
if (thread->skipped_by_thread(args.y))
return;
Drawers::Instance()->DrawSpanMaskedAddClamp(&args);
}
/////////////////////////////////////////////////////////////////////////////
WorkerThreadData DrawWall1LLVMCommand::ThreadData(DrawerThread *thread)
{
WorkerThreadData d;
d.core = thread->core;
d.num_cores = thread->num_cores;
d.pass_start_y = thread->pass_start_y;
d.pass_end_y = thread->pass_end_y;
return d;
}
DrawWall1LLVMCommand::DrawWall1LLVMCommand(const WallDrawerArgs &drawerargs)
{
auto shade_constants = drawerargs.ColormapConstants();
args.dest = (uint32_t*)drawerargs.Dest();
args.dest_y = drawerargs.DestY();
args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch();
args.count = drawerargs.Count();
args.texturefrac[0] = drawerargs.TextureVPos();
args.texturefracx[0] = drawerargs.TextureUPos();
args.iscale[0] = drawerargs.TextureVStep();
args.textureheight[0] = drawerargs.TextureHeight();
args.source[0] = (const uint32 *)drawerargs.TexturePixels();
args.source2[0] = (const uint32 *)drawerargs.TexturePixels2();
args.light[0] = LightBgra::calc_light_multiplier(drawerargs.Light());
args.light_red = shade_constants.light_red;
args.light_green = shade_constants.light_green;
args.light_blue = shade_constants.light_blue;
args.light_alpha = shade_constants.light_alpha;
args.fade_red = shade_constants.fade_red;
args.fade_green = shade_constants.fade_green;
args.fade_blue = shade_constants.fade_blue;
args.fade_alpha = shade_constants.fade_alpha;
args.desaturate = shade_constants.desaturate;
args.srcalpha = drawerargs.SrcAlpha() >> (FRACBITS - 8);
args.destalpha = drawerargs.DestAlpha() >> (FRACBITS - 8);
args.flags = 0;
if (shade_constants.simple_shade)
args.flags |= DrawWallArgs::simple_shade;
if (args.source2[0] == nullptr)
args.flags |= DrawWallArgs::nearest_filter;
args.z = drawerargs.dc_viewpos.Z;
args.step_z = drawerargs.dc_viewpos_step.Z;
args.dynlights = drawerargs.dc_lights;
args.num_dynlights = drawerargs.dc_num_lights;
}
void DrawWall1LLVMCommand::Execute(DrawerThread *thread)
{
WorkerThreadData d = ThreadData(thread);
Drawers::Instance()->vlinec1(&args, &d);
}
FString DrawWall1LLVMCommand::DebugInfo()
{
return "DrawWall1\n" + args.ToString();
}
/////////////////////////////////////////////////////////////////////////////
WorkerThreadData DrawColumnLLVMCommand::ThreadData(DrawerThread *thread)
{
WorkerThreadData d;
d.core = thread->core;
d.num_cores = thread->num_cores;
d.pass_start_y = thread->pass_start_y;
d.pass_end_y = thread->pass_end_y;
return d;
}
FString DrawColumnLLVMCommand::DebugInfo()
{
return "DrawColumn\n" + args.ToString();
}
DrawColumnLLVMCommand::DrawColumnLLVMCommand(const SpriteDrawerArgs &drawerargs)
{
auto shade_constants = drawerargs.ColormapConstants();
args.dest = (uint32_t*)drawerargs.Dest();
args.source = drawerargs.TexturePixels();
args.source2 = drawerargs.TexturePixels2();
args.colormap = drawerargs.Colormap();
args.translation = drawerargs.TranslationMap();
args.basecolors = (const uint32_t *)GPalette.BaseColors;
args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch();
args.count = drawerargs.Count();
args.dest_y = drawerargs.DestY();
args.iscale = drawerargs.TextureVStep();
args.texturefracx = drawerargs.TextureUPos();
args.textureheight = drawerargs.TextureHeight();
args.texturefrac = drawerargs.TextureVPos();
args.light = LightBgra::calc_light_multiplier(drawerargs.Light());
args.color = LightBgra::shade_pal_index_simple(drawerargs.SolidColor(), args.light);
args.srccolor = drawerargs.SrcColorBgra();
args.srcalpha = drawerargs.SrcAlpha() >> (FRACBITS - 8);
args.destalpha = drawerargs.DestAlpha() >> (FRACBITS - 8);
args.light_red = shade_constants.light_red;
args.light_green = shade_constants.light_green;
args.light_blue = shade_constants.light_blue;
args.light_alpha = shade_constants.light_alpha;
args.fade_red = shade_constants.fade_red;
args.fade_green = shade_constants.fade_green;
args.fade_blue = shade_constants.fade_blue;
args.fade_alpha = shade_constants.fade_alpha;
args.desaturate = shade_constants.desaturate;
args.flags = 0;
if (shade_constants.simple_shade)
args.flags |= DrawColumnArgs::simple_shade;
if (args.source2 == nullptr)
args.flags |= DrawColumnArgs::nearest_filter;
}
void DrawColumnLLVMCommand::Execute(DrawerThread *thread)
{
WorkerThreadData d = ThreadData(thread);
Drawers::Instance()->DrawColumn(&args, &d);
}
/////////////////////////////////////////////////////////////////////////////
WorkerThreadData DrawSkyLLVMCommand::ThreadData(DrawerThread *thread)
{
WorkerThreadData d;
d.core = thread->core;
d.num_cores = thread->num_cores;
d.pass_start_y = thread->pass_start_y;
d.pass_end_y = thread->pass_end_y;
return d;
}
DrawSkyLLVMCommand::DrawSkyLLVMCommand(const SkyDrawerArgs &drawerargs)
{
args.dest = (uint32_t*)drawerargs.Dest();
args.dest_y = drawerargs.DestY();
args.count = drawerargs.Count();
args.pitch = RenderViewport::Instance()->RenderTarget->GetPitch();
args.texturefrac[0] = drawerargs.TextureVPos();
args.iscale[0] = drawerargs.TextureVStep();
args.source0[0] = (const uint32_t *)drawerargs.FrontTexturePixels();
args.source1[0] = (const uint32_t *)drawerargs.BackTexturePixels();
args.textureheight0 = drawerargs.FrontTextureHeight();
args.textureheight1 = drawerargs.BackTextureHeight();
args.top_color = drawerargs.SolidTopColor();
args.bottom_color = drawerargs.SolidBottomColor();
args.flags = drawerargs.FadeSky() ? DrawSkyArgs::fade_sky : 0;
}
FString DrawSkyLLVMCommand::DebugInfo()
{
return "DrawSky\n" + args.ToString();
Queue->Push<DrawSkyDouble32Command>(args);
}
/////////////////////////////////////////////////////////////////////////////

View file

@ -68,131 +68,6 @@ namespace swrenderer
#endif
#endif
#define DECLARE_DRAW_COMMAND(name, func, base) \
class name##LLVMCommand : public base \
{ \
public: \
using base::base; \
void Execute(DrawerThread *thread) override \
{ \
WorkerThreadData d = ThreadData(thread); \
Drawers::Instance()->func(&args, &d); \
} \
};
class DrawSpanLLVMCommand : public DrawerCommand
{
public:
DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs);
void Execute(DrawerThread *thread) override;
FString DebugInfo() override;
protected:
DrawSpanArgs args;
private:
inline static bool sampler_setup(double lod, const uint32_t * &source, int &xbits, int &ybits, bool mipmapped);
};
class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand
{
public:
using DrawSpanLLVMCommand::DrawSpanLLVMCommand;
void Execute(DrawerThread *thread) override;
};
class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand
{
public:
using DrawSpanLLVMCommand::DrawSpanLLVMCommand;
void Execute(DrawerThread *thread) override;
};
class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand
{
public:
using DrawSpanLLVMCommand::DrawSpanLLVMCommand;
void Execute(DrawerThread *thread) override;
};
class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand
{
public:
using DrawSpanLLVMCommand::DrawSpanLLVMCommand;
void Execute(DrawerThread *thread) override;
};
class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand
{
public:
using DrawSpanLLVMCommand::DrawSpanLLVMCommand;
void Execute(DrawerThread *thread) override;
};
class DrawWall1LLVMCommand : public DrawerCommand
{
protected:
DrawWallArgs args;
WorkerThreadData ThreadData(DrawerThread *thread);
public:
DrawWall1LLVMCommand(const WallDrawerArgs &drawerargs);
void Execute(DrawerThread *thread) override;
FString DebugInfo() override;
};
class DrawColumnLLVMCommand : public DrawerCommand
{
protected:
DrawColumnArgs args;
WorkerThreadData ThreadData(DrawerThread *thread);
FString DebugInfo() override;
public:
DrawColumnLLVMCommand(const SpriteDrawerArgs &drawerargs);
void Execute(DrawerThread *thread) override;
};
class DrawSkyLLVMCommand : public DrawerCommand
{
protected:
DrawSkyArgs args;
WorkerThreadData ThreadData(DrawerThread *thread);
public:
DrawSkyLLVMCommand(const SkyDrawerArgs &drawerargs);
FString DebugInfo() override;
};
DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallSubClamp1, tmvline1_subclamp, DrawWall1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallRevSubClamp1, tmvline1_revsubclamp, DrawWall1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnAdd, DrawColumnAdd, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnTranslated, DrawColumnTranslated, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnTlatedAdd, DrawColumnTlatedAdd, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnShaded, DrawColumnShaded, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnAddClamp, DrawColumnAddClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnAddClampTranslated, DrawColumnAddClampTranslated, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnSubClamp, DrawColumnSubClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnSubClampTranslated, DrawColumnSubClampTranslated, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRevSubClamp, DrawColumnRevSubClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRevSubClampTranslated, DrawColumnRevSubClampTranslated, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumn, FillColumn, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand);
DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand);
class DrawFuzzColumnRGBACommand : public DrawerCommand
{
int _x;

View file

@ -1,287 +0,0 @@
/*
** LLVM code generated drawers
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "i_system.h"
#include "r_drawers.h"
#include "x86.h"
#include "c_cvars.h"
#include "version.h"
#include "m_misc.h"
/////////////////////////////////////////////////////////////////////////////
#if !defined(NO_DRAWERGEN)
extern "C"
{
void DrawColumn_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnShaded_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnAddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnTlatedAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnAddClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRevSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void FillColumn_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void FillColumnAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void FillColumnAddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void FillColumnSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void FillColumnRevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1Copy_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1Add_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1Shaded_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1AddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1SubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1RevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1Translated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1TlatedAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1AddClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1SubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt1RevSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4Copy_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4Add_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4Shaded_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4AddClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4SubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4RevSubClamp_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4Translated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4TlatedAdd_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4AddClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4SubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawColumnRt4RevSubClampTranslated_SSE2(const DrawColumnArgs *, const WorkerThreadData *);
void DrawSpan_SSE2(const DrawSpanArgs *);
void DrawSpanMasked_SSE2(const DrawSpanArgs *);
void DrawSpanTranslucent_SSE2(const DrawSpanArgs *);
void DrawSpanMaskedTranslucent_SSE2(const DrawSpanArgs *);
void DrawSpanAddClamp_SSE2(const DrawSpanArgs *);
void DrawSpanMaskedAddClamp_SSE2(const DrawSpanArgs *);
void vlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void mvlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline1_add_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline4_add_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline1_addclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline4_addclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline1_subclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline4_subclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline1_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline4_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void DrawSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *);
void DrawDoubleSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *);
void TriDraw8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill8_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_3_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_4_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_5_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_6_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_7_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_8_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_9_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_10_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_11_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_12_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_13_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriFill32_14_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
}
#endif
/////////////////////////////////////////////////////////////////////////////
Drawers::Drawers()
{
#if !defined(NO_DRAWERGEN)
DrawColumn = DrawColumn_SSE2;
DrawColumnAdd = DrawColumnAdd_SSE2;
DrawColumnShaded = DrawColumnShaded_SSE2;
DrawColumnAddClamp = DrawColumnAddClamp_SSE2;
DrawColumnSubClamp = DrawColumnSubClamp_SSE2;
DrawColumnRevSubClamp = DrawColumnRevSubClamp_SSE2;
DrawColumnTranslated = DrawColumnTranslated_SSE2;
DrawColumnTlatedAdd = DrawColumnTlatedAdd_SSE2;
DrawColumnAddClampTranslated = DrawColumnAddClampTranslated_SSE2;
DrawColumnSubClampTranslated = DrawColumnSubClampTranslated_SSE2;
DrawColumnRevSubClampTranslated = DrawColumnRevSubClampTranslated_SSE2;
FillColumn = FillColumn_SSE2;
FillColumnAdd = FillColumnAdd_SSE2;
FillColumnAddClamp = FillColumnAddClamp_SSE2;
FillColumnSubClamp = FillColumnSubClamp_SSE2;
FillColumnRevSubClamp = FillColumnRevSubClamp_SSE2;
DrawSpan = DrawSpan_SSE2;
DrawSpanMasked = DrawSpanMasked_SSE2;
DrawSpanTranslucent = DrawSpanTranslucent_SSE2;
DrawSpanMaskedTranslucent = DrawSpanMaskedTranslucent_SSE2;
DrawSpanAddClamp = DrawSpanAddClamp_SSE2;
DrawSpanMaskedAddClamp = DrawSpanMaskedAddClamp_SSE2;
vlinec1 = vlinec1_SSE2;
mvlinec1 = mvlinec1_SSE2;
tmvline1_add = tmvline1_add_SSE2;
tmvline1_addclamp = tmvline1_addclamp_SSE2;
tmvline1_subclamp = tmvline1_subclamp_SSE2;
tmvline1_revsubclamp = tmvline1_revsubclamp_SSE2;
DrawSky1 = DrawSky1_SSE2;
DrawDoubleSky1 = DrawDoubleSky1_SSE2;
TriDraw8.push_back(TriDraw8_0_SSE2);
TriDraw8.push_back(TriDraw8_1_SSE2);
TriDraw8.push_back(TriDraw8_2_SSE2);
TriDraw8.push_back(TriDraw8_3_SSE2);
TriDraw8.push_back(TriDraw8_4_SSE2);
TriDraw8.push_back(TriDraw8_5_SSE2);
TriDraw8.push_back(TriDraw8_6_SSE2);
TriDraw8.push_back(TriDraw8_7_SSE2);
TriDraw8.push_back(TriDraw8_8_SSE2);
TriDraw8.push_back(TriDraw8_9_SSE2);
TriDraw8.push_back(TriDraw8_10_SSE2);
TriDraw8.push_back(TriDraw8_11_SSE2);
TriDraw8.push_back(TriDraw8_12_SSE2);
TriDraw8.push_back(TriDraw8_13_SSE2);
TriDraw8.push_back(TriDraw8_14_SSE2);
TriDraw32.push_back(TriDraw32_0_SSE2);
TriDraw32.push_back(TriDraw32_1_SSE2);
TriDraw32.push_back(TriDraw32_2_SSE2);
TriDraw32.push_back(TriDraw32_3_SSE2);
TriDraw32.push_back(TriDraw32_4_SSE2);
TriDraw32.push_back(TriDraw32_5_SSE2);
TriDraw32.push_back(TriDraw32_6_SSE2);
TriDraw32.push_back(TriDraw32_7_SSE2);
TriDraw32.push_back(TriDraw32_8_SSE2);
TriDraw32.push_back(TriDraw32_9_SSE2);
TriDraw32.push_back(TriDraw32_10_SSE2);
TriDraw32.push_back(TriDraw32_11_SSE2);
TriDraw32.push_back(TriDraw32_12_SSE2);
TriDraw32.push_back(TriDraw32_13_SSE2);
TriDraw32.push_back(TriDraw32_14_SSE2);
TriFill8.push_back(TriFill8_0_SSE2);
TriFill8.push_back(TriFill8_1_SSE2);
TriFill8.push_back(TriFill8_2_SSE2);
TriFill8.push_back(TriFill8_3_SSE2);
TriFill8.push_back(TriFill8_4_SSE2);
TriFill8.push_back(TriFill8_5_SSE2);
TriFill8.push_back(TriFill8_6_SSE2);
TriFill8.push_back(TriFill8_7_SSE2);
TriFill8.push_back(TriFill8_8_SSE2);
TriFill8.push_back(TriFill8_9_SSE2);
TriFill8.push_back(TriFill8_10_SSE2);
TriFill8.push_back(TriFill8_11_SSE2);
TriFill8.push_back(TriFill8_12_SSE2);
TriFill8.push_back(TriFill8_13_SSE2);
TriFill8.push_back(TriFill8_14_SSE2);
TriFill32.push_back(TriFill32_0_SSE2);
TriFill32.push_back(TriFill32_1_SSE2);
TriFill32.push_back(TriFill32_2_SSE2);
TriFill32.push_back(TriFill32_3_SSE2);
TriFill32.push_back(TriFill32_4_SSE2);
TriFill32.push_back(TriFill32_5_SSE2);
TriFill32.push_back(TriFill32_6_SSE2);
TriFill32.push_back(TriFill32_7_SSE2);
TriFill32.push_back(TriFill32_8_SSE2);
TriFill32.push_back(TriFill32_9_SSE2);
TriFill32.push_back(TriFill32_10_SSE2);
TriFill32.push_back(TriFill32_11_SSE2);
TriFill32.push_back(TriFill32_12_SSE2);
TriFill32.push_back(TriFill32_13_SSE2);
TriFill32.push_back(TriFill32_14_SSE2);
#endif
}
Drawers *Drawers::Instance()
{
static Drawers drawers;
return &drawers;
}
FString DrawWallArgs::ToString()
{
FString info;
info.Format("dest_y = %i, count = %i, flags = %i, texturefrac[0] = %u, textureheight[0] = %u", dest_y, count, flags, texturefrac[0], textureheight[0]);
return info;
}
FString DrawSpanArgs::ToString()
{
FString info;
info.Format("x1 = %i, x2 = %i, y = %i, flags = %i", x1, x2, y, flags);
return info;
}
FString DrawColumnArgs::ToString()
{
FString info;
info.Format("dest_y = %i, count = %i, flags = %i, iscale = %i (%f), texturefrac = %i (%f)", dest_y, count, flags, iscale, ((fixed_t)iscale) / (float)FRACUNIT, texturefrac, ((fixed_t)texturefrac) / (float)FRACUNIT);
return info;
}
FString DrawSkyArgs::ToString()
{
FString info;
info.Format("dest_y = %i, count = %i", dest_y, count);
return info;
}

View file

@ -66,149 +66,6 @@ struct TriLight
float radius;
};
struct DrawWallArgs
{
uint32_t *dest;
const uint32_t *source[4];
const uint32_t *source2[4];
int32_t pitch;
int32_t count;
int32_t dest_y;
uint32_t texturefrac[4];
uint32_t texturefracx[4];
uint32_t iscale[4];
uint32_t textureheight[4];
uint32_t light[4];
uint32_t srcalpha;
uint32_t destalpha;
uint16_t light_alpha;
uint16_t light_red;
uint16_t light_green;
uint16_t light_blue;
uint16_t fade_alpha;
uint16_t fade_red;
uint16_t fade_green;
uint16_t fade_blue;
uint16_t desaturate;
uint32_t flags;
enum Flags
{
simple_shade = 1,
nearest_filter = 2
};
float z, step_z;
TriLight *dynlights;
uint32_t num_dynlights;
FString ToString();
};
struct DrawSpanArgs
{
uint32_t *destorg;
const uint32_t *source;
int32_t destpitch;
int32_t xfrac;
int32_t yfrac;
int32_t xstep;
int32_t ystep;
int32_t x1;
int32_t x2;
int32_t y;
int32_t xbits;
int32_t ybits;
uint32_t light;
uint32_t srcalpha;
uint32_t destalpha;
uint16_t light_alpha;
uint16_t light_red;
uint16_t light_green;
uint16_t light_blue;
uint16_t fade_alpha;
uint16_t fade_red;
uint16_t fade_green;
uint16_t fade_blue;
uint16_t desaturate;
uint32_t flags;
enum Flags
{
simple_shade = 1,
nearest_filter = 2
};
float viewpos_x, step_viewpos_x;
TriLight *dynlights;
uint32_t num_dynlights;
FString ToString();
};
struct DrawColumnArgs
{
uint32_t *dest;
const uint8_t *source;
const uint8_t *source2;
uint8_t *colormap;
uint8_t *translation;
const uint32_t *basecolors;
int32_t pitch;
int32_t count;
int32_t dest_y;
uint32_t iscale;
uint32_t texturefracx;
uint32_t textureheight;
uint32_t texturefrac;
uint32_t light;
uint32_t color;
uint32_t srccolor;
uint32_t srcalpha;
uint32_t destalpha;
uint16_t light_alpha;
uint16_t light_red;
uint16_t light_green;
uint16_t light_blue;
uint16_t fade_alpha;
uint16_t fade_red;
uint16_t fade_green;
uint16_t fade_blue;
uint16_t desaturate;
uint32_t flags;
enum Flags
{
simple_shade = 1,
nearest_filter = 2
};
FString ToString();
};
struct DrawSkyArgs
{
uint32_t *dest;
const uint32_t *source0[4];
const uint32_t *source1[4];
int32_t pitch;
int32_t count;
int32_t dest_y;
uint32_t texturefrac[4];
uint32_t iscale[4];
uint32_t textureheight0;
uint32_t textureheight1;
uint32_t top_color;
uint32_t bottom_color;
uint32_t flags;
enum Flags
{
fade_sky = 1
};
FString ToString();
};
struct TriVertex
{
TriVertex() { }
@ -292,51 +149,3 @@ enum class TriBlendMode
};
inline int NumTriBlendModes() { return (int)TriBlendMode::Skycap + 1; }
class Drawers
{
public:
static Drawers *Instance();
void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnShaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawSpan)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMaskedTranslucent)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr;
void(*vlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*mvlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline1_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline1_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline1_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline1_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*DrawSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr;
void(*DrawDoubleSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr;
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> TriDraw8;
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> TriDraw32;
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> TriFill8;
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> TriFill32;
private:
Drawers();
};

View file

@ -7,6 +7,5 @@ if( WIN32 AND NOT CMAKE_SIZEOF_VOID_P MATCHES "8" )
endif()
add_subdirectory( updaterevision )
add_subdirectory( zipdir )
add_subdirectory( drawergen )
set( CROSS_EXPORTS ${CROSS_EXPORTS} PARENT_SCOPE )

View file

@ -1,187 +0,0 @@
cmake_minimum_required( VERSION 2.8.7 )
include( CheckCXXCompilerFlag )
include(../../precompiled_headers.cmake)
# Path where it looks for the LLVM compiled files on Windows
set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../llvm" )
if( CMAKE_SIZEOF_VOID_P MATCHES "8" )
set( X64 64 )
endif()
if( NOT DRAWERGEN_LIBS )
set( DRAWERGEN_LIBS "" )
endif()
include_directories( . )
file( GLOB HEADER_FILES
*.h
ssa/*.h
fixedfunction/*.h
)
if( NOT WIN32 )
set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen ipo
irreader transformutils instrumentation profiledata runtimedyld
object instcombine linker analysis selectiondag scalaropts vectorize executionengine
mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen
armasmprinter arminfo armdesc armcodegen )
# Example LLVM_DIR folder: C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm
find_package(LLVM REQUIRED CONFIG)
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} )
include_directories( ${LLVM_INCLUDE_DIRS} )
set( DRAWERGEN_LIBS ${DRAWERGEN_LIBS} ${llvm_libs} )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti" )
else()
set( LLVM_COMPONENTS core support asmparser asmprinter bitreader bitwriter codegen passes ipo
irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld
object instcombine linker analysis selectiondag scalaropts vectorize executionengine
mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen )
include_directories( "${LLVM_PRECOMPILED_DIR}/include" )
if( X64 )
include_directories( "${LLVM_PRECOMPILED_DIR}/64bit-include" )
set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/64bit-" )
else()
include_directories( "${LLVM_PRECOMPILED_DIR}/32bit-include" )
set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit-" )
endif()
foreach(buildtype IN ITEMS RELEASE DEBUG)
set( llvm_libs_${buildtype} "${llvm_libs_base}${buildtype}" )
set( LLVM_${buildtype}_LIBS "" )
foreach( llvm_module ${LLVM_COMPONENTS} )
find_library( LLVM_${llvm_module}_LIBRARY_${buildtype} LLVM${llvm_module} PATHS ${llvm_libs_${buildtype}} )
set( LLVM_${buildtype}_LIBS ${LLVM_${buildtype}_LIBS} ${LLVM_${llvm_module}_LIBRARY_${buildtype}} )
endforeach( llvm_module )
endforeach(buildtype)
endif()
if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.5")
set( CMAKE_C_FLAGS "-Wno-unused-result ${CMAKE_C_FLAGS}" )
set( CMAKE_CXX_FLAGS "-Wno-unused-result ${CMAKE_CXX_FLAGS}" )
endif()
if( CMAKE_CXX_COMPILER_ID STREQUAL "Clang" )
if( APPLE OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "3.6" )
set( CMAKE_CXX_FLAGS "-Wno-inconsistent-missing-override ${CMAKE_CXX_FLAGS}" )
endif()
endif()
set( CMAKE_C_FLAGS "-Wall -Wextra -Wno-unused -Wno-unused-parameter -Wno-missing-field-initializers -ffp-contract=off ${CMAKE_C_FLAGS}" )
set( CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused -Wno-unused-parameter -Wno-missing-field-initializers -ffp-contract=off ${CMAKE_CXX_FLAGS}" )
# Use the highest C++ standard available since VS2015 compiles with C++14
# but we only require C++11. The recommended way to do this in CMake is to
# probably to use target_compile_features, but I don't feel like maintaining
# a list of features we use.
CHECK_CXX_COMPILER_FLAG( "-std=gnu++14" CAN_DO_CPP14 )
if ( CAN_DO_CPP14 )
set ( CMAKE_CXX_FLAGS "-std=gnu++14 ${CMAKE_CXX_FLAGS}" )
else ()
CHECK_CXX_COMPILER_FLAG( "-std=gnu++1y" CAN_DO_CPP1Y )
if ( CAN_DO_CPP1Y )
set ( CMAKE_CXX_FLAGS "-std=gnu++1y ${CMAKE_CXX_FLAGS}" )
else ()
CHECK_CXX_COMPILER_FLAG( "-std=gnu++11" CAN_DO_CPP11 )
if ( CAN_DO_CPP11 )
set ( CMAKE_CXX_FLAGS "-std=gnu++11 ${CMAKE_CXX_FLAGS}" )
else ()
CHECK_CXX_COMPILER_FLAG( "-std=gnu++0x" CAN_DO_CPP0X )
if ( CAN_DO_CPP0X )
set ( CMAKE_CXX_FLAGS "-std=gnu++0x ${CMAKE_CXX_FLAGS}" )
endif ()
endif ()
endif ()
endif ()
if ( APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang" )
set( CMAKE_CXX_FLAGS "-stdlib=libc++ ${CMAKE_CXX_FLAGS}" )
set( CMAKE_EXE_LINKER_FLAGS "-stdlib=libc++ ${CMAKE_EXE_LINKER_FLAGS}" )
endif ()
endif()
if( WIN32 )
if( MSVC_VERSION GREATER 1399 )
# VC 8+ adds a manifest automatically to the executable. We need to
# merge ours with it.
set( MT_MERGE ON )
else()
set( TRUSTINFO trustinfo.rc )
endif()
else( WIN32 )
set( TRUSTINFO "" )
endif()
set (SOURCES
drawergen.cpp
llvmprogram.cpp
llvmdrawers.cpp
ssa/ssa_bool.cpp
ssa/ssa_float.cpp
ssa/ssa_float_ptr.cpp
ssa/ssa_for_block.cpp
ssa/ssa_function.cpp
ssa/ssa_if_block.cpp
ssa/ssa_int.cpp
ssa/ssa_int_ptr.cpp
ssa/ssa_short.cpp
ssa/ssa_scope.cpp
ssa/ssa_struct_type.cpp
ssa/ssa_ubyte.cpp
ssa/ssa_ubyte_ptr.cpp
ssa/ssa_value.cpp
ssa/ssa_vec4f.cpp
ssa/ssa_vec4f_ptr.cpp
ssa/ssa_vec4i.cpp
ssa/ssa_vec4i_ptr.cpp
ssa/ssa_vec8s.cpp
ssa/ssa_vec16ub.cpp
fixedfunction/drawercodegen.cpp
fixedfunction/drawspancodegen.cpp
fixedfunction/drawwallcodegen.cpp
fixedfunction/drawcolumncodegen.cpp
fixedfunction/drawskycodegen.cpp
fixedfunction/drawtrianglecodegen.cpp
fixedfunction/setuptrianglecodegen.cpp
)
enable_precompiled_headers( precomp.h SOURCES )
if( NOT CMAKE_CROSSCOMPILING )
add_executable( drawergen ${SOURCES} ${TRUSTINFO} ${HEADER_FILES} )
set( CROSS_EXPORTS ${CROSS_EXPORTS} drawergen PARENT_SCOPE )
endif()
if( MT_MERGE )
add_custom_command(TARGET drawergen POST_BUILD
COMMAND mt -inputresource:$<TARGET_FILE:drawergen> -manifest ${CMAKE_CURRENT_SOURCE_DIR}/trustinfo.txt -outputresource:$<TARGET_FILE:drawergen> -nologo
COMMENT "Embedding trustinfo into drawergen" )
endif()
# Linux - add these flags for LLVM compatibility to prevent crashing
#if ( UNIX AND NOT APPLE )
# set( CMAKE_EXE_LINKER_FLAGS "-Wl,--exclude-libs,ALL ${CMAKE_EXE_LINKER_FLAGS}" )
#endif()
target_link_libraries( drawergen ${DRAWERGEN_LIBS} )
if( WIN32 )
foreach(debuglib ${LLVM_DEBUG_LIBS})
target_link_libraries( drawergen debug ${debuglib} )
endforeach(debuglib)
foreach(releaselib ${LLVM_RELEASE_LIBS})
target_link_libraries( drawergen optimized ${releaselib} )
endforeach(releaselib)
endif()
#source_group("Render Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/.+")
#source_group("Render Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/ssa/.+")
#source_group("Render Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/fixedfunction/.+")
source_group("Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/.+\\.(cpp|h)$")
source_group("Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/ssa/.+")
source_group("Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/fixedfunction/.+")

View file

@ -1,142 +0,0 @@
/*
** LLVM code generated drawers
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "timestamp.h"
#include "exception.h"
#include "llvmdrawers.h"
std::string &AllTimestamps()
{
static std::string timestamps;
return timestamps;
}
void AddSourceFileTimestamp(const char *timestamp)
{
if (!AllTimestamps().empty()) AllTimestamps().push_back(' ');
AllTimestamps() += timestamp;
}
int main(int argc, char **argv)
{
try
{
if (argc != 2)
{
std::cerr << "Usage: " << argv[0] << "<output filename>" << std::endl;
return 1;
}
std::string timestamp_filename = argv[1] + std::string(".timestamp");
FILE *file = fopen(timestamp_filename.c_str(), "rb");
if (file != nullptr)
{
char buffer[4096];
int bytes_read = fread(buffer, 1, 4096, file);
fclose(file);
std::string last_timestamp;
if (bytes_read > 0)
last_timestamp = std::string(buffer, bytes_read);
if (AllTimestamps() == last_timestamp)
{
std::cout << "Not recompiling drawers because the object file is already up to date." << std::endl;
exit(0);
}
}
llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag)
{
std::cerr << "LLVM fatal error: " << reason;
exit(1);
});
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
std::string triple = llvm::sys::getDefaultTargetTriple();
#ifdef __APPLE__
// Target triple is x86_64-apple-darwin15.6.0
auto pos = triple.find("-apple-darwin");
if (pos != std::string::npos)
{
triple = triple.substr(0, pos) + "-apple-darwin10.11.0";
}
#endif
std::cout << "Target triple is " << triple << std::endl;
#ifdef __arm__
std::string cpuName = llvm::sys::getHostCPUName(); // "armv8";
#else
std::string cpuName = "pentium4";
#endif
std::string features;
std::cout << "Compiling drawer code for " << cpuName << ".." << std::endl;
LLVMDrawers drawersSSE2(triple, cpuName, features, "_SSE2");
file = fopen(argv[1], "wb");
if (file == nullptr)
{
std::cerr << "Unable to open " << argv[1] << " for writing." << std::endl;
return 1;
}
int result = fwrite(drawersSSE2.ObjectFile.data(), drawersSSE2.ObjectFile.size(), 1, file);
fclose(file);
if (result != 1)
{
std::cerr << "Could not write data to " << argv[1] << std::endl;
return 1;
}
file = fopen(timestamp_filename.c_str(), "wb");
if (file == nullptr)
{
std::cerr << "Could not create timestamp file" << std::endl;
return 1;
}
result = fwrite(AllTimestamps().data(), AllTimestamps().length(), 1, file);
fclose(file);
if (result != 1)
{
std::cerr << "Could not write timestamp file" << std::endl;
return 1;
}
//LLVMDrawers drawersSSE4("core2");
//LLVMDrawers drawersAVX("sandybridge");
//LLVMDrawers drawersAVX2("haswell");
return 0;
}
catch (const std::exception &e)
{
std::cerr << e.what() << std::endl;
return 1;
}
}

View file

@ -1,36 +0,0 @@
/*
** LLVM code generated drawers
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include <string>
#include <exception>
class Exception : public std::exception
{
public:
Exception(const std::string &message) : message(message) { }
const char *what() const noexcept override { return message.c_str(); }
private:
std::string message;
};

View file

@ -1,322 +0,0 @@
/*
** DrawColumn code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "timestamp.h"
#include "fixedfunction/drawcolumncodegen.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_scope.h"
#include "ssa/ssa_for_block.h"
#include "ssa/ssa_if_block.h"
#include "ssa/ssa_stack.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_struct_type.h"
#include "ssa/ssa_value.h"
void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data)
{
dest = args[0][0].load(true);
source = args[0][1].load(true);
source2 = args[0][2].load(true);
colormap = args[0][3].load(true);
translation = args[0][4].load(true);
basecolors = args[0][5].load(true);
pitch = args[0][6].load(true);
count = args[0][7].load(true);
dest_y = args[0][8].load(true);
iscale = args[0][9].load(true);
texturefracx = args[0][10].load(true);
textureheight = args[0][11].load(true);
texturefrac = args[0][12].load(true);
light = args[0][13].load(true);
color = SSAVec4i::unpack(args[0][14].load(true));
srccolor = SSAVec4i::unpack(args[0][15].load(true));
srcalpha = args[0][16].load(true);
destalpha = args[0][17].load(true);
SSAShort light_alpha = args[0][18].load(true);
SSAShort light_red = args[0][19].load(true);
SSAShort light_green = args[0][20].load(true);
SSAShort light_blue = args[0][21].load(true);
SSAShort fade_alpha = args[0][22].load(true);
SSAShort fade_red = args[0][23].load(true);
SSAShort fade_green = args[0][24].load(true);
SSAShort fade_blue = args[0][25].load(true);
SSAShort desaturate = args[0][26].load(true);
SSAInt flags = args[0][27].load(true);
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
shade_constants.desaturate = desaturate.zext_int();
thread.core = thread_data[0][0].load(true);
thread.num_cores = thread_data[0][1].load(true);
thread.pass_start_y = thread_data[0][2].load(true);
thread.pass_end_y = thread_data[0][3].load(true);
thread.temp = thread_data[0][4].load(true);
is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade);
is_nearest_filter = (flags & DrawColumnArgs::nearest_filter) == SSAInt(DrawColumnArgs::nearest_filter);
count = count_for_thread(dest_y, count, thread);
dest = dest_for_thread(dest_y, pitch, dest, thread);
pitch = pitch * thread.num_cores;
stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread));
iscale = iscale * thread.num_cores;
SSAIfBlock branch;
branch.if_block(is_simple_shade);
LoopShade(variant, true);
branch.else_block();
LoopShade(variant, false);
branch.end_block();
}
void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, bool isSimpleShade)
{
SSAIfBlock branch;
branch.if_block(is_nearest_filter);
Loop(variant, isSimpleShade, true);
branch.else_block();
one = (1 << 30) / textureheight;
stack_frac.store(stack_frac.load() - (one >> 1));
Loop(variant, isSimpleShade, false);
branch.end_block();
}
void DrawColumnCodegen::Loop(DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter)
{
stack_index.store(SSAInt(0));
{
SSAForBlock loop;
SSAInt index = stack_index.load();
loop.loop_block(index < count);
SSAInt sample_index, frac;
frac = stack_frac.load();
if (IsPaletteInput(variant))
sample_index = frac >> FRACBITS;
else
sample_index = frac;
SSAInt offset = index * pitch * 4;
SSAVec4i bgcolor = dest[offset].load_vec4ub(false);
SSAVec4i outcolor = ProcessPixel(sample_index, bgcolor, variant, isSimpleShade, isNearestFilter);
dest[offset].store_vec4ub(outcolor);
stack_index.store(index.add(SSAInt(1), true, true));
stack_frac.store(frac + iscale);
loop.end_block();
}
}
bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant)
{
switch (variant)
{
default:
case DrawColumnVariant::DrawCopy:
case DrawColumnVariant::Draw:
case DrawColumnVariant::DrawAdd:
case DrawColumnVariant::DrawAddClamp:
case DrawColumnVariant::DrawSubClamp:
case DrawColumnVariant::DrawRevSubClamp:
case DrawColumnVariant::Fill:
case DrawColumnVariant::FillAdd:
case DrawColumnVariant::FillAddClamp:
case DrawColumnVariant::FillSubClamp:
case DrawColumnVariant::FillRevSubClamp:
return false;
case DrawColumnVariant::DrawShaded:
case DrawColumnVariant::DrawTranslated:
case DrawColumnVariant::DrawTlatedAdd:
case DrawColumnVariant::DrawAddClampTranslated:
case DrawColumnVariant::DrawSubClampTranslated:
case DrawColumnVariant::DrawRevSubClampTranslated:
return true;
}
}
SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter)
{
SSAInt alpha, inv_alpha;
SSAVec4i fg;
switch (variant)
{
default:
case DrawColumnVariant::DrawCopy:
return blend_copy(Sample(sample_index, isNearestFilter));
case DrawColumnVariant::Draw:
return blend_copy(Shade(Sample(sample_index, isNearestFilter), isSimpleShade));
case DrawColumnVariant::DrawAdd:
case DrawColumnVariant::DrawAddClamp:
fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade);
return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawColumnVariant::DrawShaded:
alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4;
inv_alpha = 256 - alpha;
return blend_add(color, bgcolor, alpha, inv_alpha);
case DrawColumnVariant::DrawSubClamp:
fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade);
return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawColumnVariant::DrawRevSubClamp:
fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade);
return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawColumnVariant::DrawTranslated:
return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade));
case DrawColumnVariant::DrawTlatedAdd:
case DrawColumnVariant::DrawAddClampTranslated:
fg = Shade(TranslateSample(sample_index), isSimpleShade);
return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawColumnVariant::DrawSubClampTranslated:
fg = Shade(TranslateSample(sample_index), isSimpleShade);
return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawColumnVariant::DrawRevSubClampTranslated:
fg = Shade(TranslateSample(sample_index), isSimpleShade);
return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawColumnVariant::Fill:
return blend_copy(color);
case DrawColumnVariant::FillAdd:
alpha = srccolor[3];
alpha = alpha + (alpha >> 7);
inv_alpha = 256 - alpha;
return blend_add(srccolor, bgcolor, alpha, inv_alpha);
case DrawColumnVariant::FillAddClamp:
return blend_add(srccolor, bgcolor, srcalpha, destalpha);
case DrawColumnVariant::FillSubClamp:
return blend_sub(srccolor, bgcolor, srcalpha, destalpha);
case DrawColumnVariant::FillRevSubClamp:
return blend_revsub(srccolor, bgcolor, srcalpha, destalpha);
}
}
SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade)
{
SSAInt alpha, inv_alpha;
switch (variant)
{
default:
case DrawColumnVariant::DrawCopy:
return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub(true));
case DrawColumnVariant::Draw:
return blend_copy(ShadePal(ColormapSample(sample_index), isSimpleShade));
case DrawColumnVariant::DrawAdd:
case DrawColumnVariant::DrawAddClamp:
return blend_add(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawShaded:
alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4;
inv_alpha = 256 - alpha;
return blend_add(color, bgcolor, alpha, inv_alpha);
case DrawColumnVariant::DrawSubClamp:
return blend_sub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawRevSubClamp:
return blend_revsub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawTranslated:
return blend_copy(ShadePal(TranslateSamplePal(sample_index), isSimpleShade));
case DrawColumnVariant::DrawTlatedAdd:
case DrawColumnVariant::DrawAddClampTranslated:
return blend_add(ShadePal(TranslateSamplePal(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawSubClampTranslated:
return blend_sub(ShadePal(TranslateSamplePal(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawRevSubClampTranslated:
return blend_revsub(ShadePal(TranslateSamplePal(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::Fill:
return blend_copy(color);
case DrawColumnVariant::FillAdd:
alpha = srccolor[3];
alpha = alpha + (alpha >> 7);
inv_alpha = 256 - alpha;
return blend_add(srccolor, bgcolor, alpha, inv_alpha);
case DrawColumnVariant::FillAddClamp:
return blend_add(srccolor, bgcolor, srcalpha, destalpha);
case DrawColumnVariant::FillSubClamp:
return blend_sub(srccolor, bgcolor, srcalpha, destalpha);
case DrawColumnVariant::FillRevSubClamp:
return blend_revsub(srccolor, bgcolor, srcalpha, destalpha);
}
}
SSAVec4i DrawColumnCodegen::Sample(SSAInt frac, bool isNearestFilter)
{
if (isNearestFilter)
{
SSAInt sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS;
return source[sample_index * 4].load_vec4ub(false);
}
else
{
return SampleLinear(source, source2, texturefracx, frac, one, textureheight);
}
}
SSAVec4i DrawColumnCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
{
// Clamp to edge
SSAInt frac_y0 = (SSAInt::MAX(SSAInt::MIN(texturefracy, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height;
SSAInt frac_y1 = (SSAInt::MAX(SSAInt::MIN(texturefracy + one, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height;
SSAInt y0 = frac_y0 >> FRACBITS;
SSAInt y1 = frac_y1 >> FRACBITS;
SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true);
SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true);
SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true);
SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true);
SSAInt inv_b = texturefracx;
SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15;
SSAInt a = 16 - inv_a;
SSAInt b = 16 - inv_b;
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}
SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index)
{
return colormap[source[sample_index].load(true).zext_int()].load(true).zext_int();
}
SSAVec4i DrawColumnCodegen::TranslateSample(SSAInt sample_index)
{
return translation[source[sample_index].load(true).zext_int() * 4].load_vec4ub(true);
}
SSAInt DrawColumnCodegen::TranslateSamplePal(SSAInt sample_index)
{
return translation[source[sample_index].load(true).zext_int()].load(true).zext_int();
}
SSAVec4i DrawColumnCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
{
if (isSimpleShade)
return shade_bgra_simple(fg, light);
else
return shade_bgra_advanced(fg, light, shade_constants);
}
SSAVec4i DrawColumnCodegen::ShadePal(SSAInt palIndex, bool isSimpleShade)
{
if (isSimpleShade)
return shade_pal_index_simple(palIndex, light, basecolors);
else
return shade_pal_index_advanced(palIndex, light, shade_constants, basecolors);
}

View file

@ -1,92 +0,0 @@
/*
** DrawColumn code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "drawercodegen.h"
enum class DrawColumnVariant
{
Fill,
FillAdd,
FillAddClamp,
FillSubClamp,
FillRevSubClamp,
DrawCopy,
Draw,
DrawAdd,
DrawTranslated,
DrawTlatedAdd,
DrawShaded,
DrawAddClamp,
DrawAddClampTranslated,
DrawSubClamp,
DrawSubClampTranslated,
DrawRevSubClamp,
DrawRevSubClampTranslated
};
class DrawColumnCodegen : public DrawerCodegen
{
public:
void Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data);
private:
void LoopShade(DrawColumnVariant variant, bool isSimpleShade);
void Loop(DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter);
SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter);
SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade);
SSAVec4i Sample(SSAInt frac, bool isNearestFilter);
SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAInt ColormapSample(SSAInt frac);
SSAVec4i TranslateSample(SSAInt frac);
SSAInt TranslateSamplePal(SSAInt frac);
SSAVec4i Shade(SSAVec4i fgcolor, bool isSimpleShade);
SSAVec4i ShadePal(SSAInt palIndex, bool isSimpleShade);
bool IsPaletteInput(DrawColumnVariant variant);
SSAStack<SSAInt> stack_index, stack_frac;
SSAUBytePtr dest;
SSAUBytePtr source;
SSAUBytePtr source2;
SSAUBytePtr colormap;
SSAUBytePtr translation;
SSAUBytePtr basecolors;
SSAInt pitch;
SSAInt count;
SSAInt dest_y;
SSAInt iscale;
SSAInt texturefracx;
SSAInt textureheight;
SSAInt one;
SSAInt texturefrac;
SSAInt light;
SSAVec4i color;
SSAVec4i srccolor;
SSAInt srcalpha;
SSAInt destalpha;
SSABool is_simple_shade;
SSABool is_nearest_filter;
SSAShadeConstants shade_constants;
SSAWorkerThread thread;
};

View file

@ -1,169 +0,0 @@
/*
** Drawer code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "timestamp.h"
#include "fixedfunction/drawercodegen.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_scope.h"
#include "ssa/ssa_for_block.h"
#include "ssa/ssa_if_block.h"
#include "ssa/ssa_stack.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_struct_type.h"
#include "ssa/ssa_value.h"
SSABool DrawerCodegen::line_skipped_by_thread(SSAInt line, SSAWorkerThread thread)
{
return line < thread.pass_start_y || line >= thread.pass_end_y || !(line % thread.num_cores == thread.core);
}
SSAInt DrawerCodegen::skipped_by_thread(SSAInt first_line, SSAWorkerThread thread)
{
SSAInt pass_skip = SSAInt::MAX(thread.pass_start_y - first_line, SSAInt(0));
SSAInt core_skip = (thread.num_cores - (first_line + pass_skip - thread.core) % thread.num_cores) % thread.num_cores;
return pass_skip + core_skip;
}
SSAInt DrawerCodegen::count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread)
{
SSAInt lines_until_pass_end = SSAInt::MAX(thread.pass_end_y - first_line, SSAInt(0));
count = SSAInt::MIN(count, lines_until_pass_end);
SSAInt c = (count - skipped_by_thread(first_line, thread) + thread.num_cores - 1) / thread.num_cores;
return SSAInt::MAX(c, SSAInt(0));
}
SSAUBytePtr DrawerCodegen::dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread)
{
return dest[skipped_by_thread(first_line, thread) * pitch * 4];
}
SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light)
{
return 256 - (light >> (FRACBITS - 8));
}
SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors)
{
SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index];
return shade_bgra_simple(color, light);
}
SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors)
{
SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index];
return shade_bgra_advanced(color, light, constants);
}
SSAVec4i DrawerCodegen::shade_bgra_simple(SSAVec4i color, SSAInt light)
{
SSAInt alpha = color[3];
color = color * light / 256;
return color.insert(3, alpha);
}
SSAVec4i DrawerCodegen::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants)
{
SSAInt blue = color[0];
SSAInt green = color[1];
SSAInt red = color[2];
SSAInt alpha = color[3];
SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate;
SSAVec4i inv_light = 256 - light;
SSAVec4i inv_desaturate = 256 - constants.desaturate;
color = (color * inv_desaturate + intensity) / 256;
color = (constants.fade * inv_light + color * light) / 256;
color = (color * constants.light) / 256;
return color.insert(3, alpha);
}
SSAVec4i DrawerCodegen::blend_copy(SSAVec4i fg)
{
return fg;
}
SSAVec4i DrawerCodegen::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha)
{
SSAInt alpha = fg[3];
alpha = alpha + (alpha >> 7); // 255 -> 256
srcalpha = (alpha * srcalpha + 128) >> 8;
SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256;
return color.insert(3, 255);
}
SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha)
{
SSAInt alpha = fg[3];
alpha = alpha + (alpha >> 7); // 255 -> 256
srcalpha = (alpha * srcalpha + 128) >> 8;
SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256;
return color.insert(3, 255);
}
SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha)
{
SSAInt alpha = fg[3];
alpha = alpha + (alpha >> 7); // 255 -> 256
srcalpha = (alpha * srcalpha + 128) >> 8;
SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256;
return color.insert(3, 255);
}
SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg)
{
SSAInt alpha = fg[3];
alpha = alpha + (alpha >> 7); // 255 -> 256
SSAInt inv_alpha = 256 - alpha;
SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256;
return color.insert(3, 255);
}
SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha)
{
SSAInt alpha = fg[3];
alpha = alpha + (alpha >> 7);
SSAInt inv_alpha = 256 - alpha;
return (destalpha * alpha + 256 * inv_alpha + 128) >> 8;
}
SSAVec4i DrawerCodegen::blend_stencil(SSAVec4i stencilcolor, SSAInt fgalpha, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha)
{
fgalpha = fgalpha + (fgalpha >> 7); // 255 -> 256
SSAInt inv_fgalpha = 256 - fgalpha;
srcalpha = (fgalpha * srcalpha + 128) >> 8;
destalpha = (destalpha * fgalpha + 256 * inv_fgalpha + 128) >> 8;
SSAVec4i color = (stencilcolor * srcalpha + bg * destalpha) / 256;
return color.insert(3, 255);
}
SSAVec4i DrawerCodegen::blend_add_srccolor_oneminussrccolor(SSAVec4i fg, SSAVec4i bg)
{
SSAVec4i fgcolor = fg + (fg >> 7); // 255 -> 256
SSAVec4i inv_fgcolor = SSAVec4i(256) - fgcolor;
return fg + ((bg * inv_fgcolor + 128) >> 8);
}

View file

@ -1,96 +0,0 @@
/*
** Drawer code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "precomp.h"
#include "ssa/ssa_value.h"
#include "ssa/ssa_vec4f.h"
#include "ssa/ssa_vec4i.h"
#include "ssa/ssa_vec8s.h"
#include "ssa/ssa_vec16ub.h"
#include "ssa/ssa_int.h"
#include "ssa/ssa_int_ptr.h"
#include "ssa/ssa_short.h"
#include "ssa/ssa_ubyte_ptr.h"
#include "ssa/ssa_vec4f_ptr.h"
#include "ssa/ssa_vec4i_ptr.h"
#include "ssa/ssa_stack.h"
#include "ssa/ssa_bool.h"
#include "ssa/ssa_barycentric_weight.h"
#include "llvm_include.h"
class SSAWorkerThread
{
public:
SSAInt core;
SSAInt num_cores;
SSAInt pass_start_y;
SSAInt pass_end_y;
SSAUBytePtr temp;
};
class SSAShadeConstants
{
public:
SSAVec4i light;
SSAVec4i fade;
SSAInt desaturate;
};
class DrawerCodegen
{
public:
// Checks if a line is rendered by this thread
SSABool line_skipped_by_thread(SSAInt line, SSAWorkerThread thread);
// The number of lines to skip to reach the first line to be rendered by this thread
SSAInt skipped_by_thread(SSAInt first_line, SSAWorkerThread thread);
// The number of lines to be rendered by this thread
SSAInt count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread);
// Calculate the dest address for the first line to be rendered by this thread
SSAUBytePtr dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread);
// LightBgra
SSAInt calc_light_multiplier(SSAInt light);
SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors);
SSAVec4i shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors);
SSAVec4i shade_bgra_simple(SSAVec4i color, SSAInt light);
SSAVec4i shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants);
// BlendBgra
SSAVec4i blend_copy(SSAVec4i fg);
SSAVec4i blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha);
SSAVec4i blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha);
SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha);
SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg);
SSAVec4i blend_stencil(SSAVec4i color, SSAInt fgalpha, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha);
SSAVec4i blend_add_srccolor_oneminussrccolor(SSAVec4i fg, SSAVec4i bg);
// Calculates the final alpha values to be used when combined with the source texture alpha channel
SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha);
};
#define FRACBITS 16
#define FRACUNIT (1<<FRACBITS)

View file

@ -1,131 +0,0 @@
/*
** DrawSky code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "timestamp.h"
#include "fixedfunction/drawskycodegen.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_scope.h"
#include "ssa/ssa_for_block.h"
#include "ssa/ssa_if_block.h"
#include "ssa/ssa_stack.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_struct_type.h"
#include "ssa/ssa_value.h"
void DrawSkyCodegen::Generate(DrawSkyVariant variant, SSAValue args, SSAValue thread_data)
{
dest = args[0][0].load(true);
source0 = args[0][1].load(true);
source1 = args[0][5].load(true);
pitch = args[0][9].load(true);
count = args[0][10].load(true);
dest_y = args[0][11].load(true);
texturefrac = args[0][12].load(true);
iscale = args[0][16].load(true);
textureheight0 = args[0][20].load(true);
SSAInt textureheight1 = args[0][21].load(true);
maxtextureheight1 = textureheight1 - 1;
top_color = SSAVec4i::unpack(args[0][22].load(true));
bottom_color = SSAVec4i::unpack(args[0][23].load(true));
SSAInt flags = args[0][24].load(true);
thread.core = thread_data[0][0].load(true);
thread.num_cores = thread_data[0][1].load(true);
thread.pass_start_y = thread_data[0][2].load(true);
thread.pass_end_y = thread_data[0][3].load(true);
is_fade_sky = (flags & DrawSkyArgs::fade_sky) == SSAInt(DrawSkyArgs::fade_sky);
count = count_for_thread(dest_y, count, thread);
dest = dest_for_thread(dest_y, pitch, dest, thread);
pitch = pitch * thread.num_cores;
stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread));
fracstep = iscale * thread.num_cores;
SSAIfBlock branch;
branch.if_block(is_fade_sky);
Loop(variant, true);
branch.else_block();
Loop(variant, false);
branch.end_block();
}
void DrawSkyCodegen::Loop(DrawSkyVariant variant, bool fade_sky)
{
stack_index.store(SSAInt(0));
{
SSAForBlock loop;
SSAInt index = stack_index.load();
loop.loop_block(index < count);
SSAInt frac = stack_frac.load();
SSAInt offset = index * pitch * 4;
if (fade_sky)
{
SSAVec4i color = FadeOut(frac, Sample(frac, variant));
dest[offset].store_vec4ub(color);
}
else
{
dest[offset].store_vec4ub(Sample(frac, variant));
}
stack_index.store(index.add(SSAInt(1), true, true));
stack_frac.store(frac + fracstep);
loop.end_block();
}
}
SSAVec4i DrawSkyCodegen::Sample(SSAInt frac, DrawSkyVariant variant)
{
SSAInt sample_index = (((frac << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
if (variant == DrawSkyVariant::Single)
{
return source0[sample_index * 4].load_vec4ub(false);
}
else
{
SSAInt sample_index2 = SSAInt::MIN(sample_index, maxtextureheight1);
SSAVec4i color0 = source0[sample_index * 4].load_vec4ub(false);
SSAVec4i color1 = source1[sample_index2 * 4].load_vec4ub(false);
return blend_alpha_blend(color0, color1);
}
}
SSAVec4i DrawSkyCodegen::FadeOut(SSAInt frac, SSAVec4i color)
{
int start_fade = 2; // How fast it should fade out
SSAInt alpha_top = SSAInt::MAX(SSAInt::MIN(frac.ashr(16 - start_fade), SSAInt(256)), SSAInt(0));
SSAInt alpha_bottom = SSAInt::MAX(SSAInt::MIN(((2 << 24) - frac).ashr(16 - start_fade), SSAInt(256)), SSAInt(0));
SSAInt inv_alpha_top = 256 - alpha_top;
SSAInt inv_alpha_bottom = 256 - alpha_bottom;
color = (color * alpha_top + top_color * inv_alpha_top) / 256;
color = (color * alpha_bottom + bottom_color * inv_alpha_bottom) / 256;
return color.insert(3, 255);
}

View file

@ -1,62 +0,0 @@
/*
** DrawSky code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "drawercodegen.h"
enum class DrawSkyVariant
{
Single,
Double
};
class DrawSkyCodegen : public DrawerCodegen
{
public:
void Generate(DrawSkyVariant variant, SSAValue args, SSAValue thread_data);
private:
void Loop(DrawSkyVariant variant, bool fade_sky);
SSAVec4i Sample(SSAInt frac, DrawSkyVariant variant);
SSAVec4i FadeOut(SSAInt frac, SSAVec4i color);
SSAStack<SSAInt> stack_index, stack_frac;
SSAUBytePtr dest;
SSAUBytePtr source0;
SSAUBytePtr source1;
SSAInt pitch;
SSAInt count;
SSAInt dest_y;
SSAInt texturefrac;
SSAInt iscale;
SSAInt textureheight0;
SSAInt maxtextureheight1;
SSAVec4i top_color;
SSAVec4i bottom_color;
SSAWorkerThread thread;
SSAInt fracstep;
SSABool is_fade_sky;
};

View file

@ -1,298 +0,0 @@
/*
** DrawSpan code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "timestamp.h"
#include "fixedfunction/drawspancodegen.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_scope.h"
#include "ssa/ssa_for_block.h"
#include "ssa/ssa_if_block.h"
#include "ssa/ssa_stack.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_struct_type.h"
#include "ssa/ssa_value.h"
void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args)
{
destorg = args[0][0].load(true);
source = args[0][1].load(true);
destpitch = args[0][2].load(true);
stack_xfrac.store(args[0][3].load(true));
stack_yfrac.store(args[0][4].load(true));
xstep = args[0][5].load(true);
ystep = args[0][6].load(true);
x1 = args[0][7].load(true);
x2 = args[0][8].load(true);
y = args[0][9].load(true);
xbits = args[0][10].load(true);
ybits = args[0][11].load(true);
light = args[0][12].load(true);
srcalpha = args[0][13].load(true);
destalpha = args[0][14].load(true);
SSAShort light_alpha = args[0][15].load(true);
SSAShort light_red = args[0][16].load(true);
SSAShort light_green = args[0][17].load(true);
SSAShort light_blue = args[0][18].load(true);
SSAShort fade_alpha = args[0][19].load(true);
SSAShort fade_red = args[0][20].load(true);
SSAShort fade_green = args[0][21].load(true);
SSAShort fade_blue = args[0][22].load(true);
SSAShort desaturate = args[0][23].load(true);
SSAInt flags = args[0][24].load(true);
start_viewpos_x = args[0][25].load(true);
step_viewpos_x = args[0][26].load(true);
dynlights = args[0][27].load(true);
num_dynlights = args[0][28].load(true);
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
shade_constants.desaturate = desaturate.zext_int();
count = x2 - x1 + 1;
data = destorg[(x1 + y * destpitch) * 4];
yshift = 32 - ybits;
xshift = yshift - xbits;
xmask = ((SSAInt(1) << xbits) - 1) << ybits;
// 64x64 is the most common case by far, so special case it.
is_64x64 = xbits == SSAInt(6) && ybits == SSAInt(6);
is_simple_shade = (flags & DrawSpanArgs::simple_shade) == SSAInt(DrawSpanArgs::simple_shade);
is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == SSAInt(DrawSpanArgs::nearest_filter);
SSAIfBlock branch;
branch.if_block(is_simple_shade);
LoopShade(variant, true);
branch.else_block();
LoopShade(variant, false);
branch.end_block();
}
void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade)
{
SSAIfBlock branch;
branch.if_block(is_nearest_filter);
LoopFilter(variant, isSimpleShade, true);
branch.else_block();
stack_xfrac.store(stack_xfrac.load() - (SSAInt(1) << (31 - xbits)));
stack_yfrac.store(stack_yfrac.load() - (SSAInt(1) << (31 - ybits)));
LoopFilter(variant, isSimpleShade, false);
branch.end_block();
}
void DrawSpanCodegen::LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter)
{
SSAIfBlock branch;
branch.if_block(is_64x64);
{
SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, true);
Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, true);
}
branch.else_block();
{
SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, false);
Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, false);
}
branch.end_block();
}
SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64)
{
SSAInt sseLength = count / 4;
stack_index.store(SSAInt(0));
stack_viewpos_x.store(start_viewpos_x);
{
SSAForBlock loop;
SSAInt index = stack_index.load();
loop.loop_block(index < sseLength);
SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(false);
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
SSAVec4i bgcolors[4] =
{
SSAVec4i::extendlo(bg0),
SSAVec4i::extendhi(bg0),
SSAVec4i::extendlo(bg1),
SSAVec4i::extendhi(bg1)
};
SSAVec4i colors[4];
for (int i = 0; i < 4; i++)
{
SSAInt xfrac = stack_xfrac.load();
SSAInt yfrac = stack_yfrac.load();
viewpos_x = stack_viewpos_x.load();
colors[i] = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolors[i], variant);
stack_viewpos_x.store(viewpos_x + step_viewpos_x);
stack_xfrac.store(xfrac + xstep);
stack_yfrac.store(yfrac + ystep);
}
SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3]));
data[index * 16].store_unaligned_vec16ub(color);
stack_index.store(index.add(SSAInt(1), true, true));
loop.end_block();
}
return sseLength;
}
void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64)
{
stack_index.store(start);
{
SSAForBlock loop;
SSAInt index = stack_index.load();
viewpos_x = stack_viewpos_x.load();
loop.loop_block(index < count);
SSAInt xfrac = stack_xfrac.load();
SSAInt yfrac = stack_yfrac.load();
SSAVec4i bgcolor = data[index * 4].load_vec4ub(false);
SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant);
data[index * 4].store_vec4ub(color);
stack_viewpos_x.store(viewpos_x + step_viewpos_x);
stack_index.store(index.add(SSAInt(1), true, true));
stack_xfrac.store(xfrac + xstep);
stack_yfrac.store(yfrac + ystep);
loop.end_block();
}
}
SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64)
{
if (isNearestFilter)
{
SSAInt spot;
if (is64x64)
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
else
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
return source[spot * 4].load_vec4ub(true);
}
else
{
if (is64x64)
{
return SampleLinear(source, xfrac, yfrac, SSAInt(26), SSAInt(26));
}
else
{
return SampleLinear(source, xfrac, yfrac, 32 - xbits, 32 - ybits);
}
}
}
SSAVec4i DrawSpanCodegen::SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits)
{
SSAInt xshift = (32 - xbits);
SSAInt yshift = (32 - ybits);
SSAInt xmask = (SSAInt(1) << xshift) - 1;
SSAInt ymask = (SSAInt(1) << yshift) - 1;
SSAInt x = xfrac >> xbits;
SSAInt y = yfrac >> ybits;
SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
SSAInt inv_b = (xfrac >> (xbits - 4)) & 15;
SSAInt inv_a = (yfrac >> (ybits - 4)) & 15;
SSAInt a = 16 - inv_a;
SSAInt b = 16 - inv_b;
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}
SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
{
SSAVec4i c;
if (isSimpleShade)
c = shade_bgra_simple(fg, light);
else
c = shade_bgra_advanced(fg, light, shade_constants);
stack_lit_color.store(SSAVec4i(0));
stack_light_index.store(SSAInt(0));
SSAForBlock block;
SSAInt light_index = stack_light_index.load();
SSAVec4i lit_color = stack_lit_color.load();
block.loop_block(light_index < num_dynlights);
{
SSAVec4i light_color = SSAUBytePtr(SSAValue(dynlights[light_index][0]).v).load_vec4ub(true);
SSAFloat light_x = dynlights[light_index][1].load(true);
SSAFloat light_y = dynlights[light_index][2].load(true);
SSAFloat light_z = dynlights[light_index][3].load(true);
SSAFloat light_rcp_radius = dynlights[light_index][4].load(true);
// L = light-pos
// dist = sqrt(dot(L, L))
// attenuation = 1 - MIN(dist * (1/radius), 1)
SSAFloat Lyz2 = light_y; // L.y*L.y + L.z*L.z
SSAFloat Lx = light_x - viewpos_x;
SSAFloat dist2 = Lyz2 + Lx * Lx;
SSAFloat rcp_dist = SSAFloat::rsqrt(dist2);
SSAFloat dist = dist2 * rcp_dist;
SSAFloat distance_attenuation = SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f));
// The simple light type
SSAFloat simple_attenuation = distance_attenuation;
// The point light type
// diffuse = dot(N,L) * attenuation
SSAFloat point_attenuation = light_z * rcp_dist * distance_attenuation;
SSAInt attenuation = SSAInt((light_z == SSAFloat(0.0f)).select(simple_attenuation, point_attenuation), true);
SSAVec4i contribution = (light_color * attenuation) >> 8;
stack_lit_color.store(lit_color + contribution);
stack_light_index.store(light_index + 1);
}
block.end_block();
return c + ((stack_lit_color.load() * fg) >> 8);
}
SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant)
{
switch (variant)
{
default:
case DrawSpanVariant::Opaque:
return blend_copy(fg);
case DrawSpanVariant::Masked:
return blend_alpha_blend(fg, bg);
case DrawSpanVariant::Translucent:
case DrawSpanVariant::AddClamp:
return blend_add(fg, bg, srcalpha, destalpha);
case DrawSpanVariant::MaskedTranslucent:
case DrawSpanVariant::MaskedAddClamp:
return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
}
}

View file

@ -1,83 +0,0 @@
/*
** DrawSpan code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "drawercodegen.h"
enum class DrawSpanVariant
{
Opaque,
Masked,
Translucent,
MaskedTranslucent,
AddClamp,
MaskedAddClamp
};
class DrawSpanCodegen : public DrawerCodegen
{
public:
void Generate(DrawSpanVariant variant, SSAValue args);
private:
void LoopShade(DrawSpanVariant variant, bool isSimpleShade);
void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter);
SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64);
void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64);
SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64);
SSAVec4i SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade);
SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant);
SSAStack<SSAInt> stack_index, stack_xfrac, stack_yfrac, stack_light_index;
SSAStack<SSAVec4i> stack_lit_color;
SSAStack<SSAFloat> stack_viewpos_x;
SSAUBytePtr destorg;
SSAUBytePtr source;
SSAInt destpitch;
SSAInt xstep;
SSAInt ystep;
SSAInt x1;
SSAInt x2;
SSAInt y;
SSAInt xbits;
SSAInt ybits;
SSAInt light;
SSAInt srcalpha;
SSAInt destalpha;
SSAInt count;
SSAUBytePtr data;
SSAInt yshift;
SSAInt xshift;
SSAInt xmask;
SSABool is_64x64;
SSABool is_simple_shade;
SSABool is_nearest_filter;
SSAShadeConstants shade_constants;
SSAFloat start_viewpos_x, step_viewpos_x;
SSAValue dynlights; // TriLight*
SSAInt num_dynlights;
SSAFloat viewpos_x;
};

View file

@ -1,697 +0,0 @@
/*
** DrawTriangle code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "timestamp.h"
#include "fixedfunction/drawtrianglecodegen.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_scope.h"
#include "ssa/ssa_for_block.h"
#include "ssa/ssa_if_block.h"
#include "ssa/ssa_stack.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_struct_type.h"
#include "ssa/ssa_value.h"
void DrawTriangleCodegen::Generate(TriBlendMode blendmode, bool truecolor, bool colorfill, SSAValue args, SSAValue thread_data)
{
this->blendmode = blendmode;
this->truecolor = truecolor;
this->colorfill = colorfill;
pixelsize = truecolor ? 4 : 1;
LoadArgs(args, thread_data);
CalculateGradients();
if (truecolor)
{
SSAIfBlock branch;
branch.if_block(is_simple_shade);
{
DrawFullSpans(true);
DrawPartialBlocks(true);
}
branch.else_block();
{
DrawFullSpans(false);
DrawPartialBlocks(false);
}
branch.end_block();
}
else
{
DrawFullSpans(true);
DrawPartialBlocks(true);
}
}
void DrawTriangleCodegen::DrawFullSpans(bool isSimpleShade)
{
stack_i.store(SSAInt(0));
SSAForBlock loop;
SSAInt i = stack_i.load();
loop.loop_block(i < numSpans, 0);
{
SSAInt spanX = SSAShort(fullSpans[i][0].load(true).v).zext_int();
SSAInt spanY = SSAShort(fullSpans[i][1].load(true).v).zext_int();
SSAInt spanLength = fullSpans[i][2].load(true);
SSAInt width = spanLength;
SSAInt height = SSAInt(8);
stack_dest.store(destOrg[(spanX + spanY * pitch) * pixelsize]);
stack_posYW.store(start.W + gradientX.W * (spanX - startX) + gradientY.W * (spanY - startY));
for (int j = 0; j < TriVertex::NumVarying; j++)
stack_posYVarying[j].store(start.Varying[j] + gradientX.Varying[j] * (spanX - startX) + gradientY.Varying[j] * (spanY - startY));
stack_y.store(SSAInt(0));
SSAForBlock loop_y;
SSAInt y = stack_y.load();
SSAUBytePtr dest = stack_dest.load();
SSAStepVariables blockPosY;
blockPosY.W = stack_posYW.load();
for (int j = 0; j < TriVertex::NumVarying; j++)
blockPosY.Varying[j] = stack_posYVarying[j].load();
loop_y.loop_block(y < height, 0);
{
stack_posXW.store(blockPosY.W);
for (int j = 0; j < TriVertex::NumVarying; j++)
stack_posXVarying[j].store(blockPosY.Varying[j]);
SSAFloat rcpW = SSAFloat((float)0x01000000) / blockPosY.W;
stack_lightpos.store(FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosY.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true));
for (int j = 0; j < TriVertex::NumVarying; j++)
stack_varyingPos[j].store(SSAInt(blockPosY.Varying[j] * rcpW, false));
stack_x.store(SSAInt(0));
SSAForBlock loop_x;
SSAInt x = stack_x.load();
SSAStepVariables blockPosX;
blockPosX.W = stack_posXW.load();
for (int j = 0; j < TriVertex::NumVarying; j++)
blockPosX.Varying[j] = stack_posXVarying[j].load();
SSAInt lightpos = stack_lightpos.load();
SSAInt varyingPos[TriVertex::NumVarying];
for (int j = 0; j < TriVertex::NumVarying; j++)
varyingPos[j] = stack_varyingPos[j].load();
loop_x.loop_block(x < width, 0);
{
blockPosX.W = blockPosX.W + gradientX.W * 8.0f;
for (int j = 0; j < TriVertex::NumVarying; j++)
blockPosX.Varying[j] = blockPosX.Varying[j] + gradientX.Varying[j] * 8.0f;
rcpW = SSAFloat((float)0x01000000) / blockPosX.W;
SSAInt varyingStep[TriVertex::NumVarying];
for (int j = 0; j < TriVertex::NumVarying; j++)
{
SSAInt nextPos = SSAInt(blockPosX.Varying[j] * rcpW, false);
varyingStep[j] = (nextPos - varyingPos[j]) / 8;
}
SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosX.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true);
SSAInt lightstep = (lightnext - lightpos) / 8;
if (truecolor)
{
for (int ix = 0; ix < 8; ix += 4)
{
SSAUBytePtr destptr = dest[(x * 8 + ix) * 4];
SSAVec16ub pixels16 = destptr.load_unaligned_vec16ub(false);
SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16);
SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16);
SSAVec4i pixels[4] =
{
SSAVec4i::extendlo(pixels8lo),
SSAVec4i::extendhi(pixels8lo),
SSAVec4i::extendlo(pixels8hi),
SSAVec4i::extendhi(pixels8hi)
};
for (int sse = 0; sse < 4; sse++)
{
currentlight = is_fixed_light.select(light, lightpos >> 8);
pixels[sse] = ProcessPixel32(pixels[sse], varyingPos, isSimpleShade);
for (int j = 0; j < TriVertex::NumVarying; j++)
varyingPos[j] = varyingPos[j] + varyingStep[j];
lightpos = lightpos + lightstep;
}
destptr.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3])));
}
}
else
{
for (int ix = 0; ix < 8; ix++)
{
currentlight = is_fixed_light.select(light, lightpos >> 8);
SSAInt colormapindex = SSAInt::MIN((256 - currentlight) * 32 / 256, SSAInt(31));
currentcolormap = Colormaps[colormapindex << 8];
SSAUBytePtr destptr = dest[(x * 8 + ix)];
destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte());
for (int j = 0; j < TriVertex::NumVarying; j++)
varyingPos[j] = varyingPos[j] + varyingStep[j];
lightpos = lightpos + lightstep;
}
}
for (int j = 0; j < TriVertex::NumVarying; j++)
stack_varyingPos[j].store(varyingPos[j]);
stack_lightpos.store(lightpos);
stack_posXW.store(blockPosX.W);
for (int j = 0; j < TriVertex::NumVarying; j++)
stack_posXVarying[j].store(blockPosX.Varying[j]);
stack_x.store(x + 1);
}
loop_x.end_block();
stack_posYW.store(blockPosY.W + gradientY.W);
for (int j = 0; j < TriVertex::NumVarying; j++)
stack_posYVarying[j].store(blockPosY.Varying[j] + gradientY.Varying[j]);
stack_dest.store(dest[pitch * pixelsize]);
stack_y.store(y + 1);
}
loop_y.end_block();
stack_i.store(i + 1);
}
loop.end_block();
}
void DrawTriangleCodegen::DrawPartialBlocks(bool isSimpleShade)
{
stack_i.store(SSAInt(0));
SSAForBlock loop;
SSAInt i = stack_i.load();
loop.loop_block(i < numBlocks, 0);
{
SSAInt blockX = SSAShort(partialBlocks[i][0].load(true).v).zext_int();
SSAInt blockY = SSAShort(partialBlocks[i][1].load(true).v).zext_int();
SSAInt mask0 = partialBlocks[i][2].load(true);
SSAInt mask1 = partialBlocks[i][3].load(true);
SSAUBytePtr dest = destOrg[(blockX + blockY * pitch) * pixelsize];
SSAStepVariables blockPosY;
blockPosY.W = start.W + gradientX.W * (blockX - startX) + gradientY.W * (blockY - startY);
for (int j = 0; j < TriVertex::NumVarying; j++)
blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (blockX - startX) + gradientY.Varying[j] * (blockY - startY);
for (int maskNum = 0; maskNum < 2; maskNum++)
{
SSAInt mask = (maskNum == 0) ? mask0 : mask1;
for (int y = 0; y < 4; y++)
{
SSAStepVariables blockPosX = blockPosY;
SSAFloat rcpW = SSAFloat((float)0x01000000) / blockPosX.W;
SSAInt varyingPos[TriVertex::NumVarying];
for (int j = 0; j < TriVertex::NumVarying; j++)
varyingPos[j] = SSAInt(blockPosX.Varying[j] * rcpW, false);
SSAInt lightpos = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosX.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true);
blockPosX.W = blockPosX.W + gradientX.W * 8.0f;
for (int j = 0; j < TriVertex::NumVarying; j++)
blockPosX.Varying[j] = blockPosX.Varying[j] + gradientX.Varying[j] * 8.0f;
rcpW = SSAFloat((float)0x01000000) / blockPosX.W;
SSAInt varyingStep[TriVertex::NumVarying];
for (int j = 0; j < TriVertex::NumVarying; j++)
{
SSAInt nextPos = SSAInt(blockPosX.Varying[j] * rcpW, false);
varyingStep[j] = (nextPos - varyingPos[j]) / 8;
}
SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f / 32.0f), globVis * blockPosX.W), SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true);
SSAInt lightstep = (lightnext - lightpos) / 8;
for (int x = 0; x < 8; x++)
{
SSABool covered = !((mask & (1 << (31 - y * 8 - x))) == SSAInt(0));
SSAIfBlock branch;
branch.if_block(covered);
{
if (truecolor)
{
currentlight = is_fixed_light.select(light, lightpos >> 8);
SSAUBytePtr destptr = dest[x * 4];
destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos, isSimpleShade));
}
else
{
currentlight = is_fixed_light.select(light, lightpos >> 8);
SSAInt colormapindex = SSAInt::MIN((256 - currentlight) * 32 / 256, SSAInt(31));
currentcolormap = Colormaps[colormapindex << 8];
SSAUBytePtr destptr = dest[x];
destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte());
}
}
branch.end_block();
for (int j = 0; j < TriVertex::NumVarying; j++)
varyingPos[j] = varyingPos[j] + varyingStep[j];
lightpos = lightpos + lightstep;
}
blockPosY.W = blockPosY.W + gradientY.W;
for (int j = 0; j < TriVertex::NumVarying; j++)
blockPosY.Varying[j] = blockPosY.Varying[j] + gradientY.Varying[j];
dest = dest[pitch * pixelsize];
}
}
stack_i.store(i + 1);
}
loop.end_block();
}
SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt *varying)
{
SSAInt ufrac = varying[0] << 8;
SSAInt vfrac = varying[1] << 8;
SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16;
SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16;
SSAInt uvoffset = upos * textureHeight + vpos;
if (colorfill)
return translation[color * 4].load_vec4ub(true);
else
return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true);
}
SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt *varying)
{
SSAInt ufrac = varying[0] << 8;
SSAInt vfrac = varying[1] << 8;
SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16;
SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16;
SSAInt uvoffset = upos * textureHeight + vpos;
if (colorfill)
return translation[color].load(true).zext_int();
else
return translation[texturePixels[uvoffset].load(true).zext_int()].load(true).zext_int();
}
SSAVec4i DrawTriangleCodegen::Sample32(SSAInt *varying)
{
if (colorfill)
return SSAVec4i::unpack(color);
SSAInt ufrac = varying[0] << 8;
SSAInt vfrac = varying[1] << 8;
SSAVec4i nearest;
SSAVec4i linear;
{
SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16;
SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16;
SSAInt uvoffset = upos * textureHeight + vpos;
nearest = texturePixels[uvoffset * 4].load_vec4ub(true);
}
return nearest;
/*
{
SSAInt uone = (SSAInt(0x01000000) / textureWidth) << 8;
SSAInt vone = (SSAInt(0x01000000) / textureHeight) << 8;
ufrac = ufrac - (uone >> 1);
vfrac = vfrac - (vone >> 1);
SSAInt frac_x0 = (ufrac >> FRACBITS) * textureWidth;
SSAInt frac_x1 = ((ufrac + uone) >> FRACBITS) * textureWidth;
SSAInt frac_y0 = (vfrac >> FRACBITS) * textureHeight;
SSAInt frac_y1 = ((vfrac + vone) >> FRACBITS) * textureHeight;
SSAInt x0 = frac_x0 >> FRACBITS;
SSAInt x1 = frac_x1 >> FRACBITS;
SSAInt y0 = frac_y0 >> FRACBITS;
SSAInt y1 = frac_y1 >> FRACBITS;
SSAVec4i p00 = texturePixels[(x0 * textureHeight + y0) * 4].load_vec4ub(true);
SSAVec4i p01 = texturePixels[(x0 * textureHeight + y1) * 4].load_vec4ub(true);
SSAVec4i p10 = texturePixels[(x1 * textureHeight + y0) * 4].load_vec4ub(true);
SSAVec4i p11 = texturePixels[(x1 * textureHeight + y1) * 4].load_vec4ub(true);
SSAInt inv_b = (frac_x1 >> (FRACBITS - 4)) & 15;
SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15;
SSAInt a = 16 - inv_a;
SSAInt b = 16 - inv_b;
linear = (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}
// // Min filter = linear, Mag filter = nearest:
// AffineLinear = (gradVaryingX[0] / AffineW) > SSAFloat(1.0f) || (gradVaryingX[0] / AffineW) < SSAFloat(-1.0f);
return AffineLinear.select(linear, nearest);
*/
}
SSAInt DrawTriangleCodegen::Sample8(SSAInt *varying)
{
SSAInt ufrac = varying[0] << 8;
SSAInt vfrac = varying[1] << 8;
SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16;
SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16;
SSAInt uvoffset = upos * textureHeight + vpos;
if (colorfill)
return color;
else
return texturePixels[uvoffset].load(true).zext_int();
}
SSAInt DrawTriangleCodegen::Shade8(SSAInt c)
{
return currentcolormap[c].load(true).zext_int();
}
SSAVec4i DrawTriangleCodegen::Shade32(SSAVec4i fg, SSAInt light, bool isSimpleShade)
{
if (isSimpleShade)
return shade_bgra_simple(fg, currentlight);
else
return shade_bgra_advanced(fg, currentlight, shade_constants);
}
SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying, bool isSimpleShade)
{
SSAVec4i fg;
SSAVec4i output;
switch (blendmode)
{
default:
case TriBlendMode::Copy:
fg = Sample32(varying);
output = blend_copy(Shade32(fg, currentlight, isSimpleShade));
break;
case TriBlendMode::AlphaBlend:
fg = Sample32(varying);
output = blend_alpha_blend(Shade32(fg, currentlight, isSimpleShade), bg);
break;
case TriBlendMode::AddSolid:
fg = Sample32(varying);
output = blend_add(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, destalpha);
break;
case TriBlendMode::Add:
fg = Sample32(varying);
output = blend_add(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
break;
case TriBlendMode::Sub:
fg = Sample32(varying);
output = blend_sub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
break;
case TriBlendMode::RevSub:
fg = Sample32(varying);
output = blend_revsub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
break;
case TriBlendMode::Stencil:
fg = Sample32(varying);
output = blend_stencil(Shade32(SSAVec4i::unpack(color), currentlight, isSimpleShade), fg[3], bg, srcalpha, destalpha);
break;
case TriBlendMode::Shaded:
output = blend_stencil(Shade32(SSAVec4i::unpack(color), currentlight, isSimpleShade), Sample8(varying), bg, srcalpha, destalpha);
break;
case TriBlendMode::TranslateCopy:
fg = TranslateSample32(varying);
output = blend_copy(Shade32(fg, currentlight, isSimpleShade));
break;
case TriBlendMode::TranslateAlphaBlend:
fg = TranslateSample32(varying);
output = blend_alpha_blend(Shade32(fg, currentlight, isSimpleShade), bg);
break;
case TriBlendMode::TranslateAdd:
fg = TranslateSample32(varying);
output = blend_add(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
break;
case TriBlendMode::TranslateSub:
fg = TranslateSample32(varying);
output = blend_sub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
break;
case TriBlendMode::TranslateRevSub:
fg = TranslateSample32(varying);
output = blend_revsub(Shade32(fg, currentlight, isSimpleShade), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
break;
case TriBlendMode::AddSrcColorOneMinusSrcColor:
fg = Sample32(varying);
output = blend_add_srccolor_oneminussrccolor(Shade32(fg, currentlight, isSimpleShade), bg);
break;
case TriBlendMode::Skycap:
fg = Sample32(varying);
output = FadeOut(varying[1], fg);
break;
}
return output;
}
SSAVec4i DrawTriangleCodegen::ToBgra(SSAInt index)
{
SSAVec4i c = BaseColors[index * 4].load_vec4ub(true);
c = c.insert(3, 255);
return c;
}
SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c)
{
SSAInt red = SSAInt::clamp(c[0], SSAInt(0), SSAInt(255));
SSAInt green = SSAInt::clamp(c[1], SSAInt(0), SSAInt(255));
SSAInt blue = SSAInt::clamp(c[2], SSAInt(0), SSAInt(255));
return RGB256k[((blue >> 2) * 64 + (green >> 2)) * 64 + (red >> 2)].load(true).zext_int();
}
SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying)
{
SSAVec4i fg;
SSAInt alpha, inv_alpha;
SSAInt output;
SSAInt palindex;
switch (blendmode)
{
default:
case TriBlendMode::Copy:
output = Shade8(Sample8(varying));
break;
case TriBlendMode::AlphaBlend:
palindex = Sample8(varying);
output = Shade8(palindex);
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::AddSolid:
palindex = Sample8(varying);
fg = ToBgra(Shade8(palindex));
output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, destalpha));
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::Add:
palindex = Sample8(varying);
fg = ToBgra(Shade8(palindex));
output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha)));
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::Sub:
palindex = Sample8(varying);
fg = ToBgra(Shade8(palindex));
output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha)));
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::RevSub:
palindex = Sample8(varying);
fg = ToBgra(Shade8(palindex));
output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha)));
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::Stencil:
output = ToPal8(blend_stencil(ToBgra(Shade8(color)), (Sample8(varying) == SSAInt(0)).select(SSAInt(0), SSAInt(256)), ToBgra(bg), srcalpha, destalpha));
break;
case TriBlendMode::Shaded:
palindex = Sample8(varying);
output = ToPal8(blend_stencil(ToBgra(Shade8(color)), palindex, ToBgra(bg), srcalpha, destalpha));
break;
case TriBlendMode::TranslateCopy:
palindex = TranslateSample8(varying);
output = Shade8(palindex);
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::TranslateAlphaBlend:
palindex = TranslateSample8(varying);
output = Shade8(palindex);
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::TranslateAdd:
palindex = TranslateSample8(varying);
fg = ToBgra(Shade8(palindex));
output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha)));
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::TranslateSub:
palindex = TranslateSample8(varying);
fg = ToBgra(Shade8(palindex));
output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha)));
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::TranslateRevSub:
palindex = TranslateSample8(varying);
fg = ToBgra(Shade8(palindex));
output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha)));
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::AddSrcColorOneMinusSrcColor:
palindex = Sample8(varying);
fg = ToBgra(Shade8(palindex));
output = ToPal8(blend_add_srccolor_oneminussrccolor(fg, ToBgra(bg)));
output = (palindex == SSAInt(0)).select(bg, output);
break;
case TriBlendMode::Skycap:
fg = ToBgra(Sample8(varying));
output = ToPal8(FadeOut(varying[1], fg));
break;
}
return output;
}
SSAVec4i DrawTriangleCodegen::FadeOut(SSAInt frac, SSAVec4i fg)
{
int start_fade = 2; // How fast it should fade out
SSAInt alpha_top = SSAInt::MAX(SSAInt::MIN(frac.ashr(16 - start_fade), SSAInt(256)), SSAInt(0));
SSAInt alpha_bottom = SSAInt::MAX(SSAInt::MIN(((2 << 24) - frac).ashr(16 - start_fade), SSAInt(256)), SSAInt(0));
SSAInt alpha = SSAInt::MIN(alpha_top, alpha_bottom);
SSAInt inv_alpha = 256 - alpha;
fg = (fg * alpha + SSAVec4i::unpack(color) * inv_alpha) / 256;
return fg.insert(3, 255);
}
void DrawTriangleCodegen::CalculateGradients()
{
gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
start.W = v1.w + gradientX.W * (SSAFloat(startX) - v1.x) + gradientY.W * (SSAFloat(startY) - v1.y);
for (int i = 0; i < TriVertex::NumVarying; i++)
{
gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w);
gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w);
start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (SSAFloat(startX) - v1.x) + gradientY.Varying[i] * (SSAFloat(startY) - v1.y);
}
shade = (64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f) / 32.0f;
}
void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data)
{
destOrg = args[0][0].load(true);
pitch = args[0][1].load(true);
v1 = LoadTriVertex(args[0][2].load(true));
v2 = LoadTriVertex(args[0][3].load(true));
v3 = LoadTriVertex(args[0][4].load(true));
texturePixels = args[0][9].load(true);
textureWidth = args[0][10].load(true);
textureHeight = args[0][11].load(true);
translation = args[0][12].load(true);
LoadUniforms(args[0][13].load(true));
if (!truecolor)
{
Colormaps = args[0][20].load(true);
RGB256k = args[0][21].load(true);
BaseColors = args[0][22].load(true);
}
fullSpans = thread_data[0][5].load(true);
partialBlocks = thread_data[0][6].load(true);
numSpans = thread_data[0][7].load(true);
numBlocks = thread_data[0][8].load(true);
startX = thread_data[0][9].load(true);
startY = thread_data[0][10].load(true);
}
SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr)
{
SSATriVertex v;
v.x = ptr[0][0].load(true);
v.y = ptr[0][1].load(true);
v.z = ptr[0][2].load(true);
v.w = ptr[0][3].load(true);
for (int i = 0; i < TriVertex::NumVarying; i++)
v.varying[i] = ptr[0][4 + i].load(true);
return v;
}
void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms)
{
light = uniforms[0][0].load(true);
color = uniforms[0][2].load(true);
srcalpha = uniforms[0][3].load(true);
destalpha = uniforms[0][4].load(true);
SSAShort light_alpha = uniforms[0][5].load(true);
SSAShort light_red = uniforms[0][6].load(true);
SSAShort light_green = uniforms[0][7].load(true);
SSAShort light_blue = uniforms[0][8].load(true);
SSAShort fade_alpha = uniforms[0][9].load(true);
SSAShort fade_red = uniforms[0][10].load(true);
SSAShort fade_green = uniforms[0][11].load(true);
SSAShort fade_blue = uniforms[0][12].load(true);
SSAShort desaturate = uniforms[0][13].load(true);
globVis = uniforms[0][14].load(true);
globVis = globVis * SSAFloat(1.0f / 32.0f);
SSAInt flags = uniforms[0][15].load(true);
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
shade_constants.desaturate = desaturate.zext_int();
is_simple_shade = (flags & TriUniforms::simple_shade) == SSAInt(TriUniforms::simple_shade);
is_nearest_filter = (flags & TriUniforms::nearest_filter) == SSAInt(TriUniforms::nearest_filter);
is_fixed_light = (flags & TriUniforms::fixed_light) == SSAInt(TriUniforms::fixed_light);
}
SSAFloat DrawTriangleCodegen::FindGradientX(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2)
{
SSAFloat top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2);
SSAFloat bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2);
return top / bottom;
}
SSAFloat DrawTriangleCodegen::FindGradientY(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2)
{
SSAFloat top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2);
SSAFloat bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2);
return top / bottom;
}

View file

@ -1,112 +0,0 @@
/*
** DrawTriangle code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "drawercodegen.h"
struct SSATriVertex
{
SSAFloat x, y, z, w;
SSAFloat varying[TriVertex::NumVarying];
};
struct SSAStepVariables
{
SSAFloat W;
SSAFloat Varying[TriVertex::NumVarying];
};
class DrawTriangleCodegen : public DrawerCodegen
{
public:
void Generate(TriBlendMode blendmode, bool truecolor, bool colorfill, SSAValue args, SSAValue thread_data);
private:
void LoadArgs(SSAValue args, SSAValue thread_data);
SSATriVertex LoadTriVertex(SSAValue v);
void LoadUniforms(SSAValue uniforms);
void CalculateGradients();
SSAFloat FindGradientX(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2);
SSAFloat FindGradientY(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2);
void DrawFullSpans(bool isSimpleShade);
void DrawPartialBlocks(bool isSimpleShade);
SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying, bool isSimpleShade);
SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying);
SSAVec4i TranslateSample32(SSAInt *varying);
SSAInt TranslateSample8(SSAInt *varying);
SSAVec4i Sample32(SSAInt *varying);
SSAInt Sample8(SSAInt *varying);
SSAVec4i Shade32(SSAVec4i fg, SSAInt light, bool isSimpleShade);
SSAInt Shade8(SSAInt c);
SSAVec4i ToBgra(SSAInt index);
SSAInt ToPal8(SSAVec4i c);
SSAVec4i FadeOut(SSAInt frac, SSAVec4i color);
SSAStack<SSAInt> stack_i, stack_y, stack_x;
SSAStack<SSAFloat> stack_posYW, stack_posXW;
SSAStack<SSAFloat> stack_posYVarying[TriVertex::NumVarying];
SSAStack<SSAFloat> stack_posXVarying[TriVertex::NumVarying];
SSAStack<SSAInt> stack_varyingPos[TriVertex::NumVarying];
SSAStack<SSAInt> stack_lightpos;
SSAStack<SSAUBytePtr> stack_dest;
SSAStepVariables gradientX, gradientY, start;
SSAFloat shade, globVis;
SSAInt currentlight;
SSAUBytePtr currentcolormap;
SSAUBytePtr destOrg;
SSAInt pitch;
SSATriVertex v1;
SSATriVertex v2;
SSATriVertex v3;
SSAUBytePtr texturePixels;
SSAInt textureWidth;
SSAInt textureHeight;
SSAUBytePtr translation;
SSAInt color, srcalpha, destalpha;
SSAInt light;
SSAShadeConstants shade_constants;
SSABool is_simple_shade;
SSABool is_nearest_filter;
SSABool is_fixed_light;
SSAUBytePtr Colormaps;
SSAUBytePtr RGB256k;
SSAUBytePtr BaseColors;
SSAInt numSpans;
SSAInt numBlocks;
SSAInt startX;
SSAInt startY;
SSAValue fullSpans; // TriFullSpan[]
SSAValue partialBlocks; // TriPartialBlock[]
TriBlendMode blendmode;
bool truecolor;
bool colorfill;
int pixelsize;
};

View file

@ -1,231 +0,0 @@
/*
** DrawWall code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "timestamp.h"
#include "fixedfunction/drawwallcodegen.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_scope.h"
#include "ssa/ssa_for_block.h"
#include "ssa/ssa_if_block.h"
#include "ssa/ssa_stack.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_struct_type.h"
#include "ssa/ssa_value.h"
void DrawWallCodegen::Generate(DrawWallVariant variant, SSAValue args, SSAValue thread_data)
{
dest = args[0][0].load(true);
source = args[0][1].load(true);
source2 = args[0][5].load(true);
pitch = args[0][9].load(true);
count = args[0][10].load(true);
dest_y = args[0][11].load(true);
texturefrac = args[0][12].load(true);
texturefracx = args[0][16].load(true);
iscale = args[0][20].load(true);
textureheight = args[0][24].load(true);
light = args[0][28].load(true);
srcalpha = args[0][32].load(true);
destalpha = args[0][33].load(true);
SSAShort light_alpha = args[0][34].load(true);
SSAShort light_red = args[0][35].load(true);
SSAShort light_green = args[0][36].load(true);
SSAShort light_blue = args[0][37].load(true);
SSAShort fade_alpha = args[0][38].load(true);
SSAShort fade_red = args[0][39].load(true);
SSAShort fade_green = args[0][40].load(true);
SSAShort fade_blue = args[0][41].load(true);
SSAShort desaturate = args[0][42].load(true);
SSAInt flags = args[0][43].load(true);
start_z = args[0][44].load(true);
step_z = args[0][45].load(true);
dynlights = args[0][46].load(true);
num_dynlights = args[0][47].load(true);
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
shade_constants.desaturate = desaturate.zext_int();
thread.core = thread_data[0][0].load(true);
thread.num_cores = thread_data[0][1].load(true);
thread.pass_start_y = thread_data[0][2].load(true);
thread.pass_end_y = thread_data[0][3].load(true);
is_simple_shade = (flags & DrawWallArgs::simple_shade) == SSAInt(DrawWallArgs::simple_shade);
is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == SSAInt(DrawWallArgs::nearest_filter);
count = count_for_thread(dest_y, count, thread);
dest = dest_for_thread(dest_y, pitch, dest, thread);
pitch = pitch * thread.num_cores;
stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread));
fracstep = iscale * thread.num_cores;
one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1;
start_z = start_z + step_z * SSAFloat(skipped_by_thread(dest_y, thread));
step_z = step_z * SSAFloat(thread.num_cores);
SSAIfBlock branch;
branch.if_block(is_simple_shade);
LoopShade(variant, true);
branch.else_block();
LoopShade(variant, false);
branch.end_block();
}
void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool isSimpleShade)
{
SSAIfBlock branch;
branch.if_block(is_nearest_filter);
Loop(variant, isSimpleShade, true);
branch.else_block();
stack_frac.store(stack_frac.load() - (one / 2));
Loop(variant, isSimpleShade, false);
branch.end_block();
}
void DrawWallCodegen::Loop(DrawWallVariant variant, bool isSimpleShade, bool isNearestFilter)
{
stack_index.store(SSAInt(0));
stack_z.store(start_z);
{
SSAForBlock loop;
SSAInt index = stack_index.load();
z = stack_z.load();
loop.loop_block(index < count);
SSAInt frac = stack_frac.load();
SSAInt offset = index * pitch * 4;
SSAVec4i bgcolor = dest[offset].load_vec4ub(false);
SSAVec4i color = Blend(Shade(Sample(frac, isNearestFilter), isSimpleShade), bgcolor, variant);
dest[offset].store_vec4ub(color);
stack_z.store(z + step_z);
stack_index.store(index.add(SSAInt(1), true, true));
stack_frac.store(frac + fracstep);
loop.end_block();
}
}
SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter)
{
if (isNearestFilter)
{
SSAInt sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS;
return source[sample_index * 4].load_vec4ub(false);
}
else
{
return SampleLinear(source, source2, texturefracx, frac, one, textureheight);
}
}
SSAVec4i DrawWallCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
{
SSAInt frac_y0 = (texturefracy >> FRACBITS) * height;
SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height;
SSAInt y0 = frac_y0 >> FRACBITS;
SSAInt y1 = frac_y1 >> FRACBITS;
SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true);
SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true);
SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true);
SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true);
SSAInt inv_b = texturefracx;
SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15;
SSAInt inv_a = 16 - a;
SSAInt b = 16 - inv_b;
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}
SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
{
SSAVec4i c;
if (isSimpleShade)
c = shade_bgra_simple(fg, light);
else
c = shade_bgra_advanced(fg, light, shade_constants);
stack_lit_color.store(SSAVec4i(0));
stack_light_index.store(SSAInt(0));
SSAForBlock block;
SSAInt light_index = stack_light_index.load();
SSAVec4i lit_color = stack_lit_color.load();
block.loop_block(light_index < num_dynlights);
{
SSAVec4i light_color = SSAUBytePtr(SSAValue(dynlights[light_index][0]).v).load_vec4ub(true);
SSAFloat light_x = dynlights[light_index][1].load(true);
SSAFloat light_y = dynlights[light_index][2].load(true);
SSAFloat light_z = dynlights[light_index][3].load(true);
SSAFloat light_rcp_radius = dynlights[light_index][4].load(true);
// L = light-pos
// dist = sqrt(dot(L, L))
// attenuation = 1 - MIN(dist * (1/radius), 1)
SSAFloat Lxy2 = light_x; // L.x*L.x + L.y*L.y
SSAFloat Lz = light_z - z;
SSAFloat dist2 = Lxy2 + Lz * Lz;
SSAFloat rcp_dist = SSAFloat::rsqrt(dist2);
SSAFloat dist = dist2 * rcp_dist;
SSAFloat distance_attenuation = SSAFloat(256.0f) - SSAFloat::MIN(dist * light_rcp_radius, SSAFloat(256.0f));
// The simple light type
SSAFloat simple_attenuation = distance_attenuation;
// The point light type
// diffuse = dot(N,L) * attenuation
SSAFloat point_attenuation = light_y * rcp_dist * distance_attenuation;
SSAInt attenuation = SSAInt((light_y == SSAFloat(0.0f)).select(simple_attenuation, point_attenuation), true);
SSAVec4i contribution = (light_color * attenuation) >> 8;
stack_lit_color.store(lit_color + contribution);
stack_light_index.store(light_index + 1);
}
block.end_block();
return c + ((stack_lit_color.load() * fg) >> 8);
}
SSAVec4i DrawWallCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant)
{
switch (variant)
{
default:
case DrawWallVariant::Opaque:
return blend_copy(fg);
case DrawWallVariant::Masked:
return blend_alpha_blend(fg, bg);
case DrawWallVariant::Add:
case DrawWallVariant::AddClamp:
return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawWallVariant::SubClamp:
return blend_sub(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawWallVariant::RevSubClamp:
return blend_revsub(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
}
}

View file

@ -1,80 +0,0 @@
/*
** DrawWall code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "drawercodegen.h"
enum class DrawWallVariant
{
Opaque,
Masked,
Add,
AddClamp,
SubClamp,
RevSubClamp
};
class DrawWallCodegen : public DrawerCodegen
{
public:
void Generate(DrawWallVariant variant, SSAValue args, SSAValue thread_data);
private:
void LoopShade(DrawWallVariant variant, bool isSimpleShade);
void Loop(DrawWallVariant variant, bool isSimpleShade, bool isNearestFilter);
SSAVec4i Sample(SSAInt frac, bool isNearestFilter);
SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade);
SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant);
SSAStack<SSAInt> stack_index, stack_frac, stack_light_index;
SSAStack<SSAVec4i> stack_lit_color;
SSAStack<SSAFloat> stack_z;
SSAUBytePtr dest;
SSAUBytePtr source;
SSAUBytePtr source2;
SSAInt pitch;
SSAInt count;
SSAInt dest_y;
SSAInt texturefrac;
SSAInt texturefracx;
SSAInt iscale;
SSAInt textureheight;
SSAInt light;
SSAInt srcalpha;
SSAInt destalpha;
SSABool is_simple_shade;
SSABool is_nearest_filter;
SSAShadeConstants shade_constants;
SSAWorkerThread thread;
SSAInt fracstep;
SSAInt one;
SSAFloat start_z, step_z;
SSAValue dynlights; // TriLight*
SSAInt num_dynlights;
SSAFloat z;
};

View file

@ -1,573 +0,0 @@
/*
** DrawTriangle code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "timestamp.h"
#include "fixedfunction/setuptrianglecodegen.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_scope.h"
#include "ssa/ssa_for_block.h"
#include "ssa/ssa_if_block.h"
#include "ssa/ssa_stack.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_struct_type.h"
#include "ssa/ssa_value.h"
void SetupTriangleCodegen::Generate(bool subsectorTest, SSAValue args, SSAValue thread_data)
{
this->subsectorTest = subsectorTest;
LoadArgs(args, thread_data);
Setup();
LoopBlockY();
}
SSAInt SetupTriangleCodegen::FloatTo28_4(SSAFloat v)
{
// SSAInt(SSAFloat::round(16.0f * v), false);
SSAInt a = SSAInt(v * 32.0f, false);
return (a + (a.ashr(31) | SSAInt(1))).ashr(1);
}
void SetupTriangleCodegen::Setup()
{
// 28.4 fixed-point coordinates
Y1 = FloatTo28_4(v1.y);
Y2 = FloatTo28_4(v2.y);
Y3 = FloatTo28_4(v3.y);
X1 = FloatTo28_4(v1.x);
X2 = FloatTo28_4(v2.x);
X3 = FloatTo28_4(v3.x);
// Deltas
DX12 = X1 - X2;
DX23 = X2 - X3;
DX31 = X3 - X1;
DY12 = Y1 - Y2;
DY23 = Y2 - Y3;
DY31 = Y3 - Y1;
// Fixed-point deltas
FDX12 = DX12 << 4;
FDX23 = DX23 << 4;
FDX31 = DX31 << 4;
FDY12 = DY12 << 4;
FDY23 = DY23 << 4;
FDY31 = DY31 << 4;
// Bounding rectangle
minx = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(X1, X2), X3) + 0xF).ashr(4), SSAInt(0));
maxx = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(X1, X2), X3) + 0xF).ashr(4), clipright - 1);
miny = SSAInt::MAX((SSAInt::MIN(SSAInt::MIN(Y1, Y2), Y3) + 0xF).ashr(4), SSAInt(0));
maxy = SSAInt::MIN((SSAInt::MAX(SSAInt::MAX(Y1, Y2), Y3) + 0xF).ashr(4), clipbottom - 1);
SSAIfBlock if0;
if0.if_block(minx >= maxx || miny >= maxy);
if0.end_retvoid();
// Start in corner of 8x8 block
minx = minx & ~(q - 1);
miny = miny & ~(q - 1);
// Half-edge constants
C1 = DY12 * X1 - DX12 * Y1;
C2 = DY23 * X2 - DX23 * Y2;
C3 = DY31 * X3 - DX31 * Y3;
// Correct for fill convention
SSAIfBlock if1;
if1.if_block(DY12 < SSAInt(0) || (DY12 == SSAInt(0) && DX12 > SSAInt(0)));
stack_C1.store(C1 + 1);
if1.else_block();
stack_C1.store(C1);
if1.end_block();
C1 = stack_C1.load();
SSAIfBlock if2;
if2.if_block(DY23 < SSAInt(0) || (DY23 == SSAInt(0) && DX23 > SSAInt(0)));
stack_C2.store(C2 + 1);
if2.else_block();
stack_C2.store(C2);
if2.end_block();
C2 = stack_C2.load();
SSAIfBlock if3;
if3.if_block(DY31 < SSAInt(0) || (DY31 == SSAInt(0) && DX31 > SSAInt(0)));
stack_C3.store(C3 + 1);
if3.else_block();
stack_C3.store(C3);
if3.end_block();
C3 = stack_C3.load();
}
void SetupTriangleCodegen::LoopBlockY()
{
SSAInt blocks_skipped = skipped_by_thread(miny / q, thread);
stack_y.store(miny + blocks_skipped * q);
stack_subsectorGBuffer.store(subsectorGBuffer[blocks_skipped * q * pitch]);
SSAForBlock loop;
y = stack_y.load();
subsectorGBuffer = stack_subsectorGBuffer.load();
loop.loop_block(y < maxy, 0);
{
LoopBlockX();
stack_subsectorGBuffer.store(subsectorGBuffer[q * pitch * thread.num_cores]);
stack_y.store(y + thread.num_cores * q);
}
loop.end_block();
}
void SetupTriangleCodegen::LoopBlockX()
{
stack_x.store(minx);
SSAForBlock loop;
x = stack_x.load();
loop.loop_block(x < maxx, 0);
{
// Corners of block
x0 = x << 4;
x1 = (x + q - 1) << 4;
y0 = y << 4;
y1 = (y + q - 1) << 4;
// Evaluate half-space functions
SSABool a00 = C1 + DX12 * y0 - DY12 * x0 > SSAInt(0);
SSABool a10 = C1 + DX12 * y0 - DY12 * x1 > SSAInt(0);
SSABool a01 = C1 + DX12 * y1 - DY12 * x0 > SSAInt(0);
SSABool a11 = C1 + DX12 * y1 - DY12 * x1 > SSAInt(0);
SSAInt a = (a00.zext_int() << 0) | (a10.zext_int() << 1) | (a01.zext_int() << 2) | (a11.zext_int() << 3);
SSABool b00 = C2 + DX23 * y0 - DY23 * x0 > SSAInt(0);
SSABool b10 = C2 + DX23 * y0 - DY23 * x1 > SSAInt(0);
SSABool b01 = C2 + DX23 * y1 - DY23 * x0 > SSAInt(0);
SSABool b11 = C2 + DX23 * y1 - DY23 * x1 > SSAInt(0);
SSAInt b = (b00.zext_int() << 0) | (b10.zext_int() << 1) | (b01.zext_int() << 2) | (b11.zext_int() << 3);
SSABool c00 = C3 + DX31 * y0 - DY31 * x0 > SSAInt(0);
SSABool c10 = C3 + DX31 * y0 - DY31 * x1 > SSAInt(0);
SSABool c01 = C3 + DX31 * y1 - DY31 * x0 > SSAInt(0);
SSABool c11 = C3 + DX31 * y1 - DY31 * x1 > SSAInt(0);
SSAInt c = (c00.zext_int() << 0) | (c10.zext_int() << 1) | (c01.zext_int() << 2) | (c11.zext_int() << 3);
// Skip block when outside an edge
SSABool process_block = !(a == SSAInt(0) || b == SSAInt(0) || c == SSAInt(0));
SetStencilBlock(x / 8 + y / 8 * stencilPitch);
// Stencil test the whole block, if possible
if (subsectorTest)
{
process_block = process_block && (!StencilIsSingleValue() || SSABool::compare_uge(StencilGetSingle(), stencilTestValue));
}
else
{
process_block = process_block && (!StencilIsSingleValue() || StencilGetSingle() == stencilTestValue);
}
SSAIfBlock branch;
branch.if_block(process_block);
// Check if block needs clipping
SSABool clipneeded = (x + q) > clipright || (y + q) > clipbottom;
SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded && StencilIsSingleValue();
// Accept whole block when totally covered
SSAIfBlock branch_covered;
branch_covered.if_block(covered);
{
LoopFullBlock();
}
branch_covered.else_block();
{
SSAIfBlock branch_covered_stencil;
branch_covered_stencil.if_block(StencilIsSingleValue());
{
SSABool stenciltestpass;
if (subsectorTest)
{
stenciltestpass = SSABool::compare_uge(StencilGetSingle(), stencilTestValue);
}
else
{
stenciltestpass = StencilGetSingle() == stencilTestValue;
}
SSAIfBlock branch_stenciltestpass;
branch_stenciltestpass.if_block(stenciltestpass);
{
LoopPartialBlock(true);
}
branch_stenciltestpass.end_block();
}
branch_covered_stencil.else_block();
{
LoopPartialBlock(false);
}
branch_covered_stencil.end_block();
}
branch_covered.end_block();
branch.end_block();
stack_x.store(x + q);
}
loop.end_block();
}
void SetupTriangleCodegen::LoopFullBlock()
{
/*
if (variant == TriDrawVariant::Stencil)
{
StencilClear(stencilWriteValue);
}
else if (variant == TriDrawVariant::StencilClose)
{
StencilClear(stencilWriteValue);
for (int iy = 0; iy < q; iy++)
{
SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch];
for (int ix = 0; ix < q; ix += 4)
{
subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth));
}
}
}
else
{
int pixelsize = truecolor ? 4 : 1;
AffineW = posx_w;
for (int i = 0; i < TriVertex::NumVarying; i++)
AffineVaryingPosY[i] = posx_varying[i];
for (int iy = 0; iy < q; iy++)
{
SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize];
SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch];
SetupAffineBlock();
for (int ix = 0; ix < q; ix += 4)
{
SSAUBytePtr buf = buffer[ix * pixelsize];
if (truecolor)
{
SSAVec16ub pixels16 = buf.load_unaligned_vec16ub(false);
SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16);
SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16);
SSAVec4i pixels[4] =
{
SSAVec4i::extendlo(pixels8lo),
SSAVec4i::extendhi(pixels8lo),
SSAVec4i::extendlo(pixels8hi),
SSAVec4i::extendhi(pixels8hi)
};
for (int sse = 0; sse < 4; sse++)
{
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
{
SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth;
pixels[sse] = subsectorTest.select(ProcessPixel32(pixels[sse], AffineVaryingPosX), pixels[sse]);
}
else
{
pixels[sse] = ProcessPixel32(pixels[sse], AffineVaryingPosX);
}
for (int i = 0; i < TriVertex::NumVarying; i++)
AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i];
}
buf.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3])));
}
else
{
SSAVec4i pixelsvec = buf.load_vec4ub(false);
SSAInt pixels[4] =
{
pixelsvec[0],
pixelsvec[1],
pixelsvec[2],
pixelsvec[3]
};
for (int sse = 0; sse < 4; sse++)
{
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
{
SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth;
pixels[sse] = subsectorTest.select(ProcessPixel8(pixels[sse], AffineVaryingPosX), pixels[sse]);
}
else
{
pixels[sse] = ProcessPixel8(pixels[sse], AffineVaryingPosX);
}
for (int i = 0; i < TriVertex::NumVarying; i++)
AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i];
}
buf.store_vec4ub(SSAVec4i(pixels[0], pixels[1], pixels[2], pixels[3]));
}
if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector)
subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth));
}
AffineW = AffineW + gradWY;
for (int i = 0; i < TriVertex::NumVarying; i++)
AffineVaryingPosY[i] = AffineVaryingPosY[i] + gradVaryingY[i];
}
}
*/
}
void SetupTriangleCodegen::LoopPartialBlock(bool isSingleStencilValue)
{
/*
int pixelsize = truecolor ? 4 : 1;
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
{
if (isSingleStencilValue)
{
SSAInt stencilMask = StencilBlockMask.load(false);
SSAUByte val0 = stencilMask.trunc_ubyte();
for (int i = 0; i < 8 * 8; i++)
StencilBlock[i].store(val0);
StencilBlockMask.store(SSAInt(0));
}
SSAUByte lastStencilValue = StencilBlock[0].load(false);
stack_stencilblock_restored.store(SSABool(true));
stack_stencilblock_lastval.store(lastStencilValue);
}
stack_CY1.store(C1 + DX12 * y0 - DY12 * x0);
stack_CY2.store(C2 + DX23 * y0 - DY23 * x0);
stack_CY3.store(C3 + DX31 * y0 - DY31 * x0);
stack_iy.store(SSAInt(0));
stack_buffer.store(dest[x * pixelsize]);
stack_subsectorbuffer.store(subsectorGBuffer[x]);
stack_AffineW.store(posx_w);
for (int i = 0; i < TriVertex::NumVarying; i++)
{
stack_AffineVaryingPosY[i].store(posx_varying[i]);
}
SSAForBlock loopy;
SSAInt iy = stack_iy.load();
SSAUBytePtr buffer = stack_buffer.load();
SSAIntPtr subsectorbuffer = stack_subsectorbuffer.load();
SSAInt CY1 = stack_CY1.load();
SSAInt CY2 = stack_CY2.load();
SSAInt CY3 = stack_CY3.load();
AffineW = stack_AffineW.load();
for (int i = 0; i < TriVertex::NumVarying; i++)
AffineVaryingPosY[i] = stack_AffineVaryingPosY[i].load();
loopy.loop_block(iy < SSAInt(q), q);
{
SetupAffineBlock();
for (int i = 0; i < TriVertex::NumVarying; i++)
stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i]);
stack_CX1.store(CY1);
stack_CX2.store(CY2);
stack_CX3.store(CY3);
stack_ix.store(SSAInt(0));
SSAForBlock loopx;
SSABool stencilblock_restored;
SSAUByte lastStencilValue;
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
{
stencilblock_restored = stack_stencilblock_restored.load();
lastStencilValue = stack_stencilblock_lastval.load();
}
SSAInt ix = stack_ix.load();
SSAInt CX1 = stack_CX1.load();
SSAInt CX2 = stack_CX2.load();
SSAInt CX3 = stack_CX3.load();
for (int i = 0; i < TriVertex::NumVarying; i++)
AffineVaryingPosX[i] = stack_AffineVaryingPosX[i].load();
loopx.loop_block(ix < SSAInt(q), q);
{
SSABool visible = (ix + x < clipright) && (iy + y < clipbottom);
SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible;
if (!isSingleStencilValue)
{
SSAUByte stencilValue = StencilBlock[ix + iy * 8].load(false);
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
{
covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth;
}
else if (variant == TriDrawVariant::StencilClose)
{
covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue);
}
else
{
covered = covered && stencilValue == stencilTestValue;
}
}
else if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
{
covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth;
}
SSAIfBlock branch;
branch.if_block(covered);
{
if (variant == TriDrawVariant::Stencil)
{
StencilBlock[ix + iy * 8].store(stencilWriteValue);
}
else if (variant == TriDrawVariant::StencilClose)
{
StencilBlock[ix + iy * 8].store(stencilWriteValue);
subsectorbuffer[ix].store(subsectorDepth);
}
else
{
SSAUBytePtr buf = buffer[ix * pixelsize];
if (truecolor)
{
SSAVec4i bg = buf.load_vec4ub(false);
buf.store_vec4ub(ProcessPixel32(bg, AffineVaryingPosX));
}
else
{
SSAUByte bg = buf.load(false);
buf.store(ProcessPixel8(bg.zext_int(), AffineVaryingPosX).trunc_ubyte());
}
if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector)
subsectorbuffer[ix].store(subsectorDepth);
}
}
branch.end_block();
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
{
SSAUByte newStencilValue = StencilBlock[ix + iy * 8].load(false);
stack_stencilblock_restored.store(stencilblock_restored && newStencilValue == lastStencilValue);
stack_stencilblock_lastval.store(newStencilValue);
}
for (int i = 0; i < TriVertex::NumVarying; i++)
stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]);
stack_CX1.store(CX1 - FDY12);
stack_CX2.store(CX2 - FDY23);
stack_CX3.store(CX3 - FDY31);
stack_ix.store(ix + 1);
}
loopx.end_block();
stack_AffineW.store(AffineW + gradWY);
for (int i = 0; i < TriVertex::NumVarying; i++)
stack_AffineVaryingPosY[i].store(AffineVaryingPosY[i] + gradVaryingY[i]);
stack_CY1.store(CY1 + FDX12);
stack_CY2.store(CY2 + FDX23);
stack_CY3.store(CY3 + FDX31);
stack_buffer.store(buffer[pitch * pixelsize]);
stack_subsectorbuffer.store(subsectorbuffer[pitch]);
stack_iy.store(iy + 1);
}
loopy.end_block();
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
{
SSAIfBlock branch;
SSABool restored = stack_stencilblock_restored.load();
branch.if_block(restored);
{
SSAUByte lastStencilValue = stack_stencilblock_lastval.load();
StencilClear(lastStencilValue);
}
branch.end_block();
}
*/
}
void SetupTriangleCodegen::SetStencilBlock(SSAInt block)
{
StencilBlock = stencilValues[block * 64];
StencilBlockMask = stencilMasks[block];
}
SSAUByte SetupTriangleCodegen::StencilGetSingle()
{
return StencilBlockMask.load(false).trunc_ubyte();
}
void SetupTriangleCodegen::StencilClear(SSAUByte value)
{
StencilBlockMask.store(SSAInt(0xffffff00) | value.zext_int());
}
SSABool SetupTriangleCodegen::StencilIsSingleValue()
{
return (StencilBlockMask.load(false) & SSAInt(0xffffff00)) == SSAInt(0xffffff00);
}
void SetupTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data)
{
pitch = args[0][1].load(true);
v1 = LoadTriVertex(args[0][2].load(true));
v2 = LoadTriVertex(args[0][3].load(true));
v3 = LoadTriVertex(args[0][4].load(true));
clipright = args[0][6].load(true);
clipbottom = args[0][8].load(true);
stencilValues = args[0][14].load(true);
stencilMasks = args[0][15].load(true);
stencilPitch = args[0][16].load(true);
stencilTestValue = args[0][17].load(true);
stencilWriteValue = args[0][18].load(true);
subsectorGBuffer = args[0][19].load(true);
thread.core = thread_data[0][0].load(true);
thread.num_cores = thread_data[0][1].load(true);
thread.pass_start_y = SSAInt(0);
thread.pass_end_y = SSAInt(32000);
}
SSASetupVertex SetupTriangleCodegen::LoadTriVertex(SSAValue ptr)
{
SSASetupVertex v;
v.x = ptr[0][0].load(true);
v.y = ptr[0][1].load(true);
v.z = ptr[0][2].load(true);
v.w = ptr[0][3].load(true);
return v;
}

View file

@ -1,98 +0,0 @@
/*
** SetupTriangle code generation
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "drawercodegen.h"
struct SSASetupVertex
{
SSAFloat x, y, z, w;
};
class SetupTriangleCodegen : public DrawerCodegen
{
public:
void Generate(bool subsectorTest, SSAValue args, SSAValue thread_data);
private:
void LoadArgs(SSAValue args, SSAValue thread_data);
SSASetupVertex LoadTriVertex(SSAValue v);
void Setup();
SSAInt FloatTo28_4(SSAFloat v);
void LoopBlockY();
void LoopBlockX();
void LoopFullBlock();
void LoopPartialBlock(bool isSingleStencilValue);
void SetStencilBlock(SSAInt block);
void StencilClear(SSAUByte value);
SSAUByte StencilGetSingle();
SSABool StencilIsSingleValue();
bool subsectorTest;
SSAStack<SSAInt> stack_C1, stack_C2, stack_C3;
SSAStack<SSAInt> stack_y;
SSAStack<SSAIntPtr> stack_subsectorGBuffer;
SSAStack<SSAInt> stack_x;
SSAStack<SSAUBytePtr> stack_buffer;
SSAStack<SSAInt> stack_iy, stack_ix;
SSAStack<SSAInt> stack_CY1, stack_CY2, stack_CY3;
SSAStack<SSAInt> stack_CX1, stack_CX2, stack_CX3;
//SSAStack<SSABool> stack_stencilblock_restored;
//SSAStack<SSAUByte> stack_stencilblock_lastval;
SSAIntPtr subsectorGBuffer;
SSAInt pitch;
SSASetupVertex v1;
SSASetupVertex v2;
SSASetupVertex v3;
SSAInt clipright;
SSAInt clipbottom;
SSAUBytePtr stencilValues;
SSAIntPtr stencilMasks;
SSAInt stencilPitch;
SSAUByte stencilTestValue;
SSAUByte stencilWriteValue;
SSAWorkerThread thread;
// Block size, standard 8x8 (must be power of two)
const int q = 8;
SSAInt Y1, Y2, Y3;
SSAInt X1, X2, X3;
SSAInt DX12, DX23, DX31;
SSAInt DY12, DY23, DY31;
SSAInt FDX12, FDX23, FDX31;
SSAInt FDY12, FDY23, FDY31;
SSAInt minx, maxx, miny, maxy;
SSAInt C1, C2, C3;
SSAInt x, y;
SSAInt x0, x1, y0, y1;
SSAUBytePtr StencilBlock;
SSAIntPtr StencilBlockMask;
};

View file

@ -1,99 +0,0 @@
/*
** LLVM includes
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#ifdef _MSC_VER
#if defined(min)
#define llvm_min_bug min
#undef min
#endif
#if defined(max)
#define llvm_max_bug max
#undef max
#endif
#pragma warning(disable: 4146) // warning C4146: unary minus operator applied to unsigned type, result still unsigned
#pragma warning(disable: 4624) // warning C4624: 'llvm::AugmentedUse' : destructor could not be generated because a base class destructor is inaccessible
#pragma warning(disable: 4355) // warning C4355: 'this' : used in base member initializer list
#pragma warning(disable: 4800) // warning C4800: 'const unsigned int' : forcing value to bool 'true' or 'false' (performance warning)
#pragma warning(disable: 4996) // warning C4996: 'std::_Copy_impl': Function call with parameters that may be unsafe - this call relies on the caller to check that the passed values are correct. To disable this warning, use -D_Sclan::SECURE_NO_WARNINGS. See documentation on how to use Visual C++ 'Checked Iterators'
#pragma warning(disable: 4244) // warning C4244: 'return' : conversion from 'uint64_t' to 'unsigned int', possible loss of data
#pragma warning(disable: 4141) // warning C4141: 'inline': used more than once
#pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception
#pragma warning(disable: 4267) // warning C4267: 'return': conversion from 'size_t' to 'unsigned int', possible loss of data
#pragma warning(disable: 4244) // warning C4244: 'initializing': conversion from '__int64' to 'unsigned int', possible loss of data
#endif
#if defined(__APPLE__) || defined(__clang__)
#define __STDC_LIMIT_MACROS // DataTypes.h:57:3: error: "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h"
#define __STDC_CONSTANT_MACROS // DataTypes.h:61:3: error: "Must #define __STDC_CONSTANT_MACROS before " "#including Support/DataTypes.h"
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wredundant-move"
#endif
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Attributes.h>
#include <llvm/IR/Verifier.h>
//#include <llvm/IR/PassManager.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Intrinsics.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/Host.h>
#include <llvm/CodeGen/AsmPrinter.h>
#include <llvm/MC/MCAsmInfo.h>
#include <llvm/Target/TargetSubtargetInfo.h>
#include <llvm/Bitcode/ReaderWriter.h>
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8)
#include <llvm/Support/FormattedStream.h>
#endif
#if defined(__APPLE__) || defined(__clang__)
#pragma clang diagnostic pop
#endif
#ifdef _MSC_VER
#if defined(llvm_min_bug)
#define min llvm_min_bug
#undef llvm_min_bug
#endif
#if defined(llvm_max_bug)
#define max llvm_max_bug
#undef llvm_max_bug
#endif
#endif

View file

@ -1,428 +0,0 @@
/*
** LLVM code generated drawers
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "timestamp.h"
#include "llvmdrawers.h"
#include "exception.h"
LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, const std::string &features, const std::string namePostfix) : mNamePostfix(namePostfix)
{
mProgram.CreateModule();
CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill);
CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd);
CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp);
CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp);
CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp);
CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw);
CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd);
CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded);
CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp);
CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp);
CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp);
CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated);
CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd);
CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated);
CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated);
CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated);
CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque);
CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked);
CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent);
CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent);
CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp);
CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp);
CodegenDrawWall("vlinec1", DrawWallVariant::Opaque);
CodegenDrawWall("mvlinec1", DrawWallVariant::Masked);
CodegenDrawWall("tmvline1_add", DrawWallVariant::Add);
CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp);
CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp);
CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp);
CodegenDrawSky("DrawSky1", DrawSkyVariant::Single);
CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double);
for (int i = 0; i < NumTriBlendModes(); i++)
{
CodegenDrawTriangle("TriDraw8_" + std::to_string(i), (TriBlendMode)i, false, false);
CodegenDrawTriangle("TriDraw32_" + std::to_string(i), (TriBlendMode)i, true, false);
CodegenDrawTriangle("TriFill8_" + std::to_string(i), (TriBlendMode)i, false, true);
CodegenDrawTriangle("TriFill32_" + std::to_string(i), (TriBlendMode)i, true, true);
}
ObjectFile = mProgram.GenerateObjectFile(triple, cpuName, features);
}
void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant)
{
llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
SSAFunction function(name + mNamePostfix);
function.add_parameter(GetDrawColumnArgsStruct(mProgram.context()));
function.add_parameter(GetWorkerThreadDataStruct(mProgram.context()));
function.create_public();
DrawColumnCodegen codegen;
codegen.Generate(variant, function.parameter(0), function.parameter(1));
builder.CreateRetVoid();
if (llvm::verifyFunction(*function.func))
throw Exception("verifyFunction failed for CodegenDrawColumn()");
}
void LLVMDrawers::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
{
llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
SSAFunction function(name + mNamePostfix);
function.add_parameter(GetDrawSpanArgsStruct(mProgram.context()));
function.create_public();
DrawSpanCodegen codegen;
codegen.Generate(variant, function.parameter(0));
builder.CreateRetVoid();
if (llvm::verifyFunction(*function.func))
throw Exception("verifyFunction failed for CodegenDrawSpan()");
}
void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant)
{
llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
SSAFunction function(name + mNamePostfix);
function.add_parameter(GetDrawWallArgsStruct(mProgram.context()));
function.add_parameter(GetWorkerThreadDataStruct(mProgram.context()));
function.create_public();
DrawWallCodegen codegen;
codegen.Generate(variant, function.parameter(0), function.parameter(1));
builder.CreateRetVoid();
if (llvm::verifyFunction(*function.func))
throw Exception("verifyFunction failed for CodegenDrawWall()");
}
void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant)
{
llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
SSAFunction function(name + mNamePostfix);
function.add_parameter(GetDrawSkyArgsStruct(mProgram.context()));
function.add_parameter(GetWorkerThreadDataStruct(mProgram.context()));
function.create_public();
DrawSkyCodegen codegen;
codegen.Generate(variant, function.parameter(0), function.parameter(1));
builder.CreateRetVoid();
if (llvm::verifyFunction(*function.func))
throw Exception("verifyFunction failed for CodegenDrawSky()");
}
void LLVMDrawers::CodegenDrawTriangle(const std::string &name, TriBlendMode blendmode, bool truecolor, bool colorfill)
{
llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
SSAFunction function(name + mNamePostfix);
function.add_parameter(GetTriDrawTriangleArgs(mProgram.context()));
function.add_parameter(GetWorkerThreadDataStruct(mProgram.context()));
function.create_public();
DrawTriangleCodegen codegen;
codegen.Generate(blendmode, truecolor, colorfill, function.parameter(0), function.parameter(1));
builder.CreateRetVoid();
if (llvm::verifyFunction(*function.func))
throw Exception("verifyFunction failed for CodegenDrawTriangle()");
}
llvm::Type *LLVMDrawers::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
{
if (DrawColumnArgsStruct)
return DrawColumnArgsStruct;
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source2;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefracx;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureheight;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srccolor;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags;
DrawColumnArgsStruct = llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo();
return DrawColumnArgsStruct;
}
llvm::Type *LLVMDrawers::GetDrawSpanArgsStruct(llvm::LLVMContext &context)
{
if (DrawSpanArgsStruct)
return DrawSpanArgsStruct;
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags;
elements.push_back(llvm::Type::getFloatTy(context)); // float viewpos_x;
elements.push_back(llvm::Type::getFloatTy(context)); // float step_viewpos_x;
elements.push_back(GetTriLightStruct(context)); // TriLight *dynlights;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t num_dynlights;
DrawSpanArgsStruct = llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo();
return DrawSpanArgsStruct;
}
llvm::Type *LLVMDrawers::GetDrawWallArgsStruct(llvm::LLVMContext &context)
{
if (DrawWallArgsStruct)
return DrawWallArgsStruct;
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt8PtrTy(context));
for (int i = 0; i < 8; i++)
elements.push_back(llvm::Type::getInt8PtrTy(context));
for (int i = 0; i < 25; i++)
elements.push_back(llvm::Type::getInt32Ty(context));
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags;
elements.push_back(llvm::Type::getFloatTy(context)); // float z;
elements.push_back(llvm::Type::getFloatTy(context)); // float step_z;
elements.push_back(GetTriLightStruct(context)); // TriLight *dynlights;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t num_dynlights;
DrawWallArgsStruct = llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo();
return DrawWallArgsStruct;
}
llvm::Type *LLVMDrawers::GetDrawSkyArgsStruct(llvm::LLVMContext &context)
{
if (DrawSkyArgsStruct)
return DrawSkyArgsStruct;
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt8PtrTy(context));
for (int i = 0; i < 8; i++)
elements.push_back(llvm::Type::getInt8PtrTy(context));
for (int i = 0; i < 16; i++)
elements.push_back(llvm::Type::getInt32Ty(context));
DrawSkyArgsStruct = llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo();
return DrawSkyArgsStruct;
}
llvm::Type *LLVMDrawers::GetWorkerThreadDataStruct(llvm::LLVMContext &context)
{
if (WorkerThreadDataStruct)
return WorkerThreadDataStruct;
std::vector<llvm::Type *> elements;
for (int i = 0; i < 4; i++)
elements.push_back(llvm::Type::getInt32Ty(context));
elements.push_back(llvm::Type::getInt8PtrTy(context));
elements.push_back(GetTriFullSpanStruct(context));
elements.push_back(GetTriPartialBlockStruct(context));
for (int i = 0; i < 4; i++)
elements.push_back(llvm::Type::getInt32Ty(context));
WorkerThreadDataStruct = llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo();
return WorkerThreadDataStruct;
}
llvm::Type *LLVMDrawers::GetTriLightStruct(llvm::LLVMContext &context)
{
if (TriLightStruct)
return TriLightStruct;
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt32Ty(context));
for (int i = 0; i < 4; i++)
elements.push_back(llvm::Type::getFloatTy(context));
TriLightStruct = llvm::StructType::create(context, elements, "TriLight", false)->getPointerTo();
return TriLightStruct;
}
llvm::Type *LLVMDrawers::GetTriVertexStruct(llvm::LLVMContext &context)
{
if (TriVertexStruct)
return TriVertexStruct;
std::vector<llvm::Type *> elements;
for (int i = 0; i < 4 + TriVertex::NumVarying; i++)
elements.push_back(llvm::Type::getFloatTy(context));
TriVertexStruct = llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo();
return TriVertexStruct;
}
llvm::Type *LLVMDrawers::GetTriMatrixStruct(llvm::LLVMContext &context)
{
if (TriMatrixStruct)
return TriMatrixStruct;
std::vector<llvm::Type *> elements;
for (int i = 0; i < 4 * 4; i++)
elements.push_back(llvm::Type::getFloatTy(context));
TriMatrixStruct = llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo();
return TriMatrixStruct;
}
llvm::Type *LLVMDrawers::GetTriUniformsStruct(llvm::LLVMContext &context)
{
if (TriUniformsStruct)
return TriUniformsStruct;
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t subsectorDepth;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate;
elements.push_back(llvm::Type::getFloatTy(context)); // float globvis;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags;
elements.push_back(GetTriMatrixStruct(context)); // TriMatrix objectToClip
TriUniformsStruct = llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo();
return TriUniformsStruct;
}
llvm::Type *LLVMDrawers::GetTriFullSpanStruct(llvm::LLVMContext &context)
{
if (TriFullSpanStruct)
return TriFullSpanStruct;
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t X;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t Y;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Length;
TriFullSpanStruct = llvm::StructType::create(context, elements, "TriFullSpan", false)->getPointerTo();
return TriFullSpanStruct;
}
llvm::Type *LLVMDrawers::GetTriPartialBlockStruct(llvm::LLVMContext &context)
{
if (TriPartialBlockStruct)
return TriPartialBlockStruct;
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t X;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t Y;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Mask0;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Mask1;
TriPartialBlockStruct = llvm::StructType::create(context, elements, "TriPartialBlock", false)->getPointerTo();
return TriPartialBlockStruct;
}
llvm::Type *LLVMDrawers::GetTriDrawTriangleArgs(llvm::LLVMContext &context)
{
if (TriDrawTriangleArgs)
return TriDrawTriangleArgs;
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *dest;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch;
elements.push_back(GetTriVertexStruct(context)); // TriVertex *v1;
elements.push_back(GetTriVertexStruct(context)); // TriVertex *v2;
elements.push_back(GetTriVertexStruct(context)); // TriVertex *v3;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipleft;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipright;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t cliptop;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t clipbottom;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *texturePixels;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureWidth;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureHeight;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *translation;
elements.push_back(GetTriUniformsStruct(context)); // const TriUniforms *uniforms;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *stencilValues;
elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *stencilMasks;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t stencilPitch;
elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilTestValue;
elements.push_back(llvm::Type::getInt8Ty(context)); // uint8_t stencilWriteValue;
elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB256k;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors;
TriDrawTriangleArgs = llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo();
return TriDrawTriangleArgs;
}

View file

@ -1,84 +0,0 @@
/*
** LLVM code generated drawers
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "fixedfunction/drawspancodegen.h"
#include "fixedfunction/drawwallcodegen.h"
#include "fixedfunction/drawcolumncodegen.h"
#include "fixedfunction/drawskycodegen.h"
#include "fixedfunction/drawtrianglecodegen.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_scope.h"
#include "ssa/ssa_for_block.h"
#include "ssa/ssa_if_block.h"
#include "ssa/ssa_stack.h"
#include "ssa/ssa_function.h"
#include "ssa/ssa_struct_type.h"
#include "ssa/ssa_value.h"
#include "ssa/ssa_barycentric_weight.h"
#include "llvmprogram.h"
#include <iostream>
class LLVMDrawers
{
public:
LLVMDrawers(const std::string &triple, const std::string &cpuName, const std::string &features, const std::string namePostfix);
std::vector<uint8_t> ObjectFile;
private:
void CodegenDrawColumn(const char *name, DrawColumnVariant variant);
void CodegenDrawSpan(const char *name, DrawSpanVariant variant);
void CodegenDrawWall(const char *name, DrawWallVariant variant);
void CodegenDrawSky(const char *name, DrawSkyVariant variant);
void CodegenDrawTriangle(const std::string &name, TriBlendMode blendmode, bool truecolor, bool colorfill);
llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context);
llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context);
llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context);
llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context);
llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context);
llvm::Type *GetTriLightStruct(llvm::LLVMContext &context);
llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context);
llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context);
llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context);
llvm::Type *GetTriFullSpanStruct(llvm::LLVMContext &context);
llvm::Type *GetTriPartialBlockStruct(llvm::LLVMContext &context);
llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context);
llvm::Type *DrawColumnArgsStruct = nullptr;
llvm::Type *DrawSpanArgsStruct = nullptr;
llvm::Type *DrawWallArgsStruct = nullptr;
llvm::Type *DrawSkyArgsStruct = nullptr;
llvm::Type *WorkerThreadDataStruct = nullptr;
llvm::Type *TriLightStruct = nullptr;
llvm::Type *TriVertexStruct = nullptr;
llvm::Type *TriMatrixStruct = nullptr;
llvm::Type *TriUniformsStruct = nullptr;
llvm::Type *TriFullSpanStruct = nullptr;
llvm::Type *TriPartialBlockStruct = nullptr;
llvm::Type *TriDrawTriangleArgs = nullptr;
LLVMProgram mProgram;
std::string mNamePostfix;
};

View file

@ -1,171 +0,0 @@
/*
** LLVM code generated drawers
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "timestamp.h"
#include "llvmprogram.h"
LLVMProgram::LLVMProgram()
{
mContext = std::make_unique<llvm::LLVMContext>();
}
void LLVMProgram::CreateModule()
{
mModule = std::make_unique<llvm::Module>("render", context());
}
std::vector<uint8_t> LLVMProgram::GenerateObjectFile(const std::string &triple, const std::string &cpuName, const std::string &features)
{
using namespace llvm;
std::string errorstring;
llvm::Module *module = mModule.get();
const Target *target = TargetRegistry::lookupTarget(triple, errorstring);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
Reloc::Model relocationModel = Reloc::PIC_;
#else
Optional<Reloc::Model> relocationModel = Reloc::PIC_;
#endif
CodeModel::Model codeModel = CodeModel::Model::Default;
TargetOptions options;
options.LessPreciseFPMADOption = true;
options.AllowFPOpFusion = FPOpFusion::Fast;
options.UnsafeFPMath = true;
options.NoInfsFPMath = true;
options.NoNaNsFPMath = true;
options.HonorSignDependentRoundingFPMathOption = false;
options.NoZerosInBSS = false;
options.GuaranteedTailCallOpt = false;
options.StackAlignmentOverride = 0;
options.UseInitArray = true;
options.DataSections = false;
options.FunctionSections = false;
options.JTType = JumpTable::Single; // Create a single table for all jumptable functions
options.ThreadModel = ThreadModel::POSIX;
options.DisableIntegratedAS = false;
options.MCOptions.SanitizeAddress = false;
options.MCOptions.MCRelaxAll = false; // relax all fixups in the emitted object file
options.MCOptions.DwarfVersion = 0;
options.MCOptions.ShowMCInst = false;
options.MCOptions.ABIName = "";
options.MCOptions.MCFatalWarnings = false;
options.MCOptions.ShowMCEncoding = false; // Show encoding in .s output
options.MCOptions.MCUseDwarfDirectory = false;
options.MCOptions.AsmVerbose = true;
#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
options.Reciprocals = TargetRecip({ "all" });
options.StackSymbolOrdering = true;
options.UniqueSectionNames = true;
options.EmulatedTLS = false;
options.ExceptionModel = ExceptionHandling::None;
options.EABIVersion = EABI::Default;
options.DebuggerTuning = DebuggerKind::Default;
options.MCOptions.MCIncrementalLinkerCompatible = false;
options.MCOptions.MCNoWarn = false;
options.MCOptions.PreserveAsmComments = true;
#endif
CodeGenOpt::Level optimizationLevel = CodeGenOpt::Aggressive;
machine = target->createTargetMachine(triple, cpuName, features, options, relocationModel, codeModel, optimizationLevel);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8)
std::string targetTriple = machine->getTargetTriple();
#else
std::string targetTriple = machine->getTargetTriple().getTriple();
#endif
module->setTargetTriple(targetTriple);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8)
module->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout()));
#else
module->setDataLayout(machine->createDataLayout());
#endif
legacy::FunctionPassManager PerFunctionPasses(module);
legacy::PassManager PerModulePasses;
#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8)
PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
#endif
SmallString<16 * 1024> str;
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8)
raw_svector_ostream vecstream(str);
formatted_raw_ostream stream(vecstream);
#else
raw_svector_ostream stream(str);
#endif
machine->addPassesToEmitFile(PerModulePasses, stream, TargetMachine::CGFT_ObjectFile);
PassManagerBuilder passManagerBuilder;
passManagerBuilder.OptLevel = 3;
passManagerBuilder.SizeLevel = 0;
passManagerBuilder.Inliner = createFunctionInliningPass();
passManagerBuilder.SLPVectorize = true;
passManagerBuilder.LoopVectorize = true;
passManagerBuilder.LoadCombine = true;
passManagerBuilder.populateModulePassManager(PerModulePasses);
passManagerBuilder.populateFunctionPassManager(PerFunctionPasses);
// Run function passes:
PerFunctionPasses.doInitialization();
for (llvm::Function &func : *module)
{
if (!func.isDeclaration())
PerFunctionPasses.run(func);
}
PerFunctionPasses.doFinalization();
// Run module passes:
PerModulePasses.run(*module);
// Return the resulting object file
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8)
stream.flush();
vecstream.flush();
#endif
std::vector<uint8_t> data;
data.resize(str.size());
memcpy(data.data(), str.data(), data.size());
return data;
}
std::string LLVMProgram::DumpModule()
{
std::string str;
llvm::raw_string_ostream stream(str);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 8)
mModule->print(stream, nullptr);
#else
mModule->print(stream, nullptr, false, true);
#endif
return stream.str();
}

View file

@ -1,41 +0,0 @@
/*
** LLVM code generated drawers
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
class LLVMProgram
{
public:
LLVMProgram();
void CreateModule();
std::vector<uint8_t> GenerateObjectFile(const std::string &triple, const std::string &cpuName, const std::string &features);
std::string DumpModule();
llvm::LLVMContext &context() { return *mContext; }
llvm::Module *module() { return mModule.get(); }
private:
llvm::TargetMachine *machine = nullptr;
std::unique_ptr<llvm::LLVMContext> mContext;
std::unique_ptr<llvm::Module> mModule;
};

View file

@ -1,11 +0,0 @@
#pragma once
#include "llvm_include.h"
#include "../../src/swrenderer/drawers/r_drawers.h"
#ifdef __arm__
#define ARM_TARGET
#else
#define X86_TARGET
#endif

View file

@ -1,118 +0,0 @@
/*
** SSA barycentric weight and viewport calculations
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_vec4f.h"
#include "ssa_float.h"
#include "ssa_int.h"
class SSAViewport
{
public:
SSAViewport(SSAInt x, SSAInt y, SSAInt width, SSAInt height)
: x(x), y(y), width(width), height(height), right(x + width), bottom(y + height),
half_width(SSAFloat(width) * 0.5f), half_height(SSAFloat(height) * 0.5f),
rcp_half_width(1.0f / (SSAFloat(width) * 0.5f)),
rcp_half_height(1.0f / (SSAFloat(height) * 0.5f))
{
}
SSAInt x, y;
SSAInt width, height;
SSAInt right, bottom;
SSAFloat half_width;
SSAFloat half_height;
SSAFloat rcp_half_width;
SSAFloat rcp_half_height;
SSAVec4f clip_to_window(SSAVec4f clip) const
{
SSAFloat w = clip[3];
SSAVec4f normalized = SSAVec4f::insert_element(clip / SSAVec4f::shuffle(clip, 3, 3, 3, 3), w, 3);
return normalized_to_window(normalized);
}
SSAVec4f normalized_to_window(SSAVec4f normalized) const
{
return SSAVec4f(
SSAFloat(x) + (normalized[0] + 1.0f) * half_width,
SSAFloat(y) + (normalized[1] + 1.0f) * half_height,
0.0f - normalized[2],
normalized[3]);
}
};
class SSABarycentricWeight
{
public:
SSABarycentricWeight(SSAViewport vp, SSAVec4f v1, SSAVec4f v2);
SSAFloat from_window_x(SSAInt x) const;
SSAFloat from_window_y(SSAInt y) const;
SSAViewport viewport;
SSAVec4f v1;
SSAVec4f v2;
};
inline SSABarycentricWeight::SSABarycentricWeight(SSAViewport viewport, SSAVec4f v1, SSAVec4f v2)
: viewport(viewport), v1(v1), v2(v2)
{
}
inline SSAFloat SSABarycentricWeight::from_window_x(SSAInt x) const
{
/* SSAFloat xnormalized = (x + 0.5f - viewport.x) * viewport.rcp_half_width - 1.0f;
SSAFloat dx = v2.x-v1.x;
SSAFloat dw = v2.w-v1.w;
SSAFloat a = (v2.x - xnormalized * v2.w) / (dx - xnormalized * dw);
return a;*/
SSAFloat xnormalized = (SSAFloat(x) + 0.5f - SSAFloat(viewport.x)) * viewport.rcp_half_width - 1.0f;
SSAFloat dx = v2[0]-v1[0];
SSAFloat dw = v2[3]-v1[3];
SSAFloat t = (xnormalized * v1[3] - v1[0]) / (dx - xnormalized * dw);
return 1.0f - t;
}
inline SSAFloat SSABarycentricWeight::from_window_y(SSAInt y) const
{
/* SSAFloat ynormalized = (y + 0.5f - viewport.y) * viewport.rcp_half_height - 1.0f;
SSAFloat dy = v2.y-v1.y;
SSAFloat dw = v2.w-v1.w;
SSAFloat a = (v2.y - ynormalized * v2.w) / (dy - ynormalized * dw);
return a;*/
SSAFloat ynormalized = (SSAFloat(y) + 0.5f - SSAFloat(viewport.y)) * viewport.rcp_half_height - 1.0f;
SSAFloat dy = v2[1]-v1[1];
SSAFloat dw = v2[3]-v1[3];
SSAFloat t = (ynormalized * v1[3] - v1[1]) / (dy - ynormalized * dw);
return 1.0f - t;
}
/*
y = (v1.y + t * dy) / (v1.w + t * dw)
y * v1.w + y * t * dw = v1.y + t * dy
y * v1.w - v1.y = t * (dy - y * dw)
t = (y * v1.w - v1.y) / (dy - y * dw)
*/

View file

@ -1,173 +0,0 @@
/*
** SSA boolean
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_bool.h"
#include "ssa_ubyte.h"
#include "ssa_vec4i.h"
#include "ssa_value.h"
#include "ssa_scope.h"
SSABool::SSABool()
: v(0)
{
}
SSABool::SSABool(bool constant)
: v(0)
{
v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, constant, false));
}
SSABool::SSABool(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSABool::llvm_type()
{
return llvm::Type::getInt1Ty(SSAScope::context());
}
SSAInt SSABool::zext_int()
{
return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint()));
}
SSAInt SSABool::select(SSAInt a, SSAInt b)
{
return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint()));
}
SSAFloat SSABool::select(SSAFloat a, SSAFloat b)
{
return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint()));
}
SSAUByte SSABool::select(SSAUByte a, SSAUByte b)
{
return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint()));
}
SSAVec4i SSABool::select(SSAVec4i a, SSAVec4i b)
{
return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint()));
}
SSABool SSABool::compare_uge(const SSAUByte &a, const SSAUByte &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpUGE(a.v, b.v, SSAScope::hint()));
}
SSABool operator&&(const SSABool &a, const SSABool &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint()));
}
SSABool operator||(const SSABool &a, const SSABool &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint()));
}
SSABool operator!(const SSABool &a)
{
return SSABool::from_llvm(SSAScope::builder().CreateNot(a.v, SSAScope::hint()));
}
SSABool operator<(const SSAInt &a, const SSAInt &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSLT(a.v, b.v, SSAScope::hint()));
}
SSABool operator<=(const SSAInt &a, const SSAInt &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSLE(a.v, b.v, SSAScope::hint()));
}
SSABool operator==(const SSAInt &a, const SSAInt &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpEQ(a.v, b.v, SSAScope::hint()));
}
SSABool operator>=(const SSAInt &a, const SSAInt &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSGE(a.v, b.v, SSAScope::hint()));
}
SSABool operator>(const SSAInt &a, const SSAInt &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSGT(a.v, b.v, SSAScope::hint()));
}
/////////////////////////////////////////////////////////////////////////////
SSABool operator<(const SSAUByte &a, const SSAUByte &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSLT(a.v, b.v, SSAScope::hint()));
}
SSABool operator<=(const SSAUByte &a, const SSAUByte &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSLE(a.v, b.v, SSAScope::hint()));
}
SSABool operator==(const SSAUByte &a, const SSAUByte &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpEQ(a.v, b.v, SSAScope::hint()));
}
SSABool operator>=(const SSAUByte &a, const SSAUByte &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSGE(a.v, b.v, SSAScope::hint()));
}
SSABool operator>(const SSAUByte &a, const SSAUByte &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSGT(a.v, b.v, SSAScope::hint()));
}
/////////////////////////////////////////////////////////////////////////////
SSABool operator<(const SSAFloat &a, const SSAFloat &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLT(a.v, b.v, SSAScope::hint()));
}
SSABool operator<=(const SSAFloat &a, const SSAFloat &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLE(a.v, b.v, SSAScope::hint()));
}
SSABool operator==(const SSAFloat &a, const SSAFloat &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateFCmpOEQ(a.v, b.v, SSAScope::hint()));
}
SSABool operator>=(const SSAFloat &a, const SSAFloat &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGE(a.v, b.v, SSAScope::hint()));
}
SSABool operator>(const SSAFloat &a, const SSAFloat &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGT(a.v, b.v, SSAScope::hint()));
}

View file

@ -1,75 +0,0 @@
/*
** SSA boolean
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_int.h"
#include "ssa_ubyte.h"
#include "ssa_float.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4i;
class SSABool
{
public:
SSABool();
explicit SSABool(bool constant);
explicit SSABool(llvm::Value *v);
static SSABool from_llvm(llvm::Value *v) { return SSABool(v); }
static llvm::Type *llvm_type();
SSAInt zext_int();
SSAInt select(SSAInt a, SSAInt b);
SSAFloat select(SSAFloat a, SSAFloat b);
SSAUByte select(SSAUByte a, SSAUByte b);
SSAVec4i select(SSAVec4i a, SSAVec4i b);
static SSABool compare_uge(const SSAUByte &a, const SSAUByte &b);
llvm::Value *v;
};
SSABool operator&&(const SSABool &a, const SSABool &b);
SSABool operator||(const SSABool &a, const SSABool &b);
SSABool operator!(const SSABool &a);
SSABool operator<(const SSAInt &a, const SSAInt &b);
SSABool operator<=(const SSAInt &a, const SSAInt &b);
SSABool operator==(const SSAInt &a, const SSAInt &b);
SSABool operator>=(const SSAInt &a, const SSAInt &b);
SSABool operator>(const SSAInt &a, const SSAInt &b);
SSABool operator<(const SSAUByte &a, const SSAUByte &b);
SSABool operator<=(const SSAUByte &a, const SSAUByte &b);
SSABool operator==(const SSAUByte &a, const SSAUByte &b);
SSABool operator>=(const SSAUByte &a, const SSAUByte &b);
SSABool operator>(const SSAUByte &a, const SSAUByte &b);
SSABool operator<(const SSAFloat &a, const SSAFloat &b);
SSABool operator<=(const SSAFloat &a, const SSAFloat &b);
SSABool operator==(const SSAFloat &a, const SSAFloat &b);
SSABool operator>=(const SSAFloat &a, const SSAFloat &b);
SSABool operator>(const SSAFloat &a, const SSAFloat &b);

View file

@ -1,143 +0,0 @@
/*
** SSA float32
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_float.h"
#include "ssa_int.h"
#include "ssa_scope.h"
#include "ssa_bool.h"
#include "ssa_vec4f.h"
SSAFloat::SSAFloat()
: v(0)
{
}
SSAFloat::SSAFloat(float constant)
: v(0)
{
v = llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant));
}
SSAFloat::SSAFloat(SSAInt i)
: v(0)
{
v = SSAScope::builder().CreateSIToFP(i.v, llvm::Type::getFloatTy(SSAScope::context()), SSAScope::hint());
}
SSAFloat::SSAFloat(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAFloat::llvm_type()
{
return llvm::Type::getFloatTy(SSAScope::context());
}
SSAFloat SSAFloat::rsqrt(SSAFloat f)
{
#ifdef ARM_TARGET
//return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::aarch64_neon_frsqrts), f.v, SSAScope::hint()));
return SSAFloat(1.0f) / (f * SSAFloat(0.01f));
#else
llvm::Value *f_ss = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4f::llvm_type()), f.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0)));
f_ss = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rsqrt_ss), f_ss, SSAScope::hint());
return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(f_ss, SSAInt(0).v, SSAScope::hint()));
#endif
}
SSAFloat SSAFloat::MIN(SSAFloat a, SSAFloat b)
{
return SSAFloat::from_llvm(SSAScope::builder().CreateSelect((a < b).v, a.v, b.v, SSAScope::hint()));
}
SSAFloat SSAFloat::MAX(SSAFloat a, SSAFloat b)
{
return SSAFloat::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint()));
}
SSAFloat SSAFloat::clamp(SSAFloat a, SSAFloat b, SSAFloat c)
{
return SSAFloat::MAX(SSAFloat::MIN(a, c), b);
}
SSAFloat operator+(const SSAFloat &a, const SSAFloat &b)
{
return SSAFloat::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint()));
}
SSAFloat operator-(const SSAFloat &a, const SSAFloat &b)
{
return SSAFloat::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint()));
}
SSAFloat operator*(const SSAFloat &a, const SSAFloat &b)
{
return SSAFloat::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint()));
}
SSAFloat operator/(const SSAFloat &a, const SSAFloat &b)
{
return SSAFloat::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint()));
}
SSAFloat operator+(float a, const SSAFloat &b)
{
return SSAFloat(a) + b;
}
SSAFloat operator-(float a, const SSAFloat &b)
{
return SSAFloat(a) - b;
}
SSAFloat operator*(float a, const SSAFloat &b)
{
return SSAFloat(a) * b;
}
SSAFloat operator/(float a, const SSAFloat &b)
{
return SSAFloat(a) / b;
}
SSAFloat operator+(const SSAFloat &a, float b)
{
return a + SSAFloat(b);
}
SSAFloat operator-(const SSAFloat &a, float b)
{
return a - SSAFloat(b);
}
SSAFloat operator*(const SSAFloat &a, float b)
{
return a * SSAFloat(b);
}
SSAFloat operator/(const SSAFloat &a, float b)
{
return a / SSAFloat(b);
}

View file

@ -1,60 +0,0 @@
/*
** SSA float32
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAInt;
class SSAFloat
{
public:
SSAFloat();
SSAFloat(SSAInt i);
explicit SSAFloat(float constant);
explicit SSAFloat(llvm::Value *v);
static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); }
static llvm::Type *llvm_type();
static SSAFloat rsqrt(SSAFloat f);
static SSAFloat MIN(SSAFloat a, SSAFloat b);
static SSAFloat MAX(SSAFloat a, SSAFloat b);
static SSAFloat clamp(SSAFloat a, SSAFloat b, SSAFloat c);
llvm::Value *v;
};
SSAFloat operator+(const SSAFloat &a, const SSAFloat &b);
SSAFloat operator-(const SSAFloat &a, const SSAFloat &b);
SSAFloat operator*(const SSAFloat &a, const SSAFloat &b);
SSAFloat operator/(const SSAFloat &a, const SSAFloat &b);
SSAFloat operator+(float a, const SSAFloat &b);
SSAFloat operator-(float a, const SSAFloat &b);
SSAFloat operator*(float a, const SSAFloat &b);
SSAFloat operator/(float a, const SSAFloat &b);
SSAFloat operator+(const SSAFloat &a, float b);
SSAFloat operator-(const SSAFloat &a, float b);
SSAFloat operator*(const SSAFloat &a, float b);
SSAFloat operator/(const SSAFloat &a, float b);

View file

@ -1,91 +0,0 @@
/*
** SSA float32 pointer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_float_ptr.h"
#include "ssa_scope.h"
SSAFloatPtr::SSAFloatPtr()
: v(0)
{
}
SSAFloatPtr::SSAFloatPtr(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAFloatPtr::llvm_type()
{
return llvm::Type::getFloatPtrTy(SSAScope::context());
}
SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const
{
return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAFloat SSAFloatPtr::load(bool constantScopeDomain) const
{
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAFloat::from_llvm(loadInst);
}
SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst);
}
SSAVec4f SSAFloatPtr::load_unaligned_vec4f(bool constantScopeDomain) const
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 1, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst);
}
void SSAFloatPtr::store(const SSAFloat &new_value)
{
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value)
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value)
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}

View file

@ -1,49 +0,0 @@
/*
** SSA float32 pointer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_float.h"
#include "ssa_int.h"
#include "ssa_vec4f.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAFloatPtr
{
public:
SSAFloatPtr();
explicit SSAFloatPtr(llvm::Value *v);
static SSAFloatPtr from_llvm(llvm::Value *v) { return SSAFloatPtr(v); }
static llvm::Type *llvm_type();
SSAFloatPtr operator[](SSAInt index) const;
SSAFloatPtr operator[](int index) const { return (*this)[SSAInt(index)]; }
SSAFloat load(bool constantScopeDomain) const;
SSAVec4f load_vec4f(bool constantScopeDomain) const;
SSAVec4f load_unaligned_vec4f(bool constantScopeDomain) const;
void store(const SSAFloat &new_value);
void store_vec4f(const SSAVec4f &new_value);
void store_unaligned_vec4f(const SSAVec4f &new_value);
llvm::Value *v;
};

View file

@ -1,62 +0,0 @@
/*
** LLVM for loop branching
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_for_block.h"
#include "ssa_scope.h"
SSAForBlock::SSAForBlock()
: if_basic_block(0), loop_basic_block(0), end_basic_block(0)
{
if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forbegin", SSAScope::builder().GetInsertBlock()->getParent());
loop_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forloop", SSAScope::builder().GetInsertBlock()->getParent());
end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forend", SSAScope::builder().GetInsertBlock()->getParent());
SSAScope::builder().CreateBr(if_basic_block);
SSAScope::builder().SetInsertPoint(if_basic_block);
}
void SSAForBlock::loop_block(SSABool true_condition, int unroll_count)
{
auto branch = SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block);
#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
if (unroll_count > 0)
{
using namespace llvm;
auto md_unroll_enable = MDNode::get(SSAScope::context(), {
MDString::get(SSAScope::context(), "llvm.loop.unroll.enable")
});
auto md_unroll_count = MDNode::get(SSAScope::context(), {
MDString::get(SSAScope::context(), "llvm.loop.unroll.count"),
ConstantAsMetadata::get(ConstantInt::get(SSAScope::context(), APInt(32, unroll_count)))
});
auto md_loop = MDNode::getDistinct(SSAScope::context(), { md_unroll_enable, md_unroll_count });
branch->setMetadata(LLVMContext::MD_loop, md_loop);
}
#endif
SSAScope::builder().SetInsertPoint(loop_basic_block);
}
void SSAForBlock::end_block()
{
SSAScope::builder().CreateBr(if_basic_block);
SSAScope::builder().SetInsertPoint(end_basic_block);
}

View file

@ -1,38 +0,0 @@
/*
** LLVM for loop branching
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_bool.h"
class SSAForBlock
{
public:
SSAForBlock();
void loop_block(SSABool true_condition, int unroll_count = 8);
void end_block();
private:
llvm::BasicBlock *if_basic_block;
llvm::BasicBlock *loop_basic_block;
llvm::BasicBlock *end_basic_block;
};

View file

@ -1,76 +0,0 @@
/*
** LLVM function
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_function.h"
#include "ssa_int.h"
#include "ssa_scope.h"
#include "ssa_value.h"
SSAFunction::SSAFunction(const std::string name)
: func(), name(name), return_type(llvm::Type::getVoidTy(SSAScope::context()))
{
}
void SSAFunction::set_return_type(llvm::Type *type)
{
return_type = type;
}
void SSAFunction::add_parameter(llvm::Type *type)
{
parameters.push_back(type);
}
void SSAFunction::create_public()
{
func = SSAScope::module()->getFunction(name.c_str());
if (func == 0)
{
llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false);
func = llvm::Function::Create(function_type, llvm::Function::ExternalLinkage, name.c_str(), SSAScope::module());
//func->setCallingConv(llvm::CallingConv::X86_StdCall);
}
llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func);
SSAScope::builder().SetInsertPoint(entry);
}
void SSAFunction::create_private()
{
func = SSAScope::module()->getFunction(name.c_str());
if (func == 0)
{
llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false);
func = llvm::Function::Create(function_type, llvm::Function::PrivateLinkage, name.c_str(), SSAScope::module());
func->addFnAttr(llvm::Attribute::AlwaysInline);
}
llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func);
SSAScope::builder().SetInsertPoint(entry);
}
SSAValue SSAFunction::parameter(int index)
{
llvm::Function::arg_iterator arg_it = func->arg_begin();
for (int i = 0; i < index; i++)
++arg_it;
return SSAValue::from_llvm(static_cast<llvm::Argument*>(arg_it));
}

View file

@ -1,51 +0,0 @@
/*
** LLVM function
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include <string>
#include <vector>
namespace llvm { class Value; }
namespace llvm { class Type; }
namespace llvm { class Function; }
class SSAInt;
class SSAValue;
class SSAFunction
{
public:
SSAFunction(const std::string name);
void set_return_type(llvm::Type *type);
void add_parameter(llvm::Type *type);
void create_public();
void create_private();
SSAValue parameter(int index);
llvm::Function *func;
private:
std::string name;
llvm::Type *return_type;
std::vector<llvm::Type *> parameters;
};

View file

@ -1,58 +0,0 @@
/*
** LLVM if statement branching
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_if_block.h"
#include "ssa_scope.h"
SSAIfBlock::SSAIfBlock()
: if_basic_block(0), else_basic_block(0), end_basic_block(0)
{
}
void SSAIfBlock::if_block(SSABool true_condition)
{
if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "if", SSAScope::builder().GetInsertBlock()->getParent());
else_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "else", SSAScope::builder().GetInsertBlock()->getParent());
end_basic_block = else_basic_block;
SSAScope::builder().CreateCondBr(true_condition.v, if_basic_block, else_basic_block);
SSAScope::builder().SetInsertPoint(if_basic_block);
}
void SSAIfBlock::else_block()
{
end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "end", SSAScope::builder().GetInsertBlock()->getParent());
SSAScope::builder().CreateBr(end_basic_block);
SSAScope::builder().SetInsertPoint(else_basic_block);
}
void SSAIfBlock::end_block()
{
SSAScope::builder().CreateBr(end_basic_block);
SSAScope::builder().SetInsertPoint(end_basic_block);
}
void SSAIfBlock::end_retvoid()
{
SSAScope::builder().CreateRetVoid();
SSAScope::builder().SetInsertPoint(end_basic_block);
}

View file

@ -1,67 +0,0 @@
/*
** LLVM if statement branching
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_bool.h"
#include "ssa_phi.h"
class SSAIfBlock
{
public:
SSAIfBlock();
void if_block(SSABool true_condition);
void else_block();
void end_block();
void end_retvoid();
private:
llvm::BasicBlock *if_basic_block;
llvm::BasicBlock *else_basic_block;
llvm::BasicBlock *end_basic_block;
};
template<typename T>
T ssa_min(T a, T b)
{
SSAPhi<T> phi;
SSAIfBlock if_block;
if_block.if_block(a <= b);
phi.add_incoming(a);
if_block.else_block();
phi.add_incoming(b);
if_block.end_block();
return phi.create();
}
template<typename T>
T ssa_max(T a, T b)
{
SSAPhi<T> phi;
SSAIfBlock if_block;
if_block.if_block(a >= b);
phi.add_incoming(a);
if_block.else_block();
phi.add_incoming(b);
if_block.end_block();
return phi.create();
}

View file

@ -1,208 +0,0 @@
/*
** SSA int32
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_int.h"
#include "ssa_float.h"
#include "ssa_ubyte.h"
#include "ssa_bool.h"
#include "ssa_scope.h"
SSAInt::SSAInt()
: v(0)
{
}
SSAInt::SSAInt(int constant)
: v(0)
{
v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true));
}
SSAInt::SSAInt(SSAFloat f, bool uint)
: v(0)
{
if (uint)
v = SSAScope::builder().CreateFPToUI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint());
else
v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint());
}
SSAInt::SSAInt(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAInt::llvm_type()
{
return llvm::Type::getInt32Ty(SSAScope::context());
}
SSAInt SSAInt::MIN(SSAInt a, SSAInt b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a < b).v, a.v, b.v, SSAScope::hint()));
}
SSAInt SSAInt::MAX(SSAInt a, SSAInt b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint()));
}
SSAInt SSAInt::clamp(SSAInt a, SSAInt b, SSAInt c)
{
return SSAInt::MAX(SSAInt::MIN(a, c), b);
}
SSAInt SSAInt::add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap)
{
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(v, b.v, SSAScope::hint(), no_unsigned_wrap, no_signed_wrap));
}
SSAInt SSAInt::ashr(int bits)
{
return SSAInt::from_llvm(SSAScope::builder().CreateAShr(v, bits, SSAScope::hint()));
}
SSAUByte SSAInt::trunc_ubyte()
{
return SSAUByte::from_llvm(SSAScope::builder().CreateTrunc(v, SSAUByte::llvm_type(), SSAScope::hint()));
}
SSAInt operator+(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAInt operator-(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAInt operator*(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
SSAInt operator/(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint()));
}
SSAInt operator%(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint()));
}
SSAInt operator+(int a, const SSAInt &b)
{
return SSAInt(a) + b;
}
SSAInt operator-(int a, const SSAInt &b)
{
return SSAInt(a) - b;
}
SSAInt operator*(int a, const SSAInt &b)
{
return SSAInt(a) * b;
}
SSAInt operator/(int a, const SSAInt &b)
{
return SSAInt(a) / b;
}
SSAInt operator%(int a, const SSAInt &b)
{
return SSAInt(a) % b;
}
SSAInt operator+(const SSAInt &a, int b)
{
return a + SSAInt(b);
}
SSAInt operator-(const SSAInt &a, int b)
{
return a - SSAInt(b);
}
SSAInt operator*(const SSAInt &a, int b)
{
return a * SSAInt(b);
}
SSAInt operator/(const SSAInt &a, int b)
{
return a / SSAInt(b);
}
SSAInt operator%(const SSAInt &a, int b)
{
return a % SSAInt(b);
}
SSAInt operator<<(const SSAInt &a, int bits)
{
return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint()));
}
SSAInt operator>>(const SSAInt &a, int bits)
{
return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint()));
}
SSAInt operator<<(const SSAInt &a, const SSAInt &bits)
{
return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits.v, SSAScope::hint()));
}
SSAInt operator>>(const SSAInt &a, const SSAInt &bits)
{
return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits.v, SSAScope::hint()));
}
SSAInt operator&(const SSAInt &a, int b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateAnd(a.v, b, SSAScope::hint()));
}
SSAInt operator&(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint()));
}
SSAInt operator|(const SSAInt &a, int b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b, SSAScope::hint()));
}
SSAInt operator|(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint()));
}
SSAInt operator~(const SSAInt &a)
{
return SSAInt::from_llvm(SSAScope::builder().CreateNot(a.v, SSAScope::hint()));
}

View file

@ -1,80 +0,0 @@
/*
** SSA int32
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAFloat;
class SSAUByte;
class SSAInt
{
public:
SSAInt();
explicit SSAInt(int constant);
SSAInt(SSAFloat f, bool uint);
explicit SSAInt(llvm::Value *v);
static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); }
static llvm::Type *llvm_type();
static SSAInt MIN(SSAInt a, SSAInt b);
static SSAInt MAX(SSAInt a, SSAInt b);
static SSAInt clamp(SSAInt a, SSAInt b, SSAInt c);
SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap);
SSAInt ashr(int bits);
SSAUByte trunc_ubyte();
llvm::Value *v;
};
SSAInt operator+(const SSAInt &a, const SSAInt &b);
SSAInt operator-(const SSAInt &a, const SSAInt &b);
SSAInt operator*(const SSAInt &a, const SSAInt &b);
SSAInt operator/(const SSAInt &a, const SSAInt &b);
SSAInt operator%(const SSAInt &a, const SSAInt &b);
SSAInt operator+(int a, const SSAInt &b);
SSAInt operator-(int a, const SSAInt &b);
SSAInt operator*(int a, const SSAInt &b);
SSAInt operator/(int a, const SSAInt &b);
SSAInt operator%(int a, const SSAInt &b);
SSAInt operator+(const SSAInt &a, int b);
SSAInt operator-(const SSAInt &a, int b);
SSAInt operator*(const SSAInt &a, int b);
SSAInt operator/(const SSAInt &a, int b);
SSAInt operator%(const SSAInt &a, int b);
SSAInt operator<<(const SSAInt &a, int bits);
SSAInt operator>>(const SSAInt &a, int bits);
SSAInt operator<<(const SSAInt &a, const SSAInt &bits);
SSAInt operator>>(const SSAInt &a, const SSAInt &bits);
SSAInt operator&(const SSAInt &a, int b);
SSAInt operator&(const SSAInt &a, const SSAInt &b);
SSAInt operator|(const SSAInt &a, int b);
SSAInt operator|(const SSAInt &a, const SSAInt &b);
SSAInt operator~(const SSAInt &a);

View file

@ -1,111 +0,0 @@
/*
** SSA int32 pointer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_int_ptr.h"
#include "ssa_scope.h"
#include "ssa_bool.h"
SSAIntPtr::SSAIntPtr()
: v(0)
{
}
SSAIntPtr::SSAIntPtr(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAIntPtr::llvm_type()
{
return llvm::Type::getInt32PtrTy(SSAScope::context());
}
SSAIntPtr SSAIntPtr::operator[](SSAInt index) const
{
return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAInt SSAIntPtr::load(bool constantScopeDomain) const
{
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAInt::from_llvm(loadInst);
}
SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4i::from_llvm(loadInst);
}
SSAVec4i SSAIntPtr::load_unaligned_vec4i(bool constantScopeDomain) const
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4i::from_llvm(loadInst);
}
void SSAIntPtr::store(const SSAInt &new_value)
{
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAIntPtr::store_vec4i(const SSAVec4i &new_value)
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value)
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAIntPtr::store_masked_vec4i(const SSAVec4i &new_value, SSABool mask[4])
{
// Create mask vector
std::vector<llvm::Constant*> maskconstants;
maskconstants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false)));
llvm::Value *maskValue = llvm::ConstantVector::get(maskconstants);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
for (int i = 0; i < 4; i++)
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, SSAInt(i).v, SSAScope::hint());
#else
for (int i = 0; i < 4; i++)
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, (uint64_t)i, SSAScope::hint());
#endif
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
auto inst = SSAScope::builder().CreateMaskedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1, maskValue);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}

View file

@ -1,52 +0,0 @@
/*
** SSA int32 pointer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_float.h"
#include "ssa_int.h"
#include "ssa_vec4i.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSABool;
class SSAIntPtr
{
public:
SSAIntPtr();
explicit SSAIntPtr(llvm::Value *v);
static SSAIntPtr from_llvm(llvm::Value *v) { return SSAIntPtr(v); }
static llvm::Type *llvm_type();
SSAIntPtr operator[](SSAInt index) const;
SSAIntPtr operator[](int index) const { return (*this)[SSAInt(index)]; }
SSAInt load(bool constantScopeDomain) const;
SSAVec4i load_vec4i(bool constantScopeDomain) const;
SSAVec4i load_unaligned_vec4i(bool constantScopeDomain) const;
void store(const SSAInt &new_value);
void store_vec4i(const SSAVec4i &new_value);
void store_unaligned_vec4i(const SSAVec4i &new_value);
void store_masked_vec4i(const SSAVec4i &new_value, SSABool mask[4]);
llvm::Value *v;
};

View file

@ -1,54 +0,0 @@
/*
** SSA phi node
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_scope.h"
class SSAIfBlock;
template <typename SSAVariable>
class SSAPhi
{
public:
void add_incoming(SSAVariable var)
{
incoming.push_back(Incoming(var.v, SSAScope::builder().GetInsertBlock()));
}
SSAVariable create()
{
llvm::PHINode *phi_node = SSAScope::builder().CreatePHI(SSAVariable::llvm_type(), (unsigned int)incoming.size(), SSAScope::hint());
for (size_t i = 0; i < incoming.size(); i++)
phi_node->addIncoming(incoming[i].v, incoming[i].bb);
return SSAVariable::from_llvm(phi_node);
}
private:
struct Incoming
{
Incoming(llvm::Value *v, llvm::BasicBlock *bb) : v(v), bb(bb) { }
llvm::Value *v;
llvm::BasicBlock *bb;
};
std::vector<Incoming> incoming;
};

View file

@ -1,96 +0,0 @@
/*
** SSA scope data
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_scope.h"
#include "ssa_int.h"
SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder)
: _context(context), _module(module), _builder(builder)
{
instance = this;
_constant_scope_domain = llvm::MDNode::get(SSAScope::context(), { llvm::MDString::get(SSAScope::context(), "ConstantScopeDomain") });
_constant_scope = llvm::MDNode::getDistinct(SSAScope::context(), { _constant_scope_domain });
_constant_scope_list = llvm::MDNode::get(SSAScope::context(), { _constant_scope });
}
SSAScope::~SSAScope()
{
instance = 0;
}
llvm::LLVMContext &SSAScope::context()
{
return *instance->_context;
}
llvm::Module *SSAScope::module()
{
return instance->_module;
}
llvm::IRBuilder<> &SSAScope::builder()
{
return *instance->_builder;
}
llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm::Type *> parameter_types)
{
llvm::Function *func = module()->getFunction(llvm::Intrinsic::getName(id, parameter_types));
if (func == 0)
func = llvm::Function::Create(llvm::Intrinsic::getType(context(), id, parameter_types), llvm::Function::ExternalLinkage, llvm::Intrinsic::getName(id, parameter_types), module());
return func;
}
llvm::Value *SSAScope::alloc_stack(llvm::Type *type)
{
return alloc_stack(type, SSAInt(1));
}
llvm::Value *SSAScope::alloc_stack(llvm::Type *type, SSAInt size)
{
// Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work
llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock();
llvm::IRBuilder<> alloca_builder(&entry, entry.begin());
return alloca_builder.CreateAlloca(type, size.v, hint());
}
llvm::MDNode *SSAScope::constant_scope_list()
{
return instance->_constant_scope_list;
}
const std::string &SSAScope::hint()
{
return instance->_hint;
}
void SSAScope::set_hint(const std::string &new_hint)
{
if (new_hint.empty())
instance->_hint = "tmp";
else
instance->_hint = new_hint;
}
SSAScope *SSAScope::instance = 0;

View file

@ -1,64 +0,0 @@
/*
** SSA scope data
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
class SSAInt;
class SSAScope
{
public:
SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder);
~SSAScope();
static llvm::LLVMContext &context();
static llvm::Module *module();
static llvm::IRBuilder<> &builder();
static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm::Type *> parameter_types = llvm::ArrayRef<llvm::Type*>());
static llvm::Value *alloc_stack(llvm::Type *type);
static llvm::Value *alloc_stack(llvm::Type *type, SSAInt size);
static llvm::MDNode *constant_scope_list();
static const std::string &hint();
static void set_hint(const std::string &hint);
private:
static SSAScope *instance;
llvm::LLVMContext *_context;
llvm::Module *_module;
llvm::IRBuilder<> *_builder;
llvm::MDNode *_constant_scope_domain;
llvm::MDNode *_constant_scope;
llvm::MDNode *_constant_scope_list;
std::string _hint;
};
class SSAScopeHint
{
public:
SSAScopeHint() : old_hint(SSAScope::hint()) { }
SSAScopeHint(const std::string &hint) : old_hint(SSAScope::hint()) { SSAScope::set_hint(hint); }
~SSAScopeHint() { SSAScope::set_hint(old_hint); }
void set(const std::string &hint) { SSAScope::set_hint(hint); }
void clear() { SSAScope::set_hint(old_hint); }
private:
std::string old_hint;
};

View file

@ -1,174 +0,0 @@
/*
** SSA int16
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_short.h"
#include "ssa_float.h"
#include "ssa_int.h"
#include "ssa_scope.h"
SSAShort::SSAShort()
: v(0)
{
}
SSAShort::SSAShort(int constant)
: v(0)
{
v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true));
}
SSAShort::SSAShort(SSAFloat f)
: v(0)
{
v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt16Ty(SSAScope::context()), SSAScope::hint());
}
SSAShort::SSAShort(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAShort::llvm_type()
{
return llvm::Type::getInt16Ty(SSAScope::context());
}
SSAInt SSAShort::zext_int()
{
return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint()));
}
SSAShort operator+(const SSAShort &a, const SSAShort &b)
{
return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAShort operator-(const SSAShort &a, const SSAShort &b)
{
return SSAShort::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAShort operator*(const SSAShort &a, const SSAShort &b)
{
return SSAShort::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
SSAShort operator/(const SSAShort &a, const SSAShort &b)
{
return SSAShort::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint()));
}
SSAShort operator%(const SSAShort &a, const SSAShort &b)
{
return SSAShort::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint()));
}
SSAShort operator+(int a, const SSAShort &b)
{
return SSAShort(a) + b;
}
SSAShort operator-(int a, const SSAShort &b)
{
return SSAShort(a) - b;
}
SSAShort operator*(int a, const SSAShort &b)
{
return SSAShort(a) * b;
}
SSAShort operator/(int a, const SSAShort &b)
{
return SSAShort(a) / b;
}
SSAShort operator%(int a, const SSAShort &b)
{
return SSAShort(a) % b;
}
SSAShort operator+(const SSAShort &a, int b)
{
return a + SSAShort(b);
}
SSAShort operator-(const SSAShort &a, int b)
{
return a - SSAShort(b);
}
SSAShort operator*(const SSAShort &a, int b)
{
return a * SSAShort(b);
}
SSAShort operator/(const SSAShort &a, int b)
{
return a / SSAShort(b);
}
SSAShort operator%(const SSAShort &a, int b)
{
return a % SSAShort(b);
}
SSAShort operator<<(const SSAShort &a, int bits)
{
return SSAShort::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint()));
}
SSAShort operator>>(const SSAShort &a, int bits)
{
return SSAShort::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint()));
}
SSAShort operator<<(const SSAShort &a, const SSAInt &bits)
{
return SSAShort::from_llvm(SSAScope::builder().CreateShl(a.v, bits.v, SSAScope::hint()));
}
SSAShort operator>>(const SSAShort &a, const SSAInt &bits)
{
return SSAShort::from_llvm(SSAScope::builder().CreateLShr(a.v, bits.v, SSAScope::hint()));
}
SSAShort operator&(const SSAShort &a, int b)
{
return SSAShort::from_llvm(SSAScope::builder().CreateAnd(a.v, b, SSAScope::hint()));
}
SSAShort operator&(const SSAShort &a, const SSAShort &b)
{
return SSAShort::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint()));
}
SSAShort operator|(const SSAShort &a, int b)
{
return SSAShort::from_llvm(SSAScope::builder().CreateOr(a.v, b, SSAScope::hint()));
}
SSAShort operator|(const SSAShort &a, const SSAShort &b)
{
return SSAShort::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint()));
}

View file

@ -1,72 +0,0 @@
/*
** SSA int16
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAFloat;
class SSAInt;
class SSAShort
{
public:
SSAShort();
explicit SSAShort(int constant);
SSAShort(SSAFloat f);
explicit SSAShort(llvm::Value *v);
static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); }
static llvm::Type *llvm_type();
SSAInt zext_int();
llvm::Value *v;
};
SSAShort operator+(const SSAShort &a, const SSAShort &b);
SSAShort operator-(const SSAShort &a, const SSAShort &b);
SSAShort operator*(const SSAShort &a, const SSAShort &b);
SSAShort operator/(const SSAShort &a, const SSAShort &b);
SSAShort operator%(const SSAShort &a, const SSAShort &b);
SSAShort operator+(int a, const SSAShort &b);
SSAShort operator-(int a, const SSAShort &b);
SSAShort operator*(int a, const SSAShort &b);
SSAShort operator/(int a, const SSAShort &b);
SSAShort operator%(int a, const SSAShort &b);
SSAShort operator+(const SSAShort &a, int b);
SSAShort operator-(const SSAShort &a, int b);
SSAShort operator*(const SSAShort &a, int b);
SSAShort operator/(const SSAShort &a, int b);
SSAShort operator%(const SSAShort &a, int b);
SSAShort operator<<(const SSAShort &a, int bits);
SSAShort operator>>(const SSAShort &a, int bits);
SSAShort operator<<(const SSAShort &a, const SSAInt &bits);
SSAShort operator>>(const SSAShort &a, const SSAInt &bits);
SSAShort operator&(const SSAShort &a, int b);
SSAShort operator&(const SSAShort &a, const SSAShort &b);
SSAShort operator|(const SSAShort &a, int b);
SSAShort operator|(const SSAShort &a, const SSAShort &b);

View file

@ -1,48 +0,0 @@
/*
** LLVM stack variable
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_scope.h"
template<typename SSAVariable>
class SSAStack
{
public:
SSAStack()
: v(0)
{
v = SSAScope::alloc_stack(SSAVariable::llvm_type());
}
SSAVariable load() const
{
return SSAVariable::from_llvm(SSAScope::builder().CreateLoad(v, SSAScope::hint()));
}
void store(const SSAVariable &new_value)
{
SSAScope::builder().CreateStore(new_value.v, v);
}
llvm::Value *v;
};

View file

@ -1,40 +0,0 @@
/*
** LLVM struct
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_struct_type.h"
#include "ssa_scope.h"
void SSAStructType::add_parameter(llvm::Type *type)
{
elements.push_back(type);
}
llvm::Type *SSAStructType::llvm_type()
{
return llvm::StructType::get(SSAScope::context(), elements, false);
}
llvm::Type *SSAStructType::llvm_type_packed()
{
return llvm::StructType::get(SSAScope::context(), elements, true);
}

View file

@ -1,38 +0,0 @@
/*
** LLVM struct
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include <vector>
namespace llvm { class Type; }
class SSAStructType
{
public:
void add_parameter(llvm::Type *type);
llvm::Type *llvm_type();
llvm::Type *llvm_type_packed();
private:
std::vector<llvm::Type *> elements;
};

View file

@ -1,122 +0,0 @@
/*
** SSA uint8
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_ubyte.h"
#include "ssa_int.h"
#include "ssa_scope.h"
SSAUByte::SSAUByte()
: v(0)
{
}
SSAUByte::SSAUByte(unsigned char constant)
: v(0)
{
v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false));
}
SSAUByte::SSAUByte(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAUByte::llvm_type()
{
return llvm::Type::getInt8Ty(SSAScope::context());
}
SSAInt SSAUByte::zext_int()
{
return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint()));
}
SSAUByte operator+(const SSAUByte &a, const SSAUByte &b)
{
return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAUByte operator-(const SSAUByte &a, const SSAUByte &b)
{
return SSAUByte::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAUByte operator*(const SSAUByte &a, const SSAUByte &b)
{
return SSAUByte::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
/*
SSAUByte operator/(const SSAUByte &a, const SSAUByte &b)
{
return SSAScope::builder().CreateDiv(a.v, b.v);
}
*/
SSAUByte operator+(unsigned char a, const SSAUByte &b)
{
return SSAUByte(a) + b;
}
SSAUByte operator-(unsigned char a, const SSAUByte &b)
{
return SSAUByte(a) - b;
}
SSAUByte operator*(unsigned char a, const SSAUByte &b)
{
return SSAUByte(a) * b;
}
/*
SSAUByte operator/(unsigned char a, const SSAUByte &b)
{
return SSAUByte(a) / b;
}
*/
SSAUByte operator+(const SSAUByte &a, unsigned char b)
{
return a + SSAUByte(b);
}
SSAUByte operator-(const SSAUByte &a, unsigned char b)
{
return a - SSAUByte(b);
}
SSAUByte operator*(const SSAUByte &a, unsigned char b)
{
return a * SSAUByte(b);
}
/*
SSAUByte operator/(const SSAUByte &a, unsigned char b)
{
return a / SSAUByte(b);
}
*/
SSAUByte operator<<(const SSAUByte &a, unsigned char bits)
{
return SSAUByte::from_llvm(SSAScope::builder().CreateShl(a.v, bits));
}
SSAUByte operator>>(const SSAUByte &a, unsigned char bits)
{
return SSAUByte::from_llvm(SSAScope::builder().CreateLShr(a.v, bits));
}

View file

@ -1,60 +0,0 @@
/*
** SSA uint8
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAInt;
class SSAUByte
{
public:
SSAUByte();
explicit SSAUByte(unsigned char constant);
explicit SSAUByte(llvm::Value *v);
static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); }
static llvm::Type *llvm_type();
SSAInt zext_int();
llvm::Value *v;
};
SSAUByte operator+(const SSAUByte &a, const SSAUByte &b);
SSAUByte operator-(const SSAUByte &a, const SSAUByte &b);
SSAUByte operator*(const SSAUByte &a, const SSAUByte &b);
//SSAUByte operator/(const SSAUByte &a, const SSAUByte &b);
SSAUByte operator+(unsigned char a, const SSAUByte &b);
SSAUByte operator-(unsigned char a, const SSAUByte &b);
SSAUByte operator*(unsigned char a, const SSAUByte &b);
//SSAUByte operator/(unsigned char a, const SSAUByte &b);
SSAUByte operator+(const SSAUByte &a, unsigned char b);
SSAUByte operator-(const SSAUByte &a, unsigned char b);
SSAUByte operator*(const SSAUByte &a, unsigned char b);
//SSAUByte operator/(const SSAUByte &a, unsigned char b);
SSAUByte operator<<(const SSAUByte &a, unsigned char bits);
SSAUByte operator>>(const SSAUByte &a, unsigned char bits);

View file

@ -1,173 +0,0 @@
/*
** SSA uint8 pointer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_ubyte_ptr.h"
#include "ssa_scope.h"
#include "ssa_bool.h"
SSAUBytePtr::SSAUBytePtr()
: v(0)
{
}
SSAUBytePtr::SSAUBytePtr(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAUBytePtr::llvm_type()
{
return llvm::Type::getInt8PtrTy(SSAScope::context());
}
SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const
{
return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAUByte SSAUBytePtr::load(bool constantScopeDomain) const
{
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAUByte::from_llvm(loadInst);
}
SSAVec4i SSAUBytePtr::load_vec4ub(bool constantScopeDomain) const
{
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
SSAInt i32 = SSAInt::from_llvm(loadInst);
return SSAVec4i::unpack(i32);
}
SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec16ub::from_llvm(loadInst);
}
SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub(bool constantScopeDomain) const
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec16ub::from_llvm(loadInst);
}
void SSAUBytePtr::store(const SSAUByte &new_value)
{
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value)
{
// Store using saturate:
SSAVec8s v8s(new_value, new_value);
SSAVec16ub v16ub(v8s, v8s);
llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16);
llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo();
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint());
llvm::StoreInst *inst = SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAUBytePtr::store_masked_vec4ub(const SSAVec4i &new_value, SSABool mask[4])
{
// Store using saturate:
SSAVec8s v8s(new_value, new_value);
SSAVec16ub v16ub(v8s, v8s);
// Create mask vector
std::vector<llvm::Constant*> maskconstants;
maskconstants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false)));
llvm::Value *maskValue = llvm::ConstantVector::get(maskconstants);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
for (int i = 0; i < 4; i++)
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, SSAInt(i).v, SSAScope::hint());
#else
for (int i = 0; i < 4; i++)
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, (uint64_t)i, SSAScope::hint());
#endif
llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16);
llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo();
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3)));
llvm::Value *shufflemask = llvm::ConstantVector::get(constants);
llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), shufflemask, SSAScope::hint());
llvm::CallInst *inst = SSAScope::builder().CreateMaskedStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), 1, maskValue);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value)
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
// The following generates _mm_stream_si128, maybe!
// llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1));
// inst->setMetadata(SSAScope::module()->getMDKindID("nontemporal"), node);
}
void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value)
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAUBytePtr::store_masked_vec16ub(const SSAVec16ub &new_value, SSABool mask[4])
{
std::vector<llvm::Constant*> constants;
constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false)));
llvm::Value *maskValue = llvm::ConstantVector::get(constants);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
for (int i = 0; i < 16; i++)
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i / 4].v, SSAInt(i).v, SSAScope::hint());
#else
for (int i = 0; i < 16; i++)
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i / 4].v, (uint64_t)i, SSAScope::hint());
#endif
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
llvm::CallInst *inst = SSAScope::builder().CreateMaskedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1, maskValue);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}

View file

@ -1,57 +0,0 @@
/*
** SSA uint8 pointer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_ubyte.h"
#include "ssa_int.h"
#include "ssa_vec4i.h"
#include "ssa_vec8s.h"
#include "ssa_vec16ub.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSABool;
class SSAUBytePtr
{
public:
SSAUBytePtr();
explicit SSAUBytePtr(llvm::Value *v);
static SSAUBytePtr from_llvm(llvm::Value *v) { return SSAUBytePtr(v); }
static llvm::Type *llvm_type();
SSAUBytePtr operator[](SSAInt index) const;
SSAUBytePtr operator[](int index) const { return (*this)[SSAInt(index)]; }
SSAUByte load(bool constantScopeDomain) const;
SSAVec4i load_vec4ub(bool constantScopeDomain) const;
SSAVec16ub load_vec16ub(bool constantScopeDomain) const;
SSAVec16ub load_unaligned_vec16ub(bool constantScopeDomain) const;
void store(const SSAUByte &new_value);
void store_vec4ub(const SSAVec4i &new_value);
void store_masked_vec4ub(const SSAVec4i &new_value, SSABool mask[4]);
void store_vec16ub(const SSAVec16ub &new_value);
void store_unaligned_vec16ub(const SSAVec16ub &new_value);
void store_masked_vec16ub(const SSAVec16ub &new_value, SSABool mask[4]);
llvm::Value *v;
};

View file

@ -1,81 +0,0 @@
/*
** SSA value
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_value.h"
#include "ssa_int.h"
#include "ssa_scope.h"
SSAValue SSAValue::load(bool constantScopeDomain)
{
auto loadInst = SSAScope::builder().CreateLoad(v, false);
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAValue::from_llvm(loadInst);
}
void SSAValue::store(llvm::Value *value)
{
auto inst = SSAScope::builder().CreateStore(value, v, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
SSAIndexLookup SSAValue::operator[](int index)
{
SSAIndexLookup result;
result.v = v;
result.indexes.push_back(SSAInt(index).v);
return result;
}
SSAIndexLookup SSAValue::operator[](SSAInt index)
{
SSAIndexLookup result;
result.v = v;
result.indexes.push_back(index.v);
return result;
}
/////////////////////////////////////////////////////////////////////////////
SSAIndexLookup::operator SSAValue()
{
return SSAValue::from_llvm(SSAScope::builder().CreateGEP(v, indexes));
}
SSAIndexLookup SSAIndexLookup::operator[](int index)
{
SSAIndexLookup result;
result.v = v;
result.indexes = indexes;
result.indexes.push_back(SSAInt(index).v);
return result;
}
SSAIndexLookup SSAIndexLookup::operator[](SSAInt index)
{
SSAIndexLookup result;
result.v = v;
result.indexes = indexes;
result.indexes.push_back(index.v);
return result;
}

View file

@ -1,74 +0,0 @@
/*
** SSA value
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include <vector>
namespace llvm { class Value; }
class SSAInt;
class SSAIndexLookup;
class SSAValue
{
public:
SSAValue() : v(0) { }
static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; }
SSAValue load(bool constantScopeDomain);
void store(llvm::Value *v);
template<typename Type>
operator Type()
{
return Type::from_llvm(v);
}
SSAIndexLookup operator[](int index);
SSAIndexLookup operator[](SSAInt index);
llvm::Value *v;
};
class SSAIndexLookup
{
public:
SSAIndexLookup() : v(0) { }
llvm::Value *v;
std::vector<llvm::Value *> indexes;
SSAValue load(bool constantScopeDomain) { SSAValue value = *this; return value.load(constantScopeDomain); }
void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); }
template<typename Type>
operator Type()
{
return Type::from_llvm(v);
}
operator SSAValue();
SSAIndexLookup operator[](int index);
SSAIndexLookup operator[](SSAInt index);
};

View file

@ -1,188 +0,0 @@
/*
** SSA vec16 uint8
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_vec16ub.h"
#include "ssa_vec8s.h"
#include "ssa_vec4i.h"
#include "ssa_scope.h"
SSAVec16ub::SSAVec16ub()
: v(0)
{
}
SSAVec16ub::SSAVec16ub(unsigned char constant)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false)));
v = llvm::ConstantVector::get(constants);
}
SSAVec16ub::SSAVec16ub(
unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7,
unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant0, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant1, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant2, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant3, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant4, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant5, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant6, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant7, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant8, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant9, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant10, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant11, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant12, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant13, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant14, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant15, false)));
v = llvm::ConstantVector::get(constants);
}
SSAVec16ub::SSAVec16ub(llvm::Value *v)
: v(v)
{
}
SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1)
: v(0)
{
#ifdef ARM_TARGET
/*
llvm::Value *int8x8_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s0.v, SSAScope::hint());
llvm::Value *int8x8_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovnsu), s1.v, SSAScope::hint());
v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v;
*/
// To do: add some clamping here
llvm::Value *int8x8_i0 = SSAScope::builder().CreateTrunc(s0.v, llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 8));
llvm::Value *int8x8_i1 = SSAScope::builder().CreateTrunc(s1.v, llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 8));
v = shuffle(from_llvm(int8x8_i0), from_llvm(int8x8_i1), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).v;
#else
llvm::Value *values[2] = { s0.v, s1.v };
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint());
#endif
}
llvm::Type *SSAVec16ub::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16);
}
SSAVec16ub SSAVec16ub::bitcast(SSAVec4i i32)
{
return SSAVec16ub::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint()));
}
SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15)
{
return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7, index8, index9, index10, index11, index12, index13, index14, index15);
}
SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index8)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index9)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index10)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index11)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index12)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index13)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index14)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index15)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
return SSAVec16ub::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint()));
}
SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b)
{
return SSAVec16ub::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b)
{
return SSAVec16ub::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b)
{
return SSAVec16ub::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
/*
SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b)
{
return SSAScope::builder().CreateDiv(a.v, b.v, SSAScope::hint());
}
*/
SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b)
{
return SSAVec16ub(a) + b;
}
SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b)
{
return SSAVec16ub(a) - b;
}
SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b)
{
return SSAVec16ub(a) * b;
}
/*
SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b)
{
return SSAVec16ub(a) / b;
}
*/
SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b)
{
return a + SSAVec16ub(b);
}
SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b)
{
return a - SSAVec16ub(b);
}
SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b)
{
return a * SSAVec16ub(b);
}
/*
SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b)
{
return a / SSAVec16ub(b);
}
*/

View file

@ -1,63 +0,0 @@
/*
** SSA vec16 uint8
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec8s;
class SSAVec4i;
class SSAVec16ub
{
public:
SSAVec16ub();
explicit SSAVec16ub(unsigned char constant);
explicit SSAVec16ub(
unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7,
unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15);
explicit SSAVec16ub(llvm::Value *v);
SSAVec16ub(SSAVec8s s0, SSAVec8s s1);
static SSAVec16ub from_llvm(llvm::Value *v) { return SSAVec16ub(v); }
static llvm::Type *llvm_type();
static SSAVec16ub bitcast(SSAVec4i i32);
static SSAVec16ub shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15);
static SSAVec16ub shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15);
llvm::Value *v;
};
SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b);
SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b);
SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b);
SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b);
SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b);
SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b);
SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b);
SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b);
SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b);
SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b);
SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b);
SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b);

View file

@ -1,209 +0,0 @@
/*
** SSA vec4 float
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_vec4f.h"
#include "ssa_vec4i.h"
#include "ssa_float.h"
#include "ssa_int.h"
#include "ssa_scope.h"
SSAVec4f::SSAVec4f()
: v(0)
{
}
SSAVec4f::SSAVec4f(float constant)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(4, llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant)));
v = llvm::ConstantVector::get(constants);
}
SSAVec4f::SSAVec4f(float constant0, float constant1, float constant2, float constant3)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant0)));
constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant1)));
constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant2)));
constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant3)));
v = llvm::ConstantVector::get(constants);
}
SSAVec4f::SSAVec4f(SSAFloat f)
: v(0)
{
llvm::Type *m1xfloattype = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 1);
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(f.v, m1xfloattype, SSAScope::hint()), llvm::UndefValue::get(m1xfloattype), mask, SSAScope::hint());
}
SSAVec4f::SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3)
: v(0)
{
v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(llvm_type()), f0.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0)));
v = SSAScope::builder().CreateInsertElement(v, f1.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1)));
v = SSAScope::builder().CreateInsertElement(v, f2.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2)));
v = SSAScope::builder().CreateInsertElement(v, f3.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)3)));
}
SSAVec4f::SSAVec4f(llvm::Value *v)
: v(v)
{
}
SSAVec4f::SSAVec4f(SSAVec4i i32)
: v(0)
{
#ifdef ARM_TARGET
v = SSAScope::builder().CreateSIToFP(i32.v, llvm_type(), SSAScope::hint());
#else
//llvm::VectorType *m128type = llvm::VectorType::get(llvm::Type::getFloatTy(*context), 4);
//return builder->CreateSIToFP(i32.v, m128type);
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvtdq2ps), i32.v, SSAScope::hint());
#endif
}
llvm::Type *SSAVec4f::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4);
}
SSAFloat SSAVec4f::operator[](SSAInt index) const
{
return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint()));
}
SSAFloat SSAVec4f::operator[](int index) const
{
return (*this)[SSAInt(index)];
}
SSAVec4f SSAVec4f::insert_element(SSAVec4f vec4f, SSAFloat value, int index)
{
return from_llvm(SSAScope::builder().CreateInsertElement(vec4f.v, value.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)index))));
}
SSAVec4f SSAVec4f::bitcast(SSAVec4i i32)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint()));
}
void SSAVec4f::transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3)
{
SSAVec4f tmp0 = shuffle(row0, row1, 0x44);//_MM_SHUFFLE(1,0,1,0));
SSAVec4f tmp2 = shuffle(row0, row1, 0xEE);//_MM_SHUFFLE(3,2,3,2));
SSAVec4f tmp1 = shuffle(row2, row3, 0x44);//_MM_SHUFFLE(1,0,1,0));
SSAVec4f tmp3 = shuffle(row2, row3, 0xEE);//_MM_SHUFFLE(3,2,3,2));
row0 = shuffle(tmp0, tmp1, 0x88);//_MM_SHUFFLE(2,0,2,0));
row1 = shuffle(tmp0, tmp1, 0xDD);//_MM_SHUFFLE(3,1,3,1));
row2 = shuffle(tmp2, tmp3, 0x88);//_MM_SHUFFLE(2,0,2,0));
row3 = shuffle(tmp2, tmp3, 0xDD);//_MM_SHUFFLE(3,1,3,1));
}
SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3)
{
return shuffle(f0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3);
}
SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
return SSAVec4f::from_llvm(SSAScope::builder().CreateShuffleVector(f0.v, f1.v, mask, SSAScope::hint()));
}
SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask)
{
return shuffle(f0, f1, mask & 3, (mask >> 2) & 3, ((mask >> 4) & 3) + 4, ((mask >> 6) & 3) + 4);
}
SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b)
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint()));
}
SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b)
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint()));
}
SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b)
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint()));
}
SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b)
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint()));
}
SSAVec4f operator+(float a, const SSAVec4f &b)
{
return SSAVec4f(a) + b;
}
SSAVec4f operator-(float a, const SSAVec4f &b)
{
return SSAVec4f(a) - b;
}
SSAVec4f operator*(float a, const SSAVec4f &b)
{
return SSAVec4f(a) * b;
}
SSAVec4f operator/(float a, const SSAVec4f &b)
{
return SSAVec4f(a) / b;
}
SSAVec4f operator+(const SSAVec4f &a, float b)
{
return a + SSAVec4f(b);
}
SSAVec4f operator-(const SSAVec4f &a, float b)
{
return a - SSAVec4f(b);
}
SSAVec4f operator*(const SSAVec4f &a, float b)
{
return a * SSAVec4f(b);
}
SSAVec4f operator/(const SSAVec4f &a, float b)
{
return a / SSAVec4f(b);
}

View file

@ -1,71 +0,0 @@
/*
** SSA vec4 float
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4i;
class SSAFloat;
class SSAInt;
class SSAVec4f
{
public:
SSAVec4f();
explicit SSAVec4f(float constant);
explicit SSAVec4f(float constant0, float constant1, float constant2, float constant3);
SSAVec4f(SSAFloat f);
SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3);
explicit SSAVec4f(llvm::Value *v);
SSAVec4f(SSAVec4i i32);
SSAFloat operator[](SSAInt index) const;
SSAFloat operator[](int index) const;
static SSAVec4f insert_element(SSAVec4f vec4f, SSAFloat value, int index);
static SSAVec4f bitcast(SSAVec4i i32);
static void transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3);
static SSAVec4f shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3);
static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3);
static SSAVec4f from_llvm(llvm::Value *v) { return SSAVec4f(v); }
static llvm::Type *llvm_type();
llvm::Value *v;
private:
static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask);
};
SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b);
SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b);
SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b);
SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b);
SSAVec4f operator+(float a, const SSAVec4f &b);
SSAVec4f operator-(float a, const SSAVec4f &b);
SSAVec4f operator*(float a, const SSAVec4f &b);
SSAVec4f operator/(float a, const SSAVec4f &b);
SSAVec4f operator+(const SSAVec4f &a, float b);
SSAVec4f operator-(const SSAVec4f &a, float b);
SSAVec4f operator*(const SSAVec4f &a, float b);
SSAVec4f operator/(const SSAVec4f &a, float b);

View file

@ -1,73 +0,0 @@
/*
** SSA vec4 float pointer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_vec4f_ptr.h"
#include "ssa_scope.h"
SSAVec4fPtr::SSAVec4fPtr()
: v(0)
{
}
SSAVec4fPtr::SSAVec4fPtr(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAVec4fPtr::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
}
SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const
{
return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const
{
auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 16, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst);
}
SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const
{
auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 1, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst);
}
void SSAVec4fPtr::store(const SSAVec4f &new_value)
{
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value)
{
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}

View file

@ -1,45 +0,0 @@
/*
** SSA vec4 float pointer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_int.h"
#include "ssa_vec4f.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4fPtr
{
public:
SSAVec4fPtr();
explicit SSAVec4fPtr(llvm::Value *v);
static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); }
static llvm::Type *llvm_type();
SSAVec4fPtr operator[](SSAInt index) const;
SSAVec4f load(bool constantScopeDomain) const;
SSAVec4f load_unaligned(bool constantScopeDomain) const;
void store(const SSAVec4f &new_value);
void store_unaligned(const SSAVec4f &new_value);
llvm::Value *v;
};

View file

@ -1,275 +0,0 @@
/*
** SSA vec4 int32
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_vec4i.h"
#include "ssa_vec4f.h"
#include "ssa_vec8s.h"
#include "ssa_vec16ub.h"
#include "ssa_int.h"
#include "ssa_scope.h"
SSAVec4i::SSAVec4i()
: v(0)
{
}
SSAVec4i::SSAVec4i(int constant)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true)));
v = llvm::ConstantVector::get(constants);
}
SSAVec4i::SSAVec4i(int constant0, int constant1, int constant2, int constant3)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant0, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant1, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant2, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant3, true)));
v = llvm::ConstantVector::get(constants);
}
SSAVec4i::SSAVec4i(llvm::Value *v)
: v(v)
{
}
SSAVec4i::SSAVec4i(SSAInt i)
: v(0)
{
llvm::Type *m1xi32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 1);
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint());
}
SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true)));
v = llvm::ConstantVector::get(constants);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
v = SSAScope::builder().CreateInsertElement(v, i0.v, SSAInt(0).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i1.v, SSAInt(1).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i2.v, SSAInt(2).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i3.v, SSAInt(3).v, SSAScope::hint());
#else
v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint());
#endif
}
/*
SSAVec4i::SSAVec4i(SSAVec4f f32)
: v(0)
{
#ifdef ARM_TARGET
v = SSAScope::builder().CreateFPToSI(f32.v, llvm_type(), SSAScope::hint());
#else
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint());
#endif
}
*/
SSAInt SSAVec4i::operator[](SSAInt index) const
{
return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint()));
}
SSAInt SSAVec4i::operator[](int index) const
{
return (*this)[SSAInt(index)];
}
SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index.v, SSAScope::hint()));
}
SSAVec4i SSAVec4i::insert(int index, SSAInt value)
{
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, SSAInt(index).v, SSAScope::hint()));
#else
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint()));
#endif
}
SSAVec4i SSAVec4i::insert(int index, int value)
{
return insert(index, SSAInt(value));
}
llvm::Type *SSAVec4i::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4);
}
SSAVec4i SSAVec4i::unpack(SSAInt i32)
{
// _mm_cvtsi32_si128 as implemented by clang:
llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint());
SSAVec4i v4i = SSAVec4i::from_llvm(v);
SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7)); // _mm_unpacklo_epi8
return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16
}
SSAVec4i SSAVec4i::bitcast(SSAVec4f f32)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint()));
}
SSAVec4i SSAVec4i::bitcast(SSAVec8s i16)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i16.v, llvm_type(), SSAScope::hint()));
}
SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, int index0, int index1, int index2, int index3)
{
return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3);
}
SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, const SSAVec4i &i1, int index0, int index1, int index2, int index3)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
return SSAVec4i::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint()));
}
void SSAVec4i::extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3)
{
SSAVec8s low = SSAVec8s::extendlo(a);
SSAVec8s high = SSAVec8s::extendhi(a);
out0 = extendlo(low);
out1 = extendhi(low);
out2 = extendlo(high);
out3 = extendhi(high);
}
SSAVec4i SSAVec4i::extendhi(SSAVec8s i16)
{
return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, SSAVec8s((short)0), 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16
}
SSAVec4i SSAVec4i::extendlo(SSAVec8s i16)
{
return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, SSAVec8s((short)0), 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16
}
SSAVec4i SSAVec4i::combinehi(SSAVec8s a, SSAVec8s b)
{
return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16
}
SSAVec4i SSAVec4i::combinelo(SSAVec8s a, SSAVec8s b)
{
return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16
}
SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint()));
}
SSAVec4i operator+(int a, const SSAVec4i &b)
{
return SSAVec4i(a) + b;
}
SSAVec4i operator-(int a, const SSAVec4i &b)
{
return SSAVec4i(a) - b;
}
SSAVec4i operator*(int a, const SSAVec4i &b)
{
return SSAVec4i(a) * b;
}
SSAVec4i operator/(int a, const SSAVec4i &b)
{
return SSAVec4i(a) / b;
}
SSAVec4i operator+(const SSAVec4i &a, int b)
{
return a + SSAVec4i(b);
}
SSAVec4i operator-(const SSAVec4i &a, int b)
{
return a - SSAVec4i(b);
}
SSAVec4i operator*(const SSAVec4i &a, int b)
{
return a * SSAVec4i(b);
}
SSAVec4i operator/(const SSAVec4i &a, int b)
{
return a / SSAVec4i(b);
}
SSAVec4i operator<<(const SSAVec4i &a, int bits)
{
return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint()));
}
SSAVec4i operator>>(const SSAVec4i &a, int bits)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint()));
}

View file

@ -1,80 +0,0 @@
/*
** SSA vec4 int32
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4f;
class SSAVec8s;
class SSAVec16ub;
class SSAInt;
class SSAVec4i
{
public:
SSAVec4i();
explicit SSAVec4i(int constant);
explicit SSAVec4i(int constant0, int constant1, int constant2, int constant3);
SSAVec4i(SSAInt i);
SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3);
explicit SSAVec4i(llvm::Value *v);
SSAVec4i(SSAVec4f f32);
SSAInt operator[](SSAInt index) const;
SSAInt operator[](int index) const;
SSAVec4i insert(SSAInt index, SSAInt value);
SSAVec4i insert(int index, SSAInt value);
SSAVec4i insert(int index, int value);
static SSAVec4i unpack(SSAInt value);
static SSAVec4i bitcast(SSAVec4f f32);
static SSAVec4i bitcast(SSAVec8s i16);
static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3);
static SSAVec4i shuffle(const SSAVec4i &f0, const SSAVec4i &f1, int index0, int index1, int index2, int index3);
static SSAVec4i extendhi(SSAVec8s i16);
static SSAVec4i extendlo(SSAVec8s i16);
static void extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3);
static SSAVec4i combinehi(SSAVec8s v0, SSAVec8s v1);
static SSAVec4i combinelo(SSAVec8s v0, SSAVec8s v1);
static SSAVec4i from_llvm(llvm::Value *v) { return SSAVec4i(v); }
static llvm::Type *llvm_type();
llvm::Value *v;
};
SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b);
SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b);
SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b);
SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b);
SSAVec4i operator+(int a, const SSAVec4i &b);
SSAVec4i operator-(int a, const SSAVec4i &b);
SSAVec4i operator*(int a, const SSAVec4i &b);
SSAVec4i operator/(int a, const SSAVec4i &b);
SSAVec4i operator+(const SSAVec4i &a, int b);
SSAVec4i operator-(const SSAVec4i &a, int b);
SSAVec4i operator*(const SSAVec4i &a, int b);
SSAVec4i operator/(const SSAVec4i &a, int b);
SSAVec4i operator<<(const SSAVec4i &a, int bits);
SSAVec4i operator>>(const SSAVec4i &a, int bits);

View file

@ -1,65 +0,0 @@
/*
** SSA vec4 int32 pointer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_vec4i_ptr.h"
#include "ssa_scope.h"
SSAVec4iPtr::SSAVec4iPtr()
: v(0)
{
}
SSAVec4iPtr::SSAVec4iPtr(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAVec4iPtr::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
}
SSAVec4iPtr SSAVec4iPtr::operator[](SSAInt index) const
{
return SSAVec4iPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAVec4i SSAVec4iPtr::load() const
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
}
SSAVec4i SSAVec4iPtr::load_unaligned() const
{
return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4)));
}
void SSAVec4iPtr::store(const SSAVec4i &new_value)
{
SSAScope::builder().CreateStore(new_value.v, v, false);
}
void SSAVec4iPtr::store_unaligned(const SSAVec4i &new_value)
{
SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false);
}

View file

@ -1,46 +0,0 @@
/*
** SSA vec4 int32 pointer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "ssa_int.h"
#include "ssa_vec4i.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4iPtr
{
public:
SSAVec4iPtr();
explicit SSAVec4iPtr(llvm::Value *v);
static SSAVec4iPtr from_llvm(llvm::Value *v) { return SSAVec4iPtr(v); }
static llvm::Type *llvm_type();
SSAVec4iPtr operator[](SSAInt index) const;
SSAVec4iPtr operator[](int index) const { return (*this)[SSAInt(index)]; }
SSAVec4i load() const;
SSAVec4i load_unaligned() const;
void store(const SSAVec4i &new_value);
void store_unaligned(const SSAVec4i &new_value);
llvm::Value *v;
};

View file

@ -1,189 +0,0 @@
/*
** SSA vec8 int16
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "precomp.h"
#include "ssa_vec8s.h"
#include "ssa_vec4i.h"
#include "ssa_vec16ub.h"
#include "ssa_scope.h"
SSAVec8s::SSAVec8s()
: v(0)
{
}
SSAVec8s::SSAVec8s(short constant)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(8, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true)));
v = llvm::ConstantVector::get(constants);
}
SSAVec8s::SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant0, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant1, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant2, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant3, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant4, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant5, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant6, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant7, true)));
v = llvm::ConstantVector::get(constants);
}
SSAVec8s::SSAVec8s(llvm::Value *v)
: v(v)
{
}
SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1)
: v(0)
{
#ifdef ARM_TARGET
/*
llvm::Value *int16x4_i0 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i0.v, SSAScope::hint());
llvm::Value *int16x4_i1 = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::arm_neon_vqmovns), i1.v, SSAScope::hint());
v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v;
*/
// To do: add some clamping here
llvm::Value *int16x4_i0 = SSAScope::builder().CreateTrunc(i0.v, llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 4));
llvm::Value *int16x4_i1 = SSAScope::builder().CreateTrunc(i1.v, llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 4));
v = shuffle(from_llvm(int16x4_i0), from_llvm(int16x4_i1), 0, 1, 2, 3, 4, 5, 6, 7).v;
#else
llvm::Value *values[2] = { i0.v, i1.v };
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint());
#endif
}
llvm::Type *SSAVec8s::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 8);
}
SSAVec8s SSAVec8s::bitcast(SSAVec16ub i8)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateBitCast(i8.v, llvm_type(), SSAScope::hint()));
}
SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7)
{
return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7);
}
SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
return SSAVec8s::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint()));
}
SSAVec8s SSAVec8s::extendhi(SSAVec16ub a)
{
return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8
}
SSAVec8s SSAVec8s::extendlo(SSAVec16ub a)
{
return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8
}
SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint()));
}
SSAVec8s operator+(short a, const SSAVec8s &b)
{
return SSAVec8s(a) + b;
}
SSAVec8s operator-(short a, const SSAVec8s &b)
{
return SSAVec8s(a) - b;
}
SSAVec8s operator*(short a, const SSAVec8s &b)
{
return SSAVec8s(a) * b;
}
SSAVec8s operator/(short a, const SSAVec8s &b)
{
return SSAVec8s(a) / b;
}
SSAVec8s operator+(const SSAVec8s &a, short b)
{
return a + SSAVec8s(b);
}
SSAVec8s operator-(const SSAVec8s &a, short b)
{
return a - SSAVec8s(b);
}
SSAVec8s operator*(const SSAVec8s &a, short b)
{
return a * SSAVec8s(b);
}
SSAVec8s operator/(const SSAVec8s &a, short b)
{
return a / SSAVec8s(b);
}
SSAVec8s operator<<(const SSAVec8s &a, int bits)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint()));
}
SSAVec8s operator>>(const SSAVec8s &a, int bits)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint()));
}

View file

@ -1,67 +0,0 @@
/*
** SSA vec8 int16
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4i;
class SSAVec16ub;
class SSAVec8s
{
public:
SSAVec8s();
explicit SSAVec8s(short constant);
explicit SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7);
explicit SSAVec8s(llvm::Value *v);
SSAVec8s(SSAVec4i i0, SSAVec4i i1);
static SSAVec8s bitcast(SSAVec16ub i8);
static SSAVec8s shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7);
static SSAVec8s shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7);
static SSAVec8s extendhi(SSAVec16ub a);
static SSAVec8s extendlo(SSAVec16ub a);
static SSAVec8s mulhi(SSAVec8s a, SSAVec8s b);
static SSAVec8s from_llvm(llvm::Value *v) { return SSAVec8s(v); }
static llvm::Type *llvm_type();
llvm::Value *v;
};
SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b);
SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b);
SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b);
SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b);
SSAVec8s operator+(short a, const SSAVec8s &b);
SSAVec8s operator-(short a, const SSAVec8s &b);
SSAVec8s operator*(short a, const SSAVec8s &b);
SSAVec8s operator/(short a, const SSAVec8s &b);
SSAVec8s operator+(const SSAVec8s &a, short b);
SSAVec8s operator-(const SSAVec8s &a, short b);
SSAVec8s operator*(const SSAVec8s &a, short b);
SSAVec8s operator/(const SSAVec8s &a, short b);
SSAVec8s operator<<(const SSAVec8s &a, int bits);
SSAVec8s operator>>(const SSAVec8s &a, int bits);

View file

@ -1,12 +0,0 @@
#pragma once
void AddSourceFileTimestamp(const char *timestamp);
namespace
{
struct TimestampSourceFile
{
TimestampSourceFile() { AddSourceFileTimestamp(__TIME__); }
} timestamp;
}

View file

@ -1,6 +0,0 @@
// This resource script is for compiling with MinGW only. Visual C++
// compilations use the manifest tool to insert the manifest instead.
#include <WinUser.h>
1 RT_MANIFEST "trustinfo.txt"

View file

@ -1,16 +0,0 @@
<!-- Ignore any warnings about Unrecognized Element "trustInfo" -->
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
<assemblyIdentity version="1.0.0.0"
processorArchitecture="X86"
name="UpdateRevision"
type="win32" />
<description>Drawergen for the ZDoom source build process.</description>
<ms_asmv3:trustInfo xmlns:ms_asmv3="urn:schemas-microsoft-com:asm.v3">
<ms_asmv3:security>
<ms_asmv3:requestedPrivileges>
<ms_asmv3:requestedExecutionLevel level="asInvoker" uiAccess="false" />
</ms_asmv3:requestedPrivileges>
</ms_asmv3:security>
</ms_asmv3:trustInfo>
</assembly>