From e9efb64a0b9d637cf192b184798ac91d9fa0f289 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 23 Feb 2017 03:50:24 +0100 Subject: [PATCH] Fix light offset --- src/swrenderer/drawers/r_draw_wall32.h | 160 +++++++++-------------- src/swrenderer/drawers/r_draw_wall32.php | 8 +- 2 files changed, 63 insertions(+), 105 deletions(-) diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h index d4ba5d51e2..7e6dd931dd 100644 --- a/src/swrenderer/drawers/r_draw_wall32.h +++ b/src/swrenderer/drawers/r_draw_wall32.h @@ -67,11 +67,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -245,11 +243,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -492,11 +488,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -705,11 +699,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -993,11 +985,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -1189,11 +1179,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -1454,11 +1442,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -1685,11 +1671,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -1991,11 +1975,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -2229,11 +2211,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -2536,11 +2516,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -2809,11 +2787,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -3157,11 +3133,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -3395,11 +3369,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -3702,11 +3674,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -3975,11 +3945,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -4323,11 +4291,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -4561,11 +4527,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -4868,11 +4832,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); @@ -5141,11 +5103,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count); diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php index 2944fa407a..bc003c9a5f 100644 --- a/src/swrenderer/drawers/r_draw_wall32.php +++ b/src/swrenderer/drawers/r_draw_wall32.php @@ -112,11 +112,9 @@ namespace swrenderer auto lights = args.dc_lights; auto num_lights = args.dc_num_lights; - float vpz = args.dc_viewpos.Z; - float stepvpz = args.dc_viewpos_step.Z; - vpz += thread->skipped_by_thread(dest_y) * stepvpz; - stepvpz *= thread->num_cores; - __m128 viewpos_z = _mm_set_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); count = thread->count_for_thread(dest_y, count);