From 04f49d1ca41f3706bf74f867c775797cf3bd2037 Mon Sep 17 00:00:00 2001 From: Bill Currie Date: Sun, 1 Oct 2023 21:30:41 +0900 Subject: [PATCH] [qfcc] Commit common scale terms While it works, and does improve the code slightly, it could do better by favoring constants over variables for the common factor. --- tools/qfcc/include/expr.h | 5 + tools/qfcc/source/expr_algebra.c | 30 +++--- tools/qfcc/source/expr_optimize.c | 159 ++++++++++++++++++++++++++++-- tools/qfcc/test/motor-point.r | 2 +- 4 files changed, 173 insertions(+), 23 deletions(-) diff --git a/tools/qfcc/include/expr.h b/tools/qfcc/include/expr.h index 4a8a8ccd6..8a5b042ba 100644 --- a/tools/qfcc/include/expr.h +++ b/tools/qfcc/include/expr.h @@ -924,6 +924,7 @@ const expr_t *edag_add_expr (const expr_t *e); bool is_scale (const expr_t *expr) __attribute__((pure)); bool is_cross (const expr_t *expr) __attribute__((pure)); bool is_sum (const expr_t *expr) __attribute__((pure)); +bool is_mult (const expr_t *expr) __attribute__((pure)); bool is_neg (const expr_t *expr) __attribute__((pure)); const expr_t *neg_expr (const expr_t *e); @@ -941,6 +942,10 @@ void scatter_terms (const expr_t *sum, const expr_t **adds, const expr_t **subs); const expr_t *gather_terms (type_t *type, const expr_t **adds, const expr_t **subs); +int count_factors (const expr_t *expr) __attribute__((pure)); +void scatter_factors (const expr_t *prod, const expr_t **factors); +const expr_t *gather_factors (type_t *type, int op, const expr_t **factors, + int count); ///@} diff --git a/tools/qfcc/source/expr_algebra.c b/tools/qfcc/source/expr_algebra.c index 1fb6be8e1..c0c3419db 100644 --- a/tools/qfcc/source/expr_algebra.c +++ b/tools/qfcc/source/expr_algebra.c @@ -425,7 +425,7 @@ is_sum (const expr_t *expr) && (expr->expr.op == '+' || expr->expr.op == '-')); } -static bool __attribute__((pure)) +bool is_mult (const expr_t *expr) { return (expr && expr->type == ex_expr @@ -450,7 +450,7 @@ count_terms (const expr_t *expr) return terms; } -static int __attribute__((pure)) +int __attribute__((pure)) count_factors (const expr_t *expr) { if (!is_mult (expr)) { @@ -475,34 +475,34 @@ is_cross (const expr_t *expr) } static void -distribute_factors_core (const expr_t *prod, const expr_t **factors, int *ind) +scatter_factors_core (const expr_t *prod, const expr_t **factors, int *ind) { auto e1 = prod->expr.e1; auto e2 = prod->expr.e2; if (is_mult (e1)) { - distribute_factors_core (e1, factors, ind); + scatter_factors_core (e1, factors, ind); } else { factors[(*ind)++] = e1; } if (is_mult (e2)) { - distribute_factors_core (e2, factors, ind); + scatter_factors_core (e2, factors, ind); } else { factors[(*ind)++] = e2; } } -static void -distribute_factors (const expr_t *prod, const expr_t **factors) +void +scatter_factors (const expr_t *prod, const expr_t **factors) { if (!is_mult (prod)) { - internal_error (prod, "distribute_factors with no product"); + internal_error (prod, "scatter_factors with no product"); } int ind = 0; - distribute_factors_core (prod, factors, &ind); + scatter_factors_core (prod, factors, &ind); } -static const expr_t * -collect_factors (type_t *type, int op, const expr_t **factors, int count) +const expr_t * +gather_factors (type_t *type, int op, const expr_t **factors, int count) { if (!count) { internal_error (0, "no factors to collect"); @@ -516,8 +516,8 @@ collect_factors (type_t *type, int op, const expr_t **factors, int count) b = factors[1]; } else { int mid = (count + 1) / 2; - a = collect_factors (type, op, factors, mid); - b = collect_factors (type, op, factors + mid, count - mid); + a = gather_factors (type, op, factors, mid); + b = gather_factors (type, op, factors + mid, count - mid); } auto prod = typed_binary_expr (type, op, a, b); return edag_add_expr (prod); @@ -551,9 +551,9 @@ sort_factors (type_t *type, const expr_t *e) int count = count_factors (e); const expr_t *factors[count + 1] = {}; - distribute_factors (e, factors); + scatter_factors (e, factors); heapsort (factors, count, sizeof (factors[0]), expr_ptr_cmp); - auto mult = collect_factors (type, '*', factors, count); + auto mult = gather_factors (type, '*', factors, count); return mult; } diff --git a/tools/qfcc/source/expr_optimize.c b/tools/qfcc/source/expr_optimize.c index 315ca17a4..ed941ecf3 100644 --- a/tools/qfcc/source/expr_optimize.c +++ b/tools/qfcc/source/expr_optimize.c @@ -45,6 +45,18 @@ static const expr_t *optimize_core (const expr_t *expr); static const expr_t skip; +static void +clean_skips (const expr_t **expr_list) +{ + auto dst = expr_list; + for (auto src = dst; *src; src++) { + if (*src != &skip) { + *dst++ = *src; + } + } + *dst = 0; +} + static const expr_t * rescale (const expr_t *expr, const expr_t *target, const expr_t *remove) { @@ -62,6 +74,37 @@ rescale (const expr_t *expr, const expr_t *target, const expr_t *remove) return scale_expr (type, rescale (expr->expr.e1, target, remove), scale); } +static const expr_t * +remult (const expr_t *expr, const expr_t *remove) +{ + if (!is_mult (expr)) { + internal_error (expr, "not a mult expression"); + } + auto type = get_type (expr); + int count = count_factors (expr); + const expr_t *factors[count + 1] = {}; + scatter_factors (expr, factors); + for (auto f = factors; *f; f++) { + if (*f == remove) { + *f = &skip; + break; + } + } + clean_skips (factors); + auto new = gather_factors (type, expr->expr.op, factors, count - 1); + return new; +} + +static const expr_t * +remult_scale (const expr_t *expr, const expr_t *remove) +{ + auto mult = remult (expr->expr.e2, remove); + auto scalee = expr->expr.e1; + auto type = get_type (expr); + auto new = typed_binary_expr (type, SCALE, scalee, mult); + return edag_add_expr (new); +} + static const expr_t * optimize_cross (const expr_t *expr, const expr_t **adds, const expr_t **subs) { @@ -170,16 +213,95 @@ optimize_cross (const expr_t *expr, const expr_t **adds, const expr_t **subs) return cross; } -static void -clean_skips (const expr_t **expr_list) +static bool __attribute__((pure)) +mult_has_factor (const expr_t *mult, const expr_t *factor) { - auto dst = expr_list; - for (auto src = dst; *src; src++) { - if (*src != &skip) { - *dst++ = *src; + if (!is_mult (mult)) { + return false; + } + if (mult->expr.e1 == factor || mult->expr.e2 == factor) { + return true; + } + bool has_factor = mult_has_factor (mult->expr.e1, factor); + if (!has_factor) { + has_factor = mult_has_factor (mult->expr.e2, factor); + } + return has_factor; +} + +static const expr_t * +optimize_scale (const expr_t *expr, const expr_t **adds, const expr_t **subs) +{ + auto mult = expr->expr.e2; + int num_factors = count_factors (mult); + int total = 0; + int fac_counts[num_factors + 1] = {}; + const expr_t *factors[num_factors + 2] = {}; + if (is_mult (mult)) { + scatter_factors (mult, factors); + } else { + factors[0] = mult; + } + + for (auto search = adds; *search; search++) { + if (is_scale (*search)) { + for (auto f = factors; *f; f++) { + if (mult_has_factor ((*search)->expr.e2, *f)) { + fac_counts[f - factors]++; + total++; + } + } } } - *dst = 0; + for (auto search = subs; *search; search++) { + if (is_scale (*search)) { + for (auto f = factors; *f; f++) { + if (mult_has_factor ((*search)->expr.e2, *f)) { + fac_counts[f - factors]++; + total++; + } + } + } + } + if (!total) { + return expr; + } + + const expr_t *common = 0; + int count = 0; + for (auto f = factors; *f; f++) { + if (fac_counts[f - factors] > count) { + common = *f; + count = fac_counts[f - factors]; + } + } + + const expr_t *com_adds[count + 2] = {}; + const expr_t *com_subs[count + 2] = {}; + auto dst = com_adds; + *dst++ = remult_scale (expr, common); + for (auto src = adds; *src; src++) { + if (is_scale (*src) && mult_has_factor ((*src)->expr.e2, common)) { + *dst++ = remult_scale (*src, common); + *src = &skip; + } + } + dst = com_subs; + for (auto src = subs; *src; src++) { + if (is_scale (*src) && mult_has_factor ((*src)->expr.e2, common)) { + *dst++ = remult_scale (*src, common); + *src = &skip; + } + } + + auto type = get_type (expr); + auto scale = expr->expr.e1; + auto col = gather_terms (type, com_adds, com_subs); + col = optimize_core (col); + + scale = typed_binary_expr (type, SCALE, col, common); + scale = edag_add_expr (scale); + return scale; } static void @@ -216,6 +338,28 @@ optimize_cross_products (const expr_t **adds, const expr_t **subs) clean_skips (subs); } +static void +optimize_scale_products (const expr_t **adds, const expr_t **subs) +{ + for (auto scan = adds; *scan; scan++) { + if (is_scale (*scan)) { + auto e = *scan; + *scan = &skip; + *scan = optimize_scale (e, adds, subs); + } + } + for (auto scan = subs; *scan; scan++) { + if (is_scale (*scan)) { + auto e = *scan; + *scan = &skip; + *scan = optimize_scale (e, subs, adds); + } + } + + clean_skips (adds); + clean_skips (subs); +} + static int expr_ptr_cmp (const void *_a, const void *_b) { @@ -269,6 +413,7 @@ optimize_core (const expr_t *expr) optimize_adds (subs); optimize_cross_products (adds, subs); + optimize_scale_products (adds, subs); expr = gather_terms (type, adds, subs); } diff --git a/tools/qfcc/test/motor-point.r b/tools/qfcc/test/motor-point.r index a2a3602b7..56bfbef58 100644 --- a/tools/qfcc/test/motor-point.r +++ b/tools/qfcc/test/motor-point.r @@ -23,7 +23,7 @@ main (void) point_t p = (point_t)'10 4 -1.5 1'f; point_t n = apply_motor (m, p); printf ("n: %.9q\n", n); - if ((vec4)n != '9.99999905 -3.99999952 -1.49999988 0.99999994'f) { + if ((vec4)n != '10 -3.99999952 -1.49999988 0.99999994'f) { return 1; } return 0;