mirror of
https://github.com/void-linux/void-packages.git
synced 2025-10-04 01:25:13 +02:00
62 lines
2.5 KiB
Diff
62 lines
2.5 KiB
Diff
From af21bcd1ea595debd71a77e1e75ed185a31bf26c Mon Sep 17 00:00:00 2001
|
|
From: Hans Petter Jansson <hpj@hpjansson.org>
|
|
Date: Wed, 19 Jun 2024 23:26:23 +0200
|
|
Subject: [PATCH] avx2: Fix compilation on 32-bit x86
|
|
|
|
Fixes #203 (GitHub).
|
|
---
|
|
chafa/internal/chafa-avx2.c | 22 +++++++++++++++++-----
|
|
1 file changed, 17 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/chafa/internal/chafa-avx2.c b/chafa/internal/chafa-avx2.c
|
|
index ae20830..7f11687 100644
|
|
--- a/chafa/internal/chafa-avx2.c
|
|
+++ b/chafa/internal/chafa-avx2.c
|
|
@@ -21,9 +21,21 @@
|
|
|
|
#include <emmintrin.h>
|
|
#include <immintrin.h>
|
|
+#include <smmintrin.h>
|
|
#include "chafa.h"
|
|
#include "internal/chafa-private.h"
|
|
|
|
+/* _mm_extract_epi64() (pextrq) is not available in 32-bit mode. Work around
|
|
+ * it. This needs to be a macro, as the compiler expects an integer constant
|
|
+ * for n. */
|
|
+#if defined __x86_64__ && !defined __ILP32__
|
|
+# define extract_128_epi64(i, n) _mm_extract_epi64 ((i), (n))
|
|
+#else
|
|
+# define extract_128_epi64(i, n) \
|
|
+ ((((guint64) _mm_extract_epi32 ((i), (n) * 2 + 1)) << 32) \
|
|
+ | _mm_extract_epi32 ((i), (n) * 2))
|
|
+#endif
|
|
+
|
|
gint
|
|
calc_error_avx2 (const ChafaPixel *pixels, const ChafaColorPair *color_pair,
|
|
const guint32 *sym_mask_u32)
|
|
@@ -96,14 +108,14 @@ calc_colors_avx2 (const ChafaPixel *pixels, ChafaColorAccum *accums_out,
|
|
accum_bg_128 = _mm_add_epi16 (_mm256_extracti128_si256 (accum_bg, 0),
|
|
_mm256_extracti128_si256 (accum_bg, 1));
|
|
((guint64 *) accums_out) [0] =
|
|
- (guint64) _mm_extract_epi64 (accum_bg_128, 0)
|
|
- + (guint64) _mm_extract_epi64 (accum_bg_128, 1);
|
|
+ extract_128_epi64 (accum_bg_128, 0)
|
|
+ + extract_128_epi64 (accum_bg_128, 1);
|
|
|
|
accum_fg_128 = _mm_add_epi16 (_mm256_extracti128_si256 (accum_fg, 0),
|
|
_mm256_extracti128_si256 (accum_fg, 1));
|
|
((guint64 *) accums_out) [1] =
|
|
- (guint64) _mm_extract_epi64 (accum_fg_128, 0)
|
|
- + (guint64) _mm_extract_epi64 (accum_fg_128, 1);
|
|
+ extract_128_epi64 (accum_fg_128, 0)
|
|
+ + extract_128_epi64 (accum_fg_128, 1);
|
|
}
|
|
|
|
/* 32768 divided by index. Divide by zero is defined as zero. */
|
|
@@ -143,5 +155,5 @@ chafa_color_accum_div_scalar_avx2 (ChafaColorAccum *accum, guint16 divisor)
|
|
accum_128 = _mm_loadl_epi64 ((const __m128i *) accum);
|
|
divisor_128 = _mm_set1_epi16 (invdiv16 [divisor]);
|
|
accum_128 = _mm_mulhrs_epi16 (accum_128, divisor_128);
|
|
- *((guint64 *) accum) = _mm_extract_epi64 (accum_128, 0);
|
|
+ *((guint64 *) accum) = extract_128_epi64 (accum_128, 0);
|
|
}
|