Line data Source code
1 : /*
2 : * SPDX-License-Identifier: GPL-3.0-or-later
3 : *
4 : * This file is part of colopresso
5 : *
6 : * Copyright (C) 2025-2026 COLOPL, Inc.
7 : *
8 : * Author: Go Kudo <g-kudo@colopl.co.jp>
9 : * Developed with AI (LLM) code assistance. See `NOTICE` for details.
10 : */
11 :
12 : #include "internal/simd.h"
13 :
14 : #if defined(CPRES_SIMD_SSE41) || defined(CPRES_SIMD_SSE2)
15 :
16 47676545 : static inline uint32_t color_distance_sq_sse(uint32_t lhs, uint32_t rhs) {
17 47676545 : __m128i a = _mm_cvtsi32_si128((int)lhs);
18 95353090 : __m128i b = _mm_cvtsi32_si128((int)rhs);
19 : __m128i a16, b16, diff, diff_sq;
20 : int result;
21 :
22 95353090 : a16 = _mm_unpacklo_epi8(a, _mm_setzero_si128());
23 95353090 : b16 = _mm_unpacklo_epi8(b, _mm_setzero_si128());
24 :
25 47676545 : diff = _mm_sub_epi16(a16, b16);
26 :
27 47676545 : diff_sq = _mm_madd_epi16(diff, diff);
28 :
29 47676545 : diff_sq = _mm_add_epi32(diff_sq, _mm_srli_si128(diff_sq, 8));
30 95353090 : diff_sq = _mm_add_epi32(diff_sq, _mm_srli_si128(diff_sq, 4));
31 :
32 47676545 : result = _mm_cvtsi128_si32(diff_sq);
33 47676545 : return (uint32_t)result;
34 : }
35 :
36 : #elif defined(CPRES_SIMD_NEON)
37 :
38 : static inline uint32_t color_distance_sq_neon(uint32_t lhs, uint32_t rhs) {
39 : uint8x8_t a = vreinterpret_u8_u32(vdup_n_u32(lhs)), b = vreinterpret_u8_u32(vdup_n_u32(rhs));
40 : int16x4_t a16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(a))), b16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(b))), diff = vsub_s16(a16, b16);
41 : int32x4_t diff_sq = vmull_s16(diff, diff);
42 : int32_t result = vgetq_lane_s32(diff_sq, 0) + vgetq_lane_s32(diff_sq, 1) + vgetq_lane_s32(diff_sq, 2) + vgetq_lane_s32(diff_sq, 3);
43 :
44 : return (uint32_t)result;
45 : }
46 :
47 : #elif defined(CPRES_SIMD_WASM128)
48 :
49 : static inline uint32_t color_distance_sq_wasm(uint32_t lhs, uint32_t rhs) {
50 : v128_t a = wasm_i32x4_splat((int32_t)lhs), b = wasm_i32x4_splat((int32_t)rhs), a16 = wasm_u16x8_extend_low_u8x16(a), b16 = wasm_u16x8_extend_low_u8x16(b), diff = wasm_i16x8_sub(a16, b16),
51 : diff_sq = wasm_i32x4_extmul_low_i16x8(diff, diff);
52 : int32_t result = wasm_i32x4_extract_lane(diff_sq, 0) + wasm_i32x4_extract_lane(diff_sq, 1) + wasm_i32x4_extract_lane(diff_sq, 2) + wasm_i32x4_extract_lane(diff_sq, 3);
53 :
54 : return (uint32_t)result;
55 : }
56 :
57 : #else
58 :
59 : static inline uint32_t color_distance_sq_scalar(uint32_t lhs, uint32_t rhs) {
60 : int32_t r1 = (int32_t)(lhs & 0xFF), g1 = (int32_t)((lhs >> 8) & 0xFF), b1 = (int32_t)((lhs >> 16) & 0xFF), a1 = (int32_t)((lhs >> 24) & 0xFF), r2 = (int32_t)(rhs & 0xFF),
61 : g2 = (int32_t)((rhs >> 8) & 0xFF), b2 = (int32_t)((rhs >> 16) & 0xFF), a2 = (int32_t)((rhs >> 24) & 0xFF), dr = r1 - r2, dg = g1 - g2, db = b1 - b2, da = a1 - a2;
62 :
63 : return (uint32_t)(dr * dr + dg * dg + db * db + da * da);
64 : }
65 :
66 : #endif
67 :
68 47676545 : uint32_t simd_color_distance_sq_u32(uint32_t lhs, uint32_t rhs) {
69 : #if defined(CPRES_SIMD_WASM128)
70 : return color_distance_sq_wasm(lhs, rhs);
71 : #elif defined(CPRES_SIMD_SSE41) || defined(CPRES_SIMD_SSE2)
72 47676545 : return color_distance_sq_sse(lhs, rhs);
73 : #elif defined(CPRES_SIMD_NEON)
74 : return color_distance_sq_neon(lhs, rhs);
75 : #else
76 : return color_distance_sq_scalar(lhs, rhs);
77 : #endif
78 : }
|