LCOV - code coverage report
Current view: top level - src - simd.c (source / functions) Coverage Total Hit
Test: colopresso Coverage Report Lines: 100.0 % 13 13
Test Date: 2026-02-16 05:23:27 Functions: 100.0 % 2 2
Legend: Lines: hit not hit

            Line data    Source code
       1              : /*
       2              :  * SPDX-License-Identifier: GPL-3.0-or-later
       3              :  *
       4              :  * This file is part of colopresso
       5              :  *
       6              :  * Copyright (C) 2025-2026 COLOPL, Inc.
       7              :  *
       8              :  * Author: Go Kudo <g-kudo@colopl.co.jp>
       9              :  * Developed with AI (LLM) code assistance. See `NOTICE` for details.
      10              :  */
      11              : 
      12              : #include "internal/simd.h"
      13              : 
      14              : #if defined(CPRES_SIMD_SSE41) || defined(CPRES_SIMD_SSE2)
      15              : 
      16     47676545 : static inline uint32_t color_distance_sq_sse(uint32_t lhs, uint32_t rhs) {
      17     47676545 :   __m128i a = _mm_cvtsi32_si128((int)lhs);
      18     95353090 :   __m128i b = _mm_cvtsi32_si128((int)rhs);
      19              :   __m128i a16, b16, diff, diff_sq;
      20              :   int result;
      21              : 
      22     95353090 :   a16 = _mm_unpacklo_epi8(a, _mm_setzero_si128());
      23     95353090 :   b16 = _mm_unpacklo_epi8(b, _mm_setzero_si128());
      24              : 
      25     47676545 :   diff = _mm_sub_epi16(a16, b16);
      26              : 
      27     47676545 :   diff_sq = _mm_madd_epi16(diff, diff);
      28              : 
      29     47676545 :   diff_sq = _mm_add_epi32(diff_sq, _mm_srli_si128(diff_sq, 8));
      30     95353090 :   diff_sq = _mm_add_epi32(diff_sq, _mm_srli_si128(diff_sq, 4));
      31              : 
      32     47676545 :   result = _mm_cvtsi128_si32(diff_sq);
      33     47676545 :   return (uint32_t)result;
      34              : }
      35              : 
      36              : #elif defined(CPRES_SIMD_NEON)
      37              : 
      38              : static inline uint32_t color_distance_sq_neon(uint32_t lhs, uint32_t rhs) {
      39              :   uint8x8_t a = vreinterpret_u8_u32(vdup_n_u32(lhs)), b = vreinterpret_u8_u32(vdup_n_u32(rhs));
      40              :   int16x4_t a16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(a))), b16 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(b))), diff = vsub_s16(a16, b16);
      41              :   int32x4_t diff_sq = vmull_s16(diff, diff);
      42              :   int32_t result = vgetq_lane_s32(diff_sq, 0) + vgetq_lane_s32(diff_sq, 1) + vgetq_lane_s32(diff_sq, 2) + vgetq_lane_s32(diff_sq, 3);
      43              : 
      44              :   return (uint32_t)result;
      45              : }
      46              : 
      47              : #elif defined(CPRES_SIMD_WASM128)
      48              : 
      49              : static inline uint32_t color_distance_sq_wasm(uint32_t lhs, uint32_t rhs) {
      50              :   v128_t a = wasm_i32x4_splat((int32_t)lhs), b = wasm_i32x4_splat((int32_t)rhs), a16 = wasm_u16x8_extend_low_u8x16(a), b16 = wasm_u16x8_extend_low_u8x16(b), diff = wasm_i16x8_sub(a16, b16),
      51              :          diff_sq = wasm_i32x4_extmul_low_i16x8(diff, diff);
      52              :   int32_t result = wasm_i32x4_extract_lane(diff_sq, 0) + wasm_i32x4_extract_lane(diff_sq, 1) + wasm_i32x4_extract_lane(diff_sq, 2) + wasm_i32x4_extract_lane(diff_sq, 3);
      53              : 
      54              :   return (uint32_t)result;
      55              : }
      56              : 
      57              : #else
      58              : 
      59              : static inline uint32_t color_distance_sq_scalar(uint32_t lhs, uint32_t rhs) {
      60              :   int32_t r1 = (int32_t)(lhs & 0xFF), g1 = (int32_t)((lhs >> 8) & 0xFF), b1 = (int32_t)((lhs >> 16) & 0xFF), a1 = (int32_t)((lhs >> 24) & 0xFF), r2 = (int32_t)(rhs & 0xFF),
      61              :           g2 = (int32_t)((rhs >> 8) & 0xFF), b2 = (int32_t)((rhs >> 16) & 0xFF), a2 = (int32_t)((rhs >> 24) & 0xFF), dr = r1 - r2, dg = g1 - g2, db = b1 - b2, da = a1 - a2;
      62              : 
      63              :   return (uint32_t)(dr * dr + dg * dg + db * db + da * da);
      64              : }
      65              : 
      66              : #endif
      67              : 
      68     47676545 : uint32_t simd_color_distance_sq_u32(uint32_t lhs, uint32_t rhs) {
      69              : #if defined(CPRES_SIMD_WASM128)
      70              :   return color_distance_sq_wasm(lhs, rhs);
      71              : #elif defined(CPRES_SIMD_SSE41) || defined(CPRES_SIMD_SSE2)
      72     47676545 :   return color_distance_sq_sse(lhs, rhs);
      73              : #elif defined(CPRES_SIMD_NEON)
      74              :   return color_distance_sq_neon(lhs, rhs);
      75              : #else
      76              :   return color_distance_sq_scalar(lhs, rhs);
      77              : #endif
      78              : }
        

Generated by: LCOV version 2.0-1