8000 Do not use masks in AVX2 double compressstore · intel/x86-simd-sort@23b6d32 · GitHub
[go: up one dir, main page]

Skip to content

Commit 23b6d32

Browse files
committed
Do not use masks in AVX2 double compressstore
1 parent cb4358f commit 23b6d32

File tree

1 file changed

+4
-10
lines changed

1 file changed

+4
-10
lines changed

src/avx2-emu-funcs.hpp

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -224,21 +224,18 @@ int avx2_double_compressstore32(void *left_addr,
224224
typename avx2_vector<T>::reg_t reg)
225225
{
226226
using vtype = avx2_vector<T>;
227-
const __m256i oxff = _mm256_set1_epi32(0xFFFFFFFF);
228227

229228
T *leftStore = (T *)left_addr;
230229
T *rightStore = (T *)right_addr;
231230

232231
int32_t shortMask = convert_avx2_mask_to_int(k);
233232
const __m256i &perm = _mm256_loadu_si256(
234233
(const __m256i *)avx2_compressstore_lut32_perm[shortMask].data());
235-
const __m256i &left = _mm256_loadu_si256(
236-
(const __m256i *)avx2_compressstore_lut32_left[shortMask].data());
237234

238235
typename vtype::reg_t temp = vtype::permutevar(reg, perm);
239236

240-
vtype::mask_storeu(leftStore, left, temp);
241-
vtype::mask_storeu(rightStore, _mm256_xor_si256(oxff, left), temp);
237+
vtype::storeu(leftStore, temp);
238+
vtype::storeu(rightStore, temp);
242239

243240
return _mm_popcnt_u32(shortMask);
244241
}
@@ -250,22 +247,19 @@ int32_t avx2_double_compressstore64(void *left_addr,
250247
typename avx2_vector<T>::reg_t reg)
251248
{
252249
using vtype = avx2_vector<T>;
253-
const __m256i oxff = _mm256_set1_epi32(0xFFFFFFFF);
254250

255251
T *leftStore = (T *)left_addr;
256252
T *rightStore = (T *)right_addr;
257253

258254
int32_t shortMask = convert_avx2_mask_to_int_64bit(k);
259255
const __m256i &perm = _mm256_loadu_si256(
260256
(const __m256i *)avx2_compressstore_lut64_perm[shortMask].data());
261-
const __m256i &left = _mm256_loadu_si256(
262-
(const __m256i *)avx2_compressstore_lut64_left[shortMask].data());
263257

264258
typename vtype::reg_t temp = vtype::cast_from(
265259
_mm256_permutevar8x32_epi32(vtype::cast_to(reg), perm));
266260

267-
vtype::mask_storeu(leftStore, left, temp);
268-
vtype::mask_storeu(rightStore, _mm256_xor_si256(oxff, left), temp);
261+
vtype::storeu(leftStore, temp);
262+
vtype::storeu(rightStore, temp);
269263

270264
return _mm_popcnt_u32(shortMask);
271265
}

0 commit comments

Comments
 (0)
0