64 bit: https://rust.godbolt.org/z/rsxh8P8Er
32 bit: https://rust.godbolt.org/z/3P3ejsnh1
I have a little experience with Rust and Assembly but I added some tests.
#[cfg(target_feature = "avx2")]
pub mod avx2 {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
fn first_nonzero_tiny(arr: &[u32]) -> Option<usize> {
arr.iter().position(|&x| x != 0)
}
fn find_u32_zeros_8elems(arr: &[u32], offset: isize) -> i32 {
unsafe {
let ymm0 = _mm256_setzero_si256();
let mut ymm1 = _mm256_loadu_si256(arr.as_ptr().offset(offset) as *const __m256i);
ymm1 = _mm256_cmpeq_epi32(ymm1, ymm0);
let ymm2 = _mm256_castsi256_ps(ymm1);
_mm256_movemask_ps(ymm2)
}
}
pub fn first_nonzero(arr: &[u32]) -> Option<usize> {
let size = arr.len();
if size < 8 {
return first_nonzero_tiny(arr);
}
let mut i: usize = 0;
let simd_size = size / 8 * 8;
while i < simd_size {
let mask: i32 = find_u32_zeros_8elems(&arr, i as isize);
//println!("mask = {}", mask);
if mask != 255 {
return Some((mask.trailing_ones() as usize) + i);
}
i += 8;
//println!("i = {}", i);
}
let last_chunk = size - 8;
let mask: i32 = find_u32_zeros_8elems(&arr, last_chunk as isize);
if mask != 255 {
return Some((mask.trailing_ones() as usize) + last_chunk);
}
None
}
}
use avx2::first_nonzero;
pub fn main() {
let v = [0];
let test1 = first_nonzero(&v);
assert_eq!(test1, None);
let v = [2];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(0));
let v = [1, 0, 0, 0, 0, 0, 0, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(0));
let v = [0, 1, 0, 0, 0, 0, 0, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(1));
let v = [0, 0, 1, 0, 0, 0, 0, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(2));
let v = [0, 0, 0, 1, 0, 0, 0, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(3));
let v = [0, 0, 0, 0, 1, 0, 0, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(4));
let v = [0, 0, 0, 0, 0, 1, 0, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(5));
let v = [0, 0, 0, 0, 0, 1, 0, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(5));
let v = [0, 0, 0, 0, 0, 0, 1, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(6));
let v = [0, 0, 0, 0, 0, 0, 0, 1, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(7));
let v = [0, 0, 0, 0, 0, 0, 0, 0, 1];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(8));
let v = [0, 0, 0, 0, 0, 0, 0, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, None);
let v = [0, 0, 0, 0, 0, 0, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, None);
let v = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(16));
let v = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(15));
let v = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 3, 4, 5];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(14));
let v = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(17));
let v = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49];
let test1 = first_nonzero(&v);
assert_eq!(test1, Some(18));
}