silvio
2014-09-20 21:20:50 UTC
Hello
I've been trying to replicate an SSE trick from a c program in rust and
come up with some strange behavior. It seems work for --opt-level 0,1
but gives the wrong result on 2,3. I used the !asm instruction to
execute assembly commands.
I'm not sure if this is a bug or just me not using asm! correctly.
The most condensed version that still produces the bug is appended.
on opt 0,1 I get the correct answer 2
on opt 2,3 I get the wrong answer 0
Silvio
-------------- next part --------------
#![feature(asm)]
use std::simd::f32x4;
use std::simd::f64x2;
fn main() {
let one2 = f64x2(1f64,1f64);
println!("one2 1: {}", one2);
let shift = mm_convert_f32x4_f64x2(mm_convert_f64x2_f32x4(one2))+one2;
let f64x2(res1, _) = shift;
println!("res: {}", res1);
}
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86_64")]
fn mm_convert_f32x4_f64x2(a: f32x4) -> f64x2 {
let mut ret : f64x2 = unsafe {std::mem::uninitialized()};
unsafe {
asm!("cvtps2pd $0, $1"
: "=x"(ret)
: "x"(a)
);
}
ret
}
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86_64")]
fn mm_convert_f64x2_f32x4(a: f64x2) -> f32x4 {
let mut ret : f32x4 = unsafe {std::mem::uninitialized()};
unsafe {
asm!("cvtpd2ps $0, $1"
: "=x"(ret)
: "x"(a)
);
}
ret
}
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86_64")]
fn mm_sqrt_reciprocal_f32x4(a: f32x4) -> f32x4 {
let mut ret : f32x4 = unsafe {std::mem::uninitialized()};
unsafe {
asm!("rsqrtps $0, $1"
: "=x"(ret)
: "x"(a)
);
}
ret
}
I've been trying to replicate an SSE trick from a c program in rust and
come up with some strange behavior. It seems work for --opt-level 0,1
but gives the wrong result on 2,3. I used the !asm instruction to
execute assembly commands.
I'm not sure if this is a bug or just me not using asm! correctly.
The most condensed version that still produces the bug is appended.
on opt 0,1 I get the correct answer 2
on opt 2,3 I get the wrong answer 0
Silvio
-------------- next part --------------
#![feature(asm)]
use std::simd::f32x4;
use std::simd::f64x2;
fn main() {
let one2 = f64x2(1f64,1f64);
println!("one2 1: {}", one2);
let shift = mm_convert_f32x4_f64x2(mm_convert_f64x2_f32x4(one2))+one2;
let f64x2(res1, _) = shift;
println!("res: {}", res1);
}
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86_64")]
fn mm_convert_f32x4_f64x2(a: f32x4) -> f64x2 {
let mut ret : f64x2 = unsafe {std::mem::uninitialized()};
unsafe {
asm!("cvtps2pd $0, $1"
: "=x"(ret)
: "x"(a)
);
}
ret
}
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86_64")]
fn mm_convert_f64x2_f32x4(a: f64x2) -> f32x4 {
let mut ret : f32x4 = unsafe {std::mem::uninitialized()};
unsafe {
asm!("cvtpd2ps $0, $1"
: "=x"(ret)
: "x"(a)
);
}
ret
}
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86_64")]
fn mm_sqrt_reciprocal_f32x4(a: f32x4) -> f32x4 {
let mut ret : f32x4 = unsafe {std::mem::uninitialized()};
unsafe {
asm!("rsqrtps $0, $1"
: "=x"(ret)
: "x"(a)
);
}
ret
}