SSE vector class for better performance in my CPU-based ray tracer [on hold]

up vote
-3
down vote

favorite

I was creating a vector class using SSE intrinsics for my CPU-based ray tracer. I thought simply replacing regular vector operations will give me good performance gain. I compiled it with visual studio 2017 compiler in x86 and x64 release mode. In x86 it was crashing but In x64 it was working fine but no performance gain.

#ifndef INC_VEC4_H

#define INC_VEC4_H



    #include<nmmintrin.h>



    _declspec(align(16))

    struct vec4

    {

        union {

            __m128 vec;

            struct { float x, y, z, w; };

            struct { float r, g, b, a; };

        };



        //normalised directions

        static vec4 UP;

        static vec4 DOWN;

        static vec4 ZERO;

        static vec4 LEFT;

        static vec4 RIGHT;

        static vec4 FORWARD;

        static vec4 BACKWARD;



        //construction

        vec4();

        vec4(float x, float y, float z, float w = 1.0f);

        explicit vec4(float n);

        vec4(const vec4& other);

        vec4& operator = (const vec4& other);

        //destruction

        ~vec4();



        inline float dot(const vec4& other)

        {

            __m128 dotResult = _mm_dp_ps(vec, other.vec, 0x7F);

            float result;

            _mm_store_ss(&result, dotResult);

            return result;

        }



        inline void make_it_unit()

        {

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_rsqrt_ps(selfDot);

            vec = _mm_mul_ps(vec, sqrtResult);

        }



        inline vec4 normalize()

        {

            vec4 result;

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_rsqrt_ps(selfDot);

            result.vec = _mm_mul_ps(vec, sqrtResult);

            return result;

        }



        inline float length()

        {

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_sqrt_ps(selfDot);

            float result;

            _mm_store_ss(&result, sqrtResult);

            return result;

        }



        //vec4-vec4 arithmetic operations

        inline vec4 operator + (const vec4& other) const {

            vec4 result;

            result.vec = _mm_add_ps(vec, other.vec);

            return result;

        }

        inline vec4 operator - (const vec4& other) const {

            vec4 result;

            result.vec = _mm_sub_ps(vec, other.vec);

            return result;

        }



        //vec4-vec4 arithmetic operations

        inline void operator += (const vec4& other) {

            vec = _mm_add_ps(vec, other.vec);

        }





        inline vec4 operator * (const vec4& other) const {

            vec4 result;

            result.vec = _mm_mul_ps(vec, other.vec);

            return result;

        }

        inline vec4 operator / (const vec4& other) const {

            vec4 result;

            result.vec = _mm_div_ps(vec, other.vec);

            return result;

        }

        inline void operator *= (const vec4& other) {

            vec = _mm_mul_ps(vec, other.vec);

        }

        inline void operator /= (const vec4& other) {

            vec = _mm_div_ps(vec, other.vec);

        }

        //vec4-scalar * & /



        inline vec4 operator / (float scalar) const {

            vec4 result;

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            result.vec = _mm_div_ps(vec, _scalar);

            return result;

        }

        inline void operator *= (float scalar) {

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            vec = _mm_mul_ps(vec, _scalar);

        }

        inline void operator /= (float scalar) {

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            vec = _mm_div_ps(vec, _scalar);

        }







        inline float squared_length() const {

            float result;

            __m128 dotResult = _mm_dp_ps(vec, vec, 0x7F);

            _mm_store_ss(&result, dotResult);

            return result;

        }



        inline vec4& make_itzero() {

            vec = _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f);

        }





        inline vec4 cross(const vec4& other) const {

            vec4 result;

            result.vec = _mm_sub_ps(

                _mm_mul_ps(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(other.vec, other.vec, _MM_SHUFFLE(3, 1, 0, 2))),

                _mm_mul_ps(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(other.vec, other.vec, _MM_SHUFFLE(3, 0, 2, 1)))

            );

            return result;

        }



        //checks 

        bool check_ifzero() const {



        }

    };



    //non-member inline operators

    inline vec4 operator * (const vec4& v, float scalar) {

        vec4 result;

        __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

        result.vec = _mm_mul_ps(v.vec, _scalar);

        return result;

    }



    inline vec4 operator * (float scalar, const vec4& v) {

        vec4 result;

        __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

        result.vec = _mm_mul_ps(v.vec, _scalar);

        return result;

    }



    inline float dot(const vec4& v1, const vec4& v2) {

        __m128 dotResult = _mm_dp_ps(v1.vec, v2.vec, 0x7F);

        float result;

        _mm_store_ss(&result, dotResult);

        return result;

    }



#endif

Can someone explain what's wrong here?

edited Nov 14 at 13:27

200_success

127k15148410

asked Nov 14 at 10:07

Ankit Singh

New contributor

put on hold as unclear what you're asking by Toby Speight, πάντα ῥεῖ, Graipher, Edward, Mast Nov 14 at 13:29

Please clarify your specific problem or add additional details to highlight exactly what you need. As it's currently written, it’s hard to tell exactly what you're asking. See the How to Ask page for help clarifying this question. If this question can be reworded to fit the rules in the help center, please edit the question.

What happened to check_ifzero()?
– 200_success
Nov 14 at 13:28

add a comment |

up vote
-3
down vote

favorite

#ifndef INC_VEC4_H

#define INC_VEC4_H



    #include<nmmintrin.h>



    _declspec(align(16))

    struct vec4

    {

        union {

            __m128 vec;

            struct { float x, y, z, w; };

            struct { float r, g, b, a; };

        };



        //normalised directions

        static vec4 UP;

        static vec4 DOWN;

        static vec4 ZERO;

        static vec4 LEFT;

        static vec4 RIGHT;

        static vec4 FORWARD;

        static vec4 BACKWARD;



        //construction

        vec4();

        vec4(float x, float y, float z, float w = 1.0f);

        explicit vec4(float n);

        vec4(const vec4& other);

        vec4& operator = (const vec4& other);

        //destruction

        ~vec4();



        inline float dot(const vec4& other)

        {

            __m128 dotResult = _mm_dp_ps(vec, other.vec, 0x7F);

            float result;

            _mm_store_ss(&result, dotResult);

            return result;

        }



        inline void make_it_unit()

        {

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_rsqrt_ps(selfDot);

            vec = _mm_mul_ps(vec, sqrtResult);

        }



        inline vec4 normalize()

        {

            vec4 result;

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_rsqrt_ps(selfDot);

            result.vec = _mm_mul_ps(vec, sqrtResult);

            return result;

        }



        inline float length()

        {

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_sqrt_ps(selfDot);

            float result;

            _mm_store_ss(&result, sqrtResult);

            return result;

        }



        //vec4-vec4 arithmetic operations

        inline vec4 operator + (const vec4& other) const {

            vec4 result;

            result.vec = _mm_add_ps(vec, other.vec);

            return result;

        }

        inline vec4 operator - (const vec4& other) const {

            vec4 result;

            result.vec = _mm_sub_ps(vec, other.vec);

            return result;

        }



        //vec4-vec4 arithmetic operations

        inline void operator += (const vec4& other) {

            vec = _mm_add_ps(vec, other.vec);

        }





        inline vec4 operator * (const vec4& other) const {

            vec4 result;

            result.vec = _mm_mul_ps(vec, other.vec);

            return result;

        }

        inline vec4 operator / (const vec4& other) const {

            vec4 result;

            result.vec = _mm_div_ps(vec, other.vec);

            return result;

        }

        inline void operator *= (const vec4& other) {

            vec = _mm_mul_ps(vec, other.vec);

        }

        inline void operator /= (const vec4& other) {

            vec = _mm_div_ps(vec, other.vec);

        }

        //vec4-scalar * & /



        inline vec4 operator / (float scalar) const {

            vec4 result;

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            result.vec = _mm_div_ps(vec, _scalar);

            return result;

        }

        inline void operator *= (float scalar) {

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            vec = _mm_mul_ps(vec, _scalar);

        }

        inline void operator /= (float scalar) {

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            vec = _mm_div_ps(vec, _scalar);

        }







        inline float squared_length() const {

            float result;

            __m128 dotResult = _mm_dp_ps(vec, vec, 0x7F);

            _mm_store_ss(&result, dotResult);

            return result;

        }



        inline vec4& make_itzero() {

            vec = _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f);

        }





        inline vec4 cross(const vec4& other) const {

            vec4 result;

            result.vec = _mm_sub_ps(

                _mm_mul_ps(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(other.vec, other.vec, _MM_SHUFFLE(3, 1, 0, 2))),

                _mm_mul_ps(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(other.vec, other.vec, _MM_SHUFFLE(3, 0, 2, 1)))

            );

            return result;

        }



        //checks 

        bool check_ifzero() const {



        }

    };



    //non-member inline operators

    inline vec4 operator * (const vec4& v, float scalar) {

        vec4 result;

        __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

        result.vec = _mm_mul_ps(v.vec, _scalar);

        return result;

    }



    inline vec4 operator * (float scalar, const vec4& v) {

        vec4 result;

        __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

        result.vec = _mm_mul_ps(v.vec, _scalar);

        return result;

    }



    inline float dot(const vec4& v1, const vec4& v2) {

        __m128 dotResult = _mm_dp_ps(v1.vec, v2.vec, 0x7F);

        float result;

        _mm_store_ss(&result, dotResult);

        return result;

    }



#endif

Can someone explain what's wrong here?

edited Nov 14 at 13:27

200_success

127k15148410

asked Nov 14 at 10:07

Ankit Singh

New contributor

put on hold as unclear what you're asking by Toby Speight, πάντα ῥεῖ, Graipher, Edward, Mast Nov 14 at 13:29

What happened to check_ifzero()?
– 200_success
Nov 14 at 13:28

add a comment |

up vote
-3
down vote

favorite

#ifndef INC_VEC4_H

#define INC_VEC4_H



    #include<nmmintrin.h>



    _declspec(align(16))

    struct vec4

    {

        union {

            __m128 vec;

            struct { float x, y, z, w; };

            struct { float r, g, b, a; };

        };



        //normalised directions

        static vec4 UP;

        static vec4 DOWN;

        static vec4 ZERO;

        static vec4 LEFT;

        static vec4 RIGHT;

        static vec4 FORWARD;

        static vec4 BACKWARD;



        //construction

        vec4();

        vec4(float x, float y, float z, float w = 1.0f);

        explicit vec4(float n);

        vec4(const vec4& other);

        vec4& operator = (const vec4& other);

        //destruction

        ~vec4();



        inline float dot(const vec4& other)

        {

            __m128 dotResult = _mm_dp_ps(vec, other.vec, 0x7F);

            float result;

            _mm_store_ss(&result, dotResult);

            return result;

        }



        inline void make_it_unit()

        {

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_rsqrt_ps(selfDot);

            vec = _mm_mul_ps(vec, sqrtResult);

        }



        inline vec4 normalize()

        {

            vec4 result;

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_rsqrt_ps(selfDot);

            result.vec = _mm_mul_ps(vec, sqrtResult);

            return result;

        }



        inline float length()

        {

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_sqrt_ps(selfDot);

            float result;

            _mm_store_ss(&result, sqrtResult);

            return result;

        }



        //vec4-vec4 arithmetic operations

        inline vec4 operator + (const vec4& other) const {

            vec4 result;

            result.vec = _mm_add_ps(vec, other.vec);

            return result;

        }

        inline vec4 operator - (const vec4& other) const {

            vec4 result;

            result.vec = _mm_sub_ps(vec, other.vec);

            return result;

        }



        //vec4-vec4 arithmetic operations

        inline void operator += (const vec4& other) {

            vec = _mm_add_ps(vec, other.vec);

        }





        inline vec4 operator * (const vec4& other) const {

            vec4 result;

            result.vec = _mm_mul_ps(vec, other.vec);

            return result;

        }

        inline vec4 operator / (const vec4& other) const {

            vec4 result;

            result.vec = _mm_div_ps(vec, other.vec);

            return result;

        }

        inline void operator *= (const vec4& other) {

            vec = _mm_mul_ps(vec, other.vec);

        }

        inline void operator /= (const vec4& other) {

            vec = _mm_div_ps(vec, other.vec);

        }

        //vec4-scalar * & /



        inline vec4 operator / (float scalar) const {

            vec4 result;

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            result.vec = _mm_div_ps(vec, _scalar);

            return result;

        }

        inline void operator *= (float scalar) {

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            vec = _mm_mul_ps(vec, _scalar);

        }

        inline void operator /= (float scalar) {

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            vec = _mm_div_ps(vec, _scalar);

        }







        inline float squared_length() const {

            float result;

            __m128 dotResult = _mm_dp_ps(vec, vec, 0x7F);

            _mm_store_ss(&result, dotResult);

            return result;

        }



        inline vec4& make_itzero() {

            vec = _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f);

        }





        inline vec4 cross(const vec4& other) const {

            vec4 result;

            result.vec = _mm_sub_ps(

                _mm_mul_ps(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(other.vec, other.vec, _MM_SHUFFLE(3, 1, 0, 2))),

                _mm_mul_ps(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(other.vec, other.vec, _MM_SHUFFLE(3, 0, 2, 1)))

            );

            return result;

        }



        //checks 

        bool check_ifzero() const {



        }

    };



    //non-member inline operators

    inline vec4 operator * (const vec4& v, float scalar) {

        vec4 result;

        __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

        result.vec = _mm_mul_ps(v.vec, _scalar);

        return result;

    }



    inline vec4 operator * (float scalar, const vec4& v) {

        vec4 result;

        __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

        result.vec = _mm_mul_ps(v.vec, _scalar);

        return result;

    }



    inline float dot(const vec4& v1, const vec4& v2) {

        __m128 dotResult = _mm_dp_ps(v1.vec, v2.vec, 0x7F);

        float result;

        _mm_store_ss(&result, dotResult);

        return result;

    }



#endif

Can someone explain what's wrong here?

edited Nov 14 at 13:27

200_success

127k15148410

asked Nov 14 at 10:07

Ankit Singh

New contributor

#ifndef INC_VEC4_H

#define INC_VEC4_H



    #include<nmmintrin.h>



    _declspec(align(16))

    struct vec4

    {

        union {

            __m128 vec;

            struct { float x, y, z, w; };

            struct { float r, g, b, a; };

        };



        //normalised directions

        static vec4 UP;

        static vec4 DOWN;

        static vec4 ZERO;

        static vec4 LEFT;

        static vec4 RIGHT;

        static vec4 FORWARD;

        static vec4 BACKWARD;



        //construction

        vec4();

        vec4(float x, float y, float z, float w = 1.0f);

        explicit vec4(float n);

        vec4(const vec4& other);

        vec4& operator = (const vec4& other);

        //destruction

        ~vec4();



        inline float dot(const vec4& other)

        {

            __m128 dotResult = _mm_dp_ps(vec, other.vec, 0x7F);

            float result;

            _mm_store_ss(&result, dotResult);

            return result;

        }



        inline void make_it_unit()

        {

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_rsqrt_ps(selfDot);

            vec = _mm_mul_ps(vec, sqrtResult);

        }



        inline vec4 normalize()

        {

            vec4 result;

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_rsqrt_ps(selfDot);

            result.vec = _mm_mul_ps(vec, sqrtResult);

            return result;

        }



        inline float length()

        {

            __m128 selfDot = _mm_dp_ps(vec, vec, 0x7F);

            __m128 sqrtResult = _mm_sqrt_ps(selfDot);

            float result;

            _mm_store_ss(&result, sqrtResult);

            return result;

        }



        //vec4-vec4 arithmetic operations

        inline vec4 operator + (const vec4& other) const {

            vec4 result;

            result.vec = _mm_add_ps(vec, other.vec);

            return result;

        }

        inline vec4 operator - (const vec4& other) const {

            vec4 result;

            result.vec = _mm_sub_ps(vec, other.vec);

            return result;

        }



        //vec4-vec4 arithmetic operations

        inline void operator += (const vec4& other) {

            vec = _mm_add_ps(vec, other.vec);

        }





        inline vec4 operator * (const vec4& other) const {

            vec4 result;

            result.vec = _mm_mul_ps(vec, other.vec);

            return result;

        }

        inline vec4 operator / (const vec4& other) const {

            vec4 result;

            result.vec = _mm_div_ps(vec, other.vec);

            return result;

        }

        inline void operator *= (const vec4& other) {

            vec = _mm_mul_ps(vec, other.vec);

        }

        inline void operator /= (const vec4& other) {

            vec = _mm_div_ps(vec, other.vec);

        }

        //vec4-scalar * & /



        inline vec4 operator / (float scalar) const {

            vec4 result;

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            result.vec = _mm_div_ps(vec, _scalar);

            return result;

        }

        inline void operator *= (float scalar) {

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            vec = _mm_mul_ps(vec, _scalar);

        }

        inline void operator /= (float scalar) {

            __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

            vec = _mm_div_ps(vec, _scalar);

        }







        inline float squared_length() const {

            float result;

            __m128 dotResult = _mm_dp_ps(vec, vec, 0x7F);

            _mm_store_ss(&result, dotResult);

            return result;

        }



        inline vec4& make_itzero() {

            vec = _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f);

        }





        inline vec4 cross(const vec4& other) const {

            vec4 result;

            result.vec = _mm_sub_ps(

                _mm_mul_ps(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(other.vec, other.vec, _MM_SHUFFLE(3, 1, 0, 2))),

                _mm_mul_ps(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(other.vec, other.vec, _MM_SHUFFLE(3, 0, 2, 1)))

            );

            return result;

        }



        //checks 

        bool check_ifzero() const {



        }

    };



    //non-member inline operators

    inline vec4 operator * (const vec4& v, float scalar) {

        vec4 result;

        __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

        result.vec = _mm_mul_ps(v.vec, _scalar);

        return result;

    }



    inline vec4 operator * (float scalar, const vec4& v) {

        vec4 result;

        __m128 _scalar = _mm_set_ps(scalar, scalar, scalar, scalar);

        result.vec = _mm_mul_ps(v.vec, _scalar);

        return result;

    }



    inline float dot(const vec4& v1, const vec4& v2) {

        __m128 dotResult = _mm_dp_ps(v1.vec, v2.vec, 0x7F);

        float result;

        _mm_store_ss(&result, dotResult);

        return result;

    }



#endif

Can someone explain what's wrong here?

c++ performance coordinate-system sse raytracing

edited Nov 14 at 13:27

200_success

127k15148410

asked Nov 14 at 10:07

Ankit Singh

New contributor

edited Nov 14 at 13:27

200_success

127k15148410

asked Nov 14 at 10:07

Ankit Singh

New contributor

edited Nov 14 at 13:27

200_success

127k15148410

edited Nov 14 at 13:27

200_success

127k15148410

edited Nov 14 at 13:27

200_success

127k15148410

asked Nov 14 at 10:07

Ankit Singh

New contributor

asked Nov 14 at 10:07

Ankit Singh

asked Nov 14 at 10:07

Ankit Singh

New contributor

Ankit Singh is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.

put on hold as unclear what you're asking by Toby Speight, πάντα ῥεῖ, Graipher, Edward, Mast Nov 14 at 13:29

What happened to check_ifzero()?
– 200_success
Nov 14 at 13:28

add a comment |

What happened to check_ifzero()?
– 200_success
Nov 14 at 13:28

What happened to check_ifzero()?
– 200_success
Nov 14 at 13:28

add a comment |

active

oldest

votes

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Cfrtjryk