#pragma pack(16) // 16 byte data align #define SIMD #include "vector.h" vector4::vector4() { ; } // un-initialised constructor // initialises a vector from 3 or 4 floats vector4::vector4(float x0, float y0, float z0, float w0=1.0f) { x = x0; y = y0; z = z0; w = w0; } // set value from 3 or 4 floats void vector4::vec(float x0, float y0, float z0, float w0=1.0f) { x = x0; y = y0; z = z0; w = w0; } // clears vector to (0, 0, 0, 1) void vector4::null(void) { x = y = z = 0.0f; w = 1.0f; } // indexes the i-th component float& vector4::operator[](int i) { return (&x)[i]; } // evaluates the magnitude of the vector float vector4::length(void) { return (float)sqrt(x*x+y*y+z*z); } // inverts the vector void vector4::negate(void) { x = -x; y = -y; z = -z; } // normalises the vector void vector4::normalize(void) { float len = (float)sqrt(x*x+y*y+z*z); if (len == 0.0f) return; x /= len; y /= len; z /= len; } // evaluate the cross-product void vector4::cross(vector4& v1, vector4& v2) { #ifdef SIMD __asm { mov esi, v1 mov edi, v2 movaps xmm0, [esi] movaps xmm1. [edi] movaps xmm2, xmm0 movaps xmm3, xmm1 shufps xmm0, xmm0, 0xc9 shufps xmm1, xmm1, 0xd2 mulps xmm2, xmm3 shufps xmm2, xmm2, 0xd2 shufps xmm3, xmm3, 0xc9 mulps xmm2, xmm3 subps xmm0, xmm2 mov esi, this movaps [esi], xmm0 } #else x = v1.y*v2.z - v1.z*v2.y; y = v1.z*v2.x - v1.x*v2.z; z = v1.x*v2.y - v1.y*v2.x; w = 1.0f; #endif } // define an operator: vector4+=vector4 for vector addition void vector4::operator+=(vector4& v) { #ifdef SIMD __asm { mov esi, this mov edi, v movaps xmm0, [esi] addps xmm0, [edi] movaps [esi], xmm0 } #else x+= v.x; y+= v.y; z+= v.z; w+= v.w; #endif } // define an operator: vector4-=vector4 for vector // subtraction void vector4::operator-=(vector4& v) { #ifdef SIMD __asm { mov esi, this mov edi, v movaps xmm0, [esi] subps xmm0, [edi] movaps [esi], xmm0 } #else x-= v.x; y-= v.y; z-= v.z; w-= v.w; #endif } // define an operator: vector4*=vector4 for the dot product void vector4::operator*=(vector4& v) { #ifdef SIMD __asm { mov esi, this mov edi, v movaps xmm0, [esi] mulps xmm0, [edi] movaps [esi], xmm0 } #else x*= v.x; y*= v.y; z*= v.z; w*= v.w; #endif } // define an operator: vector4+vector4 for vector addition vector4 vector4::operator+(vector4& v) { __declspec(align(16)) vector4 ret; #ifdef SIMD __asm { mov esi, this mov edi, v movaps xmm0, [esi] addps xmm0, [edi] movaps ret, xmm0 } #else ret.x = x+v.x; ret.y = y+v.y; ret.z = z+v.z; ret.w = w+v.w; #endif return ret; } // define an operator: vector4-vector4 for vector // subtraction vector4 vector4::operator-(vector4& v) { __declspec(align(16)) vector4 ret; #ifdef SIMD __asm { mov esi, this mov edi, v movaps xmm0, [esi] subps xmm0, [edi] movaps ret, xmm0 } #else ret.x = x-v.x; ret.y = y-v.y; ret.z = z-v.z; ret.w = w-v.w; #endif return ret; } // define an operator: vector4*vector4 for dot product vector4 vector4::operator*(vector4& v) { __declspec(align(16)) vector4 ret; #ifdef SIMD __asm { mov esi, this mov edi, v movaps xmm0, [esi] mulps xmm0, [edi] movaps ret, xmm0 } #else ret.x = x*v.x; ret.y = y*v.y; ret.z = z*v.z; ret.w = w*v.w; #endif return ret; } // define an operator: vector4*mat4x4 to multiply a vector // and a matrix vector4 vector4::operator*(mat4x4& m) { __declspec(align(16)) vector4 a; #ifdef SIMD __asm { mov esi, this mov edi, m movaps xmm0, [esi] movaps xmm1, xmm0 movaps xmm2, xmm0 movaps xmm3, xmm0 shufps xmm0, xmm2, 0x00 shufps xmm1, xmm2, 0x55 shufps xmm2, xmm2, 0xAA shufps xmm3, xmm2, 0xFF mulps xmm0, [edi] mulps xmm1, [edi+16] mulps xmm2, [edi+32] mulps xmm3, [edi+48] addps xmm0, xmm1 addps xmm0, xmm2 addps xmm0, xmm3 movaps a, xmm0 } #else float *f = (float *)this; a.x = x*f[0] + y*f[4] + z*f[8] + w*f[12]; a.y = x*f[1] + y*f[5] + z*f[9] + w*f[13]; a.z = x*f[2] + y*f[6] + z*f[10] + w*f[14]; a.w = x*f[3] + y*f[7] + z*f[11] + w*f[15]; #endif return a; } //clears all matrix elements to zero void mat4x4::null(void) { memset(&m, 0, sizeof(m)); } // sets the matrix to an identity matrix void mat4x4::load_identity(void) { memset(m, 0, sizeof(m)); m[0][0] = m[1][1] = m[2][2] = m[3][3] = 1.0; } // sets up a matrix as a rotation or rad around vector vec void mat4x4::set_rotation(vector4& dir, float rad); // operator mat4x4*mat4x4 mat4x4 mat4x4::operator*(mat4x4& m1) { __declspec(align(16)) mat4x4 m2; #ifdef SIMD __asm { mov edi, m1 movaps xmm4, [edi] movaps xmm5, [edi+16] movaps xmm6, [edi+32] movaps xmm7, [edi+48] mov esi, this mov eax, 0 LOOP: movaps xmm0, [esi+eax] movaps xmm1, xmm0 movaps xmm2, xmm0 movaps xmm3, xmm0 shufps xmm0, xmm2, 0x00 shufps xmm1, xmm2, 0x55 shufps xmm2, xmm2, 0xAA shufps xmm3, xmm3, 0xFF mulps xmm0, [edi] mulps xmm1, [edi+16] mulps xmm2, [edi+32] mulps xmm3, [edi+48] addps xmm0, xmm1 addps xmm0, xmm2 addps xmm0, xmm3 movaps m2+eax, xmm0 add eax, 16 cmp eax, 48 jle LOOP } #else int i, j, k; float ab; for (i=0; i<4; i++) for (j=0; j<4; j++) { ab = 0.0f; for(k=0; k<4; k++) ab += m[i][k]*m1.m[k][j]; m2.m[i][j] = ab; } #endif return m2; } // define an operator: mat4x4*vector4 to multiply a matrix // and a vector vector4 mat4x4::operator*(vector4& v) { __declspec(align(16)) vector4 a; #ifdef SIMD __asm { mov esi, v mov edi, this movaps xmm0, [esi] movaps xmm1, xmm0 movaps xmm2, xmm0 movaps xmm3, xmm0 shufps xmm0, xmm2, 0x00 shufps xmm1, xmm2, 0x55 shufps xmm2, xmm2, 0xAA shufps xmm3, xmm2, 0xFF mulps xmm0, [edi] mulps xmm1, [edi+16] mulps xmm2, [edi+32] mulps xmm3, [edi+48] addps xmm0, xmm1 addps xmm0, xmm2 addps xmm0, xmm3 movaps a, xmm0 } #else float *f = (float *)this; a.x = v.x*f[0] + v.y*f[4] + v.z*f[8] + v.w*f[12]; a.y = v.x*f[1] + v.y*f[5] + v.z*f[9] + v.w*f[13]; a.z = v.x*f[2] + v.y*f[6] + v.z*f[10] + v.w*f[14]; a.w = v.x*f[3] + v.y*f[7] + v.z*f[11] + v.w*f[15]; #endif return a; }