370 __m128 row0, row1, row2, row3; 371 __m128 tmp0, tmp1, tmp2, tmp3; 372 373 374 row0 = _mm_load_ps( mat2[0] ); 375 row1 = _mm_load_ps( mat2[1] ); 376 row2 = _mm_load_ps( mat2[2] ); 377 row3 = _mm_load_ps( mat2[3] ); 378 379 380 381 tmp0 = _mm_unpacklo_ps( row0, row1 ); 382 tmp2 = _mm_unpacklo_ps( row2, row3 ); 383 tmp1 = _mm_unpackhi_ps( row0, row1 ); 384 tmp3 = _mm_unpackhi_ps( row2, row3 ); 385 386 387 row0 = _mm_movelh_ps( tmp0, tmp2 ); 388 row1 = _mm_movehl_ps( tmp2, tmp0 ); 389 row2 = _mm_movelh_ps( tmp1, tmp3 ); 390 row3 = _mm_movehl_ps( tmp3, tmp1 ); 391 392 393 _mm_store_ps( mat2[0], row0 ); 394 _mm_store_ps( mat2[1], row1 ); 395 _mm_store_ps( mat2[2], row2 ); 396 _mm_store_ps( mat2[3], row3 ); 397 398 399 400 ret[0][0] = mul_asm(mat1[0], mat2[0]); 401 ret[0][1] = mul_asm(mat1[0], mat2[1]); 402 ret[0][2] = mul_asm(mat1[0], mat2[2]); 403 ret[0][3] = mul_asm(mat1[0], mat2[3]); 404 ret[1][0] = mul_asm(mat1[1], mat2[0]); 405 ret[1][1] = mul_asm(mat1[1], mat2[1]); 406 ret[1][2] = mul_asm(mat1[1], mat2[2]); 407 ret[1][3] = mul_asm(mat1[1], mat2[3]); 408 ret[2][0] = mul_asm(mat1[2], mat2[0]); 409 ret[2][1] = mul_asm(mat1[2], mat2[1]); 410 ret[2][2] = mul_asm(mat1[2], mat2[2]); 411 ret[2][3] = mul_asm(mat1[2], mat2[3]); 412 ret[3][0] = mul_asm(mat1[3], mat2[0]); 413 ret[3][1] = mul_asm(mat1[3], mat2[1]); 414 ret[3][2] = mul_asm(mat1[3], mat2[2]); 415 ret[3][3] = mul_asm(mat1[3], mat2[3]); 416
Yeah, this is for a class. Guess which one? (6.172)
I'm guessing comp sci, because that's what your label says.
ReplyDelete