first commit
This commit is contained in:
CMakeLists.txtLICENSEREADME.md
include/cglm
affine-mat.haffine-post.haffine-pre.haffine.haffine2d.happlesimd.hbezier.hbox.hcall.h
call
affine.haffine2d.hbezier.hbox.hcam.h
cam.hcglm.hclipspace
ortho_lh_no.hortho_lh_zo.hortho_rh_no.hortho_rh_zo.hpersp_lh_no.hpersp_lh_zo.hpersp_rh_no.hpersp_rh_zo.hproject_no.hproject_zo.hview_lh_no.hview_lh_zo.hview_rh_no.hview_rh_zo.h
curve.hease.heuler.hfrustum.hio.hivec2.hivec3.hivec4.hmat2.hmat3.hmat4.hplane.hproject.hquat.hray.hsphere.hvec2.hvec3.hvec4.hclipspace
ortho_lh_no.hortho_lh_zo.hortho_rh_no.hortho_rh_zo.hpersp.hpersp_lh_no.hpersp_lh_zo.hpersp_rh_no.hpersp_rh_zo.hproject_no.hproject_zo.hview_lh.hview_lh_no.hview_lh_zo.hview_rh.hview_rh_no.hview_rh_zo.h
color.hcommon.hcurve.hease.heuler.hfrustum.hio.hivec2.hivec3.hivec4.hmat2.hmat3.hmat4.hplane.hproject.hquat.hray.hsimd
sphere.hstruct.hstruct
affine-post.haffine-pre.haffine.haffine2d.hbox.hcam.h
types-struct.htypes.hutil.hvec2-ext.hvec2.hvec3-ext.hvec3.hvec4-ext.hvec4.hversion.hclipspace
ortho_lh_no.hortho_lh_zo.hortho_rh_no.hortho_rh_zo.hpersp_lh_no.hpersp_lh_zo.hpersp_rh_no.hpersp_rh_zo.hproject_no.hproject_zo.hview_lh_no.hview_lh_zo.hview_rh_no.hview_rh_zo.h
color.hcurve.heuler.hfrustum.hio.hmat2.hmat3.hmat4.hplane.hproject.hquat.hsphere.hvec2-ext.hvec2.hvec3-ext.hvec3.hvec4-ext.hvec4.hsrc
GAME.hmain.cmain.h
engine
atlas.catlas.hcamera.ccamera.hevent.cevent.hfont.cfont.hglew.cglew.hglyph.cglyph.hio.cio.hkeyboard.ckeyboard.hmouse.cmouse.hmusic.cmusic.hrenderer.crenderer.hsdl.csdl.hshader.cshader.hsound.csound.hsurface.csurface.htexture.ctexture.htick.ctick.hvao.cvao.hvbo.cvbo.hvertexattribute.cvertexattribute.hwindow.cwindow.h
game
ecs
ECS_TYPES.hc_chase.cc_chase.hc_circle.cc_circle.hc_circle_collide.cc_circle_collide.hc_color.cc_color.hc_color_change.cc_color_change.hc_color_collide_delete.cc_color_collide_delete.hc_contact_damage.cc_contact_damage.hc_control.cc_control.hc_damage.cc_damage.hc_flash.cc_flash.hc_game_object.cc_game_object.hc_health.cc_health.hc_move.cc_move.hc_orbit.cc_orbit.hc_particle_spawn.cc_particle_spawn.hc_physics.cc_physics.hc_player_atlas_health.cc_player_atlas_health.hc_player_death.cc_player_death.hc_pulsate.cc_pulsate.hc_rotation.cc_rotation.hc_scale.cc_scale.hc_spin.cc_spin.hc_sprite.cc_sprite.hc_stun.cc_stun.hc_text.cc_text.hc_time_delete.cc_time_delete.hc_ui_button.cc_ui_button.he_enemy.ce_enemy.he_follower.ce_follower.he_logo.ce_logo.he_particle.ce_particle.he_player.ce_player.he_text.ce_text.he_ui_button.ce_ui_button.he_warning.ce_warning.hecs.cecs.hecs_register.cecs_register.hentity.centity.h
game.cgame.hgame_font.cgame_font.hgame_music.cgame_music.hgame_shader.cgame_shader.hgame_sound.cgame_sound.hgame_state.cgame_state.hgame_texture.cgame_texture.hshader
state
util
66
include/cglm/simd/avx/affine.h
Normal file
66
include/cglm/simd/avx/affine.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c), Recep Aslantas.
|
||||
*
|
||||
* MIT License (MIT), http://opensource.org/licenses/MIT
|
||||
* Full license can be found in the LICENSE file
|
||||
*/
|
||||
|
||||
#ifndef cglm_affine_mat_avx_h
|
||||
#define cglm_affine_mat_avx_h
|
||||
#ifdef __AVX__
|
||||
|
||||
#include "../../common.h"
|
||||
#include "../intrin.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
|
||||
/* D = R * L (Column-Major) */
|
||||
|
||||
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
|
||||
|
||||
y0 = glmm_load256(m2[0]); /* h g f e d c b a */
|
||||
y1 = glmm_load256(m2[2]); /* p o n m l k j i */
|
||||
|
||||
y2 = glmm_load256(m1[0]); /* h g f e d c b a */
|
||||
y3 = glmm_load256(m1[2]); /* p o n m l k j i */
|
||||
|
||||
/* 0x03: 0b00000011 */
|
||||
y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */
|
||||
y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i p o n m */
|
||||
|
||||
/* f f f f a a a a */
|
||||
/* h h h h c c c c */
|
||||
/* e e e e b b b b */
|
||||
/* g g g g d d d d */
|
||||
y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
|
||||
y7 = _mm256_permutevar_ps(y0, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2));
|
||||
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
|
||||
y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
|
||||
|
||||
glmm_store256(dest[0],
|
||||
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
|
||||
_mm256_mul_ps(y3, y7)),
|
||||
_mm256_add_ps(_mm256_mul_ps(y4, y8),
|
||||
_mm256_mul_ps(y5, y9))));
|
||||
|
||||
/* n n n n i i i i */
|
||||
/* p p p p k k k k */
|
||||
/* m m m m j j j j */
|
||||
/* o o o o l l l l */
|
||||
y6 = _mm256_permutevar_ps(y1, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
|
||||
y7 = _mm256_permutevar_ps(y1, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2));
|
||||
y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
|
||||
y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
|
||||
|
||||
glmm_store256(dest[2],
|
||||
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
|
||||
_mm256_mul_ps(y3, y7)),
|
||||
_mm256_add_ps(_mm256_mul_ps(y4, y8),
|
||||
_mm256_mul_ps(y5, y9))));
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* cglm_affine_mat_avx_h */
|
76
include/cglm/simd/avx/mat4.h
Normal file
76
include/cglm/simd/avx/mat4.h
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (c), Recep Aslantas.
|
||||
*
|
||||
* MIT License (MIT), http://opensource.org/licenses/MIT
|
||||
* Full license can be found in the LICENSE file
|
||||
*/
|
||||
|
||||
#ifndef cglm_mat_simd_avx_h
|
||||
#define cglm_mat_simd_avx_h
|
||||
#ifdef __AVX__
|
||||
|
||||
#include "../../common.h"
|
||||
#include "../intrin.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_scale_avx(mat4 m, float s) {
|
||||
__m256 y0;
|
||||
y0 = _mm256_set1_ps(s);
|
||||
|
||||
glmm_store256(m[0], _mm256_mul_ps(y0, glmm_load256(m[0])));
|
||||
glmm_store256(m[2], _mm256_mul_ps(y0, glmm_load256(m[2])));
|
||||
}
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
|
||||
/* D = R * L (Column-Major) */
|
||||
|
||||
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
|
||||
|
||||
y0 = glmm_load256(m2[0]); /* h g f e d c b a */
|
||||
y1 = glmm_load256(m2[2]); /* p o n m l k j i */
|
||||
|
||||
y2 = glmm_load256(m1[0]); /* h g f e d c b a */
|
||||
y3 = glmm_load256(m1[2]); /* p o n m l k j i */
|
||||
|
||||
/* 0x03: 0b00000011 */
|
||||
y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */
|
||||
y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i p o n m */
|
||||
|
||||
/* f f f f a a a a */
|
||||
/* h h h h c c c c */
|
||||
/* e e e e b b b b */
|
||||
/* g g g g d d d d */
|
||||
y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
|
||||
y7 = _mm256_permutevar_ps(y0, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2));
|
||||
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
|
||||
y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
|
||||
|
||||
glmm_store256(dest[0],
|
||||
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
|
||||
_mm256_mul_ps(y3, y7)),
|
||||
_mm256_add_ps(_mm256_mul_ps(y4, y8),
|
||||
_mm256_mul_ps(y5, y9))));
|
||||
|
||||
/* n n n n i i i i */
|
||||
/* p p p p k k k k */
|
||||
/* m m m m j j j j */
|
||||
/* o o o o l l l l */
|
||||
y6 = _mm256_permutevar_ps(y1, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
|
||||
y7 = _mm256_permutevar_ps(y1, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2));
|
||||
y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
|
||||
y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
|
||||
|
||||
glmm_store256(dest[2],
|
||||
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
|
||||
_mm256_mul_ps(y3, y7)),
|
||||
_mm256_add_ps(_mm256_mul_ps(y4, y8),
|
||||
_mm256_mul_ps(y5, y9))));
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* cglm_mat_simd_avx_h */
|
Reference in New Issue
Block a user