sm64

A Super Mario 64 decompilation
Log | Files | Refs | README | LICENSE

math_util.c (26792B)


      1 #include <ultra64.h>
      2 
      3 #include "sm64.h"
      4 #include "engine/graph_node.h"
      5 #include "math_util.h"
      6 #include "surface_collision.h"
      7 
      8 #include "trig_tables.inc.c"
      9 
     10 // Variables for a spline curve animation (used for the flight path in the grand star cutscene)
     11 Vec4s *gSplineKeyframe;
     12 float gSplineKeyframeFraction;
     13 int gSplineState;
     14 
     15 // These functions have bogus return values.
     16 // Disable the compiler warning.
     17 #pragma GCC diagnostic push
     18 
     19 #ifdef __GNUC__
     20 #if defined(__clang__)
     21   #pragma GCC diagnostic ignored "-Wreturn-stack-address"
     22 #else
     23   #pragma GCC diagnostic ignored "-Wreturn-local-addr"
     24 #endif
     25 #endif
     26 
     27 /// Copy vector 'src' to 'dest'
     28 void *vec3f_copy(Vec3f dest, Vec3f src) {
     29     dest[0] = src[0];
     30     dest[1] = src[1];
     31     dest[2] = src[2];
     32     return &dest; //! warning: function returns address of local variable
     33 }
     34 
     35 /// Set vector 'dest' to (x, y, z)
     36 void *vec3f_set(Vec3f dest, f32 x, f32 y, f32 z) {
     37     dest[0] = x;
     38     dest[1] = y;
     39     dest[2] = z;
     40     return &dest; //! warning: function returns address of local variable
     41 }
     42 
     43 /// Add vector 'a' to 'dest'
     44 void *vec3f_add(Vec3f dest, Vec3f a) {
     45     dest[0] += a[0];
     46     dest[1] += a[1];
     47     dest[2] += a[2];
     48     return &dest; //! warning: function returns address of local variable
     49 }
     50 
     51 /// Make 'dest' the sum of vectors a and b.
     52 void *vec3f_sum(Vec3f dest, Vec3f a, Vec3f b) {
     53     dest[0] = a[0] + b[0];
     54     dest[1] = a[1] + b[1];
     55     dest[2] = a[2] + b[2];
     56     return &dest; //! warning: function returns address of local variable
     57 }
     58 
     59 /// Copy vector src to dest
     60 void *vec3s_copy(Vec3s dest, Vec3s src) {
     61     dest[0] = src[0];
     62     dest[1] = src[1];
     63     dest[2] = src[2];
     64     return &dest; //! warning: function returns address of local variable
     65 }
     66 
     67 /// Set vector 'dest' to (x, y, z)
     68 void *vec3s_set(Vec3s dest, s16 x, s16 y, s16 z) {
     69     dest[0] = x;
     70     dest[1] = y;
     71     dest[2] = z;
     72     return &dest; //! warning: function returns address of local variable
     73 }
     74 
     75 /// Add vector a to 'dest'
     76 void *vec3s_add(Vec3s dest, Vec3s a) {
     77     dest[0] += a[0];
     78     dest[1] += a[1];
     79     dest[2] += a[2];
     80     return &dest; //! warning: function returns address of local variable
     81 }
     82 
     83 /// Make 'dest' the sum of vectors a and b.
     84 void *vec3s_sum(Vec3s dest, Vec3s a, Vec3s b) {
     85     dest[0] = a[0] + b[0];
     86     dest[1] = a[1] + b[1];
     87     dest[2] = a[2] + b[2];
     88     return &dest; //! warning: function returns address of local variable
     89 }
     90 
     91 /// Subtract vector a from 'dest'
     92 void *vec3s_sub(Vec3s dest, Vec3s a) {
     93     dest[0] -= a[0];
     94     dest[1] -= a[1];
     95     dest[2] -= a[2];
     96     return &dest; //! warning: function returns address of local variable
     97 }
     98 
     99 /// Convert short vector a to float vector 'dest'
    100 void *vec3s_to_vec3f(Vec3f dest, Vec3s a) {
    101     dest[0] = a[0];
    102     dest[1] = a[1];
    103     dest[2] = a[2];
    104     return &dest; //! warning: function returns address of local variable
    105 }
    106 
    107 /**
    108  * Convert float vector a to a short vector 'dest' by rounding the components
    109  * to the nearest integer.
    110  */
    111 void *vec3f_to_vec3s(Vec3s dest, Vec3f a) {
    112     // add/subtract 0.5 in order to round to the nearest s32 instead of truncating
    113     dest[0] = a[0] + ((a[0] > 0) ? 0.5f : -0.5f);
    114     dest[1] = a[1] + ((a[1] > 0) ? 0.5f : -0.5f);
    115     dest[2] = a[2] + ((a[2] > 0) ? 0.5f : -0.5f);
    116     return &dest; //! warning: function returns address of local variable
    117 }
    118 
    119 /**
    120  * Set 'dest' the normal vector of a triangle with vertices a, b and c.
    121  * It is similar to vec3f_cross, but it calculates the vectors (c-b) and (b-a)
    122  * at the same time.
    123  */
    124 void *find_vector_perpendicular_to_plane(Vec3f dest, Vec3f a, Vec3f b, Vec3f c) {
    125     dest[0] = (b[1] - a[1]) * (c[2] - b[2]) - (c[1] - b[1]) * (b[2] - a[2]);
    126     dest[1] = (b[2] - a[2]) * (c[0] - b[0]) - (c[2] - b[2]) * (b[0] - a[0]);
    127     dest[2] = (b[0] - a[0]) * (c[1] - b[1]) - (c[0] - b[0]) * (b[1] - a[1]);
    128     return &dest; //! warning: function returns address of local variable
    129 }
    130 
    131 /// Make vector 'dest' the cross product of vectors a and b.
    132 void *vec3f_cross(Vec3f dest, Vec3f a, Vec3f b) {
    133     dest[0] = a[1] * b[2] - b[1] * a[2];
    134     dest[1] = a[2] * b[0] - b[2] * a[0];
    135     dest[2] = a[0] * b[1] - b[0] * a[1];
    136     return &dest; //! warning: function returns address of local variable
    137 }
    138 
    139 /// Scale vector 'dest' so it has length 1
    140 void *vec3f_normalize(Vec3f dest) {
    141     //! Possible division by zero
    142     f32 invsqrt = 1.0f / sqrtf(dest[0] * dest[0] + dest[1] * dest[1] + dest[2] * dest[2]);
    143 
    144     dest[0] *= invsqrt;
    145     dest[1] *= invsqrt;
    146     dest[2] *= invsqrt;
    147     return &dest; //! warning: function returns address of local variable
    148 }
    149 
    150 #pragma GCC diagnostic pop
    151 
    152 /// Copy matrix 'src' to 'dest'
    153 void mtxf_copy(Mat4 dest, Mat4 src) {
    154     register s32 i;
    155     register u32 *d = (u32 *) dest;
    156     register u32 *s = (u32 *) src;
    157 
    158     for (i = 0; i < 16; i++) {
    159         *d++ = *s++;
    160     }
    161 }
    162 
    163 /**
    164  * Set mtx to the identity matrix
    165  */
    166 void mtxf_identity(Mat4 mtx) {
    167     register s32 i;
    168     register f32 *dest;
    169     // These loops must be one line to match on -O2
    170 
    171     // initialize everything except the first and last cells to 0
    172     for (dest = (f32 *) mtx + 1, i = 0; i < 14; dest++, i++) *dest = 0;
    173 
    174     // initialize the diagonal cells to 1
    175     for (dest = (f32 *) mtx, i = 0; i < 4; dest += 5, i++) *dest = 1;
    176 }
    177 
    178 /**
    179  * Set dest to a translation matrix of vector b
    180  */
    181 void mtxf_translate(Mat4 dest, Vec3f b) {
    182     mtxf_identity(dest);
    183     dest[3][0] = b[0];
    184     dest[3][1] = b[1];
    185     dest[3][2] = b[2];
    186 }
    187 
    188 /**
    189  * Set mtx to a look-at matrix for the camera. The resulting transformation
    190  * transforms the world as if there exists a camera at position 'from' pointed
    191  * at the position 'to'. The up-vector is assumed to be (0, 1, 0), but the 'roll'
    192  * angle allows a bank rotation of the camera.
    193  */
    194 void mtxf_lookat(Mat4 mtx, Vec3f from, Vec3f to, s16 roll) {
    195     register f32 invLength;
    196     f32 dx;
    197     f32 dz;
    198     f32 xColY;
    199     f32 yColY;
    200     f32 zColY;
    201     f32 xColZ;
    202     f32 yColZ;
    203     f32 zColZ;
    204     f32 xColX;
    205     f32 yColX;
    206     f32 zColX;
    207 
    208     dx = to[0] - from[0];
    209     dz = to[2] - from[2];
    210 
    211     invLength = -1.0 / sqrtf(dx * dx + dz * dz);
    212     dx *= invLength;
    213     dz *= invLength;
    214 
    215     yColY = coss(roll);
    216     xColY = sins(roll) * dz;
    217     zColY = -sins(roll) * dx;
    218 
    219     xColZ = to[0] - from[0];
    220     yColZ = to[1] - from[1];
    221     zColZ = to[2] - from[2];
    222 
    223     invLength = -1.0 / sqrtf(xColZ * xColZ + yColZ * yColZ + zColZ * zColZ);
    224     xColZ *= invLength;
    225     yColZ *= invLength;
    226     zColZ *= invLength;
    227 
    228     xColX = yColY * zColZ - zColY * yColZ;
    229     yColX = zColY * xColZ - xColY * zColZ;
    230     zColX = xColY * yColZ - yColY * xColZ;
    231 
    232     invLength = 1.0 / sqrtf(xColX * xColX + yColX * yColX + zColX * zColX);
    233 
    234     xColX *= invLength;
    235     yColX *= invLength;
    236     zColX *= invLength;
    237 
    238     xColY = yColZ * zColX - zColZ * yColX;
    239     yColY = zColZ * xColX - xColZ * zColX;
    240     zColY = xColZ * yColX - yColZ * xColX;
    241 
    242     invLength = 1.0 / sqrtf(xColY * xColY + yColY * yColY + zColY * zColY);
    243     xColY *= invLength;
    244     yColY *= invLength;
    245     zColY *= invLength;
    246 
    247     mtx[0][0] = xColX;
    248     mtx[1][0] = yColX;
    249     mtx[2][0] = zColX;
    250     mtx[3][0] = -(from[0] * xColX + from[1] * yColX + from[2] * zColX);
    251 
    252     mtx[0][1] = xColY;
    253     mtx[1][1] = yColY;
    254     mtx[2][1] = zColY;
    255     mtx[3][1] = -(from[0] * xColY + from[1] * yColY + from[2] * zColY);
    256 
    257     mtx[0][2] = xColZ;
    258     mtx[1][2] = yColZ;
    259     mtx[2][2] = zColZ;
    260     mtx[3][2] = -(from[0] * xColZ + from[1] * yColZ + from[2] * zColZ);
    261 
    262     mtx[0][3] = 0;
    263     mtx[1][3] = 0;
    264     mtx[2][3] = 0;
    265     mtx[3][3] = 1;
    266 }
    267 
    268 /**
    269  * Build a matrix that rotates around the z axis, then the x axis, then the y
    270  * axis, and then translates.
    271  */
    272 void mtxf_rotate_zxy_and_translate(Mat4 dest, Vec3f translate, Vec3s rotate) {
    273     register f32 sx = sins(rotate[0]);
    274     register f32 cx = coss(rotate[0]);
    275 
    276     register f32 sy = sins(rotate[1]);
    277     register f32 cy = coss(rotate[1]);
    278 
    279     register f32 sz = sins(rotate[2]);
    280     register f32 cz = coss(rotate[2]);
    281 
    282     dest[0][0] = cy * cz + sx * sy * sz;
    283     dest[1][0] = -cy * sz + sx * sy * cz;
    284     dest[2][0] = cx * sy;
    285     dest[3][0] = translate[0];
    286 
    287     dest[0][1] = cx * sz;
    288     dest[1][1] = cx * cz;
    289     dest[2][1] = -sx;
    290     dest[3][1] = translate[1];
    291 
    292     dest[0][2] = -sy * cz + sx * cy * sz;
    293     dest[1][2] = sy * sz + sx * cy * cz;
    294     dest[2][2] = cx * cy;
    295     dest[3][2] = translate[2];
    296 
    297     dest[0][3] = dest[1][3] = dest[2][3] = 0.0f;
    298     dest[3][3] = 1.0f;
    299 }
    300 
    301 /**
    302  * Build a matrix that rotates around the x axis, then the y axis, then the z
    303  * axis, and then translates.
    304  */
    305 void mtxf_rotate_xyz_and_translate(Mat4 dest, Vec3f b, Vec3s c) {
    306     register f32 sx = sins(c[0]);
    307     register f32 cx = coss(c[0]);
    308 
    309     register f32 sy = sins(c[1]);
    310     register f32 cy = coss(c[1]);
    311 
    312     register f32 sz = sins(c[2]);
    313     register f32 cz = coss(c[2]);
    314 
    315     dest[0][0] = cy * cz;
    316     dest[0][1] = cy * sz;
    317     dest[0][2] = -sy;
    318     dest[0][3] = 0;
    319 
    320     dest[1][0] = sx * sy * cz - cx * sz;
    321     dest[1][1] = sx * sy * sz + cx * cz;
    322     dest[1][2] = sx * cy;
    323     dest[1][3] = 0;
    324 
    325     dest[2][0] = cx * sy * cz + sx * sz;
    326     dest[2][1] = cx * sy * sz - sx * cz;
    327     dest[2][2] = cx * cy;
    328     dest[2][3] = 0;
    329 
    330     dest[3][0] = b[0];
    331     dest[3][1] = b[1];
    332     dest[3][2] = b[2];
    333     dest[3][3] = 1;
    334 }
    335 
    336 /**
    337  * Set 'dest' to a transformation matrix that turns an object to face the camera.
    338  * 'mtx' is the look-at matrix from the camera
    339  * 'position' is the position of the object in the world
    340  * 'angle' rotates the object while still facing the camera.
    341  */
    342 void mtxf_billboard(Mat4 dest, Mat4 mtx, Vec3f position, s16 angle) {
    343     dest[0][0] = coss(angle);
    344     dest[0][1] = sins(angle);
    345     dest[0][2] = 0;
    346     dest[0][3] = 0;
    347 
    348     dest[1][0] = -dest[0][1];
    349     dest[1][1] = dest[0][0];
    350     dest[1][2] = 0;
    351     dest[1][3] = 0;
    352 
    353     dest[2][0] = 0;
    354     dest[2][1] = 0;
    355     dest[2][2] = 1;
    356     dest[2][3] = 0;
    357 
    358     dest[3][0] =
    359         mtx[0][0] * position[0] + mtx[1][0] * position[1] + mtx[2][0] * position[2] + mtx[3][0];
    360     dest[3][1] =
    361         mtx[0][1] * position[0] + mtx[1][1] * position[1] + mtx[2][1] * position[2] + mtx[3][1];
    362     dest[3][2] =
    363         mtx[0][2] * position[0] + mtx[1][2] * position[1] + mtx[2][2] * position[2] + mtx[3][2];
    364     dest[3][3] = 1;
    365 }
    366 
    367 /**
    368  * Set 'dest' to a transformation matrix that aligns an object with the terrain
    369  * based on the normal. Used for enemies.
    370  * 'upDir' is the terrain normal
    371  * 'yaw' is the angle which it should face
    372  * 'pos' is the object's position in the world
    373  */
    374 void mtxf_align_terrain_normal(Mat4 dest, Vec3f upDir, Vec3f pos, s16 yaw) {
    375     Vec3f lateralDir;
    376     Vec3f leftDir;
    377     Vec3f forwardDir;
    378 
    379     vec3f_set(lateralDir, sins(yaw), 0, coss(yaw));
    380     vec3f_normalize(upDir);
    381 
    382     vec3f_cross(leftDir, upDir, lateralDir);
    383     vec3f_normalize(leftDir);
    384 
    385     vec3f_cross(forwardDir, leftDir, upDir);
    386     vec3f_normalize(forwardDir);
    387 
    388     dest[0][0] = leftDir[0];
    389     dest[0][1] = leftDir[1];
    390     dest[0][2] = leftDir[2];
    391     dest[3][0] = pos[0];
    392 
    393     dest[1][0] = upDir[0];
    394     dest[1][1] = upDir[1];
    395     dest[1][2] = upDir[2];
    396     dest[3][1] = pos[1];
    397 
    398     dest[2][0] = forwardDir[0];
    399     dest[2][1] = forwardDir[1];
    400     dest[2][2] = forwardDir[2];
    401     dest[3][2] = pos[2];
    402 
    403     dest[0][3] = 0.0f;
    404     dest[1][3] = 0.0f;
    405     dest[2][3] = 0.0f;
    406     dest[3][3] = 1.0f;
    407 }
    408 
    409 /**
    410  * Set 'mtx' to a transformation matrix that aligns an object with the terrain
    411  * based on 3 height samples in an equilateral triangle around the object.
    412  * Used for Mario when crawling or sliding.
    413  * 'yaw' is the angle which it should face
    414  * 'pos' is the object's position in the world
    415  * 'radius' is the distance from each triangle vertex to the center
    416  */
    417 void mtxf_align_terrain_triangle(Mat4 mtx, Vec3f pos, s16 yaw, f32 radius) {
    418     struct Surface *sp74;
    419     Vec3f point0;
    420     Vec3f point1;
    421     Vec3f point2;
    422     Vec3f forward;
    423     Vec3f xColumn;
    424     Vec3f yColumn;
    425     Vec3f zColumn;
    426     f32 avgY;
    427     f32 minY = -radius * 3;
    428 
    429     point0[0] = pos[0] + radius * sins(yaw + 0x2AAA);
    430     point0[2] = pos[2] + radius * coss(yaw + 0x2AAA);
    431     point1[0] = pos[0] + radius * sins(yaw + 0x8000);
    432     point1[2] = pos[2] + radius * coss(yaw + 0x8000);
    433     point2[0] = pos[0] + radius * sins(yaw + 0xD555);
    434     point2[2] = pos[2] + radius * coss(yaw + 0xD555);
    435 
    436     point0[1] = find_floor(point0[0], pos[1] + 150, point0[2], &sp74);
    437     point1[1] = find_floor(point1[0], pos[1] + 150, point1[2], &sp74);
    438     point2[1] = find_floor(point2[0], pos[1] + 150, point2[2], &sp74);
    439 
    440     if (point0[1] - pos[1] < minY) {
    441         point0[1] = pos[1];
    442     }
    443 
    444     if (point1[1] - pos[1] < minY) {
    445         point1[1] = pos[1];
    446     }
    447 
    448     if (point2[1] - pos[1] < minY) {
    449         point2[1] = pos[1];
    450     }
    451 
    452     avgY = (point0[1] + point1[1] + point2[1]) / 3;
    453 
    454     vec3f_set(forward, sins(yaw), 0, coss(yaw));
    455     find_vector_perpendicular_to_plane(yColumn, point0, point1, point2);
    456     vec3f_normalize(yColumn);
    457     vec3f_cross(xColumn, yColumn, forward);
    458     vec3f_normalize(xColumn);
    459     vec3f_cross(zColumn, xColumn, yColumn);
    460     vec3f_normalize(zColumn);
    461 
    462     mtx[0][0] = xColumn[0];
    463     mtx[0][1] = xColumn[1];
    464     mtx[0][2] = xColumn[2];
    465     mtx[3][0] = pos[0];
    466 
    467     mtx[1][0] = yColumn[0];
    468     mtx[1][1] = yColumn[1];
    469     mtx[1][2] = yColumn[2];
    470     mtx[3][1] = (avgY < pos[1]) ? pos[1] : avgY;
    471 
    472     mtx[2][0] = zColumn[0];
    473     mtx[2][1] = zColumn[1];
    474     mtx[2][2] = zColumn[2];
    475     mtx[3][2] = pos[2];
    476 
    477     mtx[0][3] = 0;
    478     mtx[1][3] = 0;
    479     mtx[2][3] = 0;
    480     mtx[3][3] = 1;
    481 }
    482 
    483 /**
    484  * Sets matrix 'dest' to the matrix product b * a assuming they are both
    485  * transformation matrices with a w-component of 1. Since the bottom row
    486  * is assumed to equal [0, 0, 0, 1], it saves some multiplications and
    487  * addition.
    488  * The resulting matrix represents first applying transformation b and
    489  * then a.
    490  */
    491 void mtxf_mul(Mat4 dest, Mat4 a, Mat4 b) {
    492     Mat4 temp;
    493     register f32 entry0;
    494     register f32 entry1;
    495     register f32 entry2;
    496 
    497     // column 0
    498     entry0 = a[0][0];
    499     entry1 = a[0][1];
    500     entry2 = a[0][2];
    501     temp[0][0] = entry0 * b[0][0] + entry1 * b[1][0] + entry2 * b[2][0];
    502     temp[0][1] = entry0 * b[0][1] + entry1 * b[1][1] + entry2 * b[2][1];
    503     temp[0][2] = entry0 * b[0][2] + entry1 * b[1][2] + entry2 * b[2][2];
    504 
    505     // column 1
    506     entry0 = a[1][0];
    507     entry1 = a[1][1];
    508     entry2 = a[1][2];
    509     temp[1][0] = entry0 * b[0][0] + entry1 * b[1][0] + entry2 * b[2][0];
    510     temp[1][1] = entry0 * b[0][1] + entry1 * b[1][1] + entry2 * b[2][1];
    511     temp[1][2] = entry0 * b[0][2] + entry1 * b[1][2] + entry2 * b[2][2];
    512 
    513     // column 2
    514     entry0 = a[2][0];
    515     entry1 = a[2][1];
    516     entry2 = a[2][2];
    517     temp[2][0] = entry0 * b[0][0] + entry1 * b[1][0] + entry2 * b[2][0];
    518     temp[2][1] = entry0 * b[0][1] + entry1 * b[1][1] + entry2 * b[2][1];
    519     temp[2][2] = entry0 * b[0][2] + entry1 * b[1][2] + entry2 * b[2][2];
    520 
    521     // column 3
    522     entry0 = a[3][0];
    523     entry1 = a[3][1];
    524     entry2 = a[3][2];
    525     temp[3][0] = entry0 * b[0][0] + entry1 * b[1][0] + entry2 * b[2][0] + b[3][0];
    526     temp[3][1] = entry0 * b[0][1] + entry1 * b[1][1] + entry2 * b[2][1] + b[3][1];
    527     temp[3][2] = entry0 * b[0][2] + entry1 * b[1][2] + entry2 * b[2][2] + b[3][2];
    528 
    529     temp[0][3] = temp[1][3] = temp[2][3] = 0;
    530     temp[3][3] = 1;
    531 
    532     mtxf_copy(dest, temp);
    533 }
    534 
    535 /**
    536  * Set matrix 'dest' to 'mtx' scaled by vector s
    537  */
    538 void mtxf_scale_vec3f(Mat4 dest, Mat4 mtx, Vec3f s) {
    539     register s32 i;
    540 
    541     for (i = 0; i < 4; i++) {
    542         dest[0][i] = mtx[0][i] * s[0];
    543         dest[1][i] = mtx[1][i] * s[1];
    544         dest[2][i] = mtx[2][i] * s[2];
    545         dest[3][i] = mtx[3][i];
    546     }
    547 }
    548 
    549 /**
    550  * Multiply a vector with a transformation matrix, which applies the transformation
    551  * to the point. Note that the bottom row is assumed to be [0, 0, 0, 1], which is
    552  * true for transformation matrices if the translation has a w component of 1.
    553  */
    554 void mtxf_mul_vec3s(Mat4 mtx, Vec3s b) {
    555     register f32 x = b[0];
    556     register f32 y = b[1];
    557     register f32 z = b[2];
    558 
    559     b[0] = x * mtx[0][0] + y * mtx[1][0] + z * mtx[2][0] + mtx[3][0];
    560     b[1] = x * mtx[0][1] + y * mtx[1][1] + z * mtx[2][1] + mtx[3][1];
    561     b[2] = x * mtx[0][2] + y * mtx[1][2] + z * mtx[2][2] + mtx[3][2];
    562 }
    563 
    564 /**
    565  * Convert float matrix 'src' to fixed point matrix 'dest'.
    566  * The float matrix may not contain entries larger than 65536 or the console
    567  * crashes. The fixed point matrix has entries with a 16-bit integer part, so
    568  * the floating point numbers are multiplied by 2^16 before being cast to a s32
    569  * integer. If this doesn't fit, the N64 and iQue consoles will throw an
    570  * exception. On Wii and Wii U Virtual Console the value will simply be clamped
    571  * and no crashes occur.
    572  */
    573 void mtxf_to_mtx(Mtx *dest, Mat4 src) {
    574 #ifdef AVOID_UB
    575     // Avoid type-casting which is technically UB by calling the equivalent
    576     // guMtxF2L function. This helps little-endian systems, as well.
    577     guMtxF2L(src, dest);
    578 #else
    579     s32 asFixedPoint;
    580     register s32 i;
    581     register s16 *a3 = (s16 *) dest;      // all integer parts stored in first 16 bytes
    582     register s16 *t0 = (s16 *) dest + 16; // all fraction parts stored in last 16 bytes
    583     register f32 *t1 = (f32 *) src;
    584 
    585     for (i = 0; i < 16; i++) {
    586         asFixedPoint = *t1++ * (1 << 16); //! float-to-integer conversion responsible for PU crashes
    587         *a3++ = GET_HIGH_S16_OF_32(asFixedPoint); // integer part
    588         *t0++ = GET_LOW_S16_OF_32(asFixedPoint);  // fraction part
    589     }
    590 #endif
    591 }
    592 
    593 /**
    594  * Set 'mtx' to a transformation matrix that rotates around the z axis.
    595  */
    596 void mtxf_rotate_xy(Mtx *mtx, s16 angle) {
    597     Mat4 temp;
    598 
    599     mtxf_identity(temp);
    600     temp[0][0] = coss(angle);
    601     temp[0][1] = sins(angle);
    602     temp[1][0] = -temp[0][1];
    603     temp[1][1] = temp[0][0];
    604     mtxf_to_mtx(mtx, temp);
    605 }
    606 
    607 /**
    608  * Extract a position given an object's transformation matrix and a camera matrix.
    609  * This is used for determining the world position of the held object: since objMtx
    610  * inherits the transformation from both the camera and Mario, it calculates this
    611  * by taking the camera matrix and inverting its transformation by first rotating
    612  * objMtx back from screen orientation to world orientation, and then subtracting
    613  * the camera position.
    614  */
    615 void get_pos_from_transform_mtx(Vec3f dest, Mat4 objMtx, Mat4 camMtx) {
    616     f32 camX = camMtx[3][0] * camMtx[0][0] + camMtx[3][1] * camMtx[0][1] + camMtx[3][2] * camMtx[0][2];
    617     f32 camY = camMtx[3][0] * camMtx[1][0] + camMtx[3][1] * camMtx[1][1] + camMtx[3][2] * camMtx[1][2];
    618     f32 camZ = camMtx[3][0] * camMtx[2][0] + camMtx[3][1] * camMtx[2][1] + camMtx[3][2] * camMtx[2][2];
    619 
    620     dest[0] =
    621         objMtx[3][0] * camMtx[0][0] + objMtx[3][1] * camMtx[0][1] + objMtx[3][2] * camMtx[0][2] - camX;
    622     dest[1] =
    623         objMtx[3][0] * camMtx[1][0] + objMtx[3][1] * camMtx[1][1] + objMtx[3][2] * camMtx[1][2] - camY;
    624     dest[2] =
    625         objMtx[3][0] * camMtx[2][0] + objMtx[3][1] * camMtx[2][1] + objMtx[3][2] * camMtx[2][2] - camZ;
    626 }
    627 
    628 /**
    629  * Take the vector starting at 'from' pointed at 'to' an retrieve the length
    630  * of that vector, as well as the yaw and pitch angles.
    631  * Basically it converts the direction to spherical coordinates.
    632  */
    633 void vec3f_get_dist_and_angle(Vec3f from, Vec3f to, f32 *dist, s16 *pitch, s16 *yaw) {
    634     register f32 x = to[0] - from[0];
    635     register f32 y = to[1] - from[1];
    636     register f32 z = to[2] - from[2];
    637 
    638     *dist = sqrtf(x * x + y * y + z * z);
    639     *pitch = atan2s(sqrtf(x * x + z * z), y);
    640     *yaw = atan2s(z, x);
    641 }
    642 
    643 /**
    644  * Construct the 'to' point which is distance 'dist' away from the 'from' position,
    645  * and has the angles pitch and yaw.
    646  */
    647 void vec3f_set_dist_and_angle(Vec3f from, Vec3f to, f32 dist, s16 pitch, s16 yaw) {
    648     to[0] = from[0] + dist * coss(pitch) * sins(yaw);
    649     to[1] = from[1] + dist * sins(pitch);
    650     to[2] = from[2] + dist * coss(pitch) * coss(yaw);
    651 }
    652 
    653 /**
    654  * Return the value 'current' after it tries to approach target, going up at
    655  * most 'inc' and going down at most 'dec'.
    656  */
    657 s32 approach_s32(s32 current, s32 target, s32 inc, s32 dec) {
    658     //! If target is close to the max or min s32, then it's possible to overflow
    659     // past it without stopping.
    660 
    661     if (current < target) {
    662         current += inc;
    663         if (current > target) {
    664             current = target;
    665         }
    666     } else {
    667         current -= dec;
    668         if (current < target) {
    669             current = target;
    670         }
    671     }
    672     return current;
    673 }
    674 
    675 /**
    676  * Return the value 'current' after it tries to approach target, going up at
    677  * most 'inc' and going down at most 'dec'.
    678  */
    679 f32 approach_f32(f32 current, f32 target, f32 inc, f32 dec) {
    680     if (current < target) {
    681         current += inc;
    682         if (current > target) {
    683             current = target;
    684         }
    685     } else {
    686         current -= dec;
    687         if (current < target) {
    688             current = target;
    689         }
    690     }
    691     return current;
    692 }
    693 
    694 /**
    695  * Helper function for atan2s. Does a look up of the arctangent of y/x assuming
    696  * the resulting angle is in range [0, 0x2000] (1/8 of a circle).
    697  */
    698 static u16 atan2_lookup(f32 y, f32 x) {
    699     u16 ret;
    700 
    701     if (x == 0) {
    702         ret = gArctanTable[0];
    703     } else {
    704         ret = gArctanTable[(s32)(y / x * 1024 + 0.5f)];
    705     }
    706     return ret;
    707 }
    708 
    709 /**
    710  * Compute the angle from (0, 0) to (x, y) as a s16. Given that terrain is in
    711  * the xz-plane, this is commonly called with (z, x) to get a yaw angle.
    712  */
    713 s16 atan2s(f32 y, f32 x) {
    714     u16 ret;
    715 
    716     if (x >= 0) {
    717         if (y >= 0) {
    718             if (y >= x) {
    719                 ret = atan2_lookup(x, y);
    720             } else {
    721                 ret = 0x4000 - atan2_lookup(y, x);
    722             }
    723         } else {
    724             y = -y;
    725             if (y < x) {
    726                 ret = 0x4000 + atan2_lookup(y, x);
    727             } else {
    728                 ret = 0x8000 - atan2_lookup(x, y);
    729             }
    730         }
    731     } else {
    732         x = -x;
    733         if (y < 0) {
    734             y = -y;
    735             if (y >= x) {
    736                 ret = 0x8000 + atan2_lookup(x, y);
    737             } else {
    738                 ret = 0xC000 - atan2_lookup(y, x);
    739             }
    740         } else {
    741             if (y < x) {
    742                 ret = 0xC000 + atan2_lookup(y, x);
    743             } else {
    744                 ret = -atan2_lookup(x, y);
    745             }
    746         }
    747     }
    748     return ret;
    749 }
    750 
    751 /**
    752  * Compute the atan2 in radians by calling atan2s and converting the result.
    753  */
    754 f32 atan2f(f32 y, f32 x) {
    755     return (f32) atan2s(y, x) * M_PI / 0x8000;
    756 }
    757 
    758 #define CURVE_BEGIN_1 1
    759 #define CURVE_BEGIN_2 2
    760 #define CURVE_MIDDLE 3
    761 #define CURVE_END_1 4
    762 #define CURVE_END_2 5
    763 
    764 /**
    765  * Set 'result' to a 4-vector with weights corresponding to interpolation
    766  * value t in [0, 1] and gSplineState. Given the current control point P, these
    767  * weights are for P[0], P[1], P[2] and P[3] to obtain an interpolated point.
    768  * The weights naturally sum to 1, and they are also always in range [0, 1] so
    769  * the interpolated point will never overshoot. The curve is guaranteed to go
    770  * through the first and last point, but not through intermediate points.
    771  *
    772  * gSplineState ensures that the curve is clamped: the first two points
    773  * and last two points have different weight formulas. These are the weights
    774  * just before gSplineState transitions:
    775  * 1: [1, 0, 0, 0]
    776  * 1->2: [0, 3/12, 7/12, 2/12]
    777  * 2->3: [0, 1/6, 4/6, 1/6]
    778  * 3->3: [0, 1/6, 4/6, 1/6] (repeats)
    779  * 3->4: [0, 1/6, 4/6, 1/6]
    780  * 4->5: [0, 2/12, 7/12, 3/12]
    781  * 5: [0, 0, 0, 1]
    782  *
    783  * I suspect that the weight formulas will give a 3rd degree B-spline with the
    784  * common uniform clamped knot vector, e.g. for n points:
    785  * [0, 0, 0, 0, 1, 2, ... n-1, n, n, n, n]
    786  * TODO: verify the classification of the spline / figure out how polynomials were computed
    787  */
    788 void spline_get_weights(Vec4f result, f32 t, UNUSED s32 c) {
    789     f32 tinv = 1 - t;
    790     f32 tinv2 = tinv * tinv;
    791     f32 tinv3 = tinv2 * tinv;
    792     f32 t2 = t * t;
    793     f32 t3 = t2 * t;
    794 
    795     switch (gSplineState) {
    796         case CURVE_BEGIN_1:
    797             result[0] = tinv3;
    798             result[1] = t3 * 1.75f - t2 * 4.5f + t * 3.0f;
    799             result[2] = -t3 * (11 / 12.0f) + t2 * 1.5f;
    800             result[3] = t3 * (1 / 6.0f);
    801             break;
    802         case CURVE_BEGIN_2:
    803             result[0] = tinv3 * 0.25f;
    804             result[1] = t3 * (7 / 12.0f) - t2 * 1.25f + t * 0.25f + (7 / 12.0f);
    805             result[2] = -t3 * 0.5f + t2 * 0.5f + t * 0.5f + (1 / 6.0f);
    806             result[3] = t3 * (1 / 6.0f);
    807             break;
    808         case CURVE_MIDDLE:
    809             result[0] = tinv3 * (1 / 6.0f);
    810             result[1] = t3 * 0.5f - t2 + (4 / 6.0f);
    811             result[2] = -t3 * 0.5f + t2 * 0.5f + t * 0.5f + (1 / 6.0f);
    812             result[3] = t3 * (1 / 6.0f);
    813             break;
    814         case CURVE_END_1:
    815             result[0] = tinv3 * (1 / 6.0f);
    816             result[1] = -tinv3 * 0.5f + tinv2 * 0.5f + tinv * 0.5f + (1 / 6.0f);
    817             result[2] = tinv3 * (7 / 12.0f) - tinv2 * 1.25f + tinv * 0.25f + (7 / 12.0f);
    818             result[3] = t3 * 0.25f;
    819             break;
    820         case CURVE_END_2:
    821             result[0] = tinv3 * (1 / 6.0f);
    822             result[1] = -tinv3 * (11 / 12.0f) + tinv2 * 1.5f;
    823             result[2] = tinv3 * 1.75f - tinv2 * 4.5f + tinv * 3.0f;
    824             result[3] = t3;
    825             break;
    826     }
    827 }
    828 
    829 /**
    830  * Initialize a spline animation.
    831  * 'keyFrames' should be an array of (s, x, y, z) vectors
    832  *  s: the speed of the keyframe in 1000/frames, e.g. s=100 means the keyframe lasts 10 frames
    833  *  (x, y, z): point in 3D space on the curve
    834  * The array should end with three entries with s=0 (infinite keyframe duration).
    835  * That's because the spline has a 3rd degree polynomial, so it looks 3 points ahead.
    836  */
    837 void anim_spline_init(Vec4s *keyFrames) {
    838     gSplineKeyframe = keyFrames;
    839     gSplineKeyframeFraction = 0;
    840     gSplineState = 1;
    841 }
    842 
    843 /**
    844  * Poll the next point from a spline animation.
    845  * anim_spline_init should be called before polling for vectors.
    846  * Returns TRUE when the last point is reached, FALSE otherwise.
    847  */
    848 s32 anim_spline_poll(Vec3f result) {
    849     Vec4f weights;
    850     s32 i;
    851     s32 hasEnded = FALSE;
    852 
    853     vec3f_copy(result, gVec3fZero);
    854     spline_get_weights(weights, gSplineKeyframeFraction, gSplineState);
    855     for (i = 0; i < 4; i++) {
    856         result[0] += weights[i] * gSplineKeyframe[i][1];
    857         result[1] += weights[i] * gSplineKeyframe[i][2];
    858         result[2] += weights[i] * gSplineKeyframe[i][3];
    859     }
    860 
    861     if ((gSplineKeyframeFraction += gSplineKeyframe[0][0] / 1000.0f) >= 1) {
    862         gSplineKeyframe++;
    863         gSplineKeyframeFraction--;
    864         switch (gSplineState) {
    865             case CURVE_END_2:
    866                 hasEnded = TRUE;
    867                 break;
    868             case CURVE_MIDDLE:
    869                 if (gSplineKeyframe[2][0] == 0) {
    870                     gSplineState = CURVE_END_1;
    871                 }
    872                 break;
    873             default:
    874                 gSplineState++;
    875                 break;
    876         }
    877     }
    878 
    879     return hasEnded;
    880 }