Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_mulmat.c
1/*
2 * Copyright 2011-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : math/NE10_addmat.c
30 */
31
32#include "NE10_types.h"
33#include "macros.h"
34
35#include <assert.h>
36
37ne10_result_t ne10_mulmat_2x2f_c (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count)
38{
39#define A1 src1[ itr ].c1.r1
40#define A2 src2[ itr ].c1.r1
41#define B1 src1[ itr ].c1.r2
42#define B2 src2[ itr ].c1.r2
43#define C1 src1[ itr ].c2.r1
44#define C2 src2[ itr ].c2.r1
45#define D1 src1[ itr ].c2.r2
46#define D2 src2[ itr ].c2.r2
47
48 NE10_X_OPERATION_FLOAT_C
49 (
50 dst[ itr ].c1.r1 = (A1 * A2) + (C1 * B2);
51 dst[ itr ].c1.r2 = (B1 * A2) + (D1 * B2);
52
53 dst[ itr ].c2.r1 = (A1 * C2) + (C1 * D2);
54 dst[ itr ].c2.r2 = (B1 * C2) + (D1 * D2);
55 );
56
57#undef A1
58#undef A2
59#undef B1
60#undef B2
61#undef C1
62#undef C2
63#undef D1
64#undef D2
65}
66
67ne10_result_t ne10_mulmat_3x3f_c (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count)
68{
69#define A1 src1[ itr ].c1.r1
70#define A2 src2[ itr ].c1.r1
71#define B1 src1[ itr ].c1.r2
72#define B2 src2[ itr ].c1.r2
73#define C1 src1[ itr ].c1.r3
74#define C2 src2[ itr ].c1.r3
75#define D1 src1[ itr ].c2.r1
76#define D2 src2[ itr ].c2.r1
77#define E1 src1[ itr ].c2.r2
78#define E2 src2[ itr ].c2.r2
79#define F1 src1[ itr ].c2.r3
80#define F2 src2[ itr ].c2.r3
81#define G1 src1[ itr ].c3.r1
82#define G2 src2[ itr ].c3.r1
83#define H1 src1[ itr ].c3.r2
84#define H2 src2[ itr ].c3.r2
85#define I1 src1[ itr ].c3.r3
86#define I2 src2[ itr ].c3.r3
87
88 NE10_X_OPERATION_FLOAT_C
89 (
90 dst[ itr ].c1.r1 = (A1 * A2) + (D1 * B2) + (G1 * C2);
91 dst[ itr ].c1.r2 = (B1 * A2) + (E1 * B2) + (H1 * C2);
92 dst[ itr ].c1.r3 = (C1 * A2) + (F1 * B2) + (I1 * C2);
93
94 dst[ itr ].c2.r1 = (A1 * D2) + (D1 * E2) + (G1 * F2);
95 dst[ itr ].c2.r2 = (B1 * D2) + (E1 * E2) + (H1 * F2);
96 dst[ itr ].c2.r3 = (C1 * D2) + (F1 * E2) + (I1 * F2);
97
98 dst[ itr ].c3.r1 = (A1 * G2) + (D1 * H2) + (G1 * I2);
99 dst[ itr ].c3.r2 = (B1 * G2) + (E1 * H2) + (H1 * I2);
100 dst[ itr ].c3.r3 = (C1 * G2) + (F1 * H2) + (I1 * I2);
101 );
102
103#undef A1
104#undef A2
105#undef B1
106#undef B2
107#undef C1
108#undef C2
109#undef D1
110#undef D2
111#undef E1
112#undef E2
113#undef F1
114#undef F2
115#undef G1
116#undef G2
117#undef H1
118#undef H2
119#undef I1
120#undef I2
121}
122
123ne10_result_t ne10_mulmat_4x4f_c (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count)
124{
125#define A1 src1[ itr ].c1.r1
126#define A2 src2[ itr ].c1.r1
127#define B1 src1[ itr ].c1.r2
128#define B2 src2[ itr ].c1.r2
129#define C1 src1[ itr ].c1.r3
130#define C2 src2[ itr ].c1.r3
131#define D1 src1[ itr ].c1.r4
132#define D2 src2[ itr ].c1.r4
133
134#define E1 src1[ itr ].c2.r1
135#define E2 src2[ itr ].c2.r1
136#define F1 src1[ itr ].c2.r2
137#define F2 src2[ itr ].c2.r2
138#define G1 src1[ itr ].c2.r3
139#define G2 src2[ itr ].c2.r3
140#define H1 src1[ itr ].c2.r4
141#define H2 src2[ itr ].c2.r4
142
143#define I1 src1[ itr ].c3.r1
144#define I2 src2[ itr ].c3.r1
145#define J1 src1[ itr ].c3.r2
146#define J2 src2[ itr ].c3.r2
147#define K1 src1[ itr ].c3.r3
148#define K2 src2[ itr ].c3.r3
149#define L1 src1[ itr ].c3.r4
150#define L2 src2[ itr ].c3.r4
151
152#define M1 src1[ itr ].c4.r1
153#define M2 src2[ itr ].c4.r1
154#define N1 src1[ itr ].c4.r2
155#define N2 src2[ itr ].c4.r2
156#define O1 src1[ itr ].c4.r3
157#define O2 src2[ itr ].c4.r3
158#define P1 src1[ itr ].c4.r4
159#define P2 src2[ itr ].c4.r4
160
161 NE10_X_OPERATION_FLOAT_C
162 (
163 dst[ itr ].c1.r1 = (A1 * A2) + (E1 * B2) + (I1 * C2) + (M1 * D2);
164 dst[ itr ].c1.r2 = (B1 * A2) + (F1 * B2) + (J1 * C2) + (N1 * D2);
165 dst[ itr ].c1.r3 = (C1 * A2) + (G1 * B2) + (K1 * C2) + (O1 * D2);
166 dst[ itr ].c1.r4 = (D1 * A2) + (H1 * B2) + (L1 * C2) + (P1 * D2);
167
168 dst[ itr ].c2.r1 = (A1 * E2) + (E1 * F2) + (I1 * G2) + (M1 * H2);
169 dst[ itr ].c2.r2 = (B1 * E2) + (F1 * F2) + (J1 * G2) + (N1 * H2);
170 dst[ itr ].c2.r3 = (C1 * E2) + (G1 * F2) + (K1 * G2) + (O1 * H2);
171 dst[ itr ].c2.r4 = (D1 * E2) + (H1 * F2) + (L1 * G2) + (P1 * H2);
172
173 dst[ itr ].c3.r1 = (A1 * I2) + (E1 * J2) + (I1 * K2) + (M1 * L2);
174 dst[ itr ].c3.r2 = (B1 * I2) + (F1 * J2) + (J1 * K2) + (N1 * L2);
175 dst[ itr ].c3.r3 = (C1 * I2) + (G1 * J2) + (K1 * K2) + (O1 * L2);
176 dst[ itr ].c3.r4 = (D1 * I2) + (H1 * J2) + (L1 * K2) + (P1 * L2);
177
178 dst[ itr ].c4.r1 = (A1 * M2) + (E1 * N2) + (I1 * O2) + (M1 * P2);
179 dst[ itr ].c4.r2 = (B1 * M2) + (F1 * N2) + (J1 * O2) + (N1 * P2);
180 dst[ itr ].c4.r3 = (C1 * M2) + (G1 * N2) + (K1 * O2) + (O1 * P2);
181 dst[ itr ].c4.r4 = (D1 * M2) + (H1 * N2) + (L1 * O2) + (P1 * P2);
182 );
183
184#undef A1
185#undef A2
186#undef B1
187#undef B2
188#undef C1
189#undef C2
190#undef D1
191#undef D2
192#undef E1
193#undef E2
194#undef F1
195#undef F2
196#undef G1
197#undef G2
198#undef H1
199#undef H2
200#undef I1
201#undef I2
202#undef J1
203#undef J2
204#undef K1
205#undef K2
206#undef L1
207#undef L2
208#undef M1
209#undef M2
210#undef N1
211#undef N2
212#undef O1
213#undef O2
214#undef P1
215#undef P2
216}