forked from IAIK/Picnic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mzd_additional.h
233 lines (207 loc) · 12 KB
/
mzd_additional.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
/* Inspired by m4ri's mzd implementation, but completely re-written for our use-case. */
#ifndef MZD_ADDITIONAL_H
#define MZD_ADDITIONAL_H
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include "macros.h"
#include "compat.h"
PICNIC_BEGIN_C_DECL
typedef uint64_t word;
#define WORD_C(v) UINT64_C(v)
typedef ATTR_ALIGNED(32) struct { word w64[4]; } block_t;
/**
* Representation of matrices and vectors
*
* The basic memory unit is a block of 256 bit. Each row is stored in (possible multiple) blocks
* depending on the number of columns. Matrices with up to 128 columns are the only exception. In
* this case, a block actually contains two rows. The row with even index is contained in w64[0] and
* w64[1], the row with odd index is contained in w64[2] and w64[3].
*/
typedef block_t mzd_local_t;
mzd_local_t* mzd_local_init_ex(unsigned int r, unsigned int c, bool clear) ATTR_ASSUME_ALIGNED(32);
#define mzd_local_init(r, c) mzd_local_init_ex(r, c, true)
static inline void mzd_local_free(mzd_local_t* v) {
picnic_aligned_free(v);
}
void mzd_copy_uint64_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_uint64_192(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_uint64_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s128_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s128_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s256_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s256_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
/**
* mzd_xor variants
*/
void mzd_xor_uint64_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_192(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_640(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_960(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_1216(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_640(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_1024(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_1280(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_768(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_1024(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_1280(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
/**
* mzd_and variants
*/
void mzd_and_uint64_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_and_uint64_192(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_and_uint64_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
/**
* shifts and rotations
*/
void mzd_shift_left_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_shift_right_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_shift_left_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_shift_right_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_shift_left_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_shift_right_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
#if defined(PICNIC_STATIC)
/* only needed for tests */
void mzd_rotate_left_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_rotate_right_uint64_128(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_rotate_left_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_rotate_right_uint64_192(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_rotate_left_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
void mzd_rotate_right_uint64_256(mzd_local_t* res, const mzd_local_t* val, unsigned int count);
#endif
/**
* Compute v * A optimized for v being a vector.
*/
void mzd_mul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_128_640(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_192_960(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_256_1216(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_128_640(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_192_1024(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_256_1280(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_128_768(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_s256_192_1024(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_s256_256_1280(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
/**
* Compute v * A optimized for v being a vector, for specific sizes depending on instance
* Only work for specific sizes and RLL_NEXT algorithm using uint64 operations
*/
void mzd_addmul_v_uint64_30_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_30_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_30_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
/**
* Use SSE2 or NEON
*/
void mzd_addmul_v_s128_30_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_30_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_30_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
/**
* Use AVX2
*/
void mzd_addmul_v_s256_30_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_30_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_30_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
/**
* Compute using parity based algorithm
* */
void mzd_mul_v_parity_uint64_128_30(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_uint64_192_30(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_uint64_256_30(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_s256_256_30(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
/**
* Compute c + v * A optimized for c and v being vectors.
*/
void mzd_addmul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_129(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_129(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
/**
* Shuffle vector x according to info in mask. Needed for OLLE optimizations.
*/
void mzd_shuffle_128_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_192_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_256_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_128_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_192_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_256_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
#define BLOCK(v, b) ((block_t*)ASSUME_ALIGNED(&(v)[(b)], 32))
#define CONST_BLOCK(v, b) ((const block_t*)ASSUME_ALIGNED(&(v)[(b)], 32))
PICNIC_END_C_DECL
#endif