-
Notifications
You must be signed in to change notification settings - Fork 1
/
stringx.s
183 lines (174 loc) · 4.59 KB
/
stringx.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
/*
* Hex-to-ascii routines for various bitnesses
*
* Copyright (C) 2019-2021 Martin Krastev <[email protected]>
*/
.arch armv8-a
.global string_x8
.global string_x16
.global string_x16_1
.global string_x16_2
.global string_x32
.global string_x64
.global string_x64_1
.text
// convert x8 to string
// x0: output buffer
// w1: value to convert, bits [7:0]
// clobbers: x2, x3, x4, x5
.align 4
string_x8:
mov w4, '0' - 0x0
mov w5, 'a' - 0xa
ubfx w2, w1, 4, 4
and w1, w1, 15
cmp w2, 0xa
csel w3, w4, w5, LO
cmp w1, 0xa
csel w4, w4, w5, LO
add w2, w2, w3
strb w2, [x0, 0]
add w1, w1, w4
strb w1, [x0, 1]
ret
// convert x16 to string
// x0: output buffer
// w1: value to convert, bits [15:0]
// clobbers: x2, x3, x4, x5, x6, x7, x8, x9
.align 4
string_x16:
mov w5, '0' - 0x0
mov w6, 'a' - 0xa
ubfx w4, w1, 12, 4
ubfx w3, w1, 8, 4
ubfx w2, w1, 4, 4
and w1, w1, 15
cmp w4, 0xa
csel w7, w5, w6, LO
cmp w3, 0xa
csel w8, w5, w6, LO
cmp w2, 0xa
csel w9, w5, w6, LO
cmp w1, 0xa
csel w5, w5, w6, LO
add w4, w4, w7
strb w4, [x0, 0]
add w3, w3, w8
strb w3, [x0, 1]
add w2, w2, w9
strb w2, [x0, 2]
add w1, w1, w5
strb w1, [x0, 3]
ret
// convert x16 to string
// x0: output buffer
// w1: value to convert, bits [15:0]
// clobbers: x2, x3, x4, x5, x6, x7
.align 4
string_x16_1:
mov w3, '0' - 0x0
mov w4, 'a' - 0xa
mov w5, 0xa
mov w6, 0x0f0f
lsr w2, w1, 4
and w1, w1, w6 // even-index nibbles
and w2, w2, w6 // odd-index nibbles
cmp w5, w1, UXTB
csel w6, w3, w4, HI
cmp w5, w2, UXTB
csel w7, w3, w4, HI
add w1, w1, w6
add w2, w2, w7
strb w1, [x0, 3]
strb w2, [x0, 2]
lsr w1, w1, 8
lsr w2, w2, 8
cmp w5, w1, UXTB
csel w6, w3, w4, HI
cmp w5, w2, UXTB
csel w7, w3, w4, HI
add w1, w1, w6
add w2, w2, w7
strb w1, [x0, 1]
strb w2, [x0, 0]
ret
// convert x16 to string
// x0: output buffer
// w1: value to convert, bits [15:0]
// clobbers: v0, v1, v3, v4, v5, v6
.align 4
string_x16_2:
rev16 w1, w1 // we write the result via a single store op, so correct for digit order, part one: swap octet order
movi v3.8b, '0' - 0x0
movi v4.8b, 'a' - 0xa
movi v5.8b, 0xa
movi v6.8b, 0xf
fmov s0, w1
ushr v1.8b, v0.8b, 4
and v0.8b, v0.8b, v6.8b
zip1 v0.8b, v1.8b, v0.8b // we write the result via a single store op, so correct for digit order, part two: swap nibble order
cmhi v1.8b, v5.8b, v0.8b
bsl v1.8b, v3.8b, v4.8b
add v0.8b, v0.8b, v1.8b
str s0, [x0]
ret
// convert x32 to string
// x0: output buffer
// w1: value to convert, bits [31:0]
// clobbers: v0, v1, v3, v4, v5, v6
.align 4
string_x32:
rev w1, w1 // we write the result via a single store op, so correct for digit order, part one: swap octet order
movi v3.8b, '0' - 0x0
movi v4.8b, 'a' - 0xa
movi v5.8b, 0xa
movi v6.8b, 0xf
fmov s0, w1
ushr v1.8b, v0.8b, 4
and v0.8b, v0.8b, v6.8b
zip1 v0.8b, v1.8b, v0.8b // we write the result via a single store op, so correct for digit order, part two: swap nibble order
cmhi v1.8b, v5.8b, v0.8b
bsl v1.8b, v3.8b, v4.8b
add v0.8b, v0.8b, v1.8b
str d0, [x0]
ret
// convert x64 to string
// x0: output buffer
// x1: value to convert, bits [63:0]
// clobbers: v0, v1, v3, v4, v5, v6
.align 4
string_x64:
rev x1, x1 // we write the result via a single store op, so correct for digit order, part one: swap octet order
movi v3.16b, '0' - 0x0
movi v4.16b, 'a' - 0xa
movi v5.16b, 0xa
movi v6.16b, 0xf
fmov d0, x1
ushr v1.8b, v0.8b, 4
and v0.8b, v0.8b, v6.8b
zip1 v0.16b, v1.16b, v0.16b // we write the result via a single store op, so correct for digit order, part two: swap nibble order
cmhi v1.16b, v5.16b, v0.16b
bsl v1.16b, v3.16b, v4.16b
add v0.16b, v0.16b, v1.16b
str q0, [x0]
ret
// convert x64 to string
// x0: output buffer
// x1: value to convert, bits [63:0]
// clobbers: v0, v1, v3, v4, v5, v6
.align 4
string_x64_1:
rev x1, x1 // we write the result via a single store op, so correct for digit order, part one: swap octet order
movi v6.16b, 0xf
fmov d0, x1
movi v3.16b, '0' - 0x0
movi v4.16b, 'a' - 0xa
movi v5.16b, 0xa
ushr v1.8b, v0.8b, 4
and v0.8b, v0.8b, v6.8b
zip1 v0.16b, v1.16b, v0.16b // we write the result via a single store op, so correct for digit order, part two: swap nibble order
cmhi v1.16b, v5.16b, v0.16b
bsl v1.16b, v3.16b, v4.16b
add v0.16b, v0.16b, v1.16b
str q0, [x0]
ret