-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Baseline Avx2 + Scalar GetPointerToFirstInvalidByte /w tests and benchmarks #10
Conversation
…imdutf/SimdUnicode into AVX2_GetIndexFirstNonAsciiByte
This reverts commit d161523.
…est, no benchmark.
Just a quick update to par since last week: A)I partially fixed the memory allocation. Turns out this part
inside is_incomplete was the culprit so I made it static and moved it out of the function.There is some light improvements B)I also ported this optimization: simdjson/simdjson#2113
|
I'm looking... |
Using https://marketplace.visualstudio.com/items?itemName=EgorBogatov.Disasmo&ssr=false#overview I am getting the following assembly: ; Method SimdUnicode.Utf8Utility:GetPointerToFirstInvalidByte(ulong,int):ulong (FullOpts)
G_M000_IG01: ;; offset=0x0000
push rbp
push r15
push r14
push rdi
push rsi
push rbx
sub rsp, 824
vzeroupper
vmovaps xmmword ptr [rsp+0x320], xmm6
vmovaps xmmword ptr [rsp+0x310], xmm7
vmovaps xmmword ptr [rsp+0x300], xmm8
vmovaps xmmword ptr [rsp+0x2F0], xmm9
vmovaps xmmword ptr [rsp+0x2E0], xmm10
vmovaps xmmword ptr [rsp+0x2D0], xmm11
vmovaps xmmword ptr [rsp+0x2C0], xmm12
vmovaps xmmword ptr [rsp+0x2B0], xmm13
vmovaps xmmword ptr [rsp+0x2A0], xmm14
vmovaps xmmword ptr [rsp+0x290], xmm15
lea rbp, [rsp+0x360]
vxorps xmm4, xmm4, xmm4
mov rax, -432
vmovdqa xmmword ptr [rbp+rax-0xE0], xmm4
vmovdqa xmmword ptr [rbp+rax-0xD0], xmm4
vmovdqa xmmword ptr [rbp+rax-0xC0], xmm4
add rax, 48
jne SHORT -5 instr
mov qword ptr [rbp-0xE0], rax
mov rax, 0x24B5C9AFE044
mov qword ptr [rbp-0xD8], rax
G_M000_IG02: ;; offset=0x00BB
mov ebx, edx
mov rsi, rcx
test rsi, rsi
je SHORT G_M000_IG04
G_M000_IG03: ;; offset=0x00C5
test ebx, ebx
jg G_M000_IG07
G_M000_IG04: ;; offset=0x00CD
mov rax, rsi
mov rcx, 0x24B5C9AFE044
cmp qword ptr [rbp-0xD8], rcx
je SHORT G_M000_IG05
call CORINFO_HELP_FAIL_FAST
G_M000_IG05: ;; offset=0x00E8
nop
G_M000_IG06: ;; offset=0x00E9
vmovaps xmm6, xmmword ptr [rsp+0x320]
vmovaps xmm7, xmmword ptr [rsp+0x310]
vmovaps xmm8, xmmword ptr [rsp+0x300]
vmovaps xmm9, xmmword ptr [rsp+0x2F0]
vmovaps xmm10, xmmword ptr [rsp+0x2E0]
vmovaps xmm11, xmmword ptr [rsp+0x2D0]
vmovaps xmm12, xmmword ptr [rsp+0x2C0]
vmovaps xmm13, xmmword ptr [rsp+0x2B0]
vmovaps xmm14, xmmword ptr [rsp+0x2A0]
vmovaps xmm15, xmmword ptr [rsp+0x290]
vzeroupper
add rsp, 824
pop rbx
pop rsi
pop rdi
pop r14
pop r15
pop rbp
ret
G_M000_IG07: ;; offset=0x0156
test byte ptr [(reloc 0x7ff89cc90961)], 1
je G_M000_IG23
G_M000_IG08: ;; offset=0x0163
mov rdx, 0x19447C01D78
mov rdx, gword ptr [rdx]
mov r8d, dword ptr [rdx+0x08]
cmp r8d, 32
jl G_M000_IG24
vmovups ymm6, ymmword ptr [rdx+0x10]
vxorps ymm7, ymm7, ymm7
vxorps ymm8, ymm8, ymm8
vxorps ymm9, ymm9, ymm9
xor edi, edi
cmp ebx, 32
jl G_M000_IG14
G_M000_IG09: ;; offset=0x019C
movsxd rdx, edi
vmovups ymm10, ymmword ptr [rsi+rdx]
vpmovmskb edx, ymm10
test edx, edx
je G_M000_IG13
G_M000_IG10: ;; offset=0x01B1
vxorps ymm0, ymm0, ymm0
vmovups ymmword ptr [rbp-0x130], ymm0
vmovups ymmword ptr [rbp-0x2B0], ymm10
vperm2i128 ymm0, ymm8, ymm10, 33
vmovups ymmword ptr [rbp-0x2D0], ymm0
lea rdx, [rbp-0x2B0]
lea r8, [rbp-0x2D0]
lea rcx, [rbp-0x130]
mov r9d, 15
vextractf128 xmm9, ymm10, 1
vextractf128 xmm11, ymm8, 1
vextractf128 xmm12, ymm7, 1
vextractf128 xmm13, ymm6, 1
call [System.Runtime.Intrinsics.X86.Avx2:AlignRight(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vxorps ymm0, ymm0, ymm0
vmovups ymmword ptr [rbp-0x170], ymm0
vxorps ymm0, ymm0, ymm0
vmovups ymmword ptr [rbp-0x190], ymm0
vmovups ymm0, ymmword ptr [rbp-0x130]
vpsrlw ymm0, ymm0, 4
vmovups ymmword ptr [rbp-0x2B0], ymm0
mov dword ptr [rsp+0x20], 2
mov dword ptr [rsp+0x28], 2
mov dword ptr [rsp+0x30], 2
mov dword ptr [rsp+0x38], 2
mov dword ptr [rsp+0x40], 2
mov dword ptr [rsp+0x48], 2
mov dword ptr [rsp+0x50], 128
mov dword ptr [rsp+0x58], 128
mov dword ptr [rsp+0x60], 128
mov dword ptr [rsp+0x68], 128
mov dword ptr [rsp+0x70], 33
mov dword ptr [rsp+0x78], 1
mov dword ptr [rsp+0x80], 21
mov dword ptr [rsp+0x88], 73
lea rdx, [rbp-0x2B0]
lea rcx, [rbp-0x150]
mov r8d, 2
mov r9d, 2
call [Vector256Extensions:Lookup16(System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vmovups ymm0, ymmword ptr [rbp-0x130]
vpand ymm0, ymm0, ymmword ptr [reloc @RWD00]
vmovups ymmword ptr [rbp-0x2B0], ymm0
mov dword ptr [rsp+0x20], 131
mov dword ptr [rsp+0x28], 131
mov dword ptr [rsp+0x30], 139
mov dword ptr [rsp+0x38], 203
mov dword ptr [rsp+0x40], 203
mov dword ptr [rsp+0x48], 203
mov dword ptr [rsp+0x50], 203
mov dword ptr [rsp+0x58], 203
mov dword ptr [rsp+0x60], 203
mov dword ptr [rsp+0x68], 203
mov dword ptr [rsp+0x70], 203
mov dword ptr [rsp+0x78], 219
mov dword ptr [rsp+0x80], 203
mov dword ptr [rsp+0x88], 203
lea rdx, [rbp-0x2B0]
lea rcx, [rbp-0x170]
mov r8d, 231
mov r9d, 163
call [Vector256Extensions:Lookup16(System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
G_M000_IG11: ;; offset=0x037D
vinsertf128 ymm10, ymm10, xmm9, 1
vpsrlw ymm0, ymm10, 4
vmovups ymmword ptr [rbp-0x2B0], ymm0
mov dword ptr [rsp+0x20], 1
mov dword ptr [rsp+0x28], 1
mov dword ptr [rsp+0x30], 1
mov dword ptr [rsp+0x38], 1
mov dword ptr [rsp+0x40], 1
mov dword ptr [rsp+0x48], 1
mov dword ptr [rsp+0x50], 230
mov dword ptr [rsp+0x58], 174
mov dword ptr [rsp+0x60], 186
mov dword ptr [rsp+0x68], 186
mov dword ptr [rsp+0x70], 1
mov dword ptr [rsp+0x78], 1
mov dword ptr [rsp+0x80], 1
mov dword ptr [rsp+0x88], 1
lea rdx, [rbp-0x2B0]
lea rcx, [rbp-0x190]
mov r8d, 1
mov r9d, 1
vextractf128 xmm9, ymm10, 1
call [Vector256Extensions:Lookup16(System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vmovups ymm0, ymmword ptr [rbp-0x150]
vpand ymm0, ymm0, ymmword ptr [rbp-0x170]
vpand ymm14, ymm0, ymmword ptr [rbp-0x190]
vxorps ymm0, ymm0, ymm0
vmovups ymmword ptr [rbp-0x1B0], ymm0
vxorps ymm0, ymm0, ymm0
vmovups ymmword ptr [rbp-0x1D0], ymm0
vinsertf128 ymm10, ymm10, xmm9, 1
vmovups ymmword ptr [rbp-0x2B0], ymm10
vinsertf128 ymm8, ymm8, xmm11, 1
vperm2i128 ymm0, ymm8, ymm10, 33
vmovups ymmword ptr [rbp-0x2D0], ymm0
lea rdx, [rbp-0x2B0]
lea r8, [rbp-0x2D0]
lea rcx, [rbp-0x1B0]
mov r9d, 14
vextractf128 xmm9, ymm10, 1
vextractf128 xmm11, ymm8, 1
vextractf128 xmm15, ymm14, 1
call [System.Runtime.Intrinsics.X86.Avx2:AlignRight(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vinsertf128 ymm10, ymm10, xmm9, 1
vmovups ymmword ptr [rbp-0x2B0], ymm10
vinsertf128 ymm8, ymm8, xmm11, 1
vperm2i128 ymm0, ymm8, ymm10, 33
vmovups ymmword ptr [rbp-0x2D0], ymm0
lea rdx, [rbp-0x2B0]
lea r8, [rbp-0x2D0]
lea rcx, [rbp-0x1D0]
mov r9d, 13
vextractf128 xmm9, ymm10, 1
call [System.Runtime.Intrinsics.X86.Avx2:AlignRight(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vmovups ymm0, ymmword ptr [rbp-0x1B0]
vpsubusb ymm0, ymm0, ymmword ptr [reloc @RWD32]
vmovups ymm1, ymmword ptr [rbp-0x1D0]
vpsubusb ymm1, ymm1, ymmword ptr [reloc @RWD64]
vpor ymm0, ymm0, ymm1
vpand ymm0, ymm0, ymmword ptr [reloc @RWD96]
vinsertf128 ymm14, ymm14, xmm15, 1
vpxor ymm0, ymm0, ymm14
G_M000_IG12: ;; offset=0x0532
vinsertf128 ymm7, ymm7, xmm12, 1
vpor ymm7, ymm7, ymm0
vinsertf128 ymm10, ymm10, xmm9, 1
vinsertf128 ymm6, ymm6, xmm13, 1
vpsubusw ymm0, ymm10, ymm6
vmovaps ymm9, ymm0
G_M000_IG13: ;; offset=0x0550
vmovaps ymm8, ymm10
add edi, 32
lea edx, [rdi+0x20]
cmp edx, ebx
jle G_M000_IG09
G_M000_IG14: ;; offset=0x0563
cmp edi, ebx
jge G_M000_IG20
lea r14, [rbp-0xF8]
xor edx, edx
mov r15d, ebx
sub r15d, edi
test r15d, r15d
jle SHORT G_M000_IG16
align [1 bytes for IG15]
G_M000_IG15: ;; offset=0x0580
cmp edx, 32
jae G_M000_IG25
mov ecx, edx
lea eax, [rdi+rdx]
cdqe
movzx rax, byte ptr [rax+rsi]
mov byte ptr [r14+rcx], al
inc edx
cmp r15d, edx
jg SHORT G_M000_IG15
G_M000_IG16: ;; offset=0x059F
mov edx, 32
mov rcx, 0x7FF89CC99F60
vextractf128 xmm11, ymm8, 1
vextractf128 xmm12, ymm7, 1
vextractf128 xmm10, ymm9, 1
vextractf128 xmm13, ymm6, 1
call CORINFO_HELP_NEWARR_1_VC
lea rdx, bword ptr [rax+0x10]
vmovdqu ymm0, ymmword ptr [r14]
vmovdqu ymmword ptr [rdx], ymm0
mov edx, dword ptr [rax+0x08]
cmp edx, 32
vinsertf128 ymm8, ymm8, xmm11, 1
vinsertf128 ymm7, ymm7, xmm12, 1
vinsertf128 ymm9, ymm9, xmm10, 1
vinsertf128 ymm6, ymm6, xmm13, 1
jl G_M000_IG24
vmovups ymm10, ymmword ptr [rax+0x10]
vpmovmskb edx, ymm10
test edx, edx
je G_M000_IG19
vmovups ymmword ptr [rbp-0x2B0], ymm10
vperm2i128 ymm0, ymm8, ymm10, 33
vmovups ymmword ptr [rbp-0x2D0], ymm0
lea rdx, [rbp-0x2B0]
lea r8, [rbp-0x2D0]
lea rcx, [rbp-0x1F0]
mov r9d, 15
vextractf128 xmm11, ymm8, 1
vextractf128 xmm12, ymm7, 1
vextractf128 xmm9, ymm10, 1
vextractf128 xmm13, ymm6, 1
call [System.Runtime.Intrinsics.X86.Avx2:AlignRight(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vmovups ymm0, ymmword ptr [rbp-0x1F0]
vpsrlw ymm0, ymm0, 4
vmovups ymmword ptr [rbp-0x2B0], ymm0
mov dword ptr [rsp+0x20], 2
mov dword ptr [rsp+0x28], 2
mov dword ptr [rsp+0x30], 2
mov dword ptr [rsp+0x38], 2
mov dword ptr [rsp+0x40], 2
mov dword ptr [rsp+0x48], 2
mov dword ptr [rsp+0x50], 128
mov dword ptr [rsp+0x58], 128
mov dword ptr [rsp+0x60], 128
mov dword ptr [rsp+0x68], 128
mov dword ptr [rsp+0x70], 33
mov dword ptr [rsp+0x78], 1
mov dword ptr [rsp+0x80], 21
mov dword ptr [rsp+0x88], 73
lea rdx, [rbp-0x2B0]
lea rcx, [rbp-0x210]
mov r8d, 2
mov r9d, 2
call [Vector256Extensions:Lookup16(System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vmovups ymm0, ymmword ptr [rbp-0x1F0]
vpand ymm0, ymm0, ymmword ptr [reloc @RWD00]
vmovups ymmword ptr [rbp-0x2B0], ymm0
mov dword ptr [rsp+0x20], 131
G_M000_IG17: ;; offset=0x0728
mov dword ptr [rsp+0x28], 131
mov dword ptr [rsp+0x30], 139
mov dword ptr [rsp+0x38], 203
mov dword ptr [rsp+0x40], 203
mov dword ptr [rsp+0x48], 203
mov dword ptr [rsp+0x50], 203
mov dword ptr [rsp+0x58], 203
mov dword ptr [rsp+0x60], 203
mov dword ptr [rsp+0x68], 203
mov dword ptr [rsp+0x70], 203
mov dword ptr [rsp+0x78], 219
mov dword ptr [rsp+0x80], 203
mov dword ptr [rsp+0x88], 203
lea rdx, [rbp-0x2B0]
lea rcx, [rbp-0x230]
mov r8d, 231
mov r9d, 163
call [Vector256Extensions:Lookup16(System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vinsertf128 ymm10, ymm10, xmm9, 1
vpsrlw ymm0, ymm10, 4
vmovups ymmword ptr [rbp-0x2B0], ymm0
mov dword ptr [rsp+0x20], 1
mov dword ptr [rsp+0x28], 1
mov dword ptr [rsp+0x30], 1
mov dword ptr [rsp+0x38], 1
mov dword ptr [rsp+0x40], 1
mov dword ptr [rsp+0x48], 1
mov dword ptr [rsp+0x50], 230
mov dword ptr [rsp+0x58], 174
mov dword ptr [rsp+0x60], 186
mov dword ptr [rsp+0x68], 186
mov dword ptr [rsp+0x70], 1
mov dword ptr [rsp+0x78], 1
mov dword ptr [rsp+0x80], 1
mov dword ptr [rsp+0x88], 1
lea rdx, [rbp-0x2B0]
lea rcx, [rbp-0x250]
mov r8d, 1
mov r9d, 1
vextractf128 xmm9, ymm10, 1
call [Vector256Extensions:Lookup16(System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte,ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vmovups ymm0, ymmword ptr [rbp-0x210]
vpand ymm0, ymm0, ymmword ptr [rbp-0x230]
vpand ymm14, ymm0, ymmword ptr [rbp-0x250]
vinsertf128 ymm10, ymm10, xmm9, 1
vmovups ymmword ptr [rbp-0x2B0], ymm10
vinsertf128 ymm8, ymm8, xmm11, 1
vperm2i128 ymm0, ymm8, ymm10, 33
vmovups ymmword ptr [rbp-0x2D0], ymm0
lea rdx, [rbp-0x2B0]
lea r8, [rbp-0x2D0]
lea rcx, [rbp-0x270]
mov r9d, 14
vextractf128 xmm11, ymm8, 1
vextractf128 xmm9, ymm10, 1
vextractf128 xmm15, ymm14, 1
call [System.Runtime.Intrinsics.X86.Avx2:AlignRight(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vinsertf128 ymm10, ymm10, xmm9, 1
vmovups ymmword ptr [rbp-0x2B0], ymm10
vinsertf128 ymm8, ymm8, xmm11, 1
vperm2i128 ymm0, ymm8, ymm10, 33
vmovups ymmword ptr [rbp-0x2D0], ymm0
G_M000_IG18: ;; offset=0x08F5
lea rdx, [rbp-0x2B0]
lea r8, [rbp-0x2D0]
lea rcx, [rbp-0x290]
mov r9d, 13
vextractf128 xmm9, ymm10, 1
call [System.Runtime.Intrinsics.X86.Avx2:AlignRight(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte],ubyte):System.Runtime.Intrinsics.Vector256`1[ubyte]]
vmovups ymm0, ymmword ptr [rbp-0x270]
vpsubusb ymm0, ymm0, ymmword ptr [reloc @RWD32]
vmovups ymm1, ymmword ptr [rbp-0x290]
vpsubusb ymm1, ymm1, ymmword ptr [reloc @RWD64]
vpor ymm0, ymm0, ymm1
vpand ymm0, ymm0, ymmword ptr [reloc @RWD96]
vinsertf128 ymm14, ymm14, xmm15, 1
vpxor ymm0, ymm0, ymm14
vinsertf128 ymm7, ymm7, xmm12, 1
vpor ymm7, ymm7, ymm0
vinsertf128 ymm10, ymm10, xmm9, 1
vinsertf128 ymm6, ymm6, xmm13, 1
vpsubusw ymm0, ymm10, ymm6
vmovaps ymm9, ymm0
G_M000_IG19: ;; offset=0x0971
add r15d, edi
mov edi, r15d
G_M000_IG20: ;; offset=0x0977
vpor ymm7, ymm7, ymm9
movsxd rax, ebx
add rax, rsi
movsxd rcx, edi
add rcx, rsi
vptest ymm7, ymm7
cmovne rax, rcx
mov rcx, 0x24B5C9AFE044
cmp qword ptr [rbp-0xD8], rcx
je SHORT G_M000_IG21
call CORINFO_HELP_FAIL_FAST
G_M000_IG21: ;; offset=0x09A9
nop
G_M000_IG22: ;; offset=0x09AA
vmovaps xmm6, xmmword ptr [rsp+0x320]
vmovaps xmm7, xmmword ptr [rsp+0x310]
vmovaps xmm8, xmmword ptr [rsp+0x300]
vmovaps xmm9, xmmword ptr [rsp+0x2F0]
vmovaps xmm10, xmmword ptr [rsp+0x2E0]
vmovaps xmm11, xmmword ptr [rsp+0x2D0]
vmovaps xmm12, xmmword ptr [rsp+0x2C0]
vmovaps xmm13, xmmword ptr [rsp+0x2B0]
vmovaps xmm14, xmmword ptr [rsp+0x2A0]
vmovaps xmm15, xmmword ptr [rsp+0x290]
vzeroupper
add rsp, 824
pop rbx
pop rsi
pop rdi
pop r14
pop r15
pop rbp
ret
G_M000_IG23: ;; offset=0x0A17
mov rcx, 0x7FF89CC90928
mov edx, 9
call CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
jmp G_M000_IG08
G_M000_IG24: ;; offset=0x0A30
call [System.ThrowHelper:ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException()]
int3
G_M000_IG25: ;; offset=0x0A37
call CORINFO_HELP_RNGCHKFAIL
int3
RWD00 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh
RWD32 dq 6060606060606060h, 6060606060606060h, 6060606060606060h, 6060606060606060h
RWD64 dq 7070707070707070h, 7070707070707070h, 7070707070707070h, 7070707070707070h
RWD96 dq 8080808080808080h, 8080808080808080h, 8080808080808080h, 8080808080808080h
; Total bytes of code: 2621 |
src/UTF8_validation.cs
Outdated
checker.check_eof(); | ||
if (checker.errors()) | ||
{ | ||
return pInputBuffer + processedLength; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function only checks for error at the end. So I expect that whether checker.errors()
is true or false, it will still scan the entire input.
Now, this is fine per se, but what you have implemented is a function to check whether the function is valid or invalid. The equivalent of GetPointerToFirstInvalidByte in simdutf is validate_utf8_with_errors
and it seems that you have implemented validate_utf8
. No big deal, but it makes any benchmarking premature.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gotcha! Im on it first thing in the morning
Some possible optimizations.
…dUnicode into AVX2_UTF8_validation
This is a draft pull request.
What I've done thus far:
-Added the content of the scalar GetPointerToFirstInvalidByte 's PR
-Added tests + benchmarks for said function
-Rechecked the code for the utf8 random generator and used C#'s native functions more.
-For the AVX2 part, everything compiles, it still needs a lot of polish and there's one or two functions that I know do the wrong thing but grosso modo, the structure is there so hopefully it doesn't take too long to finish it.
Adding the final benchmarks for the scalar version and finalize my review of last week's PR this morning.