-
Notifications
You must be signed in to change notification settings - Fork 561
/
Copy pathhandy.h
3166 lines (2725 loc) · 144 KB
/
handy.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* handy.h
*
* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1999, 2000,
* 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2012 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*
*/
/* IMPORTANT NOTE: Everything whose name begins with an underscore is for
* internal core Perl use only. */
#ifndef PERL_HANDY_H_ /* Guard against nested #inclusion */
#define PERL_HANDY_H_
#ifndef PERL_CORE
# define Null(type) ((type)NULL)
/*
=for apidoc_section $string
=for apidoc AmnU||Nullch
Null character pointer. (No longer available when C<PERL_CORE> is
defined.)
=for apidoc_section $SV
=for apidoc AmnU||Nullsv
Null SV pointer. (No longer available when C<PERL_CORE> is defined.)
=cut
Below are signatures of functions from config.h which can't easily be gleaned
from it, and are very unlikely to change
=for apidoc_defn Am|int|Sigsetjmp|jmp_buf env|int savesigs
=for apidoc_defn Am|void|Siglongjmp|jmp_buf env|int val
=for apidoc_defn Am|void *|FILE_ptr|FILE * f
=for apidoc_defn Am|Size_t|FILE_cnt|FILE * f
=for apidoc_defn Am|void *|FILE_base|FILE * f
=for apidoc_defn Am|Size_t|FILE_bufsiz|FILE *f
=for apidoc_defn Amu|token|CAT2|token x|token y
=for apidoc_defn Amu|string|STRINGIFY|token x
=for apidoc_defn Am|double|Drand01
=for apidoc_defn Am|void|seedDrand01|Rand_seed_t x
=for apidoc_defn Am|char *|Gconvert|double x|Size_t n|bool t|char * b
=cut
*/
# define Nullch Null(char*)
# define Nullfp Null(PerlIO*)
# define Nullsv Null(SV*)
#endif
#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
#define TRUE (1)
#define FALSE (0)
/*
=for apidoc_section $SV
=for apidoc Am |AV * |MUTABLE_AV |AV * p
=for apidoc_item |CV * |MUTABLE_CV |CV * p
=for apidoc_item |GV * |MUTABLE_GV |GV * p
=for apidoc_item |HV * |MUTABLE_HV |HV * p
=for apidoc_item |IO * |MUTABLE_IO |IO * p
=for apidoc_item |void *|MUTABLE_PTR|void * p
=for apidoc_item |SV * |MUTABLE_SV |SV * p
The C<MUTABLE_I<*>>() macros cast pointers to the types shown, in such a way
(compiler permitting) that casting away const-ness will give a warning;
e.g.:
const SV *sv = ...;
AV *av1 = (AV*)sv; <== BAD: the const has been silently
cast away
AV *av2 = MUTABLE_AV(sv); <== GOOD: it may warn
C<MUTABLE_PTR> is the base macro used to derive new casts. The other
already-built-in ones return pointers to what their names indicate.
=cut
The brace group version will raise a diagnostic if 'p' is const; the other
blindly casts away const.
*/
#if defined(PERL_USE_GCC_BRACE_GROUPS)
# define MUTABLE_PTR(p) ({ void *p_ = (p); p_; })
#else
# define MUTABLE_PTR(p) ((void *) (p))
#endif
#define MUTABLE_AV(p) ((AV *)MUTABLE_PTR(p))
#define MUTABLE_CV(p) ((CV *)MUTABLE_PTR(p))
#define MUTABLE_GV(p) ((GV *)MUTABLE_PTR(p))
#define MUTABLE_HV(p) ((HV *)MUTABLE_PTR(p))
#define MUTABLE_IO(p) ((IO *)MUTABLE_PTR(p))
#define MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p))
/*
=for apidoc_section $SV
=for apidoc Am |AV *|AV_FROM_REF|SV * ref
=for apidoc_item |CV *|CV_FROM_REF|SV * ref
=for apidoc_item |GV *|GV_FROM_REF|SV * ref
=for apidoc_item |HV *|HV_FROM_REF|SV * ref
The C<I<*>V_FROM_REF> macros extract the C<SvRV()> from a given reference SV
and return a suitably-cast to pointer to the referenced SV. When running
under C<-DDEBUGGING>, assertions are also applied that check that I<ref> is
definitely a reference SV that refers to an SV of the right type.
=cut
*/
#if defined(DEBUGGING) && defined(PERL_USE_GCC_BRACE_GROUPS)
# define xV_FROM_REF(XV, ref) \
({ SV *ref_ = ref; \
assert(SvROK(ref_)); \
assert(SvTYPE(SvRV(ref_)) == SVt_PV ## XV); \
(XV *)(SvRV(ref_)); })
#else
# define xV_FROM_REF(XV, ref) ((XV *)(SvRV(ref)))
#endif
#define AV_FROM_REF(ref) xV_FROM_REF(AV, ref)
#define CV_FROM_REF(ref) xV_FROM_REF(CV, ref)
#define GV_FROM_REF(ref) xV_FROM_REF(GV, ref)
#define HV_FROM_REF(ref) xV_FROM_REF(HV, ref)
#ifndef __cplusplus
# include <stdbool.h>
#endif
/*
=for apidoc_section $casting
=for apidoc Am|bool|cBOOL|bool expr
Cast-to-bool. When Perl was able to be compiled on pre-C99 compilers, a
C<(bool)> cast didn't necessarily do the right thing, so this macro was
created (and made somewhat complicated to work around bugs in old
compilers). Now, many years later, and C99 is used, this is no longer
required, but is kept for backwards compatibility.
=cut
*/
#define cBOOL(cbool) ((bool) (cbool))
/* Try to figure out __func__ or __FUNCTION__ equivalent, if any.
* XXX Should really be a Configure probe, with HAS__FUNCTION__
* and FUNCTION__ as results.
* XXX Similarly, a Configure probe for __FILE__ and __LINE__ is needed. */
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__SUNPRO_C)) /* C99 or close enough. */
# define FUNCTION__ __func__
# define SAFE_FUNCTION__ __func__
#elif (defined(__DECC_VER)) /* Tru64 or VMS, and strict C89 being used, but not modern enough cc (in Tru64, -c99 not known, only -std1). */
# define FUNCTION__ ("")
# define SAFE_FUNCTION__ ("UNKNOWN")
#else
# define FUNCTION__ __FUNCTION__ /* Common extension. */
# define SAFE_FUNCTION__ __FUNCTION__ /* Common extension. */
#endif
/* XXX A note on the perl source internal type system. The
original intent was that I32 be *exactly* 32 bits.
Currently, we only guarantee that I32 is *at least* 32 bits.
Specifically, if int is 64 bits, then so is I32. (This is the case
for the Cray.) This has the advantage of meshing nicely with
standard library calls (where we pass an I32 and the library is
expecting an int), but the disadvantage that an I32 is not 32 bits.
Andy Dougherty August 1996
There is no guarantee that there is *any* integral type with
exactly 32 bits. It is perfectly legal for a system to have
sizeof(short) == sizeof(int) == sizeof(long) == 8.
Similarly, there is no guarantee that I16 and U16 have exactly 16
bits.
For dealing with issues that may arise from various 32/64-bit
systems, we will ask Configure to check out
SHORTSIZE == sizeof(short)
INTSIZE == sizeof(int)
LONGSIZE == sizeof(long)
LONGLONGSIZE == sizeof(long long) (if HAS_LONG_LONG)
PTRSIZE == sizeof(void *)
DOUBLESIZE == sizeof(double)
LONG_DOUBLESIZE == sizeof(long double) (if HAS_LONG_DOUBLE).
*/
#ifdef I_INTTYPES /* e.g. Linux has int64_t without <inttypes.h> */
# include <inttypes.h>
# ifdef INT32_MIN_BROKEN
# undef INT32_MIN
# define INT32_MIN (-2147483647-1)
# endif
# ifdef INT64_MIN_BROKEN
# undef INT64_MIN
# define INT64_MIN (-9223372036854775807LL-1)
# endif
#endif
typedef I8TYPE I8;
typedef U8TYPE U8;
typedef I16TYPE I16;
typedef U16TYPE U16;
typedef I32TYPE I32;
typedef U32TYPE U32;
#ifdef QUADKIND
typedef I64TYPE I64;
typedef U64TYPE U64;
#endif
/* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.
Please search CHAR_MAX in perl.h for further details. */
#ifdef UINT8_MAX
# define U8_MAX UINT8_MAX
#else
# define U8_MAX PERL_UCHAR_MAX
#endif
#ifdef UINT8_MIN
# define U8_MIN UINT8_MIN
#else
# define U8_MIN PERL_UCHAR_MIN
#endif
#ifdef INT16_MAX
# define I16_MAX INT16_MAX
#else
# define I16_MAX PERL_SHORT_MAX
#endif
#ifdef INT16_MIN
# define I16_MIN INT16_MIN
#else
# define I16_MIN PERL_SHORT_MIN
#endif
#ifdef UINT16_MAX
# define U16_MAX UINT16_MAX
#else
# define U16_MAX PERL_USHORT_MAX
#endif
#ifdef UINT16_MIN
# define U16_MIN UINT16_MIN
#else
# define U16_MIN PERL_USHORT_MIN
#endif
#ifdef INT32_MAX
# define I32_MAX INT32_MAX
#elif LONGSIZE > 4
# define I32_MAX PERL_INT_MAX
#else
# define I32_MAX PERL_LONG_MAX
#endif
#ifdef INT32_MIN
# define I32_MIN INT32_MIN
#elif LONGSIZE > 4
# define I32_MIN PERL_INT_MIN
#else
# define I32_MIN PERL_LONG_MIN
#endif
#ifdef UINT32_MAX
# ifndef UINT32_MAX_BROKEN /* e.g. HP-UX with gcc messes this up */
# define U32_MAX UINT_MAX
# else
# define U32_MAX 4294967295U
# endif
#elif LONGSIZE > 4
# define U32_MAX PERL_UINT_MAX
#else
# define U32_MAX PERL_ULONG_MAX
#endif
#ifdef UINT32_MIN
# define U32_MIN UINT32_MIN
#elif LONGSIZE > 4
# define U32_MIN PERL_UINT_MIN
#else
# define U32_MIN PERL_ULONG_MIN
#endif
/*
=for apidoc_section $integer
=for apidoc Ay|| PERL_INT_FAST8_T
=for apidoc_item PERL_INT_FAST16_T
=for apidoc_item PERL_UINT_FAST8_T
=for apidoc_item PERL_UINT_FAST16_T
These are equivalent to the correspondingly-named C99 typedefs on platforms
that have those; they evaluate to C<int> and C<unsigned int> on platforms that
don't, so that you can portably take advantage of this C99 feature.
=cut
*/
#ifdef I_STDINT
typedef int_fast8_t PERL_INT_FAST8_T;
typedef uint_fast8_t PERL_UINT_FAST8_T;
typedef int_fast16_t PERL_INT_FAST16_T;
typedef uint_fast16_t PERL_UINT_FAST16_T;
#else
typedef int PERL_INT_FAST8_T;
typedef unsigned int PERL_UINT_FAST8_T;
typedef int PERL_INT_FAST16_T;
typedef unsigned int PERL_UINT_FAST16_T;
#endif
/* log(2) (i.e., log base 10 of 2) is pretty close to 0.30103, just in case
* anyone is grepping for it. So BIT_DIGITS gives the number of decimal digits
* required to represent any possible unsigned number containing N bits.
* TYPE_DIGITS gives the number of decimal digits required to represent any
* possible unsigned number of type T. */
#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log10(2) =~ 146/485 */
#define TYPE_DIGITS(T) BIT_DIGITS(sizeof(T) * 8)
#define TYPE_CHARS(T) (TYPE_DIGITS(T) + 2) /* sign, NUL */
/* Unused by core; should be deprecated */
#define Ctl(ch) ((ch) & 037)
#if defined(PERL_CORE) || defined(PERL_EXT)
# ifndef MIN
# define MIN(a,b) ((a) < (b) ? (a) : (b))
# endif
# ifndef MAX
# define MAX(a,b) ((a) > (b) ? (a) : (b))
# endif
#endif
/* Returns a boolean as to whether the input unsigned number is a power of 2
* (2**0, 2**1, etc). In other words if it has just a single bit set.
* If not, subtracting 1 would leave the uppermost bit set, so the & would
* yield non-zero */
#if defined(PERL_CORE) || defined(PERL_EXT)
# define isPOWER_OF_2(n) ((n) && ((n) & ((n)-1)) == 0)
#endif
/* Returns a mask with the lowest n bits set */
#define nBIT_MASK(n) ((UINTMAX_C(1) << (n)) - 1)
/* The largest unsigned number that will fit into n bits */
#define nBIT_UMAX(n) nBIT_MASK(n)
/*
=for apidoc_section $directives
=for apidoc Am||__ASSERT_|bool expr
This is a helper macro to avoid preprocessor issues, replaced by nothing
unless under DEBUGGING, where it expands to an assert of its argument,
followed by a comma (hence the comma operator). If we just used a straight
assert(), we would get a comma with nothing before it when not DEBUGGING.
=cut
We also use empty definition under Coverity since the __ASSERT_
checks often check for things that Really Cannot Happen, and Coverity
detects that and gets all excited. */
#if defined(DEBUGGING) && !defined(__COVERITY__) \
&& ! defined(PERL_SMALL_MACRO_BUFFER)
# define __ASSERT_(statement) assert(statement),
#else
# define __ASSERT_(statement)
#endif
/*
=for apidoc_section $SV
=for apidoc Ama|SV*|newSVpvs|"literal string"
Like C<newSVpvn>, but takes a literal string instead of a
string/length pair.
=for apidoc Ama|SV*|newSVpvs_flags|"literal string"|U32 flags
Like C<newSVpvn_flags>, but takes a literal string instead of
a string/length pair.
=for apidoc Ama|SV*|newSVpvs_share|"literal string"
Like C<newSVpvn_share>, but takes a literal string instead of
a string/length pair and omits the hash parameter.
=for apidoc Am|SV *|sv_setref_pvs|SV *const rv|const char *const classname|"literal string"
Like C<sv_setref_pvn>, but takes a literal string instead of
a string/length pair.
=cut
*/
#define ASSERT_IS_LITERAL(s) ("" s "")
/*
=for apidoc_section $string
=for apidoc Amu|pair|STR_WITH_LEN|"literal string"
Returns two comma separated tokens of the input literal string, and its length.
This is convenience macro which helps out in some API calls.
Note that it can't be used as an argument to macros or functions that under
some configurations might be macros, which means that it requires the full
Perl_xxx(aTHX_ ...) form for any API calls where it's used.
=cut
*/
#define STR_WITH_LEN(s) ASSERT_IS_LITERAL(s), (sizeof(s)-1)
/* STR_WITH_LEN() shortcuts */
#define newSVpvs(str) Perl_newSVpvn(aTHX_ STR_WITH_LEN(str))
#define newSVpvs_flags(str,flags) \
Perl_newSVpvn_flags(aTHX_ STR_WITH_LEN(str), flags)
#define newSVpvs_share(str) Perl_newSVpvn_share(aTHX_ STR_WITH_LEN(str), 0)
/*
=for apidoc_defn Am|void|sv_catpvs_flags|SV * const dsv|"literal string"|I32 flags
=for apidoc_defn Am|void|sv_catpvs_nomg|SV * const dsv|"literal string"
=for apidoc_defn Am|void|sv_catpvs|SV * const dsv|"literal string"
=for apidoc_defn Am|void|sv_catpvs_mg|SV * const dsv|"literal string"
=cut
*/
#define sv_catpvs_flags(dsv, str, flags) \
Perl_sv_catpvn_flags(aTHX_ dsv, STR_WITH_LEN(str), flags)
#define sv_catpvs_nomg(dsv, str) \
Perl_sv_catpvn_flags(aTHX_ dsv, STR_WITH_LEN(str), 0)
#define sv_catpvs(dsv, str) \
Perl_sv_catpvn_flags(aTHX_ dsv, STR_WITH_LEN(str), SV_GMAGIC)
#define sv_catpvs_mg(dsv, str) \
Perl_sv_catpvn_flags(aTHX_ dsv, STR_WITH_LEN(str), SV_GMAGIC|SV_SMAGIC)
/*
=for apidoc_defn Am|void|sv_setpvs |SV *const sv|"literal string"
=for apidoc_defn Am|void|sv_setpvs_mg|SV *const sv|"literal string"
=cut
*/
#define sv_setpvs(dsv, str) Perl_sv_setpvn(aTHX_ dsv, STR_WITH_LEN(str))
#define sv_setpvs_mg(dsv, str) Perl_sv_setpvn_mg(aTHX_ dsv, STR_WITH_LEN(str))
#define sv_setref_pvs(rv, classname, str) \
Perl_sv_setref_pvn(aTHX_ rv, classname, STR_WITH_LEN(str))
/*
=for apidoc_defn Ama|char*|savepvs|"literal string"
=for apidoc_defn Ama|char*|savesharedpvs|"literal string"
=cut
*/
#define savepvs(str) Perl_savepvn(aTHX_ STR_WITH_LEN(str))
#define savesharedpvs(str) Perl_savesharedpvn(aTHX_ STR_WITH_LEN(str))
/*
=for apidoc_defn Am|HV*|gv_stashpvs|"name"|I32 create
=cut
*/
#define gv_stashpvs(str, create) \
Perl_gv_stashpvn(aTHX_ STR_WITH_LEN(str), create)
/*
=for apidoc_defn Am|GV *|gv_fetchpvs|"name"|I32 flags|const svtype sv_type
=for apidoc_defn Am|GV *|gv_fetchpvn|const char * nambeg|STRLEN full_len|I32 flags|const svtype sv_type
=cut
*/
#define gv_fetchpvs(name, flags, sv_type) \
Perl_gv_fetchpvn_flags(aTHX_ STR_WITH_LEN(name), flags, sv_type)
#define gv_fetchpvn gv_fetchpvn_flags
/*
=for apidoc_defn mx|void|lex_stuff_pvs|"pv"|U32 flags
=cut
*/
#define lex_stuff_pvs(pv,flags) Perl_lex_stuff_pvn(aTHX_ STR_WITH_LEN(pv), flags)
/*
=for apidoc_defn Am|CV *|get_cvs|"name"|I32 flags
=cut
*/
#define get_cvs(str, flags) \
Perl_get_cvn_flags(aTHX_ STR_WITH_LEN(str), (flags))
/* internal helpers */
/* Transitional */
#ifndef PERL_VERSION_MAJOR
# define PERL_VERSION_MAJOR PERL_REVISION
#else
# undef PERL_REVISION /* We don't want code to be using these */
#endif
#ifndef PERL_VERSION_MINOR
# define PERL_VERSION_MINOR PERL_VERSION
#else
# undef PERL_VERSION
#endif
#ifndef PERL_VERSION_PATCH
# define PERL_VERSION_PATCH PERL_SUBVERSION
#else
# undef PERL_SUBVERSION
#endif
#define PERL_JNP_TO_DECIMAL_(maJor,miNor,Patch) \
/* '10*' leaves room for things like alpha, beta, releases */ \
(10 * (((maJor) * 1000000) + ((miNor) * 1000) + (Patch)))
#define PERL_DECIMAL_VERSION_ \
PERL_JNP_TO_DECIMAL_(PERL_VERSION_MAJOR, PERL_VERSION_MINOR, \
PERL_VERSION_PATCH)
/*
=for apidoc_section $versioning
=for apidoc AmR|bool|PERL_VERSION_EQ|const U8 major|const U8 minor|const U8 patch
=for apidoc_item PERL_VERSION_GE
=for apidoc_item PERL_VERSION_GT
=for apidoc_item PERL_VERSION_LE
=for apidoc_item PERL_VERSION_LT
=for apidoc_item PERL_VERSION_NE
Returns whether or not the perl currently being compiled has the specified
relationship to the perl given by the parameters. For example,
#if PERL_VERSION_GT(5,24,2)
code that will only be compiled on perls after v5.24.2
#else
fallback code
#endif
Note that this is usable in making compile-time decisions
You may use the special value '*' for the final number to mean ALL possible
values for it. Thus,
#if PERL_VERSION_EQ(5,31,'*')
means all perls in the 5.31 series. And
#if PERL_VERSION_NE(5,24,'*')
means all perls EXCEPT 5.24 ones. And
#if PERL_VERSION_LE(5,9,'*')
is effectively
#if PERL_VERSION_LT(5,10,0)
This means you don't have to think so much when converting from the existing
deprecated C<PERL_VERSION> to using this macro:
#if PERL_VERSION <= 9
becomes
#if PERL_VERSION_LE(5,9,'*')
=cut
*/
/* N.B. These don't work if the patch version is 42 or 92, as those are what
* '*' is in ASCII and EBCDIC respectively */
# define PERL_VERSION_EQ(j,n,p) \
(((p) == '*') \
? ( (j) == PERL_VERSION_MAJOR \
&& (n) == PERL_VERSION_MINOR) \
: (PERL_DECIMAL_VERSION_ == PERL_JNP_TO_DECIMAL_(j,n,p)))
# define PERL_VERSION_NE(j,n,p) (! PERL_VERSION_EQ(j,n,p))
# define PERL_VERSION_LT(j,n,p) /* < '*' effectively means < 0 */ \
(PERL_DECIMAL_VERSION_ < PERL_JNP_TO_DECIMAL_( (j), \
(n), \
(((p) == '*') ? 0 : (p))))
# define PERL_VERSION_GE(j,n,p) (! PERL_VERSION_LT(j,n,p))
# define PERL_VERSION_LE(j,n,p) /* <= '*' effectively means <= 999 */ \
(PERL_DECIMAL_VERSION_ <= PERL_JNP_TO_DECIMAL_( (j), \
(n), \
(((p) == '*') ? 999 : (p))))
# define PERL_VERSION_GT(j,n,p) (! PERL_VERSION_LE(j,n,p))
/*
=for apidoc_section $string
=for apidoc Am|bool|strNE|char* s1|char* s2
Test two C<NUL>-terminated strings to see if they are different. Returns true
or false.
=for apidoc Am|bool|strEQ|char* s1|char* s2
Test two C<NUL>-terminated strings to see if they are equal. Returns true or
false.
=for apidoc Am|bool|strLT|char* s1|char* s2
Test two C<NUL>-terminated strings to see if the first, C<s1>, is less than the
second, C<s2>. Returns true or false.
=for apidoc Am|bool|strLE|char* s1|char* s2
Test two C<NUL>-terminated strings to see if the first, C<s1>, is less than or
equal to the second, C<s2>. Returns true or false.
=for apidoc Am|bool|strGT|char* s1|char* s2
Test two C<NUL>-terminated strings to see if the first, C<s1>, is greater than
the second, C<s2>. Returns true or false.
=for apidoc Am|bool|strGE|char* s1|char* s2
Test two C<NUL>-terminated strings to see if the first, C<s1>, is greater than
or equal to the second, C<s2>. Returns true or false.
=for apidoc Am|bool|strnNE|char* s1|char* s2|STRLEN len
Test two C<NUL>-terminated strings to see if they are different. The C<len>
parameter indicates the number of bytes to compare. Returns true or false. (A
wrapper for C<strncmp>).
=for apidoc Am|bool|strnEQ|char* s1|char* s2|STRLEN len
Test two C<NUL>-terminated strings to see if they are equal. The C<len>
parameter indicates the number of bytes to compare. Returns true or false. (A
wrapper for C<strncmp>).
=for apidoc Am|bool|memEQ|char* s1|char* s2|STRLEN len
Test two buffers (which may contain embedded C<NUL> characters, to see if they
are equal. The C<len> parameter indicates the number of bytes to compare.
Returns true or false. It is undefined behavior if either of the buffers
doesn't contain at least C<len> bytes.
=for apidoc Am|bool|memEQs|char* s1|STRLEN l1|"s2"
Like L</memEQ>, but the second string is a literal enclosed in double quotes,
C<l1> gives the number of bytes in C<s1>.
Returns true or false.
=for apidoc Am|bool|memNE|char* s1|char* s2|STRLEN len
Test two buffers (which may contain embedded C<NUL> characters, to see if they
are not equal. The C<len> parameter indicates the number of bytes to compare.
Returns true or false. It is undefined behavior if either of the buffers
doesn't contain at least C<len> bytes.
=for apidoc Am|bool|memNEs|char* s1|STRLEN l1|"s2"
Like L</memNE>, but the second string is a literal enclosed in double quotes,
C<l1> gives the number of bytes in C<s1>.
Returns true or false.
=for apidoc Am|bool|memCHRs|"list"|char c
Returns the position of the first occurrence of the byte C<c> in the literal
string C<"list">, or NULL if C<c> doesn't appear in C<"list">. All bytes are
treated as unsigned char. Thus this macro can be used to determine if C<c> is
in a set of particular characters. Unlike L<strchr(3)>, it works even if C<c>
is C<NUL> (and the set doesn't include C<NUL>).
=cut
New macros should use the following conventions for their names (which are
based on the underlying C library functions):
(mem | str n? ) (EQ | NE | LT | GT | GE | (( BEGIN | END ) P? )) l? s?
Each has two main parameters, string-like operands that are compared
against each other, as specified by the macro name. Some macros may
additionally have one or potentially even two length parameters. If a length
parameter applies to both string parameters, it will be positioned third;
otherwise any length parameter immediately follows the string parameter it
applies to.
If the prefix to the name is 'str', the string parameter is a pointer to a C
language string. Such a string does not contain embedded NUL bytes; its
length may be unknown, but can be calculated by C<strlen()>, since it is
terminated by a NUL, which isn't included in its length.
The optional 'n' following 'str' means that there is a third parameter,
giving the maximum number of bytes to look at in each string. Even if both
strings are longer than the length parameter, those extra bytes will be
unexamined.
The 's' suffix means that the 2nd byte string parameter is a literal C
double-quoted string. Its length will automatically be calculated by the
macro, so no length parameter will ever be needed for it.
If the prefix is 'mem', the string parameters don't have to be C strings;
they may contain embedded NUL bytes, do not necessarily have a terminating
NUL, and their lengths can be known only through other means, which in
practice are additional parameter(s) passed to the function. All 'mem'
functions have at least one length parameter. Barring any 'l' or 's' suffix,
there is a single length parameter, in position 3, which applies to both
string parameters. The 's' suffix means, as described above, that the 2nd
string is a literal double-quoted C string (hence its length is calculated by
the macro, and the length parameter to the function applies just to the first
string parameter, and hence is positioned just after it). An 'l' suffix
means that the 2nd string parameter has its own length parameter, and the
signature will look like memFOOl(s1, l1, s2, l2).
BEGIN (and END) are for testing if the 2nd string is an initial (or final)
substring of the 1st string. 'P' if present indicates that the substring
must be a "proper" one in tha mathematical sense that the first one must be
strictly larger than the 2nd.
*/
#define strNE(s1,s2) (strcmp(s1,s2) != 0)
#define strEQ(s1,s2) (strcmp(s1,s2) == 0)
#define strLT(s1,s2) (strcmp(s1,s2) < 0)
#define strLE(s1,s2) (strcmp(s1,s2) <= 0)
#define strGT(s1,s2) (strcmp(s1,s2) > 0)
#define strGE(s1,s2) (strcmp(s1,s2) >= 0)
#define strnNE(s1,s2,l) (strncmp(s1,s2,l) != 0)
#define strnEQ(s1,s2,l) (strncmp(s1,s2,l) == 0)
#define memEQ(s1,s2,l) (memcmp(((const void *) (s1)), ((const void *) (s2)), l) == 0)
#define memNE(s1,s2,l) (! memEQ(s1,s2,l))
/* memEQ and memNE where second comparand is a string constant */
#define memEQs(s1, l, s2) \
(((sizeof(s2)-1) == (l)) && memEQ((s1), ASSERT_IS_LITERAL(s2), (sizeof(s2)-1)))
#define memNEs(s1, l, s2) (! memEQs(s1, l, s2))
/* Keep these private until we decide it was a good idea */
#if defined(PERL_CORE) || defined(PERL_EXT) || defined(PERL_EXT_POSIX)
#define strBEGINs(s1,s2) (strncmp(s1,ASSERT_IS_LITERAL(s2), sizeof(s2)-1) == 0)
#define memBEGINs(s1, l, s2) \
( (Ptrdiff_t) (l) >= (Ptrdiff_t) sizeof(s2) - 1 \
&& memEQ(s1, ASSERT_IS_LITERAL(s2), sizeof(s2)-1))
#define memBEGINPs(s1, l, s2) \
( (Ptrdiff_t) (l) > (Ptrdiff_t) sizeof(s2) - 1 \
&& memEQ(s1, ASSERT_IS_LITERAL(s2), sizeof(s2)-1))
#define memENDs(s1, l, s2) \
( (Ptrdiff_t) (l) >= (Ptrdiff_t) sizeof(s2) - 1 \
&& memEQ(s1 + (l) - (sizeof(s2) - 1), ASSERT_IS_LITERAL(s2), sizeof(s2)-1))
#define memENDPs(s1, l, s2) \
( (Ptrdiff_t) (l) > (Ptrdiff_t) sizeof(s2) \
&& memEQ(s1 + (l) - (sizeof(s2) - 1), ASSERT_IS_LITERAL(s2), sizeof(s2)-1))
#endif /* End of making macros private */
#define memLT(s1,s2,l) (memcmp(s1,s2,l) < 0)
#define memLE(s1,s2,l) (memcmp(s1,s2,l) <= 0)
#define memGT(s1,s2,l) (memcmp(s1,s2,l) > 0)
#define memGE(s1,s2,l) (memcmp(s1,s2,l) >= 0)
#define memCHRs(s1,c) ((const char *) memchr(ASSERT_IS_LITERAL(s1) , c, sizeof(s1)-1))
/*
* Character classes.
*
* Unfortunately, the introduction of locales means that we
* can't trust isupper(), etc. to tell the truth. And when
* it comes to /\w+/ with tainting enabled, we *must* be able
* to trust our character classes.
*
* Therefore, the default tests in the text of Perl will be independent of
* locale. Any code that wants to depend on the current locale will use the
* macros that contain _LC in their names
*/
#ifdef USE_LOCALE_CTYPE
# ifndef CTYPE256
# define CTYPE256
# endif
#endif
/*
=head1 Character classification
This section is about functions (really macros) that classify characters
into types, such as punctuation versus alphabetic, etc. Most of these are
analogous to regular expression character classes. (See
L<perlrecharclass/POSIX Character Classes>.) There are several variants for
each class. (Not all macros have all variants; each item below lists the
ones valid for it.) None are affected by C<use bytes>, and only the ones
with C<LC> in the name are affected by the current locale.
The base function, e.g., C<isALPHA()>, takes any signed or unsigned value,
treating it as a code point, and returns a boolean as to whether or not the
character represented by it is (or on non-ASCII platforms, corresponds to) an
ASCII character in the named class based on platform, Unicode, and Perl rules.
If the input is a number that doesn't fit in an octet, FALSE is returned.
Variant C<isI<FOO>_A> (e.g., C<isALPHA_A()>) is identical to the base function
with no suffix C<"_A">. This variant is used to emphasize by its name that
only ASCII-range characters can return TRUE.
Variant C<isI<FOO>_L1> imposes the Latin-1 (or EBCDIC equivalent) character set
onto the platform. That is, the code points that are ASCII are unaffected,
since ASCII is a subset of Latin-1. But the non-ASCII code points are treated
as if they are Latin-1 characters. For example, C<isWORDCHAR_L1()> will return
true when called with the code point 0xDF, which is a word character in both
ASCII and EBCDIC (though it represents different characters in each).
If the input is a number that doesn't fit in an octet, FALSE is returned.
(Perl's documentation uses a colloquial definition of Latin-1, to include all
code points below 256.)
Variant C<isI<FOO>_uvchr> is exactly like the C<isI<FOO>_L1> variant, for
inputs below 256, but if the code point is larger than 255, Unicode rules are
used to determine if it is in the character class. For example,
C<isWORDCHAR_uvchr(0x100)> returns TRUE, since 0x100 is LATIN CAPITAL LETTER A
WITH MACRON in Unicode, and is a word character.
Variants C<isI<FOO>_utf8> and C<isI<FOO>_utf8_safe> are like C<isI<FOO>_uvchr>,
but are used for UTF-8 encoded strings. The two forms are different names for
the same thing. Each call to one of these classifies the first character of
the string starting at C<p>. The second parameter, C<e>, points to anywhere in
the string beyond the first character, up to one byte past the end of the
entire string. Although both variants are identical, the suffix C<_safe> in
one name emphasizes that it will not attempt to read beyond S<C<e - 1>>,
provided that the constraint S<C<s E<lt> e>> is true (this is asserted for in
C<-DDEBUGGING> builds). If the UTF-8 for the input character is malformed in
some way, the program may croak, or the function may return FALSE, at the
discretion of the implementation, and subject to change in future releases.
Variant C<isI<FOO>_LC> is like the C<isI<FOO>_A> and C<isI<FOO>_L1> variants,
but the result is based on the current locale, which is what C<LC> in the name
stands for. If Perl can determine that the current locale is a UTF-8 locale,
it uses the published Unicode rules; otherwise, it uses the C library function
that gives the named classification. For example, C<isDIGIT_LC()> when not in
a UTF-8 locale returns the result of calling C<isdigit()>. FALSE is always
returned if the input won't fit into an octet. On some platforms where the C
library function is known to be defective, Perl changes its result to follow
the POSIX standard's rules.
Variant C<isI<FOO>_LC_uvchr> acts exactly like C<isI<FOO>_LC> for inputs less
than 256, but for larger ones it returns the Unicode classification of the code
point.
Variants C<isI<FOO>_LC_utf8> and C<isI<FOO>_LC_utf8_safe> are like
C<isI<FOO>_LC_uvchr>, but are used for UTF-8 encoded strings. The two forms
are different names for the same thing. Each call to one of these classifies
the first character of the string starting at C<p>. The second parameter,
C<e>, points to anywhere in the string beyond the first character, up to one
byte past the end of the entire string. Although both variants are identical,
the suffix C<_safe> in one name emphasizes that it will not attempt to read
beyond S<C<e - 1>>, provided that the constraint S<C<s E<lt> e>> is true (this
is asserted for in C<-DDEBUGGING> builds). If the UTF-8 for the input
character is malformed in some way, the program may croak, or the function may
return FALSE, at the discretion of the implementation, and subject to change in
future releases.
=for apidoc Am|bool|isALPHA|UV ch
=for apidoc_item ||isALPHA_A|UV ch
=for apidoc_item ||isALPHA_LC|UV ch
=for apidoc_item ||isALPHA_LC_utf8_safe|U8 * s| U8 *end
=for apidoc_item ||isALPHA_LC_uvchr|UV ch
=for apidoc_item ||isALPHA_L1|UV ch
=for apidoc_item ||isALPHA_utf8|U8 * s|U8 * end
=for apidoc_item ||isALPHA_utf8_safe|U8 * s|U8 * end
=for apidoc_item ||isALPHA_uvchr|UV ch
Returns a boolean indicating whether the specified input is one of C<[A-Za-z]>,
analogous to C<m/[[:alpha:]]/>.
See the L<top of this section|/Character classification> for an explanation of
the variants.
=cut
Here and below, we add the prototypes of these macros for downstream programs
that would be interested in them, such as Devel::PPPort
=for apidoc Am|bool|isALPHANUMERIC|UV ch
=for apidoc_item ||isALPHANUMERIC_A|UV ch
=for apidoc_item ||isALPHANUMERIC_LC|UV ch
=for apidoc_item ||isALPHANUMERIC_LC_utf8_safe|U8 * s| U8 *end
=for apidoc_item ||isALPHANUMERIC_LC_uvchr|UV ch
=for apidoc_item ||isALPHANUMERIC_L1|UV ch
=for apidoc_item ||isALPHANUMERIC_utf8|U8 * s|U8 * end
=for apidoc_item ||isALPHANUMERIC_utf8_safe|U8 * s|U8 * end
=for apidoc_item ||isALPHANUMERIC_uvchr|UV ch
Returns a boolean indicating whether the specified character is one of
C<[A-Za-z0-9]>, analogous to C<m/[[:alnum:]]/>.
See the L<top of this section|/Character classification> for an explanation of
the variants.
=for apidoc Am|bool|isALNUMC|UV ch
=for apidoc_item ||isALNUMC_A|UV ch
=for apidoc_item ||isALNUMC_LC|UV ch
=for apidoc_item ||isALNUMC_LC_uvchr|UV ch
=for apidoc_item ||isALNUMC_L1|UV ch
These are discouraged, backward compatibility macros for L</C<isALPHANUMERIC>>.
That is, each returns a boolean indicating whether the specified character is
one of C<[A-Za-z0-9]>, analogous to C<m/[[:alnum:]]/>.
The C<C> suffix in the names was meant to indicate that they correspond to the
C language L<C<isalnum(3)>>.
=for apidoc Am|bool|isASCII|UV ch
=for apidoc_item ||isASCII_A|UV ch
=for apidoc_item ||isASCII_LC|UV ch
=for apidoc_item ||isASCII_LC_utf8_safe|U8 * s| U8 *end
=for apidoc_item ||isASCII_LC_uvchr|UV ch
=for apidoc_item ||isASCII_L1|UV ch
=for apidoc_item ||isASCII_utf8|U8 * s|U8 * end
=for apidoc_item ||isASCII_utf8_safe|U8 * s|U8 * end
=for apidoc_item ||isASCII_uvchr|UV ch
Returns a boolean indicating whether the specified character is one of the 128
characters in the ASCII character set, analogous to C<m/[[:ascii:]]/>.
On non-ASCII platforms, it returns TRUE iff this
character corresponds to an ASCII character. Variants C<isASCII_A()> and
C<isASCII_L1()> are identical to C<isASCII()>.
See the L<top of this section|/Character classification> for an explanation of
the variants.
Note, however, that some platforms do not have the C library routine
C<isascii()>. In these cases, the variants whose names contain C<LC> are the
same as the corresponding ones without.
Also note, that because all ASCII characters are UTF-8 invariant (meaning they
have the exact same representation (always a single byte) whether encoded in
UTF-8 or not), C<isASCII> will give the correct results when called with any
byte in any string encoded or not in UTF-8. And similarly C<isASCII_utf8> and
C<isASCII_utf8_safe> will work properly on any string encoded or not in UTF-8.
=for apidoc Am|bool|isBLANK|UV ch
=for apidoc_item ||isBLANK_A|UV ch
=for apidoc_item ||isBLANK_LC|UV ch
=for apidoc_item ||isBLANK_LC_utf8_safe|U8 * s| U8 *end
=for apidoc_item ||isBLANK_LC_uvchr|UV ch
=for apidoc_item ||isBLANK_L1|UV ch
=for apidoc_item ||isBLANK_utf8|U8 * s|U8 * end
=for apidoc_item ||isBLANK_utf8_safe|U8 * s|U8 * end
=for apidoc_item ||isBLANK_uvchr|UV ch
Returns a boolean indicating whether the specified character is a
character considered to be a blank, analogous to C<m/[[:blank:]]/>.
See the L<top of this section|/Character classification> for an explanation of
the variants.
Note,
however, that some platforms do not have the C library routine
C<isblank()>. In these cases, the variants whose names contain C<LC> are
the same as the corresponding ones without.
=for apidoc Am|bool|isCNTRL|UV ch
=for apidoc_item ||isCNTRL_A|UV ch
=for apidoc_item ||isCNTRL_LC|UV ch
=for apidoc_item ||isCNTRL_LC_utf8_safe|U8 * s| U8 *end
=for apidoc_item ||isCNTRL_LC_uvchr|UV ch
=for apidoc_item ||isCNTRL_L1|UV ch
=for apidoc_item ||isCNTRL_utf8|U8 * s|U8 * end
=for apidoc_item ||isCNTRL_utf8_safe|U8 * s|U8 * end
=for apidoc_item ||isCNTRL_uvchr|UV ch
Returns a boolean indicating whether the specified character is a
control character, analogous to C<m/[[:cntrl:]]/>.
See the L<top of this section|/Character classification> for an explanation of
the variants.
On EBCDIC platforms, you almost always want to use the C<isCNTRL_L1> variant.
=for apidoc Am|bool|isDIGIT|UV ch
=for apidoc_item ||isDIGIT_A|UV ch
=for apidoc_item ||isDIGIT_LC|UV ch
=for apidoc_item ||isDIGIT_LC_utf8_safe|U8 * s| U8 *end
=for apidoc_item ||isDIGIT_LC_uvchr|UV ch
=for apidoc_item ||isDIGIT_L1|UV ch
=for apidoc_item ||isDIGIT_utf8|U8 * s|U8 * end
=for apidoc_item ||isDIGIT_utf8_safe|U8 * s|U8 * end
=for apidoc_item ||isDIGIT_uvchr|UV ch
Returns a boolean indicating whether the specified character is a
digit, analogous to C<m/[[:digit:]]/>.
Variants C<isDIGIT_A> and C<isDIGIT_L1> are identical to C<isDIGIT>.
See the L<top of this section|/Character classification> for an explanation of
the variants.
=for apidoc Am|bool|isGRAPH|UV ch
=for apidoc_item ||isGRAPH_A|UV ch
=for apidoc_item ||isGRAPH_LC|UV ch
=for apidoc_item ||isGRAPH_LC_utf8_safe|U8 * s| U8 *end
=for apidoc_item ||isGRAPH_LC_uvchr|UV ch
=for apidoc_item ||isGRAPH_L1|UV ch
=for apidoc_item ||isGRAPH_utf8|U8 * s|U8 * end
=for apidoc_item ||isGRAPH_utf8_safe|U8 * s|U8 * end
=for apidoc_item ||isGRAPH_uvchr|UV ch
Returns a boolean indicating whether the specified character is a
graphic character, analogous to C<m/[[:graph:]]/>.
See the L<top of this section|/Character classification> for an explanation of
the variants.
=for apidoc Am|bool|isLOWER|UV ch
=for apidoc_item ||isLOWER_A|UV ch
=for apidoc_item ||isLOWER_LC|UV ch
=for apidoc_item ||isLOWER_LC_utf8_safe|U8 * s| U8 *end
=for apidoc_item ||isLOWER_LC_uvchr|UV ch
=for apidoc_item ||isLOWER_L1|UV ch
=for apidoc_item ||isLOWER_utf8|U8 * s|U8 * end
=for apidoc_item ||isLOWER_utf8_safe|U8 * s|U8 * end
=for apidoc_item ||isLOWER_uvchr|UV ch
Returns a boolean indicating whether the specified character is a
lowercase character, analogous to C<m/[[:lower:]]/>.
See the L<top of this section|/Character classification> for an explanation of
the variants
=for apidoc Am|bool|isOCTAL|UV ch
=for apidoc_item ||isOCTAL_A|UV ch
=for apidoc_item ||isOCTAL_L1|UV ch
Returns a boolean indicating whether the specified character is an
octal digit, [0-7].
The only two variants are C<isOCTAL_A> and C<isOCTAL_L1>; each is identical to
C<isOCTAL>.
=for apidoc Am|bool|isPUNCT|UV ch
=for apidoc_item ||isPUNCT_A|UV ch
=for apidoc_item ||isPUNCT_LC|UV ch
=for apidoc_item ||isPUNCT_LC_utf8_safe|U8 * s| U8 *end
=for apidoc_item ||isPUNCT_LC_uvchr|UV ch
=for apidoc_item ||isPUNCT_L1|UV ch
=for apidoc_item ||isPUNCT_utf8|U8 * s|U8 * end
=for apidoc_item ||isPUNCT_utf8_safe|U8 * s|U8 * end
=for apidoc_item ||isPUNCT_uvchr|UV ch