diff options
| author | Avi Halachmi (:avih) <avihpit@yahoo.com> | 2026-04-13 01:09:11 +0300 |
|---|---|---|
| committer | Ron Yorston <rmy@pobox.com> | 2026-04-16 10:42:13 +0100 |
| commit | 9f1aec537dafda10c3f297b802eca348616bf288 (patch) | |
| tree | b2ee8e0a4f2a7669b743067aa60184f19b9b2c35 | |
| parent | 7c49824cb6137c05583a3ed343975114d12ff0a4 (diff) | |
| download | busybox-w32-9f1aec537dafda10c3f297b802eca348616bf288.tar.gz busybox-w32-9f1aec537dafda10c3f297b802eca348616bf288.tar.bz2 busybox-w32-9f1aec537dafda10c3f297b802eca348616bf288.zip | |
win32: actype: code shrink
Unify actype/actail into a single function, and make actype
a wrapper macro which invokes actail(str, NULL) (to avoid calling
a function actype which then trampolines to actail with NULL).
This concludes the integration of actype/isactype.
Overall:
- Unified/standard-ish char-class handling in fnmatch/regcomp/tr.
- Saved about 1400 bytes at the binary as x64.
- regcomp.c is negligibly faster (actype/isactype are O(1)).
- tr.c now also supports [:graph:] and [:print:], and not slower.
fnmatch.c (tested using scripts/patbench.sh):
Before actype was integrated, alnum was fastest and xdigit slowest
due to their order in the names strings list and in the switch/case:
[35] 1305 ms OK (M:12-45) '*[0-9A-Za-z]*[0-9A-Za-z]'
[36] 1928 ms OK (M:12-45) '*[[:alnum:]]*[[:alnum:]]'
[38] 1446 ms OK (M:12-45) '*[a-bc-de-z]*[a-bc-de-z]'
[39] 3192 ms OK (M:12-45) '*[[:lower:]]*[[:lower:]]'
[40] 1586 ms OK (M:---4-) '*[0-9A-Fa-ef]*[0-9A-Fa-ef]'
[41] 4601 ms OK (M:---4-) '*[[:xdigit:]]*[[:xdigit:]]'
Now all classes are similar relative to an equivalent range,
and just barely slower than "alnum" before actype was added
(still slower than range due to additional temp name buffer):
[35] 1402 ms OK (M:12-45) '*[0-9A-Za-z]*[0-9A-Za-z]'
[36] 1962 ms OK (M:12-45) '*[[:alnum:]]*[[:alnum:]]'
[38] 1453 ms OK (M:12-45) '*[a-bc-de-z]*[a-bc-de-z]'
[39] 1965 ms OK (M:12-45) '*[[:lower:]]*[[:lower:]]'
[40] 1683 ms OK (M:---4-) '*[0-9A-Fa-ef]*[0-9A-Fa-ef]'
[41] 2117 ms OK (M:---4-) '*[[:xdigit:]]*[[:xdigit:]]'
| -rw-r--r-- | win32/actype.c | 27 | ||||
| -rw-r--r-- | win32/actype.h | 4 |
2 files changed, 13 insertions, 18 deletions
diff --git a/win32/actype.c b/win32/actype.c index 4d0cf91a3..967de4644 100644 --- a/win32/actype.c +++ b/win32/actype.c | |||
| @@ -31,10 +31,8 @@ along with this program; if not, see https://www.gnu.org/licenses/ . | |||
| 31 | #endif | 31 | #endif |
| 32 | 32 | ||
| 33 | #if ACTYPE_OPTIMIZE_FOR_SIZE | 33 | #if ACTYPE_OPTIMIZE_FOR_SIZE |
| 34 | #include <string.h> /* strcmp */ | ||
| 35 | char *is_prefixed_with(const char*, const char*); /* libbb */ | 34 | char *is_prefixed_with(const char*, const char*); /* libbb */ |
| 36 | #define IS_PREFIXED_WITH_CLASS is_prefixed_with | 35 | #define IS_PREFIXED_WITH_CLASS is_prefixed_with |
| 37 | #define IS_CLASS(s, c) (!strcmp((s), (c))) | ||
| 38 | #else | 36 | #else |
| 39 | /* c[0]-c[4] are not 0, so we can skip these 0-tests, and do it inline. | 37 | /* c[0]-c[4] are not 0, so we can skip these 0-tests, and do it inline. |
| 40 | * in x64 this adds ~ 40 bytes, and actype/actail are almost x2 faster. | 38 | * in x64 this adds ~ 40 bytes, and actype/actail are almost x2 faster. |
| @@ -44,9 +42,6 @@ along with this program; if not, see https://www.gnu.org/licenses/ . | |||
| 44 | ? c[5] ? /* xdigit */ s[5] == 't' ? s+6 : 0 \ | 42 | ? c[5] ? /* xdigit */ s[5] == 't' ? s+6 : 0 \ |
| 45 | : s+5 \ | 43 | : s+5 \ |
| 46 | : 0) | 44 | : 0) |
| 47 | |||
| 48 | /* modifies s */ | ||
| 49 | #define IS_CLASS(s, c) ((s = IS_PREFIXED_WITH_CLASS(s, c)) && !*s) | ||
| 50 | #endif | 45 | #endif |
| 51 | 46 | ||
| 52 | 47 | ||
| @@ -107,27 +102,23 @@ const _isactype_t _actype_fns[] = { | |||
| 107 | "alnum\0alpha\0blank\0cntrl\0digit\0graph\0" \ | 102 | "alnum\0alpha\0blank\0cntrl\0digit\0graph\0" \ |
| 108 | "lower\0print\0punct\0space\0upper\0xdigit" | 103 | "lower\0print\0punct\0space\0upper\0xdigit" |
| 109 | 104 | ||
| 110 | actype_t actype(const char *s) | ||
| 111 | { | ||
| 112 | int i = POTENTIAL_CLASS_IDX(s); | ||
| 113 | const char *c = CHAR_CLASSES + i * 6; | ||
| 114 | |||
| 115 | if (!IS_CLASS(s, c)) | ||
| 116 | return 0; | ||
| 117 | return ACTYPE_FROM_IDX(i); | ||
| 118 | } | ||
| 119 | |||
| 120 | actype_t actail(const char *s, int *len) | 105 | actype_t actail(const char *s, int *len) |
| 121 | { | 106 | { |
| 122 | int i = POTENTIAL_CLASS_IDX(s); | 107 | int i = POTENTIAL_CLASS_IDX(s); |
| 123 | const char *c = CHAR_CLASSES + i * 6; | 108 | const char *c = CHAR_CLASSES + i * 6; |
| 124 | 109 | ||
| 125 | s = IS_PREFIXED_WITH_CLASS(s, c); | 110 | s = IS_PREFIXED_WITH_CLASS(s, c); |
| 126 | 111 | if (!s) | |
| 127 | if (!s || *s++ != ':' || *s != ']') | ||
| 128 | return 0; | 112 | return 0; |
| 129 | 113 | ||
| 130 | *len = 7 + (i == 11); /* xdigit is 6, others are 5 */ | 114 | /* done: s is just past the matched name, verify tail based on len */ |
| 115 | if (len) { | ||
| 116 | if (*s++ != ':' || *s != ']') | ||
| 117 | return 0; | ||
| 118 | *len = 7 + (i == 11); /* xdigit is 6, others are 5 */ | ||
| 119 | } else if (*s) { | ||
| 120 | return 0; | ||
| 121 | } | ||
| 131 | return ACTYPE_FROM_IDX(i); | 122 | return ACTYPE_FROM_IDX(i); |
| 132 | } | 123 | } |
| 133 | 124 | ||
diff --git a/win32/actype.h b/win32/actype.h index d0a2da39f..4e906a7f1 100644 --- a/win32/actype.h +++ b/win32/actype.h | |||
| @@ -46,6 +46,9 @@ typedef int (*_isactype_t)(int); /* isalpha et-al prototype */ | |||
| 46 | extern actype_t actype(const char *name); | 46 | extern actype_t actype(const char *name); |
| 47 | extern int isactype(int c, actype_t t); | 47 | extern int isactype(int c, actype_t t); |
| 48 | 48 | ||
| 49 | /* actype above is the official prototype, but in practice we invoke actail */ | ||
| 50 | #define actype(name) (actail((name), NULL)) | ||
| 51 | |||
| 49 | 52 | ||
| 50 | /* extensions */ | 53 | /* extensions */ |
| 51 | 54 | ||
| @@ -58,6 +61,7 @@ extern int isactype(int c, actype_t t); | |||
| 58 | * set *len=strlen("NAME:]") and return non-0 actype("NAME"). | 61 | * set *len=strlen("NAME:]") and return non-0 actype("NAME"). |
| 59 | * else return 0. | 62 | * else return 0. |
| 60 | * useful in typical char-class parsing scenarios. | 63 | * useful in typical char-class parsing scenarios. |
| 64 | * if len is NULL: identical to actype. | ||
| 61 | */ | 65 | */ |
| 62 | extern actype_t actail(const char *str, int *len); | 66 | extern actype_t actail(const char *str, int *len); |
| 63 | 67 | ||
