diff options
author | Mark Adler <madler@alumni.caltech.edu> | 2021-12-31 16:57:07 -0800 |
---|---|---|
committer | Mark Adler <madler@alumni.caltech.edu> | 2021-12-31 16:57:07 -0800 |
commit | 8678871f18f4dd51101a9db1e37791f975969079 (patch) | |
tree | 4db677c163317d56fefa7f52aaa440271fe4c7eb | |
parent | c3f3043f7aa80750245f8166a338c4877020b589 (diff) | |
download | zlib-8678871f18f4dd51101a9db1e37791f975969079.tar.gz zlib-8678871f18f4dd51101a9db1e37791f975969079.tar.bz2 zlib-8678871f18f4dd51101a9db1e37791f975969079.zip |
Replace black/white with allow/block. (theresa-m)
-rw-r--r-- | doc/txtvsbin.txt | 12 | ||||
-rw-r--r-- | trees.c | 18 |
2 files changed, 15 insertions, 15 deletions
diff --git a/doc/txtvsbin.txt b/doc/txtvsbin.txt index 3d0f063..2a901ea 100644 --- a/doc/txtvsbin.txt +++ b/doc/txtvsbin.txt | |||
@@ -38,15 +38,15 @@ The Algorithm | |||
38 | 38 | ||
39 | The algorithm works by dividing the set of bytecodes [0..255] into three | 39 | The algorithm works by dividing the set of bytecodes [0..255] into three |
40 | categories: | 40 | categories: |
41 | - The white list of textual bytecodes: | 41 | - The allow list of textual bytecodes: |
42 | 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255. | 42 | 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255. |
43 | - The gray list of tolerated bytecodes: | 43 | - The gray list of tolerated bytecodes: |
44 | 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC). | 44 | 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC). |
45 | - The black list of undesired, non-textual bytecodes: | 45 | - The block list of undesired, non-textual bytecodes: |
46 | 0 (NUL) to 6, 14 to 31. | 46 | 0 (NUL) to 6, 14 to 31. |
47 | 47 | ||
48 | If a file contains at least one byte that belongs to the white list and | 48 | If a file contains at least one byte that belongs to the allow list and |
49 | no byte that belongs to the black list, then the file is categorized as | 49 | no byte that belongs to the block list, then the file is categorized as |
50 | plain text; otherwise, it is categorized as binary. (The boundary case, | 50 | plain text; otherwise, it is categorized as binary. (The boundary case, |
51 | when the file is empty, automatically falls into the latter category.) | 51 | when the file is empty, automatically falls into the latter category.) |
52 | 52 | ||
@@ -84,9 +84,9 @@ consistent results, regardless what alphabet encoding is being used. | |||
84 | results on a text encoded, say, using ISO-8859-16 versus UTF-8.) | 84 | results on a text encoded, say, using ISO-8859-16 versus UTF-8.) |
85 | 85 | ||
86 | There is an extra category of plain text files that are "polluted" with | 86 | There is an extra category of plain text files that are "polluted" with |
87 | one or more black-listed codes, either by mistake or by peculiar design | 87 | one or more block-listed codes, either by mistake or by peculiar design |
88 | considerations. In such cases, a scheme that tolerates a small fraction | 88 | considerations. In such cases, a scheme that tolerates a small fraction |
89 | of black-listed codes would provide an increased recall (i.e. more true | 89 | of block-listed codes would provide an increased recall (i.e. more true |
90 | positives). This, however, incurs a reduced precision overall, since | 90 | positives). This, however, incurs a reduced precision overall, since |
91 | false positives are more likely to appear in binary files that contain | 91 | false positives are more likely to appear in binary files that contain |
92 | large chunks of textual data. Furthermore, "polluted" plain text should | 92 | large chunks of textual data. Furthermore, "polluted" plain text should |
@@ -1091,9 +1091,9 @@ local void compress_block(s, ltree, dtree) | |||
1091 | * Check if the data type is TEXT or BINARY, using the following algorithm: | 1091 | * Check if the data type is TEXT or BINARY, using the following algorithm: |
1092 | * - TEXT if the two conditions below are satisfied: | 1092 | * - TEXT if the two conditions below are satisfied: |
1093 | * a) There are no non-portable control characters belonging to the | 1093 | * a) There are no non-portable control characters belonging to the |
1094 | * "black list" (0..6, 14..25, 28..31). | 1094 | * "block list" (0..6, 14..25, 28..31). |
1095 | * b) There is at least one printable character belonging to the | 1095 | * b) There is at least one printable character belonging to the |
1096 | * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). | 1096 | * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). |
1097 | * - BINARY otherwise. | 1097 | * - BINARY otherwise. |
1098 | * - The following partially-portable control characters form a | 1098 | * - The following partially-portable control characters form a |
1099 | * "gray list" that is ignored in this detection algorithm: | 1099 | * "gray list" that is ignored in this detection algorithm: |
@@ -1103,19 +1103,19 @@ local void compress_block(s, ltree, dtree) | |||
1103 | local int detect_data_type(s) | 1103 | local int detect_data_type(s) |
1104 | deflate_state *s; | 1104 | deflate_state *s; |
1105 | { | 1105 | { |
1106 | /* black_mask is the bit mask of black-listed bytes | 1106 | /* block_mask is the bit mask of block-listed bytes |
1107 | * set bits 0..6, 14..25, and 28..31 | 1107 | * set bits 0..6, 14..25, and 28..31 |
1108 | * 0xf3ffc07f = binary 11110011111111111100000001111111 | 1108 | * 0xf3ffc07f = binary 11110011111111111100000001111111 |
1109 | */ | 1109 | */ |
1110 | unsigned long black_mask = 0xf3ffc07fUL; | 1110 | unsigned long block_mask = 0xf3ffc07fUL; |
1111 | int n; | 1111 | int n; |
1112 | 1112 | ||
1113 | /* Check for non-textual ("black-listed") bytes. */ | 1113 | /* Check for non-textual ("block-listed") bytes. */ |
1114 | for (n = 0; n <= 31; n++, black_mask >>= 1) | 1114 | for (n = 0; n <= 31; n++, block_mask >>= 1) |
1115 | if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) | 1115 | if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0)) |
1116 | return Z_BINARY; | 1116 | return Z_BINARY; |
1117 | 1117 | ||
1118 | /* Check for textual ("white-listed") bytes. */ | 1118 | /* Check for textual ("allow-listed") bytes. */ |
1119 | if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 | 1119 | if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 |
1120 | || s->dyn_ltree[13].Freq != 0) | 1120 | || s->dyn_ltree[13].Freq != 0) |
1121 | return Z_TEXT; | 1121 | return Z_TEXT; |
@@ -1123,7 +1123,7 @@ local int detect_data_type(s) | |||
1123 | if (s->dyn_ltree[n].Freq != 0) | 1123 | if (s->dyn_ltree[n].Freq != 0) |
1124 | return Z_TEXT; | 1124 | return Z_TEXT; |
1125 | 1125 | ||
1126 | /* There are no "black-listed" or "white-listed" bytes: | 1126 | /* There are no "block-listed" or "allow-listed" bytes: |
1127 | * this stream either is empty or has tolerated ("gray-listed") bytes only. | 1127 | * this stream either is empty or has tolerated ("gray-listed") bytes only. |
1128 | */ | 1128 | */ |
1129 | return Z_BINARY; | 1129 | return Z_BINARY; |