summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Adler <madler@alumni.caltech.edu>2021-12-31 16:57:07 -0800
committerMark Adler <madler@alumni.caltech.edu>2021-12-31 16:57:07 -0800
commit8678871f18f4dd51101a9db1e37791f975969079 (patch)
tree4db677c163317d56fefa7f52aaa440271fe4c7eb
parentc3f3043f7aa80750245f8166a338c4877020b589 (diff)
downloadzlib-8678871f18f4dd51101a9db1e37791f975969079.tar.gz
zlib-8678871f18f4dd51101a9db1e37791f975969079.tar.bz2
zlib-8678871f18f4dd51101a9db1e37791f975969079.zip
Replace black/white with allow/block. (theresa-m)
-rw-r--r--doc/txtvsbin.txt12
-rw-r--r--trees.c18
2 files changed, 15 insertions, 15 deletions
diff --git a/doc/txtvsbin.txt b/doc/txtvsbin.txt
index 3d0f063..2a901ea 100644
--- a/doc/txtvsbin.txt
+++ b/doc/txtvsbin.txt
@@ -38,15 +38,15 @@ The Algorithm
38 38
39The algorithm works by dividing the set of bytecodes [0..255] into three 39The algorithm works by dividing the set of bytecodes [0..255] into three
40categories: 40categories:
41- The white list of textual bytecodes: 41- The allow list of textual bytecodes:
42 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255. 42 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255.
43- The gray list of tolerated bytecodes: 43- The gray list of tolerated bytecodes:
44 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC). 44 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC).
45- The black list of undesired, non-textual bytecodes: 45- The block list of undesired, non-textual bytecodes:
46 0 (NUL) to 6, 14 to 31. 46 0 (NUL) to 6, 14 to 31.
47 47
48If a file contains at least one byte that belongs to the white list and 48If a file contains at least one byte that belongs to the allow list and
49no byte that belongs to the black list, then the file is categorized as 49no byte that belongs to the block list, then the file is categorized as
50plain text; otherwise, it is categorized as binary. (The boundary case, 50plain text; otherwise, it is categorized as binary. (The boundary case,
51when the file is empty, automatically falls into the latter category.) 51when the file is empty, automatically falls into the latter category.)
52 52
@@ -84,9 +84,9 @@ consistent results, regardless what alphabet encoding is being used.
84results on a text encoded, say, using ISO-8859-16 versus UTF-8.) 84results on a text encoded, say, using ISO-8859-16 versus UTF-8.)
85 85
86There is an extra category of plain text files that are "polluted" with 86There is an extra category of plain text files that are "polluted" with
87one or more black-listed codes, either by mistake or by peculiar design 87one or more block-listed codes, either by mistake or by peculiar design
88considerations. In such cases, a scheme that tolerates a small fraction 88considerations. In such cases, a scheme that tolerates a small fraction
89of black-listed codes would provide an increased recall (i.e. more true 89of block-listed codes would provide an increased recall (i.e. more true
90positives). This, however, incurs a reduced precision overall, since 90positives). This, however, incurs a reduced precision overall, since
91false positives are more likely to appear in binary files that contain 91false positives are more likely to appear in binary files that contain
92large chunks of textual data. Furthermore, "polluted" plain text should 92large chunks of textual data. Furthermore, "polluted" plain text should
diff --git a/trees.c b/trees.c
index decaeb7..6896067 100644
--- a/trees.c
+++ b/trees.c
@@ -1091,9 +1091,9 @@ local void compress_block(s, ltree, dtree)
1091 * Check if the data type is TEXT or BINARY, using the following algorithm: 1091 * Check if the data type is TEXT or BINARY, using the following algorithm:
1092 * - TEXT if the two conditions below are satisfied: 1092 * - TEXT if the two conditions below are satisfied:
1093 * a) There are no non-portable control characters belonging to the 1093 * a) There are no non-portable control characters belonging to the
1094 * "black list" (0..6, 14..25, 28..31). 1094 * "block list" (0..6, 14..25, 28..31).
1095 * b) There is at least one printable character belonging to the 1095 * b) There is at least one printable character belonging to the
1096 * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). 1096 * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
1097 * - BINARY otherwise. 1097 * - BINARY otherwise.
1098 * - The following partially-portable control characters form a 1098 * - The following partially-portable control characters form a
1099 * "gray list" that is ignored in this detection algorithm: 1099 * "gray list" that is ignored in this detection algorithm:
@@ -1103,19 +1103,19 @@ local void compress_block(s, ltree, dtree)
1103local int detect_data_type(s) 1103local int detect_data_type(s)
1104 deflate_state *s; 1104 deflate_state *s;
1105{ 1105{
1106 /* black_mask is the bit mask of black-listed bytes 1106 /* block_mask is the bit mask of block-listed bytes
1107 * set bits 0..6, 14..25, and 28..31 1107 * set bits 0..6, 14..25, and 28..31
1108 * 0xf3ffc07f = binary 11110011111111111100000001111111 1108 * 0xf3ffc07f = binary 11110011111111111100000001111111
1109 */ 1109 */
1110 unsigned long black_mask = 0xf3ffc07fUL; 1110 unsigned long block_mask = 0xf3ffc07fUL;
1111 int n; 1111 int n;
1112 1112
1113 /* Check for non-textual ("black-listed") bytes. */ 1113 /* Check for non-textual ("block-listed") bytes. */
1114 for (n = 0; n <= 31; n++, black_mask >>= 1) 1114 for (n = 0; n <= 31; n++, block_mask >>= 1)
1115 if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) 1115 if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))
1116 return Z_BINARY; 1116 return Z_BINARY;
1117 1117
1118 /* Check for textual ("white-listed") bytes. */ 1118 /* Check for textual ("allow-listed") bytes. */
1119 if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 1119 if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
1120 || s->dyn_ltree[13].Freq != 0) 1120 || s->dyn_ltree[13].Freq != 0)
1121 return Z_TEXT; 1121 return Z_TEXT;
@@ -1123,7 +1123,7 @@ local int detect_data_type(s)
1123 if (s->dyn_ltree[n].Freq != 0) 1123 if (s->dyn_ltree[n].Freq != 0)
1124 return Z_TEXT; 1124 return Z_TEXT;
1125 1125
1126 /* There are no "black-listed" or "white-listed" bytes: 1126 /* There are no "block-listed" or "allow-listed" bytes:
1127 * this stream either is empty or has tolerated ("gray-listed") bytes only. 1127 * this stream either is empty or has tolerated ("gray-listed") bytes only.
1128 */ 1128 */
1129 return Z_BINARY; 1129 return Z_BINARY;