text_metadata.rs source code [crates/png/src/text_metadata.rs]

1	//! # Text chunks (tEXt/zTXt/iTXt) structs and functions
2	//!
3	//! The [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#11textinfo) optionally allows for
4	//! embedded text chunks in the file. They may appear either before or after the image data
5	//! chunks. There are three kinds of text chunks.
6	//! - `tEXt`: This has a `keyword` and `text` field, and is ISO 8859-1 encoded.
7	//! - `zTXt`: This is semantically the same as `tEXt`, i.e. it has the same fields and
8	//! encoding, but the `text` field is compressed before being written into the PNG file.
9	//! - `iTXt`: This chunk allows for its `text` field to be any valid UTF-8, and supports
10	//! compression of the text field as well.
11	//!
12	//! The `ISO 8859-1` encoding technically doesn't allow any control characters
13	//! to be used, but in practice these values are encountered anyway. This can
14	//! either be the extended `ISO-8859-1` encoding with control characters or the
15	//! `Windows-1252` encoding. This crate assumes the `ISO-8859-1` encoding is
16	//! used.
17	//!
18	//! ## Reading text chunks
19	//!
20	//! As a PNG is decoded, any text chunk encountered is appended the
21	//! [`Info`](`crate::common::Info`) struct, in the `uncompressed_latin1_text`,
22	//! `compressed_latin1_text`, and the `utf8_text` fields depending on whether the encountered
23	//! chunk is `tEXt`, `zTXt`, or `iTXt`.
24	//!
25	//! ```
26	//! use std::fs::File;
27	//! use std::iter::FromIterator;
28	//! use std::path::PathBuf;
29	//!
30	//! // Opening a png file that has a zTXt chunk
31	//! let decoder = png::Decoder::new(
32	//! File::open(PathBuf::from_iter([
33	//! "tests",
34	//! "text_chunk_examples",
35	//! "ztxt_example.png",
36	//! ]))
37	//! .unwrap(),
38	//! );
39	//! let mut reader = decoder.read_info().unwrap();
40	//! // If the text chunk is before the image data frames, `reader.info()` already contains the text.
41	//! for text_chunk in &reader.info().compressed_latin1_text {
42	//! println!("{:?}", text_chunk.keyword); // Prints the keyword
43	//! println!("{:#?}", text_chunk); // Prints out the text chunk.
44	//! // To get the uncompressed text, use the `get_text` method.
45	//! println!("{}", text_chunk.get_text().unwrap());
46	//! }
47	//! ```
48	//!
49	//! ## Writing text chunks
50	//!
51	//! There are two ways to write text chunks: the first is to add the appropriate text structs directly to the encoder header before the header is written to file.
52	//! To add a text chunk at any point in the stream, use the `write_text_chunk` method.
53	//!
54	//! ```
55	//! # use png::text_metadata::{ITXtChunk, ZTXtChunk};
56	//! # use std::env;
57	//! # use std::fs::File;
58	//! # use std::io::BufWriter;
59	//! # use std::iter::FromIterator;
60	//! # use std::path::PathBuf;
61	//! # let file = File::create(PathBuf::from_iter(["target", "text_chunk.png"])).unwrap();
62	//! # let ref mut w = BufWriter::new(file);
63	//! let mut encoder = png::Encoder::new(w, `2`, `1`); // Width is 2 pixels and height is 1.
64	//! encoder.set_color(png::ColorType::Rgba);
65	//! encoder.set_depth(png::BitDepth::Eight);
66	//! // Adding text chunks to the header
67	//! encoder
68	//! .add_text_chunk(
69	//! "Testing tEXt".to_string(),
70	//! "This is a tEXt chunk that will appear before the IDAT chunks.".to_string(),
71	//! )
72	//! .unwrap();
73	//! encoder
74	//! .add_ztxt_chunk(
75	//! "Testing zTXt".to_string(),
76	//! "This is a zTXt chunk that is compressed in the png file.".to_string(),
77	//! )
78	//! .unwrap();
79	//! encoder
80	//! .add_itxt_chunk(
81	//! "Testing iTXt".to_string(),
82	//! "iTXt chunks support all of UTF8. Example: हिंदी.".to_string(),
83	//! )
84	//! .unwrap();
85	//!
86	//! let mut writer = encoder.write_header().unwrap();
87	//!
88	//! let data = [`255`, `0`, `0`, `255`, `0`, `0`, `0`, `255`]; // An array containing a RGBA sequence. First pixel is red and second pixel is black.
89	//! writer.write_image_data(&data).unwrap(); // Save
90	//!
91	//! // We can add a tEXt/zTXt/iTXt at any point before the encoder is dropped from scope. These chunks will be at the end of the png file.
92	//! let tail_ztxt_chunk = ZTXtChunk::new("Comment".to_string(), "A zTXt chunk after the image data.".to_string());
93	//! writer.write_text_chunk(&tail_ztxt_chunk).unwrap();
94	//!
95	//! // The fields of the text chunk are public, so they can be mutated before being written to the file.
96	//! let mut tail_itxt_chunk = ITXtChunk::new("Author".to_string(), "सायंतन खान".to_string());
97	//! tail_itxt_chunk.compressed = `true`;
98	//! tail_itxt_chunk.language_tag = "hi".to_string();
99	//! tail_itxt_chunk.translated_keyword = "लेखक".to_string();
100	//! writer.write_text_chunk(&tail_itxt_chunk).unwrap();
101	//! ```
102
103	#![warn(missing_docs)]
104
105	use crate::{chunk, encoder, DecodingError, EncodingError};
106	use fdeflate::BoundedDecompressionError;
107	use flate2::write::ZlibEncoder;
108	use flate2::Compression;
109	use std::{convert::TryFrom, io::Write};
110
111	/// Default decompression limit for compressed text chunks.
112	pub const DECOMPRESSION_LIMIT: usize = `2097152`; // 2 MiB
113
114	/// Text encoding errors that is wrapped by the standard EncodingError type
115	#[derive(Debug, Clone, Copy)]
116	pub(crate) enum TextEncodingError {
117	/// Unrepresentable characters in string
118	Unrepresentable,
119	/// Keyword longer than 79 bytes or empty
120	InvalidKeywordSize,
121	/// Error encountered while compressing text
122	CompressionError,
123	}
124
125	/// Text decoding error that is wrapped by the standard DecodingError type
126	#[derive(Debug, Clone, Copy)]
127	pub(crate) enum TextDecodingError {
128	/// Unrepresentable characters in string
129	Unrepresentable,
130	/// Keyword longer than 79 bytes or empty
131	InvalidKeywordSize,
132	/// Missing null separator
133	MissingNullSeparator,
134	/// Compressed text cannot be uncompressed
135	InflationError,
136	/// Needs more space to decompress
137	OutOfDecompressionSpace,
138	/// Using an unspecified value for the compression method
139	InvalidCompressionMethod,
140	/// Using a byte that is not 0 or 255 as compression flag in iTXt chunk
141	InvalidCompressionFlag,
142	/// Missing the compression flag
143	MissingCompressionFlag,
144	}
145
146	/// A generalized text chunk trait
147	pub trait EncodableTextChunk {
148	/// Encode text chunk as `Vec<u8>` to a `Write`
149	fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError>;
150	}
151
152	/// Struct representing a tEXt chunk
153	#[derive(Clone, Debug, PartialEq, Eq)]
154	pub struct TEXtChunk {
155	/// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1.
156	pub keyword: String,
157	/// Text field of tEXt chunk. Can be at most 2GB.
158	pub text: String,
159	}
160
161	fn decode_iso_8859_1(text: &[u8]) -> String {
162	text.iter().map(\|&b: u8\| b as char).collect()
163	}
164
165	pub(crate) fn encode_iso_8859_1(text: &str) -> Result<Vec<u8>, TextEncodingError> {
166	encode_iso_8859_1_iter(text).collect()
167	}
168
169	fn encode_iso_8859_1_into(buf: &mut Vec<u8>, text: &str) -> Result<(), TextEncodingError> {
170	for b: Result in encode_iso_8859_1_iter(text) {
171	buf.push(b?);
172	}
173	Ok(())
174	}
175
176	fn encode_iso_8859_1_iter(text: &str) -> impl Iterator<Item = Result<u8, TextEncodingError>> + '_ {
177	textChars<'_>.chars()
178	.map(\|c: char\| u8::try_from(c as u32).map_err(\|_\| TextEncodingError::Unrepresentable))
179	}
180
181	fn decode_ascii(text: &[u8]) -> Result<&str, TextDecodingError> {
182	if text.is_ascii() {
183	// `from_utf8` cannot panic because we're already checked that `text` is ASCII-7.
184	// And this is the only safe way to get ASCII-7 string from `&[u8]`.
185	Ok(std::str::from_utf8(text).expect(msg:"unreachable"))
186	} else {
187	Err(TextDecodingError::Unrepresentable)
188	}
189	}
190
191	impl TEXtChunk {
192	/// Constructs a new TEXtChunk.
193	/// Not sure whether it should take &str or String.
194	pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
195	Self {
196	keyword: keyword.into(),
197	text: text.into(),
198	}
199	}
200
201	/// Decodes a slice of bytes to a String using Latin-1 decoding.
202	/// The decoder runs in strict mode, and any decoding errors are passed along to the caller.
203	pub(crate) fn decode(
204	keyword_slice: &[u8],
205	text_slice: &[u8],
206	) -> Result<Self, TextDecodingError> {
207	if keyword_slice.is_empty() \|\| keyword_slice.len() > `79` {
208	return Err(TextDecodingError::InvalidKeywordSize);
209	}
210
211	Ok(Self {
212	keyword: decode_iso_8859_1(keyword_slice),
213	text: decode_iso_8859_1(text_slice),
214	})
215	}
216	}
217
218	impl EncodableTextChunk for TEXtChunk {
219	/// Encodes TEXtChunk to a Writer. The keyword and text are separated by a byte of zeroes.
220	fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
221	let mut data: Vec = encode_iso_8859_1(&self.keyword)?;
222
223	if data.is_empty() \|\| data.len() > `79` {
224	return Err(TextEncodingError::InvalidKeywordSize.into());
225	}
226
227	data.push(`0`);
228
229	encode_iso_8859_1_into(&mut data, &self.text)?;
230
231	encoder::write_chunk(w, name:chunk::tEXt, &data)
232	}
233	}
234
235	/// Struct representing a zTXt chunk
236	#[derive(Clone, Debug, PartialEq, Eq)]
237	pub struct ZTXtChunk {
238	/// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1.
239	pub keyword: String,
240	/// Text field of zTXt chunk. It is compressed by default, but can be uncompressed if necessary.
241	text: OptCompressed,
242	}
243
244	/// Private enum encoding the compressed and uncompressed states of zTXt/iTXt text field.
245	#[derive(Clone, Debug, PartialEq, Eq)]
246	enum OptCompressed {
247	/// Compressed version of text field. Can be at most 2GB.
248	Compressed(Vec<u8>),
249	/// Uncompressed text field.
250	Uncompressed(String),
251	}
252
253	impl ZTXtChunk {
254	/// Creates a new ZTXt chunk.
255	pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
256	Self {
257	keyword: keyword.into(),
258	text: OptCompressed::Uncompressed(text.into()),
259	}
260	}
261
262	pub(crate) fn decode(
263	keyword_slice: &[u8],
264	compression_method: u8,
265	text_slice: &[u8],
266	) -> Result<Self, TextDecodingError> {
267	if keyword_slice.is_empty() \|\| keyword_slice.len() > `79` {
268	return Err(TextDecodingError::InvalidKeywordSize);
269	}
270
271	if compression_method != `0` {
272	return Err(TextDecodingError::InvalidCompressionMethod);
273	}
274
275	Ok(Self {
276	keyword: decode_iso_8859_1(keyword_slice),
277	text: OptCompressed::Compressed(text_slice.to_vec()),
278	})
279	}
280
281	/// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes.
282	pub fn decompress_text(&mut self) -> Result<(), DecodingError> {
283	self.decompress_text_with_limit(DECOMPRESSION_LIMIT)
284	}
285
286	/// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes.
287	pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> {
288	match &self.text {
289	OptCompressed::Compressed(v) => {
290	let uncompressed_raw = match fdeflate::decompress_to_vec_bounded(&v[..], limit) {
291	Ok(s) => s,
292	Err(BoundedDecompressionError::OutputTooLarge { .. }) => {
293	return Err(DecodingError::from(
294	TextDecodingError::OutOfDecompressionSpace,
295	));
296	}
297	Err(_) => {
298	return Err(DecodingError::from(TextDecodingError::InflationError));
299	}
300	};
301	self.text = OptCompressed::Uncompressed(decode_iso_8859_1(&uncompressed_raw));
302	}
303	OptCompressed::Uncompressed(_) => {}
304	};
305	Ok(())
306	}
307
308	/// Decompresses the inner text, and returns it as a `String`.
309	/// If decompression uses more the 2MiB, first call decompress with limit, and then this method.
310	pub fn get_text(&self) -> Result<String, DecodingError> {
311	match &self.text {
312	OptCompressed::Compressed(v) => {
313	let uncompressed_raw = fdeflate::decompress_to_vec(v)
314	.map_err(\|_\| DecodingError::from(TextDecodingError::InflationError))?;
315	Ok(decode_iso_8859_1(&uncompressed_raw))
316	}
317	OptCompressed::Uncompressed(s) => Ok(s.clone()),
318	}
319	}
320
321	/// Compresses the inner text, mutating its own state.
322	pub fn compress_text(&mut self) -> Result<(), EncodingError> {
323	match &self.text {
324	OptCompressed::Uncompressed(s) => {
325	let uncompressed_raw = encode_iso_8859_1(s)?;
326	let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast());
327	encoder
328	.write_all(&uncompressed_raw)
329	.map_err(\|_\| EncodingError::from(TextEncodingError::CompressionError))?;
330	self.text = OptCompressed::Compressed(
331	encoder
332	.finish()
333	.map_err(\|_\| EncodingError::from(TextEncodingError::CompressionError))?,
334	);
335	}
336	OptCompressed::Compressed(_) => {}
337	}
338
339	Ok(())
340	}
341	}
342
343	impl EncodableTextChunk for ZTXtChunk {
344	fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
345	let mut data = encode_iso_8859_1(&self.keyword)?;
346
347	if data.is_empty() \|\| data.len() > `79` {
348	return Err(TextEncodingError::InvalidKeywordSize.into());
349	}
350
351	// Null separator
352	data.push(`0`);
353
354	// Compression method: the only valid value is 0, as of 2021.
355	data.push(`0`);
356
357	match &self.text {
358	OptCompressed::Compressed(v) => {
359	data.extend_from_slice(&v[..]);
360	}
361	OptCompressed::Uncompressed(s) => {
362	// This code may have a bug. Check for correctness.
363	let uncompressed_raw = encode_iso_8859_1(s)?;
364	let mut encoder = ZlibEncoder::new(data, Compression::fast());
365	encoder
366	.write_all(&uncompressed_raw)
367	.map_err(\|_\| EncodingError::from(TextEncodingError::CompressionError))?;
368	data = encoder
369	.finish()
370	.map_err(\|_\| EncodingError::from(TextEncodingError::CompressionError))?;
371	}
372	};
373
374	encoder::write_chunk(w, chunk::zTXt, &data)
375	}
376	}
377
378	/// Struct encoding an iTXt chunk
379	#[derive(Clone, Debug, PartialEq, Eq)]
380	pub struct ITXtChunk {
381	/// The keyword field. This needs to be between 1-79 bytes when encoded as Latin-1.
382	pub keyword: String,
383	/// Indicates whether the text will be (or was) compressed in the PNG.
384	pub compressed: bool,
385	/// A hyphen separated list of languages that the keyword is translated to. This is ASCII-7 encoded.
386	pub language_tag: String,
387	/// Translated keyword. This is UTF-8 encoded.
388	pub translated_keyword: String,
389	/// Text field of iTXt chunk. It is compressed by default, but can be uncompressed if necessary.
390	text: OptCompressed,
391	}
392
393	impl ITXtChunk {
394	/// Constructs a new iTXt chunk. Leaves all but keyword and text to default values.
395	pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self {
396	Self {
397	keyword: keyword.into(),
398	compressed: `false`,
399	language_tag: "".to_string(),
400	translated_keyword: "".to_string(),
401	text: OptCompressed::Uncompressed(text.into()),
402	}
403	}
404
405	pub(crate) fn decode(
406	keyword_slice: &[u8],
407	compression_flag: u8,
408	compression_method: u8,
409	language_tag_slice: &[u8],
410	translated_keyword_slice: &[u8],
411	text_slice: &[u8],
412	) -> Result<Self, TextDecodingError> {
413	if keyword_slice.is_empty() \|\| keyword_slice.len() > `79` {
414	return Err(TextDecodingError::InvalidKeywordSize);
415	}
416	let keyword = decode_iso_8859_1(keyword_slice);
417
418	let compressed = match compression_flag {
419	`0` => `false`,
420	`1` => `true`,
421	_ => return Err(TextDecodingError::InvalidCompressionFlag),
422	};
423
424	if compressed && compression_method != `0` {
425	return Err(TextDecodingError::InvalidCompressionMethod);
426	}
427
428	let language_tag = decode_ascii(language_tag_slice)?.to_owned();
429
430	let translated_keyword = std::str::from_utf8(translated_keyword_slice)
431	.map_err(\|_\| TextDecodingError::Unrepresentable)?
432	.to_string();
433	let text = if compressed {
434	OptCompressed::Compressed(text_slice.to_vec())
435	} else {
436	OptCompressed::Uncompressed(
437	String::from_utf8(text_slice.to_vec())
438	.map_err(\|_\| TextDecodingError::Unrepresentable)?,
439	)
440	};
441
442	Ok(Self {
443	keyword,
444	compressed,
445	language_tag,
446	translated_keyword,
447	text,
448	})
449	}
450
451	/// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes.
452	pub fn decompress_text(&mut self) -> Result<(), DecodingError> {
453	self.decompress_text_with_limit(DECOMPRESSION_LIMIT)
454	}
455
456	/// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes.
457	pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> {
458	match &self.text {
459	OptCompressed::Compressed(v) => {
460	let uncompressed_raw = match fdeflate::decompress_to_vec_bounded(v, limit) {
461	Ok(s) => s,
462	Err(BoundedDecompressionError::OutputTooLarge { .. }) => {
463	return Err(DecodingError::from(
464	TextDecodingError::OutOfDecompressionSpace,
465	));
466	}
467	Err(_) => {
468	return Err(DecodingError::from(TextDecodingError::InflationError));
469	}
470	};
471	self.text = OptCompressed::Uncompressed(
472	String::from_utf8(uncompressed_raw)
473	.map_err(\|_\| TextDecodingError::Unrepresentable)?,
474	);
475	}
476	OptCompressed::Uncompressed(_) => {}
477	};
478	Ok(())
479	}
480
481	/// Decompresses the inner text, and returns it as a `String`.
482	/// If decompression takes more than 2 MiB, try `decompress_text_with_limit` followed by this method.
483	pub fn get_text(&self) -> Result<String, DecodingError> {
484	match &self.text {
485	OptCompressed::Compressed(v) => {
486	let uncompressed_raw = fdeflate::decompress_to_vec(v)
487	.map_err(\|_\| DecodingError::from(TextDecodingError::InflationError))?;
488	String::from_utf8(uncompressed_raw)
489	.map_err(\|_\| TextDecodingError::Unrepresentable.into())
490	}
491	OptCompressed::Uncompressed(s) => Ok(s.clone()),
492	}
493	}
494
495	/// Compresses the inner text, mutating its own state.
496	pub fn compress_text(&mut self) -> Result<(), EncodingError> {
497	match &self.text {
498	OptCompressed::Uncompressed(s) => {
499	let uncompressed_raw = s.as_bytes();
500	let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast());
501	encoder
502	.write_all(uncompressed_raw)
503	.map_err(\|_\| EncodingError::from(TextEncodingError::CompressionError))?;
504	self.text = OptCompressed::Compressed(
505	encoder
506	.finish()
507	.map_err(\|_\| EncodingError::from(TextEncodingError::CompressionError))?,
508	);
509	}
510	OptCompressed::Compressed(_) => {}
511	}
512
513	Ok(())
514	}
515	}
516
517	impl EncodableTextChunk for ITXtChunk {
518	fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> {
519	// Keyword
520	let mut data = encode_iso_8859_1(&self.keyword)?;
521
522	if data.is_empty() \|\| data.len() > `79` {
523	return Err(TextEncodingError::InvalidKeywordSize.into());
524	}
525
526	// Null separator
527	data.push(`0`);
528
529	// Compression flag
530	if self.compressed {
531	data.push(`1`);
532	} else {
533	data.push(`0`);
534	}
535
536	// Compression method
537	data.push(`0`);
538
539	// Language tag
540	if !self.language_tag.is_ascii() {
541	return Err(EncodingError::from(TextEncodingError::Unrepresentable));
542	}
543	data.extend(self.language_tag.as_bytes());
544
545	// Null separator
546	data.push(`0`);
547
548	// Translated keyword
549	data.extend_from_slice(self.translated_keyword.as_bytes());
550
551	// Null separator
552	data.push(`0`);
553
554	// Text
555	if self.compressed {
556	match &self.text {
557	OptCompressed::Compressed(v) => {
558	data.extend_from_slice(&v[..]);
559	}
560	OptCompressed::Uncompressed(s) => {
561	let uncompressed_raw = s.as_bytes();
562	let mut encoder = ZlibEncoder::new(data, Compression::fast());
563	encoder
564	.write_all(uncompressed_raw)
565	.map_err(\|_\| EncodingError::from(TextEncodingError::CompressionError))?;
566	data = encoder
567	.finish()
568	.map_err(\|_\| EncodingError::from(TextEncodingError::CompressionError))?;
569	}
570	}
571	} else {
572	match &self.text {
573	OptCompressed::Compressed(v) => {
574	let uncompressed_raw = fdeflate::decompress_to_vec(v)
575	.map_err(\|_\| EncodingError::from(TextEncodingError::CompressionError))?;
576	data.extend_from_slice(&uncompressed_raw[..]);
577	}
578	OptCompressed::Uncompressed(s) => {
579	data.extend_from_slice(s.as_bytes());
580	}
581	}
582	}
583
584	encoder::write_chunk(w, chunk::iTXt, &data)
585	}
586	}
587