From 23ab6cb1f2e12c07cb269798217f924df42bffec Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Fri, 28 Feb 2025 08:11:21 -0800 Subject: [PATCH 1/5] Windows support (#24) --- Cargo.toml | 2 +- src/file_util.rs | 33 +++++++++++++++++++++++---------- src/xml_util.rs | 14 +++++++------- src/zip_util.rs | 10 +++++----- 4 files changed, 36 insertions(+), 23 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0ef678f..b89279f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "docxtools" -version = "0.9.1-SNAPSHOT" +version = "0.10.0-SNAPSHOT" edition = "2021" authors = ["David Bosschaert "] license = "Apache-2.0" diff --git a/src/file_util.rs b/src/file_util.rs index bb42435..08adee4 100644 --- a/src/file_util.rs +++ b/src/file_util.rs @@ -1,24 +1,37 @@ -use std::path::Path; +use std::path::{MAIN_SEPARATOR, MAIN_SEPARATOR_STR, Path}; pub struct FileUtil { } impl FileUtil { + pub fn normalize_path(s: &str) -> String { + let src_char = if MAIN_SEPARATOR == '/' { + "\\" + } else { + "/" + }; + + s.replace(src_char, MAIN_SEPARATOR_STR) + } + pub fn get_sub_path(path: &Path, base_dir: &str) -> String { + let nbase_dir = FileUtil::normalize_path(base_dir); + let base; - if base_dir.ends_with("/") { - base = base_dir.to_owned(); + if nbase_dir.ends_with(MAIN_SEPARATOR_STR) { + base = nbase_dir; } else { - base = base_dir.to_owned() + "/"; + base = nbase_dir + MAIN_SEPARATOR_STR; } let sub_path; let full_path = path.to_string_lossy(); - if full_path.starts_with(&base) { - sub_path = &full_path[base.len()..]; + let nfull_path = FileUtil::normalize_path(&full_path); + if nfull_path.starts_with(&base) { + sub_path = &nfull_path[base.len()..]; } else { - sub_path = &full_path; + sub_path = &nfull_path; } sub_path.to_owned() @@ -34,20 +47,20 @@ mod tests { fn test_get_sub_path() { let p = Path::new("/some/where/on/the/rainbow.docx"); let b = "/some/where/on/"; - assert_eq!("the/rainbow.docx", FileUtil::get_sub_path(p, b)); + assert_eq!(FileUtil::normalize_path("the/rainbow.docx"), FileUtil::get_sub_path(p, b)); } #[test] fn test_get_sub_path1() { let p = Path::new("/some/where/on/the/rainbow.docx"); let b = "/some/where/on"; - assert_eq!("the/rainbow.docx", FileUtil::get_sub_path(p, b)); + assert_eq!(FileUtil::normalize_path("the/rainbow.docx"), FileUtil::get_sub_path(p, b)); } #[test] fn test_get_sub_path2() { let b = "/some/where/on/"; let p = Path::new("/elsewhere/cloud.docx"); - assert_eq!("/elsewhere/cloud.docx", FileUtil::get_sub_path(p, b)); + assert_eq!(FileUtil::normalize_path("/elsewhere/cloud.docx"), FileUtil::get_sub_path(p, b)); } } \ No newline at end of file diff --git a/src/xml_util.rs b/src/xml_util.rs index 1620a75..fc705ed 100644 --- a/src/xml_util.rs +++ b/src/xml_util.rs @@ -7,7 +7,7 @@ use regex::Regex; use std::collections::{BTreeMap, HashMap}; use std::fs::{File, self}; use std::io::{BufReader, BufWriter}; -use std::path::{Path, PathBuf}; +use std::path::{Path, PathBuf, MAIN_SEPARATOR, MAIN_SEPARATOR_STR}; use std::str; use uuid::Uuid; use unicase::UniCase; @@ -154,7 +154,7 @@ impl XMLUtil { let mut rels_files = vec!(); for f in files { - let last_slash = f.rfind('/').expect(&f); + let last_slash = f.rfind(MAIN_SEPARATOR).expect(&f); let mut new_fn = String::new(); new_fn.push_str(&f[..last_slash]); new_fn.push_str("/_"); @@ -162,7 +162,7 @@ impl XMLUtil { new_fn.push_str(&f[last_slash..]); new_fn.push('.'); new_fn.push_str(rels_extension); - rels_files.push(new_fn); + rels_files.push(FileUtil::normalize_path(&new_fn)); } rels_files @@ -175,9 +175,9 @@ impl XMLUtil { /// `pattern` and `replacement` are used to search/replace operations. /// `output_file` optionally specifies a different output file for replacement operations. fn snr_xml(mode: Mode, dir: &str, src_file: &str, files: Option>, output_file: Option<&str>) { - let mut base_dir = dir.to_owned(); - if !dir.ends_with("/") { - base_dir.push('/'); + let mut base_dir = FileUtil::normalize_path(dir); + if !base_dir.ends_with(MAIN_SEPARATOR_STR) { + base_dir.push(MAIN_SEPARATOR); } for entry in WalkDir::new(dir).into_iter() @@ -838,7 +838,7 @@ impl XMLUtil { } else { rel_pn = pn; } - mappings.insert(rel_pn.to_string(), + mappings.insert(FileUtil::normalize_path(rel_pn), str::from_utf8(cv.value.as_ref()).unwrap().to_string()); } } diff --git a/src/zip_util.rs b/src/zip_util.rs index 39e079a..bf285e4 100644 --- a/src/zip_util.rs +++ b/src/zip_util.rs @@ -151,7 +151,7 @@ impl ZipUtil { mod tests { use crate::file_util::FileUtil; use super::ZipUtil; - use std::{path::Path, fs, io}; + use std::{path::MAIN_SEPARATOR_STR, path::Path, fs, io}; use walkdir::WalkDir; use testdir::testdir; @@ -165,13 +165,13 @@ mod tests { let wd = WalkDir::new(&outdir); let extracts: Vec = wd.into_iter() .map(|e| FileUtil::get_sub_path(&e.unwrap().path(), &outdir.to_string_lossy())) - .filter(|e| !e.starts_with("/")) + .filter(|e| !e.starts_with(MAIN_SEPARATOR_STR)) .filter(|e| e.contains('.')) .collect(); assert!(extracts.contains(&"foo.test.txt".into())); assert!(extracts.contains(&"empty.file".into())); - assert!(extracts.contains(&"sub/sub/[Content_Types].xml".into())); + assert!(extracts.contains(&FileUtil::normalize_path("sub/sub/[Content_Types].xml"))); assert_eq!(3, extracts.len(), "Should be only 3 files"); let empty_file = Path::new(&outdir).join("empty.file"); @@ -209,14 +209,14 @@ mod tests { let extracts: Vec = WalkDir::new(&expldir).into_iter() .map(|e| FileUtil::get_sub_path(&e.unwrap().path(), &expldir.to_string_lossy())) - .filter(|e| !e.starts_with("/")) + .filter(|e| !e.starts_with(MAIN_SEPARATOR_STR)) .filter(|e| e.contains('.')) .collect(); assert_eq!(3, extracts.len()); assert!(extracts.contains(&"foo.test.txt".into())); assert!(extracts.contains(&"empty.file".into())); - assert!(extracts.contains(&"sub/sub/[Content_Types].xml".into())); + assert!(extracts.contains(&FileUtil::normalize_path("sub/sub/[Content_Types].xml"))); let empty_file = Path::new(&expldir).join("empty.file"); assert!(empty_file.is_file()); From 17f68d8567165f606042d564385fabd19224733a Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Fri, 28 Feb 2025 16:14:16 +0000 Subject: [PATCH 2/5] Move to version 0.10.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b89279f..59218e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "docxtools" -version = "0.10.0-SNAPSHOT" +version = "0.10.0" edition = "2021" authors = ["David Bosschaert "] license = "Apache-2.0" From 6e4666f74d61796d687a12a431e4f95fa14d1a7b Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Tue, 11 Mar 2025 05:26:13 -0700 Subject: [PATCH 3/5] Add test to show how to match for an ending link pattern (#26) --- src/test/test_tree7/[Content_Types].xml | 20 ++ .../test_tree7/word/_rels/document2.xml.rels | 10 + src/test/test_tree7/word/document2.xml | 305 ++++++++++++++++++ src/xml_util.rs | 30 ++ 4 files changed, 365 insertions(+) create mode 100644 src/test/test_tree7/[Content_Types].xml create mode 100644 src/test/test_tree7/word/_rels/document2.xml.rels create mode 100644 src/test/test_tree7/word/document2.xml diff --git a/src/test/test_tree7/[Content_Types].xml b/src/test/test_tree7/[Content_Types].xml new file mode 100644 index 0000000..bb8704e --- /dev/null +++ b/src/test/test_tree7/[Content_Types].xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/test/test_tree7/word/_rels/document2.xml.rels b/src/test/test_tree7/word/_rels/document2.xml.rels new file mode 100644 index 0000000..c303745 --- /dev/null +++ b/src/test/test_tree7/word/_rels/document2.xml.rels @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/src/test/test_tree7/word/document2.xml b/src/test/test_tree7/word/document2.xml new file mode 100644 index 0000000..ad6bc7e --- /dev/null +++ b/src/test/test_tree7/word/document2.xml @@ -0,0 +1,305 @@ + + + + + + + + + + + + + + Testing 123 + + + + + + + + + + + + + And some some more text + + + + + + and then some + + + + + + + + + + + + + + + + + + + + Something here + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Blah + + + + + + + + + + + + + + + + + + some + + + + + + + + + + + + + + + + + + None + + + + + + + + + + + + + + + + + + + + Some + + + + + + + some + + + + + + + + + + + + + + some + + + + + + + + + + + + + + + + + + + hello + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Here’s a hyperlink: + + + + + + + + link + + + + + + + + + + + + + + + + + + + + + And here’s just some text: + + + + + + + www.example.com/a + + + + + + www.example.com/a/b + + + + + + + + + + + \ No newline at end of file diff --git a/src/xml_util.rs b/src/xml_util.rs index fc705ed..2f3a04b 100644 --- a/src/xml_util.rs +++ b/src/xml_util.rs @@ -1210,6 +1210,36 @@ mod tests { Ok(()) } + #[test] + fn test_replace_hyperlink2() -> io::Result<()> { + let orgdir = "./src/test/test_tree7"; + let testdir = testdir!(); + + copy_dir_all(orgdir, &testdir)?; + + let before_doc = fs::read_to_string("./src/test/test_tree7/word/document2.xml")?; + let before = fs::read_to_string("./src/test/test_tree7/word/_rels/document2.xml.rels")?; + + assert!(before.contains("Target=\"http://www.example.com/a\""), "Precondition"); + assert!(before_doc.contains(">www.example.com/a<"), "Precondition"); + assert!(before.contains("Target=\"http://www.example.com/a/b\""), "Precondition"); + assert!(before_doc.contains(">www.example.com/a/b<"), "Precondition"); + + XMLUtil::replace_rel_attr(&testdir.to_string_lossy(), "my-source.docx", + "www.example.com/a$", "www.example.com/a/", + &Some(&testdir.join("output-2.docx").to_string_lossy())); + + let after_doc = fs::read_to_string(testdir.join("word/document2.xml"))?; + let after = fs::read_to_string(testdir.join("word/_rels/document2.xml.rels"))?; + + assert!(after.contains("Target=\"http://www.example.com/a/\"")); + assert!(after_doc.contains(">www.example.com/a<"), "Should not have changed the document text"); + assert!(after.contains("Target=\"http://www.example.com/a/b\""), "Should not have changed this link"); + assert!(after_doc.contains(">www.example.com/a/b<"), "Should not have changed the document text"); + + Ok(()) + } + #[test] fn test_replace_both() -> io::Result<()> { let orgdir = "./src/test/test_tree3"; From d2a3eff17b769fb8475804108fa96777414593d3 Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Wed, 9 Apr 2025 12:35:04 +0100 Subject: [PATCH 4/5] Move to 0.11.0-SNAPSHOT --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 59218e7..81d9b4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "docxtools" -version = "0.10.0" +version = "0.11.0-SNAPSHOT" edition = "2021" authors = ["David Bosschaert "] license = "Apache-2.0" From 494385d52bb10d4527c62a8867c75487f80be351 Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Tue, 15 Apr 2025 14:12:46 +0100 Subject: [PATCH 5/5] Update dependencies --- Cargo.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 81d9b4d..fd19d08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "docxtools" version = "0.11.0-SNAPSHOT" edition = "2021" -authors = ["David Bosschaert "] +authors = ["David Bosschaert ", "contributors"] license = "Apache-2.0" description = "A command-line tool to work with docx files, for example to make bulk-changes in them without the need to open a word processor." readme = "README.md" @@ -10,20 +10,20 @@ keywords = ["cli", "docx"] categories = ["command-line-utilities"] [dependencies] -regex = "1.9" +regex = "1.11" tempfile = "3.8" zip = "2.2.1" walkdir = "2.3" unicode-bom = "2.0" clap = { version = "4.3.19", features = ["derive"] } -quick-xml = "0.31" +quick-xml = "0.37" uuid = { version = "1.6", features = ["v4"] } unicase = "2.7" [dev-dependencies] gag = "1.0" -serial_test = "2.0" -testdir = "0.8" +serial_test = "3.2" +testdir = "0.9" [profile.release] opt-level = 'z' # Optimize for size.