From d16bc36e83155c01ad8d82795fd1815ee92a6b55 Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Wed, 13 May 2026 09:32:11 +0200 Subject: [PATCH 1/4] Use relative paths in tree-sitter extractor diagnostics Diagnostic `location.file` entries were using absolute paths (e.g. `/home/runner/work/...`), causing broken links in the GitHub UI. Now relativize against CWD (the source root during extraction), falling back to a properly percent-encoded `file:` URI for paths outside it. Fixes https://github.com/github/codeql/issues/21802 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 1 + ql/Cargo.lock | 527 +++++++++++++++--- ruby/extractor/src/extractor.rs | 7 +- .../syntax-error/diagnostics.expected | 4 +- .../unknown-encoding/diagnostics.expected | 2 +- shared/tree-sitter-extractor/Cargo.toml | 1 + .../src/extractor/mod.rs | 18 +- .../tree-sitter-extractor/src/file_paths.rs | 60 ++ 8 files changed, 521 insertions(+), 99 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4fab55a6444f..171f4b52c420 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -456,6 +456,7 @@ dependencies = [ "tree-sitter", "tree-sitter-json", "tree-sitter-ql", + "url", "yeast", "zstd", ] diff --git a/ql/Cargo.lock b/ql/Cargo.lock index 6632bf162eec..ba31581cc233 100644 --- a/ql/Cargo.lock +++ b/ql/Cargo.lock @@ -17,12 +17,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -119,6 +113,8 @@ version = "1.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf" dependencies = [ + "jobserver", + "libc", "shlex", ] @@ -130,11 +126,10 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.40" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", @@ -200,6 +195,9 @@ dependencies = [ "tracing", "tracing-subscriber", "tree-sitter", + "url", + "yeast", + "zstd", ] [[package]] @@ -265,6 +263,17 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.13.0" @@ -335,6 +344,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "flate2" version = "1.1.0" @@ -345,6 +360,15 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + [[package]] name = "globset" version = "0.4.15" @@ -354,10 +378,16 @@ dependencies = [ "aho-corasick", "bstr", "log", - "regex-automata 0.4.8", - "regex-syntax 0.8.5", + "regex-automata", + "regex-syntax", ] +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + [[package]] name = "heck" version = "0.5.0" @@ -366,9 +396,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.3.9" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "iana-time-zone" @@ -393,6 +423,119 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -405,6 +548,15 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.72" @@ -426,6 +578,12 @@ version = "0.2.162" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + [[package]] name = "log" version = "0.4.22" @@ -434,11 +592,11 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "matchers" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" dependencies = [ - "regex-automata 0.1.10", + "regex-automata", ] [[package]] @@ -458,12 +616,11 @@ dependencies = [ [[package]] name = "nu-ansi-term" -version = "0.46.0" +version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "overload", - "winapi", + "windows-sys", ] [[package]] @@ -477,9 +634,9 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ "hermit-abi", "libc", @@ -492,10 +649,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] -name = "overload" -version = "0.1.1" +name = "percent-encoding" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "pin-project-lite" @@ -503,6 +660,21 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + [[package]] name = "proc-macro2" version = "1.0.89" @@ -514,18 +686,18 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.37" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" -version = "1.10.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" dependencies = [ "either", "rayon-core", @@ -533,9 +705,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -543,42 +715,27 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.1" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.8", - "regex-syntax 0.8.5", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", + "regex-automata", + "regex-syntax", ] [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.5", + "regex-syntax", ] -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - [[package]] name = "regex-syntax" version = "0.8.5" @@ -623,6 +780,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -644,6 +814,12 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "streaming-iterator" version = "0.1.9" @@ -667,6 +843,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -677,11 +864,21 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -690,9 +887,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.28" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", @@ -701,9 +898,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.33" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", @@ -722,14 +919,14 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.19" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "matchers", "nu-ansi-term", "once_cell", - "regex", + "regex-automata", "sharded-slab", "smallvec", "thread_local", @@ -746,7 +943,7 @@ checksum = "b67baf55e7e1b6806063b1e51041069c90afff16afcbbccd278d899f9d84bca4" dependencies = [ "cc", "regex", - "regex-syntax 0.8.5", + "regex-syntax", "streaming-iterator", "tree-sitter-language", ] @@ -775,6 +972,16 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8ddffe35a0e5eeeadf13ff7350af564c6e73993a24db62caee1822b185c2600" +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-ql" version = "0.23.0" @@ -793,12 +1000,46 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicode-ident" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -866,28 +1107,6 @@ version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" version = "0.52.0" @@ -899,9 +1118,9 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-sys" @@ -975,3 +1194,137 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "yeast" +version = "0.1.0" +dependencies = [ + "clap", + "serde", + "serde_json", + "serde_yaml", + "tree-sitter", + "tree-sitter-python", + "tree-sitter-ruby", + "yeast-macros", +] + +[[package]] +name = "yeast-macros" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/ruby/extractor/src/extractor.rs b/ruby/extractor/src/extractor.rs index 4849f473ccbc..817db97c0ef1 100644 --- a/ruby/extractor/src/extractor.rs +++ b/ruby/extractor/src/extractor.rs @@ -94,11 +94,14 @@ pub fn run(options: Options) -> std::io::Result<()> { node_types::read_node_types_str("erb", tree_sitter_embedded_template::NODE_TYPES)?; let lines: std::io::Result> = std::io::BufReader::new(file_list).lines().collect(); let lines = lines?; + let source_root = std::env::current_dir().ok(); lines .par_iter() .try_for_each(|line| { let mut diagnostics_writer = diagnostics.logger(); let path = PathBuf::from(line).canonicalize()?; + let diagnostics_file_path = + file_paths::relativize_for_diagnostic(&path, source_root.as_deref()); match &overlay_changed_files { Some(changed_files) if !changed_files.contains(&path) => { // We are extracting an overlay and this file is not in the list of changes files, so we should skip it. @@ -165,7 +168,7 @@ pub fn run(options: Options) -> std::io::Result<()> { "character-decoding-error", "Character decoding error", ) - .file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref())) + .file(&diagnostics_file_path) .message( "Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}.", &[ @@ -185,7 +188,7 @@ pub fn run(options: Options) -> std::io::Result<()> { diagnostics_writer.write( diagnostics_writer .new_entry("unknown-character-encoding", "Could not process some files due to an unknown character encoding") - .file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref())) + .file(&diagnostics_file_path) .message( "Unknown character encoding {} in {} {}.", &[ diff --git a/ruby/ql/integration-tests/diagnostics/syntax-error/diagnostics.expected b/ruby/ql/integration-tests/diagnostics/syntax-error/diagnostics.expected index d9ae8e1b617c..b688f22e39ac 100644 --- a/ruby/ql/integration-tests/diagnostics/syntax-error/diagnostics.expected +++ b/ruby/ql/integration-tests/diagnostics/syntax-error/diagnostics.expected @@ -5,7 +5,7 @@ "location": { "endColumn": 5, "endLine": 1, - "file": "/bad.rb", + "file": "bad.rb", "startColumn": 4, "startLine": 1 }, @@ -28,7 +28,7 @@ "location": { "endColumn": 7, "endLine": 3, - "file": "/bad.rb", + "file": "bad.rb", "startColumn": 8, "startLine": 3 }, diff --git a/ruby/ql/integration-tests/diagnostics/unknown-encoding/diagnostics.expected b/ruby/ql/integration-tests/diagnostics/unknown-encoding/diagnostics.expected index 1c9caa49824c..2470d9304303 100644 --- a/ruby/ql/integration-tests/diagnostics/unknown-encoding/diagnostics.expected +++ b/ruby/ql/integration-tests/diagnostics/unknown-encoding/diagnostics.expected @@ -3,7 +3,7 @@ "https://docs.ruby-lang.org/en/master/syntax/comments_rdoc.html#label-encoding+Directive" ], "location": { - "file": "/encoding.rb" + "file": "encoding.rb" }, "markdownMessage": "Unknown character encoding `silly` in `#encoding:` [directive](https://docs.ruby-lang.org/en/master/syntax/comments_rdoc.html#label-encoding+Directive).", "plaintextMessage": "Unknown character encoding silly in #encoding: directive.", diff --git a/shared/tree-sitter-extractor/Cargo.toml b/shared/tree-sitter-extractor/Cargo.toml index 1ad18a6df5a5..10b1d8db35cd 100644 --- a/shared/tree-sitter-extractor/Cargo.toml +++ b/shared/tree-sitter-extractor/Cargo.toml @@ -18,6 +18,7 @@ lazy_static = "1.5.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" chrono = { version = "0.4.42", features = ["serde"] } +url = "2.5" num_cpus = "1.17.0" zstd = "0.13.3" yeast = { path = "../yeast" } diff --git a/shared/tree-sitter-extractor/src/extractor/mod.rs b/shared/tree-sitter-extractor/src/extractor/mod.rs index 0c3e13660817..9eaa1f93cc75 100644 --- a/shared/tree-sitter-extractor/src/extractor/mod.rs +++ b/shared/tree-sitter-extractor/src/extractor/mod.rs @@ -298,6 +298,9 @@ pub fn extract( yeast_runner: Option<&yeast::Runner<'_>>, ) { let path_str = file_paths::normalize_and_transform_path(path, transformer); + let source_root = std::env::current_dir().ok(); + let diagnostics_path = + file_paths::relativize_for_diagnostic(path, source_root.as_deref()); let span = tracing::span!( tracing::Level::TRACE, "extract", @@ -318,7 +321,7 @@ pub fn extract( source, diagnostics_writer, trap_writer, - &path_str, + &diagnostics_path, file_label, language_prefix, schema, @@ -343,8 +346,9 @@ struct ChildNode { } struct Visitor<'a> { - /// The file path of the source code (as string) - path: &'a str, + /// A path suitable for diagnostic locations: relative to the source root if possible, + /// otherwise a file: URI + diagnostics_path: &'a str, /// The label to use whenever we need to refer to the `@file` entity of this /// source file. file_label: trap::Label, @@ -376,13 +380,13 @@ impl<'a> Visitor<'a> { source: &'a [u8], diagnostics_writer: &'a mut diagnostics::LogWriter, trap_writer: &'a mut trap::Writer, - path: &'a str, + diagnostics_path: &'a str, file_label: trap::Label, language_prefix: &str, schema: &'a NodeTypeMap, ) -> Visitor<'a> { Visitor { - path, + diagnostics_path, file_label, source, diagnostics_writer, @@ -433,7 +437,7 @@ impl<'a> Visitor<'a> { ); mesg.severity(diagnostics::Severity::Warning) .location( - self.path, + self.diagnostics_path, loc.start_line, loc.start_column, loc.end_line, @@ -553,7 +557,7 @@ impl<'a> Visitor<'a> { ) .severity(diagnostics::Severity::Warning) .location( - self.path, + self.diagnostics_path, loc.start_line, loc.start_column, loc.end_line, diff --git a/shared/tree-sitter-extractor/src/file_paths.rs b/shared/tree-sitter-extractor/src/file_paths.rs index bdb9dd035f06..2728f10a5f88 100644 --- a/shared/tree-sitter-extractor/src/file_paths.rs +++ b/shared/tree-sitter-extractor/src/file_paths.rs @@ -3,6 +3,25 @@ use std::{ path::{Path, PathBuf}, }; +/// Given an absolute path, returns a relative path if it's under `source_root`, +/// otherwise a properly-encoded `file:` URI. This is used for diagnostic locations, which +/// should use relative paths per the CodeQL diagnostic message format spec. +pub fn relativize_for_diagnostic(path: &Path, source_root: Option<&Path>) -> String { + source_root + .and_then(|root| path.strip_prefix(root).ok()) + .and_then(|rel| rel.to_str()) + .map(|s| s.to_owned()) + .unwrap_or_else(|| path_to_file_uri(path)) +} + +/// Convert a path to a `file:` URI, using the `url` crate for proper percent-encoding. +/// Falls back to a simple `file://` prefix if the `url` crate can't handle the path. +fn path_to_file_uri(path: &Path) -> String { + url::Url::from_file_path(path) + .map(|u| u.to_string()) + .unwrap_or_else(|()| format!("file://{}", path.display())) +} + /// This represents the minimum supported path transformation that is needed to support extracting /// overlay databases. Specifically, it represents a transformer where one path prefix is replaced /// with a different prefix. @@ -224,3 +243,44 @@ pub fn path_for( } result } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn relativize_under_source_root() { + let path = Path::new("/home/runner/work/repo/src/foo.rb"); + let result = + relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo"))); + assert_eq!(result, "src/foo.rb"); + } + + #[test] + fn relativize_outside_source_root_produces_file_uri() { + let path = Path::new("/other/location/foo.rb"); + let result = + relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo"))); + assert_eq!(result, "file:///other/location/foo.rb"); + } + + #[test] + fn relativize_no_source_root_produces_file_uri() { + let path = Path::new("/home/runner/work/repo/src/foo.rb"); + let result = relativize_for_diagnostic(path, None); + assert_eq!(result, "file:///home/runner/work/repo/src/foo.rb"); + } + + #[test] + fn path_to_file_uri_encodes_spaces() { + let result = path_to_file_uri(Path::new("/home/user/my project/foo.rb")); + assert_eq!(result, "file:///home/user/my%20project/foo.rb"); + } + + #[test] + fn relativize_exact_root_path() { + let path = Path::new("/repo/foo.rb"); + let result = relativize_for_diagnostic(path, Some(Path::new("/repo"))); + assert_eq!(result, "foo.rb"); + } +} From 57ac0192c018d40e5c5584cb1028f9ebc7dad11d Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Wed, 13 May 2026 09:48:45 +0200 Subject: [PATCH 2/4] Fix formatting Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- shared/tree-sitter-extractor/src/extractor/mod.rs | 3 +-- shared/tree-sitter-extractor/src/file_paths.rs | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/shared/tree-sitter-extractor/src/extractor/mod.rs b/shared/tree-sitter-extractor/src/extractor/mod.rs index 9eaa1f93cc75..a4f4eccf28cc 100644 --- a/shared/tree-sitter-extractor/src/extractor/mod.rs +++ b/shared/tree-sitter-extractor/src/extractor/mod.rs @@ -299,8 +299,7 @@ pub fn extract( ) { let path_str = file_paths::normalize_and_transform_path(path, transformer); let source_root = std::env::current_dir().ok(); - let diagnostics_path = - file_paths::relativize_for_diagnostic(path, source_root.as_deref()); + let diagnostics_path = file_paths::relativize_for_diagnostic(path, source_root.as_deref()); let span = tracing::span!( tracing::Level::TRACE, "extract", diff --git a/shared/tree-sitter-extractor/src/file_paths.rs b/shared/tree-sitter-extractor/src/file_paths.rs index 2728f10a5f88..e4698e533db7 100644 --- a/shared/tree-sitter-extractor/src/file_paths.rs +++ b/shared/tree-sitter-extractor/src/file_paths.rs @@ -251,16 +251,14 @@ mod tests { #[test] fn relativize_under_source_root() { let path = Path::new("/home/runner/work/repo/src/foo.rb"); - let result = - relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo"))); + let result = relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo"))); assert_eq!(result, "src/foo.rb"); } #[test] fn relativize_outside_source_root_produces_file_uri() { let path = Path::new("/other/location/foo.rb"); - let result = - relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo"))); + let result = relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo"))); assert_eq!(result, "file:///other/location/foo.rb"); } From c3cf7c2bcac83df67de1ab45cc784ba987442011 Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Wed, 13 May 2026 10:28:27 +0200 Subject: [PATCH 3/4] Use absolute path fallback instead of `file:` URI Drop the `url` crate dependency. When a path can't be relativized against the source root, emit it as a bare absolute path and let the CLI's SARIF generator handle URI conversion downstream. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 1 - shared/tree-sitter-extractor/Cargo.toml | 1 - .../tree-sitter-extractor/src/file_paths.rs | 27 +++++-------------- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 171f4b52c420..4fab55a6444f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -456,7 +456,6 @@ dependencies = [ "tree-sitter", "tree-sitter-json", "tree-sitter-ql", - "url", "yeast", "zstd", ] diff --git a/shared/tree-sitter-extractor/Cargo.toml b/shared/tree-sitter-extractor/Cargo.toml index 10b1d8db35cd..1ad18a6df5a5 100644 --- a/shared/tree-sitter-extractor/Cargo.toml +++ b/shared/tree-sitter-extractor/Cargo.toml @@ -18,7 +18,6 @@ lazy_static = "1.5.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" chrono = { version = "0.4.42", features = ["serde"] } -url = "2.5" num_cpus = "1.17.0" zstd = "0.13.3" yeast = { path = "../yeast" } diff --git a/shared/tree-sitter-extractor/src/file_paths.rs b/shared/tree-sitter-extractor/src/file_paths.rs index e4698e533db7..dc53fc7b9256 100644 --- a/shared/tree-sitter-extractor/src/file_paths.rs +++ b/shared/tree-sitter-extractor/src/file_paths.rs @@ -4,22 +4,15 @@ use std::{ }; /// Given an absolute path, returns a relative path if it's under `source_root`, -/// otherwise a properly-encoded `file:` URI. This is used for diagnostic locations, which +/// otherwise the absolute path as-is. This is used for diagnostic locations, which /// should use relative paths per the CodeQL diagnostic message format spec. +/// Absolute path fallback is handled downstream by the CLI's SARIF generator. pub fn relativize_for_diagnostic(path: &Path, source_root: Option<&Path>) -> String { source_root .and_then(|root| path.strip_prefix(root).ok()) .and_then(|rel| rel.to_str()) .map(|s| s.to_owned()) - .unwrap_or_else(|| path_to_file_uri(path)) -} - -/// Convert a path to a `file:` URI, using the `url` crate for proper percent-encoding. -/// Falls back to a simple `file://` prefix if the `url` crate can't handle the path. -fn path_to_file_uri(path: &Path) -> String { - url::Url::from_file_path(path) - .map(|u| u.to_string()) - .unwrap_or_else(|()| format!("file://{}", path.display())) + .unwrap_or_else(|| path.display().to_string()) } /// This represents the minimum supported path transformation that is needed to support extracting @@ -256,23 +249,17 @@ mod tests { } #[test] - fn relativize_outside_source_root_produces_file_uri() { + fn relativize_outside_source_root_returns_absolute() { let path = Path::new("/other/location/foo.rb"); let result = relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo"))); - assert_eq!(result, "file:///other/location/foo.rb"); + assert_eq!(result, "/other/location/foo.rb"); } #[test] - fn relativize_no_source_root_produces_file_uri() { + fn relativize_no_source_root_returns_absolute() { let path = Path::new("/home/runner/work/repo/src/foo.rb"); let result = relativize_for_diagnostic(path, None); - assert_eq!(result, "file:///home/runner/work/repo/src/foo.rb"); - } - - #[test] - fn path_to_file_uri_encodes_spaces() { - let result = path_to_file_uri(Path::new("/home/user/my project/foo.rb")); - assert_eq!(result, "file:///home/user/my%20project/foo.rb"); + assert_eq!(result, "/home/runner/work/repo/src/foo.rb"); } #[test] From c2fc0cf1118eca02630b08301c92ddff3cc79ef5 Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Wed, 13 May 2026 10:31:48 +0200 Subject: [PATCH 4/4] Fix Windows path handling in diagnostic relativization Canonicalize `current_dir()` to match canonicalized file paths (avoids `\\?\` prefix mismatch on Windows), and normalize backslashes to forward slashes in relative diagnostic paths. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ruby/extractor/src/extractor.rs | 2 +- shared/tree-sitter-extractor/src/extractor/mod.rs | 4 +++- shared/tree-sitter-extractor/src/file_paths.rs | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ruby/extractor/src/extractor.rs b/ruby/extractor/src/extractor.rs index 817db97c0ef1..d418c144bfc9 100644 --- a/ruby/extractor/src/extractor.rs +++ b/ruby/extractor/src/extractor.rs @@ -94,7 +94,7 @@ pub fn run(options: Options) -> std::io::Result<()> { node_types::read_node_types_str("erb", tree_sitter_embedded_template::NODE_TYPES)?; let lines: std::io::Result> = std::io::BufReader::new(file_list).lines().collect(); let lines = lines?; - let source_root = std::env::current_dir().ok(); + let source_root = std::env::current_dir().ok().and_then(|d| d.canonicalize().ok()); lines .par_iter() .try_for_each(|line| { diff --git a/shared/tree-sitter-extractor/src/extractor/mod.rs b/shared/tree-sitter-extractor/src/extractor/mod.rs index a4f4eccf28cc..3611a328daaf 100644 --- a/shared/tree-sitter-extractor/src/extractor/mod.rs +++ b/shared/tree-sitter-extractor/src/extractor/mod.rs @@ -298,7 +298,9 @@ pub fn extract( yeast_runner: Option<&yeast::Runner<'_>>, ) { let path_str = file_paths::normalize_and_transform_path(path, transformer); - let source_root = std::env::current_dir().ok(); + let source_root = std::env::current_dir() + .ok() + .and_then(|d| d.canonicalize().ok()); let diagnostics_path = file_paths::relativize_for_diagnostic(path, source_root.as_deref()); let span = tracing::span!( tracing::Level::TRACE, diff --git a/shared/tree-sitter-extractor/src/file_paths.rs b/shared/tree-sitter-extractor/src/file_paths.rs index dc53fc7b9256..71b9ca0469a4 100644 --- a/shared/tree-sitter-extractor/src/file_paths.rs +++ b/shared/tree-sitter-extractor/src/file_paths.rs @@ -11,7 +11,7 @@ pub fn relativize_for_diagnostic(path: &Path, source_root: Option<&Path>) -> Str source_root .and_then(|root| path.strip_prefix(root).ok()) .and_then(|rel| rel.to_str()) - .map(|s| s.to_owned()) + .map(|s| s.replace('\\', "/")) .unwrap_or_else(|| path.display().to_string()) }