diff --git a/src/web/extractors/rustdoc.rs b/src/web/extractors/rustdoc.rs index 37e2d1a54..9b6c64682 100644 --- a/src/web/extractors/rustdoc.rs +++ b/src/web/extractors/rustdoc.rs @@ -20,6 +20,13 @@ use std::borrow::Cow; const INDEX_HTML: &str = "index.html"; const FOLDER_AND_INDEX_HTML: &str = "/index.html"; +pub(crate) const ROOT_RUSTDOC_HTML_FILES: &[&str] = &[ + "all.html", + "help.html", + "settings.html", + "scrape-examples-help.html", +]; + #[derive(Clone, Debug, PartialEq)] pub(crate) enum PageKind { Rustdoc, @@ -723,27 +730,38 @@ fn generate_rustdoc_path_for_url( && !path.is_empty() && path != INDEX_HTML { - if let Some(target_name) = target_name - && path == target_name - { - // Sometimes people add a path that is just the target name. - // This matches our `rustdoc_redirector_handler` route, - // without trailing slash (`/{name}/{version}/{target}`). - // - // Even though the rustdoc html handler would also be capable - // handling this input, we want to redirect to the trailing - // slash version (`/{name}/{version}/{target}/`), - // so they are separate. - format!("{}/", target_name) - } else { + // for none-elements paths we have to guarantee that we have a + // trailing slash, otherwise the rustdoc-url won't hit the html-handler and + // lead to redirect loops. + if path.contains('/') { // just use the given inner to start, if: // * it's not empty // * it's not just "index.html" + // * we have a slash in the path. path.to_string() + } else if ROOT_RUSTDOC_HTML_FILES.contains(&path) { + // special case: some files are at the root of the rustdoc output, + // without a trailing slash, and the routes are fine with that. + // e.g. `/help.html`, `/settings.html`, `/all.html`, ... + path.to_string() + } else if let Some(target_name) = target_name { + if target_name == path { + // when we have the target name as path, without a trailing slash, + // just add the slash. + format!("{}/", path) + } else { + // when someone just attaches some path to the URL, like + // `/{krate}/{version}/somefile.html`, we assume they meant + // `/{krate}/{version}/{target_name}/somefile.html`. + format!("{}/{}", target_name, path) + } + } else { + // fallback: just attach a slash and redirect. + format!("{}/", path) } } else if let Some(target_name) = target_name { // after having no usable given path, we generate one with the - // target name, if we have one. + // target name, if we have one/. format!("{}/", target_name) } else { // no usable given path: @@ -1000,12 +1018,17 @@ mod tests { )] #[test_case( None, Some("something"), false, - None, "something", "something"; + None, "something", "krate/something"; "without trailing slash" )] + #[test_case( + None, Some("settings.html"), false, + None, "settings.html", "settings.html"; + "without trailing slash, but known root name" + )] #[test_case( None, Some("/something"), false, - None, "something", "something"; + None, "something", "krate/something"; "leading slash is cut" )] #[test_case( @@ -1027,7 +1050,7 @@ mod tests { )] #[test_case( None, Some(&format!("{DEFAULT_TARGET}/one")), false, - Some(DEFAULT_TARGET), "one", "one"; + Some(DEFAULT_TARGET), "one", "krate/one"; "target + one without trailing slash" )] #[test_case( @@ -1073,7 +1096,7 @@ mod tests { )] #[test_case( Some(UNKNOWN_TARGET), None, false, - None, UNKNOWN_TARGET, UNKNOWN_TARGET; + None, UNKNOWN_TARGET, &format!("krate/{UNKNOWN_TARGET}"); "unknown target without trailing slash" )] #[test_case( @@ -1639,23 +1662,26 @@ mod tests { assert_eq!(params.rustdoc_url(), "/krate/latest/krate/"); } - #[test_case(Some(PageKind::Rustdoc))] - #[test_case(None)] - fn test_only_target_name_without_slash(page_kind: Option) { + #[test_case("other_path.html", "/krate/latest/krate/other_path.html")] + #[test_case("other_path", "/krate/latest/krate/other_path"; "without .html")] + #[test_case("other_path.html", "/krate/latest/krate/other_path.html"; "with .html")] + #[test_case("settings.html", "/krate/latest/settings.html"; "static routes")] + #[test_case(KRATE, "/krate/latest/krate/"; "same as target name, without slash")] + fn test_redirect_some_odd_paths_we_saw(inner_path: &str, expected_url: &str) { // test for https://github.com/rust-lang/docs.rs/issues/2989 let params = RustdocParams::new(KRATE) - .with_maybe_page_kind(page_kind) - .try_with_original_uri("/dummy/latest/dummy") + .with_page_kind(PageKind::Rustdoc) + .try_with_original_uri(format!("/{KRATE}/latest/{inner_path}")) .unwrap() .with_req_version(ReqVersion::Latest) .with_maybe_doc_target(None::) - .with_inner_path(KRATE) + .with_inner_path(inner_path) .with_default_target(DEFAULT_TARGET) .with_target_name(KRATE) .with_doc_targets(TARGETS.iter().cloned()); dbg!(¶ms); - assert_eq!(params.rustdoc_url(), "/krate/latest/krate/"); + assert_eq!(params.rustdoc_url(), expected_url); } } diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 51d8504d8..5a8c17599 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -34,7 +34,7 @@ use axum::{ http::StatusCode, response::{IntoResponse, Response as AxumResponse}, }; -use http::{HeaderValue, Uri, header}; +use http::{HeaderValue, Uri, header, uri::Authority}; use serde::Deserialize; use std::{ collections::HashMap, @@ -219,6 +219,7 @@ pub(crate) async fn rustdoc_redirector_handler( let params = params.with_page_kind(PageKind::Rustdoc); fn redirect_to_doc( + original_uri: Option<&Uri>, url: EscapedURI, cache_policy: CachePolicy, path_in_crate: Option<&str>, @@ -229,6 +230,19 @@ pub(crate) async fn rustdoc_redirector_handler( url }; + if let Some(original_uri) = original_uri + && original_uri.path() == url.path() + && (url.authority().is_none() + || url.authority() == Some(&Authority::from_static("docs.rs"))) + { + return Err(anyhow!( + "infinite redirect detected, \noriginal_uri = {}, redirect_url = {}", + original_uri, + url + ) + .into()); + } + trace!(%url, ?cache_policy, path_in_crate, "redirect to doc"); Ok(axum_cached_redirect(url, cache_policy)?) } @@ -266,6 +280,7 @@ pub(crate) async fn rustdoc_redirector_handler( let target_uri = EscapedURI::from_uri(description.href.clone()).append_raw_query(original_query); return redirect_to_doc( + params.original_uri(), target_uri, CachePolicy::ForeverInCdnAndStaleInBrowser, path_in_crate.as_deref(), @@ -329,6 +344,7 @@ pub(crate) async fn rustdoc_redirector_handler( if matched_release.rustdoc_status() { Ok(redirect_to_doc( + params.original_uri(), params.rustdoc_url().append_raw_query(original_query), if matched_release.is_latest_url() { CachePolicy::ForeverInCdn @@ -3041,7 +3057,7 @@ mod test { web.assert_redirect_cached_unchecked( "/clap/latest/clapproc%20macro%20%60Parser%60%20not%20expanded:%20Cannot%20create%20expander%20for", - "/clap/latest/clapproc%20macro%20%60Parser%60%20not%20expanded:%20Cannot%20create%20expander%20for", + "/clap/latest/clap/clapproc%20macro%20%60Parser%60%20not%20expanded:%20Cannot%20create%20expander%20for", CachePolicy::ForeverInCdn, env.config(), ).await?; @@ -3345,7 +3361,7 @@ mod test { #[tokio::test(flavor = "multi_thread")] #[test_case("/dummy/"; "only krate")] #[test_case("/dummy/latest/"; "with version")] - #[test_case("/dummy/latest/dummy"; "full without trailing slash")] + #[test_case("/dummy/latest/dummy"; "target-name as path, without trailing slash")] #[test_case("/dummy/latest/dummy/"; "final target")] async fn test_full_latest_url_without_trailing_slash(path: &str) -> Result<()> { // test for https://github.com/rust-lang/docs.rs/issues/2989 @@ -3368,6 +3384,38 @@ mod test { .await?; } + Ok(()) + } + #[tokio::test(flavor = "multi_thread")] + #[test_case( + "/dummy/latest/other_path", + "/dummy/latest/dummy/other_path"; + "other path, without trailing slash" + )] + #[test_case( + "/dummy/latest/other_path.html", + "/dummy/latest/dummy/other_path.html"; + "other html path, without trailing slash" + )] + async fn test_full_latest_url_some_path_but_trailing_slash( + path: &str, + expected_redirect: &str, + ) -> Result<()> { + // test for https://github.com/rust-lang/docs.rs/issues/2989 + + let env = TestEnvironment::new().await?; + + env.fake_release() + .await + .name("dummy") + .version("1.0.0") + .create() + .await?; + + let web = env.web_app().await; + web.assert_redirect_unchecked(path, expected_redirect) + .await?; + Ok(()) } }