8000 fix content-type for zstd files, additionally build & serve gzip compressed rustdoc json by syphar · Pull Request #2848 · rust-lang/docs.rs · GitHub
[go: up one dir, main page]

Skip to content

fix content-type for zstd files, additionally build & serve gzip compressed rustdoc json #2848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ prometheus = { version = "0.14.0", default-features = false }
rustwide = { version = "0.19.0", features = ["unstable-toolchain-ci", "unstable"] }
mime_guess = "2"
zstd = "0.13.0"
flate2 = "1.1.1"
hostname = "0.4.0"
path-slash = "0.2.0"
once_cell = { version = "1.4.0", features = ["parking_lot"] }
Expand Down
12 changes: 12 additions & 0 deletions benches/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ pub fn regex_capture_matches(c: &mut Criterion) {
5 * 1024 * 1024,
)
});
})
.bench_function("compress gzip", |b| {
b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Gzip));
})
.bench_function("decompress gzip", |b| {
b.iter(|| {
decompress(
black_box(html_slice),
CompressionAlgorithm::Gzip,
5 * 1024 * 1024,
)
});
});
}

Expand Down
3 changes: 2 additions & 1 deletion src/db/delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ mod tests {
use super::*;
use crate::db::ReleaseId;
use crate::registry_api::{CrateOwner, OwnerKind};
use crate::storage::rustdoc_json_path;
use crate::storage::{CompressionAlgorithm, rustdoc_json_path};
use crate::test::{async_wrapper, fake_release_that_failed_before_build};
use test_case::test_case;

Expand Down Expand Up @@ -413,6 +413,7 @@ mod tests {
version,
"x86_64-unknown-linux-gnu",
crate::storage::RustdocJsonFormatVersion::Latest,
Some(CompressionAlgorithm::Zstd),
))
.await
}
Expand Down
26 changes: 26 additions & 0 deletions src/db/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ pub(crate) fn detect_mime(file_path: impl AsRef<Path>) -> Mime {
Some("toml") => mimes::TEXT_TOML.clone(),
Some("js") => mime::TEXT_JAVASCRIPT,
Some("json") => mime::APPLICATION_JSON,
Some("gz") => mimes::APPLICATION_GZIP.clone(),
Some("zst") => mimes::APPLICATION_ZSTD.clone(),
_ => mime,
}
}
Expand Down Expand Up @@ -103,3 +105,27 @@ pub(crate) fn file_list_to_json(files: impl IntoIterator<Item = FileEntry>) -> V
.collect(),
)
}

#[cfg(test)]
mod tests {
use super::*;
use test_case::test_case;

// some standard mime types that mime-guess handles
#[test_case("txt", &mime::TEXT_PLAIN)]
#[test_case("html", &mime::TEXT_HTML)]
// overrides of other mime types and defaults for
// types mime-guess doesn't know about
#[test_case("md", &mimes::TEXT_MARKDOWN)]
#[test_case("rs", &mimes::TEXT_RUST)]
#[test_case("markdown", &mimes::TEXT_MARKDOWN)]
#[test_case("css", &mime::TEXT_CSS)]
#[test_case("toml", &mimes::TEXT_TOML)]
#[test_case("js", &mime::TEXT_JAVASCRIPT)]
#[test_case("json", &mime::APPLICATION_JSON)]
#[test_case("zst", &mimes::APPLICATION_ZSTD)]
#[test_case("gz", &mimes::APPLICATION_GZIP)]
fn test_detect_mime(ext: &str, expected: &Mime) {
assert_eq!(&detect_mime(format!("something.{ext}")), expected);
}
}
2 changes: 2 additions & 0 deletions src/db/mimes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ macro_rules! mime {
}

mime!(APPLICATION_ZIP, "application/zip");
mime!(APPLICATION_ZSTD, "application/zstd");
mime!(APPLICATION_GZIP, "application/gzip");
mime!(TEXT_MARKDOWN, "text/markdown");
mime!(TEXT_RUST, "text/rust");
mime!(TEXT_TOML, "text/toml");
3 changes: 3 additions & 0 deletions src/docbuilder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ mod rustwide_builder;
pub(crate) use self::limits::Limits;
pub(crate) use self::rustwide_builder::DocCoverage;
pub use self::rustwide_builder::{BuildPackageSummary, PackageKind, RustwideBuilder};

#[cfg(test)]
pub use self::rustwide_builder::RUSTDOC_JSON_COMPRESSION_ALGORITHMS;
88 changes: 57 additions & 31 deletions src/docbuilder/rustwide_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ use crate::docbuilder::Limits;
use crate::error::Result;
use crate::repositories::RepositoryStatsUpdater;
use crate::storage::{
RustdocJsonFormatVersion, get_file_list, rustdoc_archive_path, rustdoc_json_path,
source_archive_path,
CompressionAlgorithm, RustdocJsonFormatVersion, compress, get_file_list, rustdoc_archive_path,
rustdoc_json_path, source_archive_path,
};
use crate::utils::{
CargoMetadata, ConfigName, copy_dir_all, get_config, parse_rustc_version, report_error,
Expand Down Expand Up @@ -45,6 +45,9 @@ const COMPONENTS: &[&str] = &["llvm-tools-preview", "rustc-dev", "rustfmt"];
const DUMMY_CRATE_NAME: &str = "empty-library";
const DUMMY_CRATE_VERSION: &str = "1.0.0";

pub const RUSTDOC_JSON_COMPRESSION_ALGORITHMS: &[CompressionAlgorithm] =
&[CompressionAlgorithm::Zstd, CompressionAlgorithm::Gzip];

/// read the format version from a rustdoc JSON file.
fn read_format_version_from_rustdoc_json(
reader: impl std::io::Read,
Expand Down Expand Up @@ -909,12 +912,25 @@ impl RustwideBuilder {
.context("couldn't parse rustdoc json to find format version")?
};

for format_version in [format_version, RustdocJsonFormatVersion::Latest] {
let _span = info_span!("store_json", %format_version).entered();
let path = rustdoc_json_path(name, version, target, format_version);
for alg in RUSTDOC_JSON_COMPRESSION_ALGORITHMS {
let compressed_json: Vec<u8> = {
let _span =
info_span!("compress_json", file_size = json_filename.metadata()?.len(), algorithm=%alg)
.entered();

compress(BufReader::new(File::open(&json_filename)?), *alg)?
};

self.storage.store_path(&path, &json_filename)?;
self.storage.set_public_access(&path, true)?;
for format_version in [format_version, RustdocJsonFormatVersion::Latest] {
let path = rustdoc_json_path(name, version, target, format_version, Some(*alg));
let _span =
info_span!("store_json", %format_version, algorithm=%alg, target_path=%path)
.entered();

self.storage
.store_one_uncompressed(&path, compressed_json.clone())?;
self.storage.set_public_access(&path, true)?;
}
}

Ok(())
Expand Down Expand Up @@ -1279,7 +1295,7 @@ mod tests {
use super::*;
use crate::db::types::Feature;
use crate::registry_api::ReleaseData;
use crate::storage::CompressionAlgorithm;
use crate::storage::{CompressionAlgorithm, compression};
use crate::test::{AxumRouterTestExt, TestEnvironment, wrapper};
use std::{io, iter};
use test_case::test_case;
Expand Down Expand Up @@ -1467,29 +1483,39 @@ mod tests {

// other targets too
for target in DEFAULT_TARGETS {
// check if rustdoc json files exist for all targets
let path = rustdoc_json_path(
crate_,
version,
target,
RustdocJsonFormatVersion::Latest,
);
assert!(storage.exists(&path)?);
assert!(storage.get_public_access(&path)?);

let json_prefix = format!("rustdoc-json/{crate_}/{version}/{target}/");
let mut json_files: Vec<_> = storage
.list_prefix(&json_prefix)
.filter_map(|res| res.ok())
.map(|f| f.strip_prefix(&json_prefix).unwrap().to_owned())
.collect();
json_files.sort();
assert!(json_files[0].starts_with(&format!("empty-library_1.0.0_{target}_")));
assert!(json_files[0].ends_with(".json"));
assert_eq!(
json_files[1],
format!("empty-library_1.0.0_{target}_latest.json")
);
for alg in RUSTDOC_JSON_COMPRESSION_ALGORITHMS {
// check if rustdoc json files exist for all targets
let path = rustdoc_json_path(
crate_,
version,
target,
RustdocJsonFormatVersion::Latest,
Some(*alg),
);
assert!(storage.exists(&path)?);
assert!(storage.get_public_access(&path)?);

let ext = compression::file_extension_for(*alg);

let json_prefix = format!("rustdoc-json/{crate_}/{version}/{target}/");
let mut json_files: Vec<_> = storage
.list_prefix(&json_prefix)
.filter_map(|res| res.ok())
.map(|f| f.strip_prefix(&json_prefix).unwrap().to_owned())
.collect();
json_files.retain(|f| f.ends_with(&format!(".json.{ext}")));
json_files.sort();
dbg!(&json_files);
assert!(
json_files[0].starts_with(&format!("empty-library_1.0.0_{target}_"))
);

assert!(json_files[0].ends_with(&format!(".json.{ext}")));
assert_eq!(
json_files[1],
format!("empty-library_1.0.0_{target}_latest.json.{ext}")
);
}

if target == &default_target {
continue;
Expand Down
55 changes: 49 additions & 6 deletions src/storage/compression.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use anyhow::Error;
use bzip2::Compression;
use bzip2::read::{BzDecoder, BzEncoder};
use flate2::read::{GzDecoder, GzEncoder};
use serde::{Deserialize, Serialize};
use std::{
collections::HashSet,
Expand Down Expand Up @@ -29,6 +29,13 @@ pub enum CompressionAlgorithm {
#[default]
Zstd = 0,
Bzip2 = 1,
Gzip = 2,
}

impl CompressionAlgorithm {
pub fn file_extension(&self) -> &'static str {
file_extension_for(*self)
}
}

impl std::convert::TryFrom<i32> for CompressionAlgorithm {
Expand All @@ -45,17 +52,40 @@ impl std::convert::TryFrom<i32> for CompressionAlgorithm {
}
}

pub(crate) fn file_extension_for(algorithm: CompressionAlgorithm) -> &'static str {
match algorithm {
CompressionAlgorithm::Zstd => "zst",
CompressionAlgorithm::Bzip2 => "bz2",
CompressionAlgorithm::Gzip => "gz",
}
}

pub(crate) fn compression_from_file_extension(ext: &str) -> Option<CompressionAlgorithm> {
match ext {
"zst" => Some(CompressionAlgorithm::Zstd),
"bz2" => Some(CompressionAlgorithm::Bzip2),
"gz" => Some(CompressionAlgorithm::Gzip),
_ => None,
}
}

// public for benchmarking
pub fn compress(content: impl Read, algorithm: CompressionAlgorithm) -> Result<Vec<u8>, Error> {
match algorithm {
CompressionAlgorithm::Zstd => Ok(zstd::encode_all(content, 9)?),
CompressionAlgorithm::Bzip2 => {
let mut compressor = BzEncoder::new(content, Compression::best());
let mut compressor = BzEncoder::new(content, bzip2::Compression::best());

let mut data = vec![];
compressor.read_to_end(&mut data)?;
Ok(data)
}
CompressionAlgorithm::Gzip => {
let mut compressor = GzEncoder::new(content, flate2::Compression::default());
let mut data = vec![];
compressor.read_to_end(&mut data)?;
Ok(data)
}
}
}

Expand All @@ -72,6 +102,9 @@ pub fn decompress(
CompressionAlgorithm::Bzip2 => {
io::copy(&mut BzDecoder::new(content), &mut buffer)?;
}
CompressionAlgorithm::Gzip => {
io::copy(&mut GzDecoder::new(content), &mut buffer)?;
}
}

Ok(buffer.into_inner())
Expand All @@ -81,6 +114,7 @@ pub fn decompress(
mod tests {
use super::*;
use strum::IntoEnumIterator;
use test_case::test_case;

#[test]
fn test_compression() {
Expand Down Expand Up @@ -134,9 +168,18 @@ mod tests {
}
}

#[test]
fn test_enum_display() {
assert_eq!(CompressionAlgorithm::Zstd.to_string(), "Zstd");
assert_eq!(CompressionAlgorithm::Bzip2.to_string(), "Bzip2");
#[test_case(CompressionAlgorithm::Zstd, "Zstd")]
#[test_case(CompressionAlgorithm::Bzip2, "Bzip2")]
#[test_case(CompressionAlgorithm::Gzip, "Gzip")]
fn test_enum_display(alg: CompressionAlgorithm, expected: &str) {
assert_eq!(alg.to_string(), expected);
}

#[test_case(CompressionAlgorithm::Zstd, "zst")]
#[test_case(CompressionAlgorithm::Bzip2, "bz2")]
#[test_case(CompressionAlgorithm::Gzip, "gz")]
fn test_file_extensions(alg: CompressionAlgorithm, expected: &str) {
assert_eq!(file_extension_for(alg), expected);
assert_eq!(compression_from_file_extension(expected), Some(alg));
}
}
Loading
Loading
0