rust-s3
rust-s3 copied to clipboard
Cannot upload objects larger than a few MB using `put_object_stream_blocking`
Describe the bug When:
- uploading a large object with
put_object_stream_blocking - a large file (> 4MB)
- with a custom header (without the custom header, the bug does not trigger)
- I receive a status code 200
- however the file is not uploaded to s3
- I do see
400 Bad Requestsbeing logged by thereqwestcrate.
Switching to put_object_blocking solves the issue.
To Reproduce
fn broken(mut bucket: Bucket, local_path: &Path, s3_path: &str) -> u16 {
bucket.add_header("x-amz-meta-sha256", "sometext");
let status_code = bucket
.put_object_stream_blocking(local_path, s3_path)
.unwrap();
info!("Status code after uploading file: {}", status_code);
status_code
}
fn working(mut bucket: Bucket, local_path: &Path, s3_path: &str) -> u16 {
bucket.add_header("x-amz-meta-sha256", "sometext");
let mut contents: Vec<u8> = Vec::with_capacity(100000);
let mut file = File::open(local_path).unwrap();
file.read_to_end(&mut contents).unwrap();
let (_, status_code) = bucket.put_object_blocking(s3_path, &contents).unwrap();
info!("Status code after uploading file: {}", status_code);
status_code
}
Expected behavior
- That the object is uploaded correctly even for large objects.
- If there is an error during the upload, that the
Resultis anError(orOk(400)), and not anOk(200)
Environment
- Rust version: rustc 1.55.0 (c8dfcfe04 2021-09-06)
- lib version: 0.26.4
Additional context Add any other context about the problem here.
Full reproduction scenario:
Cargo.toml
[dependencies]
rust-s3 = "0.26.4"
log = "0.4.14"
env_logger = "0.9.0"
url = "2.2.2"
main.rs
use std::path::PathBuf;
use chrono::Utc;
use lazy_static::lazy_static;
use log::trace;
mod bundle;
mod catalog;
mod command;
mod config;
mod error;
mod util;
const MANIFEST_FILE: &str = "manifest.json";
const AWS_METADATA_PREFIX: &str = "x-amz-meta-";
const AWS_METADATA_SHA256: &str = "sha256";
type Checksum = String;
lazy_static! {
pub(crate) static ref BINARY_LOCATION: PathBuf = std::env::current_exe().unwrap();
}
const VERSION: &str = env!("CARGO_PKG_VERSION");
fn main() {
let config = config::Config::new();
if config.is_chatty() {
eprintln!(
"[{} STARTUP {}] Starting with version {}",
Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true),
env!("CARGO_PKG_NAME"),
VERSION
);
}
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or(config.get_loglevel()))
.format_timestamp_millis()
.init();
if config.is_chatty() {
trace!("User supplied configuration: {:#?}", config);
}
use_pgbackrest_environment();
if let Err(e) = config.run() {
println!("{:?}", e);
std::process::exit(1)
} else if config.is_chatty() {
eprintln!(
"[{} DONE {}] Action complete successfully",
Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true),
env!("CARGO_PKG_NAME"),
);
}
}
/// use_pgbackrest_environment is a convenience method for those environments that do not have the AWS_* environment
/// variables set. This function populates missing AWS_* environment variables using the PGBACKREST_REPO1_S3_* variables,
/// if they *are* actually set
fn use_pgbackrest_environment() {
for env in &[
("AWS_ACCESS_KEY_ID", "PGBACKREST_REPO1_S3_KEY"),
("AWS_SECRET_ACCESS_KEY", "PGBACKREST_REPO1_S3_KEY_SECRET"),
("AWS_REGION", "PGBACKREST_REPO1_S3_REGION"),
] {
if let (None, Some(value)) = (std::env::var_os(env.0), std::env::var_os(env.1)) {
trace!("Populating environment variable {} using envvar {}", env.0, env.1);
std::env::set_var(env.0, value);
}
}
}
Reproduce
dd if=/dev/random of=30mb-3.test bs=1M count=30
RUST_LOG=debug cargo run --release -- 30mb-3.test 's3://example-bucket/testing/'
working part
[2021-09-30T08:33:44.910Z DEBUG hyper::client::pool] pooling idle connection for ("https", example-bucket.s3.amazonaws.com)
[2021-09-30T08:33:44.910Z DEBUG reqwest::async_impl::client] response '200 OK' for https://example-bucket.s3.amazonaws.com/testing/30mb-3.test-working
[2021-09-30T08:33:44.914Z INFO broken] Status code after uploading file: 200
[...]
[2021-09-30T08:34:03.684Z INFO broken] Object seems to exist just fine: /testing/30mb-3.test-working
broken part
[2021-09-30T08:34:02.690Z DEBUG hyper::proto::h1::decode] incoming chunked header: 0x16B (363 bytes)
[2021-09-30T08:34:02.690Z DEBUG reqwest::async_impl::client] response '400 Bad Request' for https://example-bucket.s3.amazonaws.com/testing/30mb-3.test-broken?partNumber=4&uploadId=6xaRKlcu6pMOJk0edUjkQMM3FB6Ou4awpTpMrbjwdL4Q9EvobezX1cFIsOoBGYr1iE_TNtEa4Ocksz9XA4hQ_O5VUH7AAbEEA9NxATBbFZ1TV2jDgR78rvUJvouYNcJz
[2021-09-30T08:34:02.692Z DEBUG hyper::proto::h1::io] read 5 bytes
[2021-09-30T08:34:02.692Z DEBUG hyper::proto::h1::conn] incoming body completed
[2021-09-30T08:34:02.696Z DEBUG reqwest::connect] starting new connection: https://example-bucket.s3.amazonaws.com/
[2021-09-30T08:34:02.696Z DEBUG hyper::client::connect::dns] resolving host="example-bucket.s3.amazonaws.com"
[2021-09-30T08:34:02.725Z DEBUG hyper::client::connect::http] connecting to 52.217.44.52:443
[2021-09-30T08:34:02.834Z DEBUG hyper::client::connect::http] connected to 52.217.44.52:443
[2021-09-30T08:34:03.063Z DEBUG hyper::proto::h1::io] flushed 766 bytes
[2021-09-30T08:34:03.063Z DEBUG hyper::proto::h1::io] flushed 1711 bytes
[2021-09-30T08:34:03.186Z DEBUG hyper::proto::h1::io] read 287 bytes
[2021-09-30T08:34:03.186Z DEBUG hyper::proto::h1::io] parsed 7 headers
[2021-09-30T08:34:03.186Z DEBUG hyper::proto::h1::conn] incoming body is chunked encoding
[2021-09-30T08:34:03.186Z DEBUG hyper::proto::h1::io] read 336 bytes
[2021-09-30T08:34:03.186Z DEBUG hyper::proto::h1::decode] incoming chunked header: 0x144 (324 bytes)
[2021-09-30T08:34:03.186Z DEBUG reqwest::async_impl::client] response '400 Bad Request' for https://example-bucket.s3.amazonaws.com/testing/30mb-3.test-broken?uploadId=6xaRKlcu6pMOJk0edUjkQMM3FB6Ou4awpTpMrbjwdL4Q9EvobezX1cFIsOoBGYr1iE_TNtEa4Ocksz9XA4hQ_O5VUH7AAbEEA9NxATBbFZ1TV2jDgR78rvUJvouYNcJz
[2021-09-30T08:34:03.186Z DEBUG hyper::proto::h1::conn] incoming body completed
[2021-09-30T08:34:03.187Z INFO broken] Status code after uploading file: 200
[2021-09-30T08:34:03.187Z INFO broken] Broken path, got status code: 200
[...]
[2021-09-30T08:34:04.187Z DEBUG reqwest::async_impl::client] response '404 Not Found' for https://example-bucket.s3.amazonaws.com/testing/30mb-3.test-broken
[2021-09-30T08:34:04.190Z ERROR broken] Object does not exist: /testing/30mb-3.test-broken
Smaller files work just fine
dd if=/dev/random of=4mb.test bs=1M count=4
RUST_LOG=debug cargo run --release -- 4mb.test 's3://example-bucket/testing/'
[...]
[2021-09-30T08:36:04.827Z INFO broken] Object seems to exist just fine: /testing/4mb.test-working
[...]
[2021-09-30T08:36:05.344Z INFO broken] Object seems to exist just fine: /testing/4mb.test-broken
Interesting, tests try 10mb objects, I'll look into this
@feikesteenbergen unfortunately I can't reproduce this, I've bumped test size to 20mb, still seems to go through fine