Incorrect handling of concatenated gzip contents
Describe the bug
Uploading a file or bytes consisting of concatenated gzip files or bytes does not work with CRT. It causes either a checksum exception or partial data being uploaded.
Expected Behavior
Uploading a file or bytes consisting of concatenated gzip files or bytes works with CRT and uploads the complete file.
Current Behavior
Uploading a file or bytes consisting of concatenated gzip files or bytes does not work with CRT. It causes either a checksum exception or partial data being uploaded.
Reproduction Steps
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void test_concatentated(boolean asFile) throws Exception {
S3AsyncClient s3AsyncClient = S3AsyncClient.create();
do_test_concatentated(false, asFile, s3AsyncClient);
}
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void test_concatentated_transfer_manager(boolean asFile) throws Exception {
S3AsyncClient s3AsyncClient = S3AsyncClient.create();
do_test_concatentated(false,
asFile,
s3AsyncClient,
S3TransferManager.builder().s3Client(s3AsyncClient).build());
}
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void test_concatentated_crt(boolean asFile) throws Exception {
S3AsyncClient s3AsyncClient = S3AsyncClient.crtCreate();
do_test_concatentated(false, asFile, s3AsyncClient);
}
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void test_concatentated_crt_transfer_manager(boolean asFile) throws Exception {
S3AsyncClient s3AsyncClient = S3AsyncClient.crtCreate();
do_test_concatentated(false,
asFile,
s3AsyncClient,
S3TransferManager.builder().s3Client(s3AsyncClient).build());
}
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void test_concatentated_gzips(boolean asFile) throws Exception {
S3AsyncClient s3AsyncClient = S3AsyncClient.create();
do_test_concatentated(true, asFile, s3AsyncClient);
}
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void test_concatentated_gzips_transfer_manager(boolean asFile) throws Exception {
S3AsyncClient s3AsyncClient = S3AsyncClient.create();
do_test_concatentated(true,
asFile,
s3AsyncClient,
S3TransferManager.builder().s3Client(s3AsyncClient).build());
}
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void test_concatentated_gzips_crt(boolean asFile) throws Exception {
S3AsyncClient s3AsyncClient = S3AsyncClient.crtCreate();
do_test_concatentated(true, asFile, s3AsyncClient);
}
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void test_concatentated_gzips_crt_transfer_manager(boolean asFile) throws Exception {
S3AsyncClient s3AsyncClient = S3AsyncClient.crtCreate();
do_test_concatentated(true,
asFile,
s3AsyncClient,
S3TransferManager.builder().s3Client(s3AsyncClient).build());
}
protected void do_test_concatentated(boolean gzip,
boolean asFile,
S3AsyncClient s3AsyncClient) throws Exception {
do_test_concatentated(gzip, asFile, s3AsyncClient, null);
}
protected void do_test_concatentated(boolean gzip,
boolean asFile,
S3AsyncClient s3AsyncClient,
S3TransferManager s3TransferManager) throws Exception {
Path concatentated = getConcatentated(gzip);
String bucket = UUID.randomUUID().toString();
String key = UUID.randomUUID().toString();
try {
s3AsyncClient.createBucket(r -> r.bucket(bucket)).join();
if (s3TransferManager != null) {
upload(s3TransferManager, bucket, key, concatentated, asFile);
} else {
upload(s3AsyncClient, bucket, key, concatentated, asFile);
}
Path downloaded;
if (s3TransferManager != null) {
downloaded = download(s3TransferManager, bucket, key);
} else {
downloaded = download(s3AsyncClient, bucket, key);
}
try {
var expectedContents = readContent(concatentated, gzip);
var actualContents = readContent(concatentated, gzip);
assertAll(() -> assertEquals(expectedContents,
actualContents,
"downloaded contents do not match uploaded contents"));
} finally {
Files.delete(downloaded);
}
} finally {
s3AsyncClient.deleteObject(r -> r.bucket(bucket).key(key));
s3AsyncClient.deleteBucket(r -> r.bucket(bucket));
Files.delete(concatentated);
}
}
protected Path getConcatentated(boolean gzip) throws Exception {
Path concatentated = Files.createTempFile(null, null);
appendContent(concatentated, "hello", gzip);
appendContent(concatentated, "world", gzip);
return concatentated;
}
protected void appendContent(Path path, String content, boolean gzip) throws Exception {
OutputStream output = Files.newOutputStream(path, StandardOpenOption.APPEND);
if (gzip) {
output = new GZIPOutputStream(output);
}
try (OutputStreamWriter writer = new OutputStreamWriter(output)) {
writer.append(content);
}
}
protected String readContent(Path path, boolean gzip) throws Exception {
InputStream input;
if (gzip) {
input = new GZIPInputStream(Files.newInputStream(path));
} else {
input = Files.newInputStream(path);
}
ByteArrayOutputStream output = new ByteArrayOutputStream();
try (input; output) {
input.transferTo(output);
}
return output.toString();
}
protected void upload(S3AsyncClient s3AsyncClient,
String bucket,
String key,
Path path,
boolean asFile) throws Exception {
if (asFile) {
s3AsyncClient.putObject(r -> r.bucket(bucket).key(key), AsyncRequestBody.fromFile(path))
.join();
} else {
s3AsyncClient.putObject(r -> r.bucket(bucket).key(key),
AsyncRequestBody.fromBytes(Files.readAllBytes(path)))
.join();
}
}
protected void upload(S3TransferManager s3TransferManager,
String bucket,
String key,
Path path,
boolean asFile) throws Exception {
if (asFile) {
s3TransferManager.uploadFile(ufr -> ufr.putObjectRequest(por -> por.bucket(bucket)
.key(key))
.source(path))
.completionFuture()
.join();
} else {
byte[] bytes = Files.readAllBytes(path);
s3TransferManager.upload(ur -> ur.putObjectRequest(por -> por.bucket(bucket).key(key))
.requestBody(AsyncRequestBody.fromBytes(bytes)))
.completionFuture()
.join();
}
}
public Path download(S3AsyncClient s3AsyncClient, String bucket, String key) throws Exception {
Path path = Files.createTempFile(null, null);
s3AsyncClient.getObject(r -> r.bucket(bucket).key(key),
AsyncResponseTransformer.toFile(path,
FileTransformerConfiguration.defaultCreateOrReplaceExisting()))
.join();
return path;
}
protected Path download(S3TransferManager s3TransferManager,
String bucket,
String key) throws Exception {
Path path = Files.createTempFile(null, null);
s3TransferManager.downloadFile(dfr -> dfr.getObjectRequest(gor -> gor.bucket(bucket)
.key(key))
.destination(path))
.completionFuture()
.join();
return path;
}
Possible Solution
No response
Additional Information/Context
No response
AWS Java SDK version used
2.20.157
JDK version used
openjdk 17.0.8 2023-07-18
Operating System and version
Mac OS 13.6
I also noticed that on Linux, we do not encounter (or are otherwise somehow ignoring) the checksum error. As a result, only the first gzip portion of the concatenated content is uploaded. The size reported by aws s3 ls matches the full file, while the size reported by aws s3api get-object matches the first gzip portion of the content.
As a workaround, we manual gunzip and gzip the file to convert it to a single gzip file:
try {
var temp = Files.createTempFile(null,
null);
try (var input = new GZIPInputStream(Files.newInputStream(concatenated));
var output = new GZIPOutputStream(Files.newOutputStream(temp))) {
input.transferTo(output);
} finally {
Files.deleteIfExists(concatenated);
}
return temp;
} catch (IOException e) {
throw new UncheckedIOException(e);
}