aws-sdk-java-v2 icon indicating copy to clipboard operation
aws-sdk-java-v2 copied to clipboard

Incorrect handling of concatenated gzip contents

Open chrischall opened this issue 2 years ago • 2 comments

Describe the bug

Uploading a file or bytes consisting of concatenated gzip files or bytes does not work with CRT. It causes either a checksum exception or partial data being uploaded.

Expected Behavior

Uploading a file or bytes consisting of concatenated gzip files or bytes works with CRT and uploads the complete file.

Current Behavior

Uploading a file or bytes consisting of concatenated gzip files or bytes does not work with CRT. It causes either a checksum exception or partial data being uploaded.

Reproduction Steps

    @ParameterizedTest
    @ValueSource(booleans = { true, false })
    public void test_concatentated(boolean asFile) throws Exception {
        S3AsyncClient s3AsyncClient = S3AsyncClient.create();
        do_test_concatentated(false, asFile, s3AsyncClient);
    }

    @ParameterizedTest
    @ValueSource(booleans = { true, false })
    public void test_concatentated_transfer_manager(boolean asFile) throws Exception {
        S3AsyncClient s3AsyncClient = S3AsyncClient.create();
        do_test_concatentated(false,
                              asFile,
                              s3AsyncClient,
                              S3TransferManager.builder().s3Client(s3AsyncClient).build());
    }

    @ParameterizedTest
    @ValueSource(booleans = { true, false })
    public void test_concatentated_crt(boolean asFile) throws Exception {
        S3AsyncClient s3AsyncClient = S3AsyncClient.crtCreate();
        do_test_concatentated(false, asFile, s3AsyncClient);
    }

    @ParameterizedTest
    @ValueSource(booleans = { true, false })
    public void test_concatentated_crt_transfer_manager(boolean asFile) throws Exception {
        S3AsyncClient s3AsyncClient = S3AsyncClient.crtCreate();
        do_test_concatentated(false,
                              asFile,
                              s3AsyncClient,
                              S3TransferManager.builder().s3Client(s3AsyncClient).build());
    }

    @ParameterizedTest
    @ValueSource(booleans = { true, false })
    public void test_concatentated_gzips(boolean asFile) throws Exception {
        S3AsyncClient s3AsyncClient = S3AsyncClient.create();
        do_test_concatentated(true, asFile, s3AsyncClient);
    }

    @ParameterizedTest
    @ValueSource(booleans = { true, false })
    public void test_concatentated_gzips_transfer_manager(boolean asFile) throws Exception {
        S3AsyncClient s3AsyncClient = S3AsyncClient.create();
        do_test_concatentated(true,
                              asFile,
                              s3AsyncClient,
                              S3TransferManager.builder().s3Client(s3AsyncClient).build());
    }

    @ParameterizedTest
    @ValueSource(booleans = { true, false })
    public void test_concatentated_gzips_crt(boolean asFile) throws Exception {
        S3AsyncClient s3AsyncClient = S3AsyncClient.crtCreate();
        do_test_concatentated(true, asFile, s3AsyncClient);
    }

    @ParameterizedTest
    @ValueSource(booleans = { true, false })
    public void test_concatentated_gzips_crt_transfer_manager(boolean asFile) throws Exception {
        S3AsyncClient s3AsyncClient = S3AsyncClient.crtCreate();
        do_test_concatentated(true,
                              asFile,
                              s3AsyncClient,
                              S3TransferManager.builder().s3Client(s3AsyncClient).build());
    }

    protected void do_test_concatentated(boolean gzip,
                                         boolean asFile,
                                         S3AsyncClient s3AsyncClient) throws Exception {
        do_test_concatentated(gzip, asFile, s3AsyncClient, null);
    }

    protected void do_test_concatentated(boolean gzip,
                                         boolean asFile,
                                         S3AsyncClient s3AsyncClient,
                                         S3TransferManager s3TransferManager) throws Exception {
        Path concatentated = getConcatentated(gzip);
        String bucket = UUID.randomUUID().toString();
        String key = UUID.randomUUID().toString();
        try {
            s3AsyncClient.createBucket(r -> r.bucket(bucket)).join();
            if (s3TransferManager != null) {
                upload(s3TransferManager, bucket, key, concatentated, asFile);
            } else {
                upload(s3AsyncClient, bucket, key, concatentated, asFile);
            }
            Path downloaded;
            if (s3TransferManager != null) {
                downloaded = download(s3TransferManager, bucket, key);
            } else {
                downloaded = download(s3AsyncClient, bucket, key);
            }
            try {
                var expectedContents = readContent(concatentated, gzip);
                var actualContents = readContent(concatentated, gzip);
                assertAll(() -> assertEquals(expectedContents,
                                             actualContents,
                                             "downloaded contents do not match uploaded contents"));
            } finally {
                Files.delete(downloaded);
            }
        } finally {
            s3AsyncClient.deleteObject(r -> r.bucket(bucket).key(key));
            s3AsyncClient.deleteBucket(r -> r.bucket(bucket));
            Files.delete(concatentated);
        }
    }

    protected Path getConcatentated(boolean gzip) throws Exception {
        Path concatentated = Files.createTempFile(null, null);
        appendContent(concatentated, "hello", gzip);
        appendContent(concatentated, "world", gzip);
        return concatentated;
    }

    protected void appendContent(Path path, String content, boolean gzip) throws Exception {
        OutputStream output = Files.newOutputStream(path, StandardOpenOption.APPEND);
        if (gzip) {
            output = new GZIPOutputStream(output);
        }
        try (OutputStreamWriter writer = new OutputStreamWriter(output)) {
            writer.append(content);
        }
    }

    protected String readContent(Path path, boolean gzip) throws Exception {
        InputStream input;
        if (gzip) {
            input = new GZIPInputStream(Files.newInputStream(path));
        } else {
            input = Files.newInputStream(path);
        }
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        try (input; output) {
            input.transferTo(output);
        }
        return output.toString();
    }

    protected void upload(S3AsyncClient s3AsyncClient,
                          String bucket,
                          String key,
                          Path path,
                          boolean asFile) throws Exception {
        if (asFile) {
            s3AsyncClient.putObject(r -> r.bucket(bucket).key(key), AsyncRequestBody.fromFile(path))
                         .join();
        } else {
            s3AsyncClient.putObject(r -> r.bucket(bucket).key(key),
                                    AsyncRequestBody.fromBytes(Files.readAllBytes(path)))
                         .join();
        }
    }

    protected void upload(S3TransferManager s3TransferManager,
                          String bucket,
                          String key,
                          Path path,
                          boolean asFile) throws Exception {
        if (asFile) {
            s3TransferManager.uploadFile(ufr -> ufr.putObjectRequest(por -> por.bucket(bucket)
                                                                               .key(key))
                                                   .source(path))
                             .completionFuture()
                             .join();
        } else {
            byte[] bytes = Files.readAllBytes(path);
            s3TransferManager.upload(ur -> ur.putObjectRequest(por -> por.bucket(bucket).key(key))
                                             .requestBody(AsyncRequestBody.fromBytes(bytes)))
                             .completionFuture()
                             .join();
        }
    }

    public Path download(S3AsyncClient s3AsyncClient, String bucket, String key) throws Exception {
        Path path = Files.createTempFile(null, null);
        s3AsyncClient.getObject(r -> r.bucket(bucket).key(key),
                                AsyncResponseTransformer.toFile(path,
                                                                FileTransformerConfiguration.defaultCreateOrReplaceExisting()))
                     .join();
        return path;
    }

    protected Path download(S3TransferManager s3TransferManager,
                            String bucket,
                            String key) throws Exception {
        Path path = Files.createTempFile(null, null);
        s3TransferManager.downloadFile(dfr -> dfr.getObjectRequest(gor -> gor.bucket(bucket)
                                                                             .key(key))
                                                 .destination(path))
                         .completionFuture()
                         .join();
        return path;
    }

Possible Solution

No response

Additional Information/Context

No response

AWS Java SDK version used

2.20.157

JDK version used

openjdk 17.0.8 2023-07-18

Operating System and version

Mac OS 13.6

chrischall avatar Oct 02 '23 18:10 chrischall

I also noticed that on Linux, we do not encounter (or are otherwise somehow ignoring) the checksum error. As a result, only the first gzip portion of the concatenated content is uploaded. The size reported by aws s3 ls matches the full file, while the size reported by aws s3api get-object matches the first gzip portion of the content.

chrischall avatar Oct 02 '23 19:10 chrischall

As a workaround, we manual gunzip and gzip the file to convert it to a single gzip file:

try {
    var temp = Files.createTempFile(null,
                                    null);
    try (var input = new GZIPInputStream(Files.newInputStream(concatenated));
            var output = new GZIPOutputStream(Files.newOutputStream(temp))) {
        input.transferTo(output);
    } finally {
        Files.deleteIfExists(concatenated);
    }
    return temp;
} catch (IOException e) {
    throw new UncheckedIOException(e);
}

chrischall avatar Oct 02 '23 19:10 chrischall