physfs icon indicating copy to clipboard operation
physfs copied to clipboard

POSIX GNU tar read support

Open jopadan opened this issue 2 years ago • 7 comments

  • supporting tared csm.bin of Chasm: The Rift Remastered at https://store.steampowered.com/app/2061230/Chasm_The_Rift/
  • See @Panzerschrek Chasm-Reverse pull request for a working implementation

jopadan avatar Jul 22 '23 15:07 jopadan

This can't be merged as-is, as it contains code licensed under the GPL. Sorry.

icculus avatar Jul 24 '23 02:07 icculus

Here is the working and clean build fix against your latest commit:

diff --git a/src/physfs_archiver_tar.c b/src/physfs_archiver_tar.c
index c0d5f44..41a3920 100644
--- a/src/physfs_archiver_tar.c
+++ b/src/physfs_archiver_tar.c
@@ -14,10 +14,14 @@
 
 static bool TAR_loadEntries(PHYSFS_Io *io, void *arc)
 {
-	union block zero_block = { .buffer = { 0 } };
-	union block current_block = { .buffer = { 0 } };
+	union block zero_block;
+	union block current_block;
 	PHYSFS_uint64 count = 0;
 
+
+	memset(zero_block.buffer, 0, sizeof(zero_block.buffer));
+	memset(current_block.buffer, 0, sizeof(current_block.buffer));
+
 	/* read header block until zero-only terminated block */
 	for(; __PHYSFS_readAll(io, current_block.buffer, BLOCKSIZE); count++)
 	{
@@ -25,8 +29,7 @@ static bool TAR_loadEntries(PHYSFS_Io *io, void *arc)
 			return true;
 
 		/* verify magic */
-		int format = TAR_magic(&current_block);
-		switch(format)
+		switch(TAR_magic(&current_block))
 		{
 			case POSIX_FORMAT:
 				TAR_posix_block(io, arc, &current_block, &count);
@@ -53,14 +56,15 @@ static void *TAR_openArchive(PHYSFS_Io *io, const char *name,
                               int forWriting, int *claimed)
 {
     void *unpkarc = NULL;
+    union block first;
+    enum archive_format format;
 
     assert(io != NULL);  /* shouldn't ever happen. */
 
     BAIL_IF(forWriting, PHYSFS_ERR_READ_ONLY, NULL);
 
-    union block first;
     BAIL_IF_ERRPASS(!__PHYSFS_readAll(io, first.buffer, BLOCKSIZE), NULL);
-    int format = TAR_magic(&first);
+    format = TAR_magic(&first);
     io->seek(io, 0);
     *claimed = format == DEFAULT_FORMAT ? 0 : 1;
 
diff --git a/src/physfs_tar.h b/src/physfs_tar.h
index 30b920a..ec51f0e 100644
--- a/src/physfs_tar.h
+++ b/src/physfs_tar.h
@@ -7,6 +7,7 @@
 #include <stdbool.h>
 #include <string.h>
 #include <ctype.h>
+#include <sys/stat.h>
 #include <fcntl.h>
 #include <time.h>
 
@@ -137,12 +138,12 @@ static bool TAR_encodeOctal(char* data, size_t size, PHYSFS_uint64 i) {
     return true;
 }
 
-static int TAR_magic(union block* block) {
+static enum archive_format TAR_magic(union block* block) {
 	if(strcmp(block->header.magic, OLDGNU_MAGIC) == 0 && strncmp(block->header.version, TVERSION, 2) == 0)
-		return (int)OLDGNU_FORMAT;
+		return OLDGNU_FORMAT;
 	if(strncmp(block->header.magic, TMAGIC, TMAGLEN - 1) == 0)
-		return (int)POSIX_FORMAT;
-	return (int)DEFAULT_FORMAT;
+		return POSIX_FORMAT;
+	return DEFAULT_FORMAT;
 }
 
 static size_t TAR_fileSize(union block* block) {

sezero avatar Jul 26 '23 21:07 sezero

The mode procedures aren't used: I'm guessing that commenting them out will make windows builds succeed.

sezero avatar Jul 27 '23 10:07 sezero

The following patch makes a clean-up of the whole thing:

diff --git a/src/physfs_tar.h b/src/physfs_tar.h
index e15bc68..aca7e40 100644
--- a/src/physfs_tar.h
+++ b/src/physfs_tar.h
@@ -1,15 +1,7 @@
 #ifndef _INCLUDE_PHYSFS_TAR_H_
 #define _INCLUDE_PHYSFS_TAR_H_
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
 #include <stdbool.h>
-#include <string.h>
-#include <ctype.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <time.h>
 
 #ifndef PATH_MAX
 #define PATH_MAX 1024
@@ -132,21 +124,15 @@ static PHYSFS_uint64 TAR_decodeOctal(char* data, size_t size) {
     return sum;
 }
 
-static bool TAR_encodeOctal(char* data, size_t size, PHYSFS_uint64 i) {
-    if(	snprintf(data, size, "%llo", i) < 0 )
-    	return false;
-    return true;
-}
-
 static int TAR_magic(union block* block) {
 	if(strcmp(block->header.magic, OLDGNU_MAGIC) == 0 && strncmp(block->header.version, TVERSION, 2) == 0)
-		return (int)OLDGNU_FORMAT;
+		return OLDGNU_FORMAT;
 	if(strncmp(block->header.magic, TMAGIC, TMAGLEN - 1) == 0)
-		return (int)POSIX_FORMAT;
-	return (int)DEFAULT_FORMAT;
+		return POSIX_FORMAT;
+	return DEFAULT_FORMAT;
 }
 
-static size_t TAR_fileSize(union block* block) {
+static PHYSFS_uint64 TAR_fileSize(union block* block) {
 	return TAR_decodeOctal(block->header.size, sizeof(block->header.size));
 }
 
@@ -169,12 +155,12 @@ static bool TAR_checksum(union block* block) {
 	return (reference_chksum == unsigned_sum || reference_chksum == signed_sum);
 }
 
-time_t TAR_time(union block* block)
+static PHYSFS_uint64 TAR_time(union block* block)
 {
 	return TAR_decodeOctal(block->header.mtime, 12);
 }
 
-bool TAR_posix_block(PHYSFS_Io* io, void* arc, union block* block, PHYSFS_uint64* count)
+static bool TAR_posix_block(PHYSFS_Io* io, void* arc, union block* block, PHYSFS_uint64* count)
 {
 	static bool long_name = false;
 	const PHYSFS_Allocator* allocator = PHYSFS_getAllocator();

long_name having persistent storage in TAR_posix_block is a problem. Haven't tried to follow the logic much, but if you truly want following it then I suggest passing it as a reference from its caller, e.g. like below: (compile-tested only !!)

diff --git a/src/physfs_archiver_tar.c b/src/physfs_archiver_tar.c
index e9ad4f0..03dcab6 100644
--- a/src/physfs_archiver_tar.c
+++ b/src/physfs_archiver_tar.c
@@ -17,6 +17,7 @@ static bool TAR_loadEntries(PHYSFS_Io *io, void *arc)
 	union block zero_block;
 	union block current_block;
 	PHYSFS_uint64 count = 0;
+	bool long_name = false;
 
 	memset(zero_block.buffer, 0, sizeof(BLOCKSIZE));
 	memset(current_block.buffer, 0, sizeof(BLOCKSIZE));
@@ -31,7 +32,7 @@ static bool TAR_loadEntries(PHYSFS_Io *io, void *arc)
 		switch(TAR_magic(&current_block))
 		{
 			case POSIX_FORMAT:
-				TAR_posix_block(io, arc, &current_block, &count);
+				TAR_posix_block(io, arc, &current_block, &count, &long_name);
 				break;
 			case OLDGNU_FORMAT:
 				break;
diff --git a/src/physfs_tar.h b/src/physfs_tar.h
index aca7e40..63f2974 100644
--- a/src/physfs_tar.h
+++ b/src/physfs_tar.h
@@ -160,9 +160,8 @@ static PHYSFS_uint64 TAR_time(union block* block)
 	return TAR_decodeOctal(block->header.mtime, 12);
 }
 
-static bool TAR_posix_block(PHYSFS_Io* io, void* arc, union block* block, PHYSFS_uint64* count)
+static bool TAR_posix_block(PHYSFS_Io* io, void* arc, union block* block, PHYSFS_uint64* count, bool *long_name)
 {
-	static bool long_name = false;
 	const PHYSFS_Allocator* allocator = PHYSFS_getAllocator();
         char name[PATH_MAX] = { 0 };
 	PHYSFS_sint64 time  = 0;
@@ -190,10 +189,10 @@ static bool TAR_posix_block(PHYSFS_Io* io, void* arc, union block* block, PHYSFS
 	/* add file type entry */
 	if (block->header.typeflag == REGTYPE || block->header.typeflag == 0) {
 		/* support long file names */
-		if(long_name) {
+		if(*long_name) {
 			strcpy(&name[0], block->header.name);
 			BAIL_IF_ERRPASS(!__PHYSFS_readAll(io, block->buffer, BLOCKSIZE), 0);
-			long_name = false;
+			*long_name = false;
 			(*count)++;
 		}
 		size = TAR_fileSize(block);
@@ -214,7 +213,7 @@ static bool TAR_posix_block(PHYSFS_Io* io, void* arc, union block* block, PHYSFS
 	/* long name mode */
 	else if(block->header.typeflag == GNUTYPE_LONGNAME)
 	{
-		long_name = true;
+		*long_name = true;
 	}
 	else
 	{

The rest is up to @icculus.

sezero avatar Jul 27 '23 23:07 sezero

@sezero: Thanks for the changes and hints I've integrated them into the pull request.

jopadan avatar Jul 28 '23 19:07 jopadan

@sezero: Thanks for the changes and hints I've integrated them into the pull request.

Thanks: all my change requests are addressed. It's in @icculus hands now.

sezero avatar Jul 28 '23 21:07 sezero

This can't be merged as-is, as it contains code licensed under the GPL. Sorry.

@icculus I've modified the code not to contain the original code licensed under the GPL now. zlib license seems to be compatible with GPL but not the other way around.

jopadan avatar Aug 12 '23 08:08 jopadan