tls-client-api icon indicating copy to clipboard operation
tls-client-api copied to clipboard

Issues retrieving binary data?

Open jlcd opened this issue 8 months ago • 2 comments

I tried using the out of the box API, with no tweaks, to download an image and a pdf.

None returned a valid resource: both the PDF and PNG were "corrupted" after delivery.

Was able to download it but had to make an alternative tls_client_cffi_src.BuildResponse()

I can dig deeper later, but just wanted to leave the question here in case anyone knowns more.

jlcd avatar Aug 27 '25 22:08 jlcd

@jlcd We have in the tls-client repo in the shared library an example how to download or upload images using the shared library api: https://github.com/bogdanfinn/tls-client/blob/master/cffi_dist/example_node/index_image.js

You can use the same payload with the standalone api.

Please note the isByteResponse Field, which you need to set to true. Check the linked example code.

bogdanfinn avatar Aug 28 '25 07:08 bogdanfinn

That makes sense, but mandates that we know what to expect from a request.

I came up with a different approach for the API:

New response-builder.go:

package api

import (
	"encoding/base64"
	"io"
	"mime"
	"strings"

	http "github.com/bogdanfinn/fhttp"
	tls_client_cffi_src "github.com/bogdanfinn/tls-client/cffi_src"
	"github.com/google/uuid"
)

// CustomResponse extends the original Response with binary data handling
type CustomResponse struct {
	Id                string                       `json:"id"`
	SessionId         string                       `json:"sessionId,omitempty"`
	Status            int                          `json:"status"`
	Target            string                       `json:"target"`
	Body              string                       `json:"body"`
	Headers           map[string][]string          `json:"headers"`
	Cookies           []tls_client_cffi_src.Cookie `json:"cookies"`
	EncodedBinaryData bool                         `json:"encodedBinaryData"`
}

// BuildCustomResponse creates a response with proper binary data handling
func BuildCustomResponse(sessionId string, withSession bool, resp *http.Response, sessionCookies []*http.Cookie, input tls_client_cffi_src.RequestInput) (*CustomResponse, error) {
	// Read the response body
	bodyBytes, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()

	// Determine if this is binary content
	contentType := resp.Header.Get("Content-Type")
	isBinary := isBinaryContent(contentType, bodyBytes)

	// Handle body encoding
	var bodyString string
	var encodedBinaryData bool

	if isBinary {
		// Base64 encode binary data
		bodyString = base64.StdEncoding.EncodeToString(bodyBytes)
		encodedBinaryData = true
	} else {
		// Keep text data as string
		bodyString = string(bodyBytes)
		encodedBinaryData = false
	}

	// Convert headers
	headers := make(map[string][]string)
	for key, values := range resp.Header {
		headers[key] = values
	}

	// Convert cookies
	cookies := transformCookies(sessionCookies)

	// Build response
	response := &CustomResponse{
		Id:                uuid.New().String(),
		Status:            resp.StatusCode,
		Target:            resp.Request.URL.String(),
		Body:              bodyString,
		Headers:           headers,
		Cookies:           cookies,
		EncodedBinaryData: encodedBinaryData,
	}

	if withSession {
		response.SessionId = sessionId
	}

	return response, nil
}

// isBinaryContent determines if the content is binary based on Content-Type
func isBinaryContent(contentType string, data []byte) bool {
	// Parse the media type
	mediaType, _, err := mime.ParseMediaType(contentType)
	if err != nil {
		// If we can't parse, fall back to content analysis
		return isBinaryData(data)
	}

	// Check for known binary types
	binaryTypes := map[string]bool{
		"image/jpeg":               true,
		"image/jpg":                true,
		"image/png":                true,
		"image/gif":                true,
		"image/webp":               true,
		"image/avif":               true,
		"image/bmp":                true,
		"image/tiff":               true,
		"application/pdf":          true,
		"application/zip":          true,
		"application/octet-stream": true,
		"video/mp4":                true,
		"audio/mpeg":               true,
	}

	// Check if it's a known binary type
	if isBinary, exists := binaryTypes[mediaType]; exists {
		return isBinary
	}

	// Check for text-based types
	if strings.HasPrefix(mediaType, "text/") ||
		mediaType == "application/json" ||
		mediaType == "application/xml" ||
		mediaType == "application/javascript" ||
		strings.HasSuffix(mediaType, "+json") ||
		strings.HasSuffix(mediaType, "+xml") {
		return false
	}

	// Fall back to content analysis for unknown types
	return isBinaryData(data)
}

// isBinaryData performs heuristic analysis to determine if data is binary
func isBinaryData(data []byte) bool {
	// Empty data is not binary
	if len(data) == 0 {
		return false
	}

	// Check first 512 bytes (or less if data is smaller)
	sampleSize := 512
	if len(data) < sampleSize {
		sampleSize = len(data)
	}

	sample := data[:sampleSize]

	// Count null bytes
	nullBytes := 0
	for _, b := range sample {
		if b == 0 {
			nullBytes++
		}
	}

	// If more than 1% null bytes, likely binary
	return float64(nullBytes)/float64(sampleSize) > 0.01
}

and in tls-client-api/api/forward-request-handler.go I did:

-	response, err := tls_client_cffi_src.BuildResponse(sessionId, withSession, resp, sessionCookies, *input)
-
-	if err != nil {
-		return handleErrorResponse(fh.logger, sessionId, withSession, err)
-	}
-
-	return apiserver.NewJsonResponse(response), nil
+	customResponse, buildErr := BuildCustomResponse(sessionId, withSession, resp, sessionCookies, *input)
+
+	if buildErr != nil {
+		clientErr := tls_client_cffi_src.NewTLSClientError(buildErr)
+		return handleErrorResponse(fh.logger, sessionId, withSession, clientErr)
+	}
+
+	return apiserver.NewJsonResponse(customResponse), nil

It basically auto-detects binary data and encodes it, and the client has to decode, by reading the new property EncodedBinaryData (boolean).

jlcd avatar Aug 28 '25 13:08 jlcd