pyvips How to convert _cffi_backend.buffer object from crop to numpy

Hi,

I am trying to use the code mentioned in #100 to generate patches for training a deep learning model. Specifically to generate patches of dimensions 256*256 from all 300 WSIs I have and then select the right patches based on otsu threshold.

def fetch(region, patch_size, x, y):
    return region.fetch(patch_size * x, patch_size * y, patch_size, patch_size)

folder = "/media/exx/Crucial X8/datsets/Camelyon"
files = os.listdir(folder)

# map vips formats to np dtypes
format_to_dtype = {
    'uchar': np.uint8,
    'char': np.int8,
    'ushort': np.uint16,
    'short': np.int16,
    'uint': np.uint32,
    'int': np.int32,
    'float': np.float32,
    'double': np.float64,
    'complex': np.complex64,
    'dpcomplex': np.complex128,
}

for i in files:
    file = os.path.join(folder, i)
    image = pyvips.Image.new_from_file(file)

    patch_size = 256
    n_across = image.width // patch_size
    n_down = image.height // patch_size
    x_max = n_across - 1
    y_max = n_down - 1

    n_patches = 0
    for y in range(0, n_down):
        print("row {} ...".format(y))
        for x in range(0, n_across):
            patch = image.crop(x * patch_size, y * patch_size,
                            patch_size, patch_size)
            patch_f = patch.fliphor()

            patch0 = patch.write_to_memory()
            patch1 = patch.rot90().write_to_memory()
            patch2 = patch.rot180().write_to_memory()
            patch3 = patch.rot270().write_to_memory()

            patch4 = patch_f.write_to_memory()
            patch5 = patch_f.rot90().write_to_memory()
            patch6 = patch_f.rot180().write_to_memory()
            patch7 = patch_f.rot270().write_to_memory()

            np_img = np.ndarray(
                buffer=patch0,
                dtype=format_to_dtype[patch0.format],
                shape=[patch0.height, patch0.width, patch0.bands]
            )
            n_patches += 8

    print("{} patches generated".format(n_patches))

Unfortunately this is throwing an error:

dtype=format_to_dtype[patch0.format],
AttributeError: '_cffi_backend.buffer' object has no attribute 'format'

Jan 24 '23 19:01 lafith

Hi @lafith,

Check the docs:

https://libvips.github.io/pyvips/intro.html#numpy-and-pil

You need eg.:

            numpy_array_patch4 = patch_f.numpy()

I think I'd expect fetch to be quicker for your case (though I've not benchmarked it).

Jan 24 '23 19:01 jcupitt

I modified this snippet using:

for y in range(0, n_down):
        print("row {} ...".format(y))
        for x in range(0, n_across):
            patch = image.crop(x * patch_size, y * patch_size,
                            patch_size, patch_size)
            patch_f = patch.fliphor()
            
            np_img = patch_f.numpy()

Now it is giving the following error:

Traceback (most recent call last):
  File "/home/exx/projects/norway/src/test.py", line 49, in <module>
    np_img = patch_f.numpy()
  File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/vimage.py", line 921, in call_function
    return pyvips.Operation.call(name, self, *args, **kwargs)
  File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 222, in call
    intro = Introspect.get(operation_name)
  File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 141, in get
    cls._introspect_cache[operation_name] = Introspect(operation_name)
  File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 38, in __init__
    op = Operation.new_from_name(operation_name)
  File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 181, in new_from_name
    raise Error('no such operation {0}'.format(operation_name))
pyvips.error.Error: no such operation numpy
  VipsOperation: class "numpy" not found

Jan 24 '23 19:01 lafith

I modified this snippet using:

for y in range(0, n_down):
        print("row {} ...".format(y))
        for x in range(0, n_across):
            patch = image.crop(x * patch_size, y * patch_size,
                            patch_size, patch_size)
            patch_f = patch.fliphor()
            
            np_img = patch_f.numpy()

Now it is giving the following error:

Traceback (most recent call last):
  File "/home/exx/projects/norway/src/test.py", line 49, in <module>
    np_img = patch_f.numpy()
  File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/vimage.py", line 921, in call_function
    return pyvips.Operation.call(name, self, *args, **kwargs)
  File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 222, in call
    intro = Introspect.get(operation_name)
  File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 141, in get
    cls._introspect_cache[operation_name] = Introspect(operation_name)
  File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 38, in __init__
    op = Operation.new_from_name(operation_name)
  File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 181, in new_from_name
    raise Error('no such operation {0}'.format(operation_name))
pyvips.error.Error: no such operation numpy
  VipsOperation: class "numpy" not found

Hey @jcupitt,

This error is resolved after updating the packages.

Jan 24 '23 19:01 lafith

@jcupitt

I am looking at the code using fetch from #100. If I replace the image[1] to image[7] with different WSIs, will your code extract patches from all 8 different WSIs at the same time?

#!/usr/bin/python3
  
import sys
import pyvips

def fetch(region, patch_size, x, y):
    return region.fetch(patch_size * x, patch_size * y, patch_size, patch_size)

image[0] = pyvips.Image.new_from_file(sys.argv[1])
image[1] = image0.rot90()
image[2] = image0.rot180()
image[3] = image0.rot270()

image[4] = image[0].fliphor()
image[5] = image[4].rot90()
image[6] = image[4].rot180()
image[7] = image[4].rot270()

reg = [pyvips.Region.new(x) for x in image]

patch_size = 64
n_across = image0.width // patch_size
n_down = image0.height // patch_size
x_max = n_across - 1
y_max = n_down - 1

n_patches = 0
for y in range(0, n_down):
    print("row {} ...".format(y))
    for x in range(0, n_across):
        patch0 = fetch(reg[0], patch_size, x, y)
        patch1 = fetch(reg[1], patch_size, y_max - y, x)
        patch2 = fetch(reg[2], patch_size, x_max - x, y_max - y)
        patch3 = fetch(reg[3], patch_size, y, x_max - x)

        patch4 = fetch(reg[4], patch_size, x_max - x, y)
        patch5 = fetch(reg[5], patch_size, y_max - y, x_max - x)
        patch6 = fetch(reg[6], patch_size, x, y_max - y)
        patch7 = fetch(reg[7], patch_size, y, x)

        n_patches += 8

print("{} patches generated".format(n_patches))

Jan 24 '23 20:01 lafith

Sure, it should be fine. Don't you need the rotates and flips, though?

Jan 24 '23 20:01 jcupitt

I thought I will add that step in PyTorch Dataset.py.

Jan 24 '23 20:01 lafith

@jcupitt

numpy conversion on fetch is giving error. What would be the solution in this case?

patch0 = fetch(reg[0], patch_size, x, y)
np_img = patch0.numpy()

Error:

AttributeError: '_cffi_backend.buffer' object has no attribute 'numpy'

Jan 24 '23 20:01 lafith

You'll need something more like your original code.

            np_img = np.ndarray(
                buffer=patch0,
                dtype=np.uint8, 
                shape=[patch_size, patch_size, 4], 
            )

Also, try the rgb option to load to get an RGB (not RGBA) image from openslide.

Jan 24 '23 20:01 jcupitt

This line worked, thank you. Where should I add the rgb option, I did not get that part?

Currently I am using PIL Image.Convert for converting to RGB.

Jan 24 '23 20:01 lafith

Add it to new_from_file(), eg:

image = pyvips.Image.new_from_file(sys.argv[1], rgb=True)

Though you'll need libvips 8.14.

Jan 24 '23 22:01 jcupitt

My ubuntu is 20.04 and on sudo apt install libvips it is saying its already in newer version (ie, 8.9). Is there a solution for this?

Jan 24 '23 22:01 lafith

Another question related to this,

am looking at the code using fetch from #100. If I replace the image[1] to image[7] with different WSIs, will your code extract patches from all 8 different WSIs at the same time?

If I am going for different WSI for each pipeline, would all elements need to be of same size? If thats the case is there a way to read WSI with specific dimensions?

Jan 24 '23 22:01 lafith

libvips doesn't mind, you can fetch any size tile from any image.

Jan 25 '23 10:01 jcupitt

I have never worked with libvips nor pyvips before, so I have a doubt regarding accessing the tiles. In the following code I am making 20 pipelines, each pipeline will read different WSI from my dataset. What should be inside the for loop for accessing tiles from all of them.

def fetch(region, patch_size, x, y):
    return region.fetch(patch_size * x, patch_size * y, patch_size, patch_size)

folder = "/media/exx/Crucial X8/datsets/Norway"
files = os.listdir(folder)

start = 0
end = len(files)
step = 20
for i in range(start, end, step):
    x = i
    image_pipelines = [0]*20
    for j,file in enumerate(files[x:x+step]):
        file_path = os.path.join(folder, file)
        image_pipelines[j] = pyvips.Image.new_from_file(file_path)

    reg = [pyvips.Region.new(img) for img in image_pipelines]

    patch_size = 256
    n_across = image_pipelines[0].width // patch_size
    n_down = image_pipelines[0].height // patch_size
    x_max = n_across - 1
    y_max = n_down - 1

    n_patches = 0
    for y in range(0, n_down):
        for x in range(0, n_across):
            print("row {}/{},{}/{} ...".format(y, n_down, x, n_across))
            patch0 = fetch(reg[0], patch_size, x, y)
            # patch1 = fetch(reg[1], patch_size, y_max - y, x)
            # patch2 = fetch(reg[2], patch_size, x_max - x, y_max - y)
            # patch3 = fetch(reg[3], patch_size, y, x_max - x)

            # patch4 = fetch(reg[4], patch_size, x_max - x, y)
            # patch5 = fetch(reg[5], patch_size, y_max - y, x_max - x)
            # patch6 = fetch(reg[6], patch_size, x, y_max - y)
            # patch7 = fetch(reg[7], patch_size, y, x)

            x = np.ndarray(
                buffer=patch0,
                dtype=np.uint8, 
                shape=[patch_size, patch_size, 4], 
            )
            img = Image.fromarray(x.numpy()).convert('RGB')
            img.save(os.path.join(
                        "/media/exx/Crucial X8/datsets", "tiles",
                        str(i)+"_"+str(x)+"_"+str(y)+".jpg"
                        )
            )
            n_patches += 20

    print("{} patches generated".format(n_patches))

libvips doesn't mind, you can fetch any size tile from any image.

I meant the dimensions of the WSI itself, not of the tile size.

Jan 25 '23 15:01 lafith