How to convert _cffi_backend.buffer object from crop to numpy
Hi,
I am trying to use the code mentioned in #100 to generate patches for training a deep learning model. Specifically to generate patches of dimensions 256*256 from all 300 WSIs I have and then select the right patches based on otsu threshold.
def fetch(region, patch_size, x, y):
return region.fetch(patch_size * x, patch_size * y, patch_size, patch_size)
folder = "/media/exx/Crucial X8/datsets/Camelyon"
files = os.listdir(folder)
# map vips formats to np dtypes
format_to_dtype = {
'uchar': np.uint8,
'char': np.int8,
'ushort': np.uint16,
'short': np.int16,
'uint': np.uint32,
'int': np.int32,
'float': np.float32,
'double': np.float64,
'complex': np.complex64,
'dpcomplex': np.complex128,
}
for i in files:
file = os.path.join(folder, i)
image = pyvips.Image.new_from_file(file)
patch_size = 256
n_across = image.width // patch_size
n_down = image.height // patch_size
x_max = n_across - 1
y_max = n_down - 1
n_patches = 0
for y in range(0, n_down):
print("row {} ...".format(y))
for x in range(0, n_across):
patch = image.crop(x * patch_size, y * patch_size,
patch_size, patch_size)
patch_f = patch.fliphor()
patch0 = patch.write_to_memory()
patch1 = patch.rot90().write_to_memory()
patch2 = patch.rot180().write_to_memory()
patch3 = patch.rot270().write_to_memory()
patch4 = patch_f.write_to_memory()
patch5 = patch_f.rot90().write_to_memory()
patch6 = patch_f.rot180().write_to_memory()
patch7 = patch_f.rot270().write_to_memory()
np_img = np.ndarray(
buffer=patch0,
dtype=format_to_dtype[patch0.format],
shape=[patch0.height, patch0.width, patch0.bands]
)
n_patches += 8
print("{} patches generated".format(n_patches))
Unfortunately this is throwing an error:
dtype=format_to_dtype[patch0.format],
AttributeError: '_cffi_backend.buffer' object has no attribute 'format'
Hi @lafith,
Check the docs:
https://libvips.github.io/pyvips/intro.html#numpy-and-pil
You need eg.:
numpy_array_patch4 = patch_f.numpy()
I think I'd expect fetch to be quicker for your case (though I've not benchmarked it).
I modified this snippet using:
for y in range(0, n_down):
print("row {} ...".format(y))
for x in range(0, n_across):
patch = image.crop(x * patch_size, y * patch_size,
patch_size, patch_size)
patch_f = patch.fliphor()
np_img = patch_f.numpy()
Now it is giving the following error:
Traceback (most recent call last):
File "/home/exx/projects/norway/src/test.py", line 49, in <module>
np_img = patch_f.numpy()
File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/vimage.py", line 921, in call_function
return pyvips.Operation.call(name, self, *args, **kwargs)
File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 222, in call
intro = Introspect.get(operation_name)
File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 141, in get
cls._introspect_cache[operation_name] = Introspect(operation_name)
File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 38, in __init__
op = Operation.new_from_name(operation_name)
File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 181, in new_from_name
raise Error('no such operation {0}'.format(operation_name))
pyvips.error.Error: no such operation numpy
VipsOperation: class "numpy" not found
I modified this snippet using:
for y in range(0, n_down): print("row {} ...".format(y)) for x in range(0, n_across): patch = image.crop(x * patch_size, y * patch_size, patch_size, patch_size) patch_f = patch.fliphor() np_img = patch_f.numpy()Now it is giving the following error:
Traceback (most recent call last): File "/home/exx/projects/norway/src/test.py", line 49, in <module> np_img = patch_f.numpy() File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/vimage.py", line 921, in call_function return pyvips.Operation.call(name, self, *args, **kwargs) File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 222, in call intro = Introspect.get(operation_name) File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 141, in get cls._introspect_cache[operation_name] = Introspect(operation_name) File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 38, in __init__ op = Operation.new_from_name(operation_name) File "/home/exx/.conda/envs/ai/lib/python3.9/site-packages/pyvips/voperation.py", line 181, in new_from_name raise Error('no such operation {0}'.format(operation_name)) pyvips.error.Error: no such operation numpy VipsOperation: class "numpy" not found
Hey @jcupitt,
This error is resolved after updating the packages.
@jcupitt
I am looking at the code using fetch from #100. If I replace the image[1] to image[7] with different WSIs, will your code extract patches from all 8 different WSIs at the same time?
#!/usr/bin/python3
import sys
import pyvips
def fetch(region, patch_size, x, y):
return region.fetch(patch_size * x, patch_size * y, patch_size, patch_size)
image[0] = pyvips.Image.new_from_file(sys.argv[1])
image[1] = image0.rot90()
image[2] = image0.rot180()
image[3] = image0.rot270()
image[4] = image[0].fliphor()
image[5] = image[4].rot90()
image[6] = image[4].rot180()
image[7] = image[4].rot270()
reg = [pyvips.Region.new(x) for x in image]
patch_size = 64
n_across = image0.width // patch_size
n_down = image0.height // patch_size
x_max = n_across - 1
y_max = n_down - 1
n_patches = 0
for y in range(0, n_down):
print("row {} ...".format(y))
for x in range(0, n_across):
patch0 = fetch(reg[0], patch_size, x, y)
patch1 = fetch(reg[1], patch_size, y_max - y, x)
patch2 = fetch(reg[2], patch_size, x_max - x, y_max - y)
patch3 = fetch(reg[3], patch_size, y, x_max - x)
patch4 = fetch(reg[4], patch_size, x_max - x, y)
patch5 = fetch(reg[5], patch_size, y_max - y, x_max - x)
patch6 = fetch(reg[6], patch_size, x, y_max - y)
patch7 = fetch(reg[7], patch_size, y, x)
n_patches += 8
print("{} patches generated".format(n_patches))
Sure, it should be fine. Don't you need the rotates and flips, though?
I thought I will add that step in PyTorch Dataset.py.
@jcupitt
numpy conversion on fetch is giving error. What would be the solution in this case?
patch0 = fetch(reg[0], patch_size, x, y) np_img = patch0.numpy()
Error:
AttributeError: '_cffi_backend.buffer' object has no attribute 'numpy'
You'll need something more like your original code.
np_img = np.ndarray(
buffer=patch0,
dtype=np.uint8,
shape=[patch_size, patch_size, 4],
)
Also, try the rgb option to load to get an RGB (not RGBA) image from openslide.
This line worked, thank you. Where should I add the rgb option, I did not get that part?
Currently I am using PIL Image.Convert for converting to RGB.
Add it to new_from_file(), eg:
image = pyvips.Image.new_from_file(sys.argv[1], rgb=True)
Though you'll need libvips 8.14.
My ubuntu is 20.04 and on sudo apt install libvips it is saying its already in newer version (ie, 8.9). Is there a solution for this?
Another question related to this,
am looking at the code using fetch from #100. If I replace the image[1] to image[7] with different WSIs, will your code extract patches from all 8 different WSIs at the same time?
If I am going for different WSI for each pipeline, would all elements need to be of same size? If thats the case is there a way to read WSI with specific dimensions?
libvips doesn't mind, you can fetch any size tile from any image.
I have never worked with libvips nor pyvips before, so I have a doubt regarding accessing the tiles. In the following code I am making 20 pipelines, each pipeline will read different WSI from my dataset. What should be inside the for loop for accessing tiles from all of them.
def fetch(region, patch_size, x, y):
return region.fetch(patch_size * x, patch_size * y, patch_size, patch_size)
folder = "/media/exx/Crucial X8/datsets/Norway"
files = os.listdir(folder)
start = 0
end = len(files)
step = 20
for i in range(start, end, step):
x = i
image_pipelines = [0]*20
for j,file in enumerate(files[x:x+step]):
file_path = os.path.join(folder, file)
image_pipelines[j] = pyvips.Image.new_from_file(file_path)
reg = [pyvips.Region.new(img) for img in image_pipelines]
patch_size = 256
n_across = image_pipelines[0].width // patch_size
n_down = image_pipelines[0].height // patch_size
x_max = n_across - 1
y_max = n_down - 1
n_patches = 0
for y in range(0, n_down):
for x in range(0, n_across):
print("row {}/{},{}/{} ...".format(y, n_down, x, n_across))
patch0 = fetch(reg[0], patch_size, x, y)
# patch1 = fetch(reg[1], patch_size, y_max - y, x)
# patch2 = fetch(reg[2], patch_size, x_max - x, y_max - y)
# patch3 = fetch(reg[3], patch_size, y, x_max - x)
# patch4 = fetch(reg[4], patch_size, x_max - x, y)
# patch5 = fetch(reg[5], patch_size, y_max - y, x_max - x)
# patch6 = fetch(reg[6], patch_size, x, y_max - y)
# patch7 = fetch(reg[7], patch_size, y, x)
x = np.ndarray(
buffer=patch0,
dtype=np.uint8,
shape=[patch_size, patch_size, 4],
)
img = Image.fromarray(x.numpy()).convert('RGB')
img.save(os.path.join(
"/media/exx/Crucial X8/datsets", "tiles",
str(i)+"_"+str(x)+"_"+str(y)+".jpg"
)
)
n_patches += 20
print("{} patches generated".format(n_patches))
libvips doesn't mind, you can fetch any size tile from any image.
I meant the dimensions of the WSI itself, not of the tile size.