In a SLURM cluster I am submitting a shell script that calls a python script (both scripts can be found below. When the shell script executes it get until where the python script is called but then nothing happens: there is no output, no error message and the SLURM job keeps running.
I assume the entire contents of the python script are not relevant (but I included it anyway for completion). For debugging purposes I inserted the print("script started")
line at the very beginning to see if it gets run but it doesn't. The last thing I see in the output is moved to directory
.
I tried calling a test.py
script containing print("test")
right before this and it gets executed normally.
What could be the reason the python script doesn't start and how can I fix it?
Edit: As user jakub recommended changing print("script started")
to print("script started", flush=True)
successfully gets printed. Including several more of these statements revealed that the script was actually running perfectly fine, it just didn't output anything. Including the same statement within the for loop that gets constantly executed also makes all print()
statements previously missing get printed.
The question then turns into: why do the print()
statements here need to have flush=True
in this script but not in other scripts?
Shell script:
#!/bin/bash
#SBATCH --mail-user=lukas.baehler@pathology.unibe.ch
#SBATCH --mail-type=end,fail
#SBATCH --output=out-ncl
#SBATCH --error=err-ncl
#SBATCH --job-name="Mask RCNN nucleus training and detection"
#SBATCH --time=24:00:00
#SBATCH --partition=gpu
#SBATCH --mem-per-cpu=64G
#SBATCH --gres=gpu:gtx1080ti:1
#SBATCH --constraint=rtx2080
conda init bash
source ~/.bashrc
conda activate nucl
cd MRCNN/samples/nucleus
echo "moved to directory"
python nucleus-pipeline2.py splitTMA
echo "Split TMAs"
Python script:
print("script started")
if __name__ == '__main__':
import argparse
import os
# Copied from later in script because the argparse part was moved up and is
# needed as default in --logs.
ROOT_DIR = os.path.abspath("../../")
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
# Parse command line arguments
parser = argparse.ArgumentParser(
description='Mask R-CNN for nuclei counting and segmentation')
parser.add_argument("command",
metavar="<command>",
help="'splitTMA', 'splitSpot', 'structure', 'train' or 'detect'")
parser.add_argument('--dataset', required=False,
metavar="/path/to/dataset/",
help='Root directory of the dataset')
parser.add_argument('--weights', required=False,
metavar="/path/to/weights.h5",
help="Path to weights .h5 file or 'coco'")
parser.add_argument('--logs', required=False,
default=DEFAULT_LOGS_DIR,
metavar="/path/to/logs/",
help='Logs and checkpoints directory (default=logs/)')
parser.add_argument('--subset', required=False,
metavar="Dataset sub-directory",
help="Subset of dataset to run prediction on")
# Own arguments
parser.add_argument("--input", required=False,
metavar="path/to/input/folder",
help="Optionally specify the input directory. Should only be used with splitTMA, splitSpot and structure.")
parser.add_argument("--output", required=False,
metavar="path/to/output/folder",
help="Optionally specify the output directory. Should only be used with splitTMA, splitSpot and structure.")
args = parser.parse_args()
assert args.command in ["train", "detect", "splitTMA", "splitSpot", "structure"], "Must set command."
################################################################################
# splitTMA
################################################################################
# The original script for this is tma-spot.py
# Splits a TMA into images of its spots.
if args.command == "splitTMA":
import os
import cv2
import numpy as np
from openslide import open_slide
from matplotlib import pyplot as plt
###################
# CONFIGURATION
# Defines the level of resolution for spot recognition
level = 7 # Default 7
# Defines the level of resolution to use for the new images
newLevel = 0 # Default 0 (higest resolution)
# Defines the spot size in pixels (has to be changed if newLevel is changed)
SpotSize = 3072 # Default 3500
# # Shift values are for alignment of the two slides.
# shiftX = 445 - 10
# shiftY = -64 + 10
print("Using the following parameters:\nlevel = {}\nnewLevel = {}\nSpotSize = {}".format(level, newLevel, SpotSize))
###################
# NUCLEUS_DIR = "MRCNN/samples/nucleus"
NUCLEUS_DIR = os.path.abspath("")
os.chdir(NUCLEUS_DIR)
if args.input:
INPUT_DIR = args.input
else:
INPUT_DIR = "slides"
print("Using '{}' as input folder.".format(INPUT_DIR))
if args.output:
OUTPUT_DIR = args.output
else:
OUTPUT_DIR = "spots"
print("Using '{}' as output folder.".format(OUTPUT_DIR))
# mrxs_filenames = [filename for filename in os.listdir("slides") if filename[-5:] == ".mrxs"]
mrxs_filenames = [filename for filename in os.listdir(INPUT_DIR) if filename[-5:] == ".mrxs"]
print("\nFound {} MIRAX files.".format(len(mrxs_filenames)))
# Loop through all .mrxs files.
for filename in mrxs_filenames:
print("\nReading {}\n".format(filename))
# filename = mrxs_filenames[0]
img = open_slide("{}/{}".format(INPUT_DIR, filename))
# # Use if you want to to see the resolution of all the levels.
# for i in range(img.level_count):
# print("Level", i, "dimension", img.level_dimensions[i],"down factor",img.level_downsamples[i])
# Use the level set previously and read the slide as an RGB image.
x_img = img.read_region((0,0), level, img.level_dimensions[level])
x_img = np.array(x_img)
rgb = np.zeros_like(x_img)
rgb[x_img==0] = 255
rgba_im = cv2.add(rgb,x_img)
imgLevel = cv2.cvtColor(rgba_im,cv2.COLOR_RGBA2RGB)
# plt.imsave("./Output/level" + str(level) + ".png", imgLevel) # <---------- USE FOR DEBUGGING
# Converts the image to gray levels and applies a gussian blur.
gray = cv2.cvtColor(imgLevel, cv2.COLOR_BGR2GRAY)
gray_blur = cv2.GaussianBlur(gray, (3, 3), 0)
# cv2.imwrite( "./Output/gray.png", gray_blur) # <-------------------------- USE FOR DEBUGGING
# Use an Otsu binarization to generate a mask for where tissue is.
ret3, thresh = cv2.threshold(gray_blur, 8, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
thresh = ~thresh
cont_img = thresh.copy()
# cv2.imwrite( "spots/cd3/contour.png", cont_img) # <------------------------ USE FOR DEBUGGING
# Finds the contours of the mask generated.
contours, hierarchy = cv2.findContours(cont_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Loop through all contours
spot_nr = 0
for cnt in contours:
# Decide based on the area of the contour if it is a spot
area = cv2.contourArea(cnt)
spotInfo = []
x, y, w, h = cv2.boundingRect(cnt)
if area < 100 or area > 2000:
spotInfo.append([-1, x, y, w, h])
continue
if len(cnt) < 5:
spotInfo.append([-1, x, y, w, h])
continue
# Calculate the center of the spot
centerX = x + int(w/2)
centerY = y + int(h/2)
# Calculate how much it needs to be scaled
factorOld = img.level_downsamples[level]
factorNew = img.level_downsamples[newLevel]
# Read the spot region
spot = img.read_region((int(centerX * factorOld)-int(SpotSize/2),
int(centerY * factorOld)-int(SpotSize/2)),
newLevel, (SpotSize, SpotSize))
spot = cv2.cvtColor(np.array(spot), cv2.COLOR_RGBA2RGB)
# Create directory and save the image
if not os.path.isdir("{}/{}".format(OUTPUT_DIR, filename[:-5])):
os.makedirs("{}/{}".format(OUTPUT_DIR, filename[:-5]))
spot_name = "{0}/{1}/{1}-{2}.png".format(OUTPUT_DIR, filename[:-5],str(spot_nr).zfill(3))
plt.imsave(spot_name, spot)
spot_nr += 1
print("Spot {} saved - Center X and Y: {}, {}".format(spot_nr, centerX, centerY))
exit()
################################################################################
# splitSpot
################################################################################
# This is copied from spot-annotation.py
# Splits spots into tiles
if args.command == "splitSpot":
import os
import sys
import argparse
import re
import numpy as np
import cv2
from matplotlib import pyplot as plt
# VARIABLES
# Change the resolution of the tiles here. Note the image resolution
# must be an integer multiple of the tile resolutions (both dimensions).
tile_resolution = [768, 768]
# NUCLEUS_DIR = "MRCNN/samples/nucleus"
NUCLEUS_DIR = os.path.abspath("")
os.chdir(NUCLEUS_DIR)
if args.input:
INPUT_DIR = args.input
else:
INPUT_DIR = "spots"
print("\nUsing '{}' as input folder.".format(INPUT_DIR))
if args.output:
OUTPUT_DIR = args.output
else:
OUTPUT_DIR = "tiles"
print("Using '{}' as output folder.".format(OUTPUT_DIR))
# EXECUTION
TMA_folders = os.listdir(INPUT_DIR)
spot_names = []
spot_count = 0
for name in TMA_folders:
spot_names.append(os.listdir("{}/{}".format(INPUT_DIR, name)))
spot_count += len(spot_names[-1])
print("\nFound {} TMA folders with a total of {} spot images.".format(len(TMA_folders), spot_count))
for a, TMA in enumerate(TMA_folders):
for b, spot in enumerate(spot_names[a]):
print("TMA: {}/{} - Spot: {}/{}".format(a+1, len(TMA_folders), b+1, len(spot_names[a])), end="\r")
# Read the image
img = cv2.imread("{}/{}/{}".format(INPUT_DIR,TMA, spot))
# Calculate how many tiles will be produced
tilesX = img.shape[0]/tile_resolution[0]
tilesY = img.shape[1]/tile_resolution[1]
assert (tilesX == int(tilesX) and tilesY == int(tilesY)), "Image resolution is not an integer multiple of the tile resolution."
tilesX, tilesY = int(tilesX), int(tilesY)
# Create the np array that will hold the tiles
tiles = np.zeros([tilesY,tilesX,tile_resolution[0],tile_resolution[1],3])
# Loop through all tiles and store them in tiles
for i in range(tilesX):
for j in range(tilesY):
tiles[j,i] = img[i*tile_resolution[0]:(i+1)*tile_resolution[0],
j*tile_resolution[1]:(j+1)*tile_resolution[1]]
tiles = tiles.astype("uint8")
# print("\nImage was split into {} tiles.".format(tiles.shape[0]*tiles.shape[1]))
# Save all the tiles
for x in range(tiles.shape[0]):
for y in range(tiles.shape[1]):
# Displays progression
# print("Saving {}/{} images...".format(str(x*tiles.shape[0]+y+1),tiles.shape[0]*tiles.shape[1]), end="\r")
# Using the plt.imsave() gives alterations in color which is
# presumably bad. Using cv2.imwrite() is also ca. 10 times faster.
imdir = "{}/{}/{}".format(OUTPUT_DIR, TMA, spot[:-4])
imname = "{}-{}-{}.png".format(spot[:-4], str(x).zfill(2), str(y).zfill(2))
if not os.path.isdir(imdir):
os.makedirs(imdir)
cv2.imwrite("{}/{}".format(imdir, imname), tiles[x,y])
print("\nSaved images in {} as [spot_name]-x-y.png.".format(OUTPUT_DIR))
exit()
################################################################################
# Prepare Data Structure
################################################################################
# Adapted from prepare-data-structure.py
# Creates the data structure required for the network
if args.command == "structure":
import os
from shutil import copyfile
NUCLEUS_DIR = os.path.abspath("")
os.chdir(NUCLEUS_DIR)
# Setting input and output directories
if args.input:
INPUT_DIR = args.input
else:
INPUT_DIR = "tiles"
print("\nUsing '{}' as input folder.".format(INPUT_DIR))
if args.output:
OUTPUT_DIR = args.output
else:
OUTPUT_DIR = "data"
print("Using '{}' as output folder.".format(OUTPUT_DIR))
# Creates a list with the paths of all tiles. Also stores just the
# filename itself with and without file extension
file_names = []
for path,_,files in os.walk(INPUT_DIR):
for f in files:
file_names.append(["{}/{}".format(path, f), f, f[:-4]])
print("\nFound {} images.".format(len(file_names)))
assert file_names != [], "No images found in input folder."
# The dataset needs to be stored inside another folder (default "own_data")
subset = "own_data"
# For each file creates the appropriate sub-folders and copies the file.
skip_count = 0
for i,info in enumerate(file_names):
print("Saving {}/{} images.".format(i+1, len(file_names)), end="\r")
dirname = "{}/{}/{}/images".format(OUTPUT_DIR, subset, info[2])
try:
os.makedirs(dirname)
except:
skip_count += 1
continue
copyfile(info[0], "{}/{}".format(dirname, info[1]))
print("\n\nSaved dataset in {}/{}".format(OUTPUT_DIR, subset))
if skip_count > 0:
print("Skipped {} files because they already existed.".format(skip_count))
print("")
exit()