diff --git a/.DS_Store b/.DS_Store
index 80213c8c65..6654c61282 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/.idea/Ass3_Report.iml b/.idea/Ass3_Report.iml
new file mode 100644
index 0000000000..d0876a78d0
--- /dev/null
+++ b/.idea/Ass3_Report.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000000..15a15b218a
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000000..105ce2da2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index f56ad02b95..5f3cf1b24c 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
-
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000000..ac664ed0b9
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 0000000000..ade8a46ffb
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,74 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {
+ "keyToString": {
+ "Project.color": "4e1919",
+ "RunOnceActivity.OpenProjectViewOnStart": "true",
+ "RunOnceActivity.ShowReadmeOnStart": "true",
+ "settings.editor.selected.configurable": "preferences.lookFeel"
+ }
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1665620130767
+
+
+ 1665620130767
+
+
+
+
\ No newline at end of file
diff --git a/recognition/.DS_Store b/recognition/.DS_Store
index ae199051c1..784cdc9c5b 100644
Binary files a/recognition/.DS_Store and b/recognition/.DS_Store differ
diff --git a/recognition/45799930/.DS_Store b/recognition/45799930/.DS_Store
new file mode 100644
index 0000000000..2952789d31
Binary files /dev/null and b/recognition/45799930/.DS_Store differ
diff --git a/recognition/45799930/README.md b/recognition/45799930/README.md
new file mode 100644
index 0000000000..88dc7b7533
--- /dev/null
+++ b/recognition/45799930/README.md
@@ -0,0 +1,90 @@
+# Segmentation of the ISIC Dataset with the Improved UNet
+
+---
+
+Author: Jessica Sullivan
+
+Student id: 45799930
+
+Assignment: COMP3710 Report Semester 2, 2022
+
+---
+
+## Description of the Algorithm and Problem it Solves
+
+The ISIC dataset contains images of skin lesions created by the International Skin Imaging collection (ISIC). They come out with a new dataset every year, which has become a major machine learning task to help identify skin cancer and to attempt to define whether the skin lesions are malignant (as stated [here](https://pubmed.ncbi.nlm.nih.gov/34852988/#:~:text=The%20International%20Skin%20Imaging%20Collaboration,cancer%20detection%20and%20malignancy%20assessment.)). The task given was to segment this data using the improved UNet and to ensure that all labels have a minimum dice coefficient of 0.8. UNet is an algorithm that segments the data using a convolutional neural network, which means that it categorises parts of an image based on what the algorithm has learned by training itself. The ISIC dataset comes with training data and ground truth training data. The ground truth shows the mask we want the training data to be once it has passed through the neural network. An example of this is:
+
+
+
+where in the second image (mask) the black is the background and the white is the skin lesion. Now the minimum dice coefficient represents how accurate our model is. In this case, it would be how much the segmented regions created by the model overlap with the mask (ground truth data). Therefore, the higher the dice coefficient, the greater the accuracy of the model, and a dice coefficient of greater the 0.8 means that more than 80% of the time, the model is correct. The benefit of this model is that being able to detect skin lesions through images accurately would help further advances in medicine, allowing the model to identify the problem areas on the skin.
+
+## How the Algorithm Works
+
+The model is based of the improved unet structure. This structure, based upon the original UNet structure has been updated in an attempt to make the model more accurate. The structure of the unet model looks like:
+
+
+
+This model demonstrates a 'U' shape, where going down at the beginning of the U is contracting the image and the upside is expanding the image back out (based on [this](https://towardsdatascience.com/unet-line-by-line-explanation-9b191c76baf5)). The contraction of the image reduces the size of the image to classify the pixels. It does this by doing multiple 2D convolutions with a 3x3 kernel. Once two convolutions have been done per layer, a max pooling is added to reduce the dimensions. Once the base layer has been reached, after the two convolutions, an upsampling is done to increase the dimensions back up. Once you have reached the top layer, convolution with a kernel size of 1x1 is done to finish the process. When downsizing the sample (max pooling), information will be lost, so to reduce the information that is lost when you go up a layer (upsampling), the output of the convolutions of the same layer while downsizing the model is concatenated to the current layer before completing the two convolutions.
+
+Whereas the imroved UNet structure is:
+
+
+
+Now, this is very similar. However, there are a couple of key things that have been modified. The first of the major things that have been modified is the localisation module. The localisation model in the improved UNet is how a lower level is manipulated to a higher spatial resolution (referenced from [here](https://arxiv.org/pdf/1802.10508v1.pdf)) by upsampling and then having a convolution model afterwards. The second major difference is the segmentation layers. The purpose of these is to try to retain as. much information as we can from the lower levels while we move upwards. This involved performing a convolution with a 3x3 kernel and upsampling before doing an element-wise sum with the beginning of the segmentation layer above.
+
+---
+
+## Dependencies
+
+### Versions Required:
+
+```commandline
+Tensorflow: 2.10.0
+Matplotlib: 3.5.3
+```
+
+### Address Reproducibility:
+
+To ensure that the code can run, you will need to download the training dataset and the truth training dataset, where you should download the one in the first row, which has the binary masks in PNG format. These can be downloaded from [here](https://challenge.isic-archive.com/data/#2017). Once downloaded, these folders should be moved to the recognition/45799930 directory, keeping the same names made when created. Therefore the directories that should have been added are:
+
+* ISIC-2017_Training_Data
+* ISIC-2017_Training_Part1_GroundTruth
+
+### Running The Code:
+
+Once the dependancies have been set up and the files have been addedas described in Address Reproducibility then the code can be run by running the predict file. Please be aware that when i run the code, about 50% of the time it comes up with an error, however re-running the program will fix it.
+
+---
+
+## Justification
+
+### Specific Pre-Processing
+
+Some pre-processing was done on the data to ensure that the images were downloaded and processed correctly, that they were the same size and that the colouring was correct. This was all done in the `dataset.py`file. Once the image was read by using the correct pathway if it was decoded depending on the file type (jpeg for images and png for the truth images), this was processed to ensure that all images were of the size (256, 256). After ensuring that we were only dealing with elements of type`tensorflow.float32`, the image was normalised by dividing by 255.
+
+### Training, Validation and Testing Splits
+
+As only the training data from the link provided was downloaded (and its corresponding truth values), the data with split into training, validation and testing sets. The ratio chosen was 80% of the data allocated to training the data, 10% allocated to validating the data, and the final 10% to testing the data. The initial dataset was shuffled (combined with the truth so that they are still at corresponding parts of their respective tensors) so that it was a random 80% of the data selected for the training, and the 10% allocated to the validation and testing data. This ratio was chosen as it is ideal to have as much data as possible to train the model so that the model can become as accurate as possible.
+
+---
+
+## Examples
+
+### Example Input:
+
+No input is needed for this model at the moment as this is to train, validate and test the model, all from the ISIC 2018 dataset, so nothing needs to be input. However, a future adaption of this code could be that once the model is trained and tested, input in the form of an image or a dataset of images could be asked for, which the mask would then be predicted. This would give it a more real-world application and could begin its use within the medical field.
+
+### Example Output:
+
+When running the predict method the output of the epoch was:
+
+
+
+which led to these graphs:
+
+
+
+
+Now it is quite clear here that the implementation of the Dice Similarity Coefficient is incorrect. It should not be constant for all the epoches. However when looking at the model loss its clearly visible that as the model is more trained, the information that is lost after processing is less and less.
+
+---
diff --git a/recognition/45799930/dataset.py b/recognition/45799930/dataset.py
new file mode 100644
index 0000000000..2073b558d4
--- /dev/null
+++ b/recognition/45799930/dataset.py
@@ -0,0 +1,94 @@
+import glob
+import tensorflow as tf
+from sklearn.utils import shuffle
+from math import floor
+
+"""
+Create a class to download and sort the ISIC data set. We shall download the training, test and validation data as well
+as all their truth data.
+"""
+
+
+class DataSet:
+
+ def __init__(self):
+ self.validate = None
+ self.testing = None
+ self.training = None
+ self.download_dataset()
+ self.image_shape = (256, 256)
+
+ def download_dataset(self):
+ """
+ This sets up the datasets we need for training, testing or validating. We need both the dataset and the truth
+ sets so that we know what it should be after processing. They have been combined in a multidimensional
+ tensor.
+
+ :return:
+
+ bool: True if all data sets and their respective truth data sets are the same size, false otherwise.
+
+ """
+ # get all the image paths for the different sets - training testing and validation. Then sort these in order so
+ # that they are in the same order (truth and initial).
+ training_truth_filenames = sorted(glob.glob('./ISIC-2017_Training_Part1_GroundTruth/*.png'))
+ training_filenames = sorted(glob.glob('./ISIC-2017_Training_Data/*.jpg'))
+
+ train, truth = shuffle((training_filenames, training_truth_filenames))
+ length = len(train)
+ training_truth_filenames = truth[:floor(length * 0.8)]
+ training_filenames = train[:floor(length * 0.8)]
+ testing_truth_filenames = truth[floor(length * 0.8):floor(length * 0.9)]
+ testing_filenames = train[floor(length * 0.8):floor(length * 0.9)]
+ validate_truth_filenames = truth[floor(length * 0.9):]
+ validate_filenames = train[floor(length * 0.9):]
+
+ # convert this into tensorflow array
+ self.training = tf.data.Dataset.from_tensor_slices((training_filenames, training_truth_filenames))
+ self.testing = tf.data.Dataset.from_tensor_slices((testing_filenames, testing_truth_filenames))
+ self.validate = tf.data.Dataset.from_tensor_slices((validate_filenames, validate_truth_filenames))
+
+ self.training = self.training.map(self.pre_process)
+ self.testing = self.testing.map(self.pre_process)
+ self.validate = self.validate.map(self.pre_process)
+
+ if len(training_truth_filenames) != len(training_filenames) or len(testing_filenames) != len(
+ testing_truth_filenames) or len(validate_truth_filenames) != len(validate_filenames):
+ return False
+ return True
+
+ def pre_process(self, image, truth_image):
+ """
+ Need to preprocess the data as all we have right now is a location of the image and the truth image. Do this by
+ reading the file, decoding the jpeg or png respectively. We check to ensure that all the images are the same
+ size. Then cast them to make sure in the same form I cast it to a float.
+
+ :param image: the path to the image.
+ :param truth_image: the path to the truth image
+ :return: the processed image and truth image.
+ """
+ image = tf.io.read_file(image)
+ # todo: do i need to change the channels? 0 is the number used in the jpeg
+ image = tf.io.decode_jpeg(image, channels=3)
+ image = tf.image.resize(image, (256, 256))
+ image = tf.cast(image, tf.float32) / 255.
+
+ truth_image = tf.io.read_file(truth_image)
+ # todo: do i need to change the channels? 0 is the number used in the jpeg
+ truth_image = tf.io.decode_png(truth_image, channels=0)
+ truth_image = tf.image.resize(truth_image, (256, 256))
+ truth_image = tf.cast(truth_image, tf.float32) / 255.
+ return image, truth_image
+
+ def split_data(self, data, truths):
+ """
+
+ :param training_data:
+ :return:
+ """
+ length = len(data)
+ train_len = length * 0.8
+ val_test_len = length * 0.1
+ data, truths = tf.random.shuffle(data, truths)
+
+ # train, test, val = data.
diff --git a/recognition/45799930/images/.DS_Store b/recognition/45799930/images/.DS_Store
new file mode 100644
index 0000000000..5008ddfcf5
Binary files /dev/null and b/recognition/45799930/images/.DS_Store differ
diff --git a/recognition/45799930/images/Epoch_Result.png b/recognition/45799930/images/Epoch_Result.png
new file mode 100644
index 0000000000..a5b660057f
Binary files /dev/null and b/recognition/45799930/images/Epoch_Result.png differ
diff --git a/recognition/45799930/images/Improved_UNet.png b/recognition/45799930/images/Improved_UNet.png
new file mode 100644
index 0000000000..fa5706b83c
Binary files /dev/null and b/recognition/45799930/images/Improved_UNet.png differ
diff --git a/recognition/45799930/images/Loss.png b/recognition/45799930/images/Loss.png
new file mode 100644
index 0000000000..56b4ae39ed
Binary files /dev/null and b/recognition/45799930/images/Loss.png differ
diff --git a/recognition/45799930/images/UNet.png b/recognition/45799930/images/UNet.png
new file mode 100644
index 0000000000..3b424312c7
Binary files /dev/null and b/recognition/45799930/images/UNet.png differ
diff --git a/recognition/45799930/images/accuracy.png b/recognition/45799930/images/accuracy.png
new file mode 100644
index 0000000000..618fc81b32
Binary files /dev/null and b/recognition/45799930/images/accuracy.png differ
diff --git a/recognition/45799930/images/mask.png b/recognition/45799930/images/mask.png
new file mode 100644
index 0000000000..f1ab42a41f
Binary files /dev/null and b/recognition/45799930/images/mask.png differ
diff --git a/recognition/45799930/images/original.jpg b/recognition/45799930/images/original.jpg
new file mode 100644
index 0000000000..935609dcd7
Binary files /dev/null and b/recognition/45799930/images/original.jpg differ
diff --git a/recognition/45799930/modules.py b/recognition/45799930/modules.py
new file mode 100644
index 0000000000..d3ef7adff5
--- /dev/null
+++ b/recognition/45799930/modules.py
@@ -0,0 +1,152 @@
+from keras.models import Model
+from keras.layers import Input, Conv2D, Add, concatenate, UpSampling2D, Dropout, BatchNormalization
+
+# Dense, Flatten,
+DROPOUT_PROB = 0.3
+INPUT_SHAPE = (256, 256, 3)
+
+
+def context_module(layer, num_filters):
+ """
+ Creates the context module as defined in the improved UNet. It normalises the model between every
+ Conv2D call. This module includes two 2D 3x3 conv layers with a dropout layer inbetween.
+
+ :param layer: the input layer within this module.
+ :param num_filters: the number of filters to pass into Conv2D.
+ :return: the final layer of this context module.
+ """
+ norm = BatchNormalization()(layer)
+ encoder = Conv2D(num_filters, 3, activation='relu', padding='same')(norm)
+ dropout = Dropout(DROPOUT_PROB)(encoder)
+ norm = BatchNormalization()(dropout)
+ return Conv2D(num_filters, 3, activation='relu', padding='same')(norm)
+
+
+def upsample_module(layer, num_filters):
+ """
+ Creates the up-sample module based off the improved UNet structure. First you up-sample
+ followed by a 3x3 conv.
+
+ :param layer: the input layer within this module.
+ :param num_filters: the number of filters to pass into Conv2D.
+ :return: the final layer of this up-sample module.
+ """
+ up = UpSampling2D(2)(layer)
+ return Conv2D(num_filters, 3, activation='relu', padding='same')(up)
+
+
+def localisation_module(layer, num_filters):
+ """
+ creates the localisation module based on the improved UNet structure. This is two convolutions,
+ with the last one being a kernel size of 1x1 and the first one being a kernel size of 3x3.
+
+ :param layer: the input layer within this module.
+ :param num_filters: the number of filters to pass into Conv2D.
+ :return: the final layer of this localisation module.
+ """
+ decoder = Conv2D(num_filters, 3, activation='relu', padding='same')(layer)
+ return Conv2D(num_filters, 1, activation='relu', padding='same')(decoder)
+
+
+def create_model(input_shape=INPUT_SHAPE):
+ """
+ Create the improved unet model.
+
+ :param input_shape: The shape of the images that we will be passing into the model.
+ As we are using the one type of image we set the default to (256, 256, 1)
+ but added this feature so that it could be easily adapted to another model
+ if required in the future.
+ :return: the module that has been created.
+ """
+
+ # Create the input for the model.
+ input = Input(shape=input_shape)
+
+ # initial 3x3 conv with filter of 16
+ setup = Conv2D(16, 3, activation='relu', padding='same')(input)
+
+ # Create the encoder:
+ # First layer : 2 x 2D Convolutions, filter size 16, with a 3x3 kernel size and a stride size of (2,2).
+ encoder_layer1 = context_module(setup, 16)
+ # element wise sum of pre context module and post context module.
+ sum1 = Add()([setup, encoder_layer1])
+
+ # Do conv with stride of 2 to reduce the size of the model.
+ pool1 = Conv2D(32, 3, 2, activation='relu', padding='same')(sum1)
+ # Second layer : 2 x 2D Convolutions, filter size 32, with a 3x3 kernel size and a stride size of (2,2).
+ encoder_layer2 = context_module(pool1, 32)
+ # element wise sum of pre context module and post context module.
+ sum2 = Add()([pool1, encoder_layer2])
+
+ # Do conv with stride of 2 to reduce the size of the model.
+ pool2 = Conv2D(64, 3, 2, activation='relu', padding='same')(sum2)
+ # Third layer : 2 x 2D Convolutions, filter size 64, with a 3x3 kernel size and a stride size of (2,2).
+ encoder_layer3 = context_module(pool2, 64)
+ # element wise sum of pre context module and post context module.
+ sum3 = Add()([pool2, encoder_layer3])
+
+ # Do conv with stride of 2 to reduce the size of the model.
+ pool3 = Conv2D(128, 3, 2, activation='relu', padding='same')(sum3)
+ # Forth layer : 2 x 2D Convolutions, filter size 128, with a 3x3 kernel size and a stride size of (2,2).
+ encoder_layer4 = context_module(pool3, 128)
+ # element wise sum of pre context module and post context module.
+ sum4 = Add()([pool3, encoder_layer4])
+
+ # Do conv with stride of 2 to reduce the size of the model.
+ pool4 = Conv2D(256, 3, 2, activation='relu', padding='same')(sum4)
+ # Fifth layer : 2 x 2D Convolutions, filter size 256, with a 3x3 kernel size and a stride size of (2,2).
+ encoder_layer5 = context_module(pool4, 256)
+ # element wise sum of pre context module and post context module.
+ sum5 = Add()([pool4, encoder_layer5])
+
+ # Create the decoder:
+
+ # up sample layer 5 to get to layer 4
+ up4 = upsample_module(sum5, 128)
+
+ # concatenate the output of sum4 with the up sample
+ con4 = concatenate([up4, sum4])
+ # Forth layer : 2 x 2D Convolutions, filter size 128, with a 3x3 kernel size for the first one and 1x1 for the
+ # second one and a stride size of (2,2).
+ decoder_layer4 = localisation_module(con4, 128)
+ # up sample layer 4 to get to layer 3
+ up3 = upsample_module(decoder_layer4, 64)
+
+ # concatenate the output of sum3 with the up sample
+ con3 = concatenate([up3, sum3])
+ # Third layer : 2 x 2D Convolutions, filter size 64, with a 3x3 kernel size for the first one and 1x1 for the
+ # second one and a stride size of (2,2).
+ decoder_layer3 = localisation_module(con3, 64)
+ # up sample layer 3 to get to layer 2
+ up2 = upsample_module(decoder_layer3, 32)
+
+ # concatenate the output of sum2 with the up sample
+ con2 = concatenate([up2, sum2])
+ # Second layer : 2 x 2D Convolutions, filter size 32, with a 3x3 kernel size for the first one and 1x1 for the
+ # second one and a stride size of (2,2).
+ decoder_layer2 = localisation_module(con2, 32)
+ # up sample layer 2 to get to layer 1
+ up1 = upsample_module(decoder_layer2, 16)
+
+ # concatenate the output of sum1 with the up sample
+ con1 = concatenate([up1, sum1])
+ # First layer : 1 x 2D Convolutions, filter size 32, with a 3x3 kernel size for the first one and 1x1 for the
+ # second one and a stride size of (2,2).
+ decoder_layer1 = Conv2D(32, 3, activation='relu', padding='same')(con1)
+
+ # Now do all the segmentation layers.
+ seg_layer3 = Conv2D(16, 3, activation='relu', padding='same')(decoder_layer3)
+ seg_layer3 = UpSampling2D()(seg_layer3)
+ seg_layer2 = Conv2D(16, 3, activation='relu', padding='same')(decoder_layer2)
+ seg_layer1 = Conv2D(16, 3, activation='relu', padding='same')(decoder_layer1)
+
+ # element wise sum of segmentation layers
+ seg_sum_2 = Add()([seg_layer3, seg_layer2])
+ seg_sum_2 = UpSampling2D()(seg_sum_2)
+ seg_sum_1 = Add()([seg_layer1, seg_sum_2])
+
+ # softmax to finish off
+ # todo: should this filter size be 2?
+ output = Conv2D(2, 3, activation='softmax', padding='same')(seg_sum_1)
+
+ return Model(inputs=input, outputs=output)
diff --git a/recognition/45799930/predict.py b/recognition/45799930/predict.py
new file mode 100644
index 0000000000..732fc7bdda
--- /dev/null
+++ b/recognition/45799930/predict.py
@@ -0,0 +1,8 @@
+import tensorflow as tf
+from train import train_model
+
+if __name__ == "__main__":
+ model, dataset = train_model()
+
+ pred = model.predict(dataset.testing)
+
diff --git a/recognition/45799930/train.py b/recognition/45799930/train.py
new file mode 100644
index 0000000000..5d0fcedf7f
--- /dev/null
+++ b/recognition/45799930/train.py
@@ -0,0 +1,46 @@
+from modules import create_model
+from dataset import DataSet
+from tensorflow import reduce_sum
+import matplotlib.pyplot as plt
+
+
+def train_model():
+ dataset = DataSet()
+ model = create_model()
+
+ # Train the model
+ model.compile(optimizer='adam', loss="binary_crossentropy", metrics=[dice_sim_co])
+ history = model.fit(dataset.training.batch(10), epochs=10, validation_data=dataset.validate.batch(10))
+ print_history(history)
+
+ return model, dataset
+
+
+def print_history(history):
+ plt.plot(history.history['dice_sim_co'])
+ plt.plot(history.history['val_dice_sim_co'])
+ plt.title('model accuracy')
+ plt.ylabel('accuracy')
+ plt.xlabel('epoch')
+ plt.legend(['train', 'val'], loc='upper left')
+ plt.show()
+
+ plt.plot(history.history['loss'])
+ plt.plot(history.history['val_loss'])
+ plt.title('model loss')
+ plt.ylabel('loss')
+ plt.xlabel('epoch')
+ plt.legend(['train', 'val'], loc='upper left')
+ plt.show()
+
+
+def dice_sim_co(x, y):
+ """
+ This implements the dice similarity coefficients as described on the link in the task sheet.
+ To get the coefficient the equation is
+ (2 * |X ∩ Y|)/(|X| + |Y|)
+ :param x : An image in the form of a tensor
+ :param y : An image to compare to in the form of a tensor.
+ :return:
+ """
+ return (2 * reduce_sum(x * y)) / (reduce_sum(x) + reduce_sum(y))