diff --git a/NeuralNetwork.NET/APIs/CuDnnNetworkLayers.cs b/NeuralNetwork.NET/APIs/CuDnnNetworkLayers.cs
index caa09b8..b449d66 100644
--- a/NeuralNetwork.NET/APIs/CuDnnNetworkLayers.cs
+++ b/NeuralNetwork.NET/APIs/CuDnnNetworkLayers.cs
@@ -1,10 +1,8 @@
-﻿using System;
-using System.Linq;
-using JetBrains.Annotations;
+﻿using JetBrains.Annotations;
 using NeuralNetworkNET.APIs.Delegates;
 using NeuralNetworkNET.APIs.Enums;
 using NeuralNetworkNET.APIs.Structs;
-using NeuralNetworkNET.Extensions;
+using NeuralNetworkNET.cuDNN;
 using NeuralNetworkNET.Networks.Layers.Cuda;
 
 namespace NeuralNetworkNET.APIs
@@ -17,22 +15,7 @@ public static class CuDnnNetworkLayers
         /// <summary>
         /// Gets whether or not the Cuda acceleration is supported on the current system
         /// </summary>
-        public static bool IsCudaSupportAvailable
-        {
-            get
-            {
-                try
-                {
-                    // Calling this directly would could a crash in the <Module> loader due to the missing .dll files
-                    return CuDnnSupportHelper.IsGpuAccelerationSupported();
-                }
-                catch (TypeInitializationException)
-                {
-                    // Missing .dll file
-                    return false;
-                }
-            }
-        }
+        public static bool IsCudaSupportAvailable => CuDnnService.IsAvailable;
 
         /// <summary>
         /// Creates a new fully connected layer with the specified number of input and output neurons, and the given activation function
@@ -132,41 +115,14 @@ public static LayerFactory Convolutional(
         public static LayerFactory Inception(InceptionInfo info, BiasInitializationMode biasMode = BiasInitializationMode.Zero) 
             => input => new CuDnnInceptionLayer(input, info, biasMode);
 
-        #region Feature helper
-
         /// <summary>
-        /// A private class that is used to create a new standalone type that contains the actual test method (decoupling is needed to &lt;Module&gt; loading crashes)
+        /// Creates a new batch normalization layer
         /// </summary>
-        private static class CuDnnSupportHelper
-        {
-            /// <summary>
-            /// Checks whether or not the Cuda features are currently supported
-            /// </summary>
-            public static bool IsGpuAccelerationSupported()
-            {
-                try
-                {
-                    // CUDA test
-                    Alea.Gpu gpu = Alea.Gpu.Default;
-                    if (gpu == null) return false;
-                    if (!Alea.cuDNN.Dnn.IsAvailable) return false; // cuDNN
-                    using (Alea.DeviceMemory<float> sample_gpu = gpu.AllocateDevice<float>(1024))
-                    {
-                        Alea.deviceptr<float> ptr = sample_gpu.Ptr;
-                        void Kernel(int i) => ptr[i] = i;
-                        Alea.Parallel.GpuExtension.For(gpu, 0, 1024, Kernel); // JIT test
-                        float[] sample = Alea.Gpu.CopyToHost(sample_gpu);
-                        return Enumerable.Range(0, 1024).Select<int, float>(i => i).ToArray().ContentEquals(sample);
-                    }
-                }
-                catch
-                {
-                    // Missing .dll or other errors
-                    return false;
-                }
-            }
-        }
-
-        #endregion
+        /// <param name="mode">The normalization mode to use for the new layer</param>
+        /// <param name="activation">The desired activation function to use in the network layer</param>
+        [PublicAPI]
+        [Pure, NotNull]
+        public static LayerFactory BatchNormalization(NormalizationMode mode, ActivationType activation) 
+            => input => new CuDnnBatchNormalizationLayer(input, mode, activation);
     }
 }
\ No newline at end of file
diff --git a/NeuralNetwork.NET/APIs/DatasetLoader.cs b/NeuralNetwork.NET/APIs/DatasetLoader.cs
index 03f6da5..2186fc0 100644
--- a/NeuralNetwork.NET/APIs/DatasetLoader.cs
+++ b/NeuralNetwork.NET/APIs/DatasetLoader.cs
@@ -2,6 +2,7 @@
 using System.Collections.Generic;
 using System.Linq;
 using JetBrains.Annotations;
+using NeuralNetworkNET.APIs.Enums;
 using NeuralNetworkNET.APIs.Interfaces.Data;
 using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Helpers;
@@ -53,30 +54,42 @@ public static class DatasetLoader
         /// <summary>
         /// Creates a new <see cref="ITrainingDataset"/> instance to train a network from the input data, where each input sample is an image in a specified format
         /// </summary>
-        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/> or <see cref="Argb32"/></typeparam>
+        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/>, <see cref="Argb32"/> or <see cref="Rgba32"/></typeparam>
         /// <param name="data">A list of <see cref="ValueTuple{T1, T2}"/> items, where the first element is the image path and the second is a vector with the expected outputs</param>
         /// <param name="size">The desired dataset batch size</param>
-        /// <param name="modify">An optional <see cref="Action{T}"/> to modify each sample image when loading the dataset</param>
+        /// <param name="normalization">The desired image normalization mode to use when loading the images</param>
+        /// <param name="modifiers">The optional <see cref="Action{T}"/> instances to use to modify the loaded image. If no modifiers are provided, each loaded image will not me tweaked. If one or more
+        /// modifiers are passed to the method, a different image will be added to the dataset for each given modifier. This can be used to easily expand an image dataset.</param>
         [PublicAPI]
         [Pure, NotNull]
         [CollectionAccess(CollectionAccessType.Read)]
-        public static ITrainingDataset Training<TPixel>([NotNull] IEnumerable<(String X, float[] Y)> data, int size, [CanBeNull] Action<IImageProcessingContext<TPixel>> modify = null)
+        public static ITrainingDataset Training<TPixel>([NotNull] IEnumerable<(String X, float[] Y)> data, int size, ImageNormalizationMode normalization, [NotNull, ItemNotNull] params Action<IImageProcessingContext<TPixel>>[] modifiers)
             where TPixel : struct, IPixel<TPixel>
-            => BatchesCollection.From(data.Select<(String X, float[] Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load(xy.X, modify), xy.Y)), size);
+        {
+            return BatchesCollection.From(modifiers.Length > 0 
+                ? data.SelectMany(xy => modifiers.Select<Action<IImageProcessingContext<TPixel>>, Func<(float[], float[])>>(f => () => (ImageLoader.Load(xy.X, normalization, f), xy.Y))) 
+                : data.Select<(String X, float[] Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load<TPixel>(xy.X, normalization, null), xy.Y)), size);
+        }
 
         /// <summary>
         /// Creates a new <see cref="ITrainingDataset"/> instance to train a network from the input data, where each input sample is an image in a specified format
         /// </summary>
-        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/> or <see cref="Argb32"/></typeparam>
+        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/>, <see cref="Argb32"/> or <see cref="Rgba32"/></typeparam>
         /// <param name="data">A list of <see cref="ValueTuple{T1, T2}"/> items, where the first element is the image path and the second is a <see cref="Func{TResult}"/> returning a vector with the expected outputs</param>
         /// <param name="size">The desired dataset batch size</param>
-        /// <param name="modify">An optional <see cref="Action{T}"/> to modify each sample image when loading the dataset</param>
+        /// <param name="normalization">The desired image normalization mode to use when loading the images</param>
+        /// <param name="modifiers">The optional <see cref="Action{T}"/> instances to use to modify the loaded image. If no modifiers are provided, each loaded image will not me tweaked. If one or more
+        /// modifiers are passed to the method, a different image will be added to the dataset for each given modifier. This can be used to easily expand an image dataset.</param>
         [PublicAPI]
         [Pure, NotNull]
         [CollectionAccess(CollectionAccessType.Read)]
-        public static ITrainingDataset Training<TPixel>([NotNull] IEnumerable<(String X, Func<float[]> Y)> data, int size, [CanBeNull] Action<IImageProcessingContext<TPixel>> modify = null)
+        public static ITrainingDataset Training<TPixel>([NotNull] IEnumerable<(String X, Func<float[]> Y)> data, int size, ImageNormalizationMode normalization, [NotNull, ItemNotNull] params Action<IImageProcessingContext<TPixel>>[] modifiers)
             where TPixel : struct, IPixel<TPixel>
-            => BatchesCollection.From(data.Select<(String X, Func<float[]> Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load(xy.X, modify), xy.Y())), size);
+        {
+            return BatchesCollection.From(modifiers.Length > 0 
+                ? data.SelectMany(xy => modifiers.Select<Action<IImageProcessingContext<TPixel>>, Func<(float[], float[])>>(f => () => (ImageLoader.Load(xy.X, normalization, f), xy.Y()))) 
+                : data.Select<(String X, Func<float[]> Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load<TPixel>(xy.X, normalization, null), xy.Y())), size);
+        }
 
         #endregion
 
@@ -120,32 +133,48 @@ public static IValidationDataset Validation([NotNull, ItemNotNull] IEnumerable<F
         /// <summary>
         /// Creates a new <see cref="IValidationDataset"/> instance to validate a network accuracy from the input collection
         /// </summary>
-        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/> or <see cref="Argb32"/></typeparam>
+        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/>, <see cref="Argb32"/> or <see cref="Rgba32"/></typeparam>
         /// <param name="data">A list of <see cref="ValueTuple{T1, T2}"/> items, where the first element is the image path and the second is a vector with the expected outputs</param>
         /// <param name="tolerance">The desired tolerance to test the network for convergence</param>
         /// <param name="epochs">The epochs interval to consider when testing the network for convergence</param>
-        /// <param name="modify">An optional <see cref="Action{T}"/> to modify each sample image when loading the dataset</param>
+        /// <param name="normalization">The desired image normalization mode to use when loading the images</param>
+        /// <param name="modifiers">The optional <see cref="Action{T}"/> instances to use to modify the loaded image. If no modifiers are provided, each loaded image will not me tweaked. If one or more
+        /// modifiers are passed to the method, a different image will be added to the dataset for each given modifier. This can be used to easily expand an image dataset.</param>
         [PublicAPI]
         [Pure, NotNull]
         [CollectionAccess(CollectionAccessType.Read)]
-        public static IValidationDataset Validation<TPixel>([NotNull] IEnumerable<(String X, float[] Y)> data, float tolerance = 1e-2f, int epochs = 5, [CanBeNull] Action<IImageProcessingContext<TPixel>> modify = null)
+        public static IValidationDataset Validation<TPixel>(
+            [NotNull] IEnumerable<(String X, float[] Y)> data, float tolerance = 1e-2f, int epochs = 5, 
+            ImageNormalizationMode normalization = ImageNormalizationMode.Sigmoid, [NotNull, ItemNotNull] params Action<IImageProcessingContext<TPixel>>[] modifiers)
             where TPixel : struct, IPixel<TPixel>
-            => Validation(data.Select<(String X, float[] Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load(xy.X, modify), xy.Y)).AsParallel(), tolerance, epochs);
+        {
+            return Validation((modifiers.Length > 0
+                ? data.SelectMany(xy => modifiers.Select<Action<IImageProcessingContext<TPixel>>, Func<(float[], float[])>>(f => () => (ImageLoader.Load(xy.X, normalization, f), xy.Y)))
+                : data.Select<(String X, float[] Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load<TPixel>(xy.X, normalization, null), xy.Y))).AsParallel(), tolerance, epochs);
+        }
 
         /// <summary>
         /// Creates a new <see cref="IValidationDataset"/> instance to validate a network accuracy from the input collection
         /// </summary>
-        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/> or <see cref="Argb32"/></typeparam>
+        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/>, <see cref="Argb32"/> or <see cref="Rgba32"/></typeparam>
         /// <param name="data">A list of <see cref="ValueTuple{T1, T2}"/> items, where the first element is the image path and the second is a <see cref="Func{TResult}"/> returning a vector with the expected outputs</param>
         /// <param name="tolerance">The desired tolerance to test the network for convergence</param>
         /// <param name="epochs">The epochs interval to consider when testing the network for convergence</param>
-        /// <param name="modify">An optional <see cref="Action{T}"/> to modify each sample image when loading the dataset</param>
+        /// <param name="normalization">The desired image normalization mode to use when loading the images</param>
+        /// <param name="modifiers">The optional <see cref="Action{T}"/> instances to use to modify the loaded image. If no modifiers are provided, each loaded image will not me tweaked. If one or more
+        /// modifiers are passed to the method, a different image will be added to the dataset for each given modifier. This can be used to easily expand an image dataset.</param>
         [PublicAPI]
         [Pure, NotNull]
         [CollectionAccess(CollectionAccessType.Read)]
-        public static IValidationDataset Validation<TPixel>([NotNull] IEnumerable<(String X, Func<float[]> Y)> data, float tolerance = 1e-2f, int epochs = 5, [CanBeNull] Action<IImageProcessingContext<TPixel>> modify = null)
+        public static IValidationDataset Validation<TPixel>(
+            [NotNull] IEnumerable<(String X, Func<float[]> Y)> data, float tolerance = 1e-2f, int epochs = 5, 
+            ImageNormalizationMode normalization = ImageNormalizationMode.Sigmoid, [NotNull, ItemNotNull] params Action<IImageProcessingContext<TPixel>>[] modifiers)
             where TPixel : struct, IPixel<TPixel>
-            => Validation(data.Select<(String X, Func<float[]> Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load(xy.X, modify), xy.Y())).AsParallel(), tolerance, epochs);
+        {
+            return Validation((modifiers.Length > 0
+                ? data.SelectMany(xy => modifiers.Select<Action<IImageProcessingContext<TPixel>>, Func<(float[], float[])>>(f => () => (ImageLoader.Load(xy.X, normalization, f), xy.Y())))
+                : data.Select<(String X, Func<float[]> Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load<TPixel>(xy.X, normalization, null), xy.Y()))).AsParallel(), tolerance, epochs);
+        }
 
         #endregion
 
@@ -186,30 +215,46 @@ public static ITestDataset Test([NotNull, ItemNotNull] IEnumerable<Func<(float[]
         /// <summary>
         /// Creates a new <see cref="ITestDataset"/> instance to test a network from the input collection
         /// </summary>
-        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/> or <see cref="Argb32"/></typeparam>
+        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/>, <see cref="Argb32"/> or <see cref="Rgba32"/></typeparam>
         /// <param name="data">A list of <see cref="ValueTuple{T1, T2}"/> items, where the first element is the image path and the second is a vector with the expected outputs</param>
         /// <param name="progress">The optional progress callback to use</param>
-        /// <param name="modify">An optional <see cref="Action{T}"/> to modify each sample image when loading the dataset</param>
+        /// <param name="normalization">The desired image normalization mode to use when loading the images</param>
+        /// <param name="modifiers">The optional <see cref="Action{T}"/> instances to use to modify the loaded image. If no modifiers are provided, each loaded image will not me tweaked. If one or more
+        /// modifiers are passed to the method, a different image will be added to the dataset for each given modifier. This can be used to easily expand an image dataset.</param>
         [PublicAPI]
         [Pure, NotNull]
         [CollectionAccess(CollectionAccessType.Read)]
-        public static ITestDataset Test<TPixel>([NotNull] IEnumerable<(String X, float[] Y)> data, [CanBeNull] Action<TrainingProgressEventArgs> progress = null, [CanBeNull] Action<IImageProcessingContext<TPixel>> modify = null)
+        public static ITestDataset Test<TPixel>(
+            [NotNull] IEnumerable<(String X, float[] Y)> data, [CanBeNull] Action<TrainingProgressEventArgs> progress = null
+            , ImageNormalizationMode normalization = ImageNormalizationMode.Sigmoid, [NotNull, ItemNotNull] params Action<IImageProcessingContext<TPixel>>[] modifiers)
             where TPixel : struct, IPixel<TPixel>
-            => Test(data.Select<(String X, float[] Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load(xy.X, modify), xy.Y)).AsParallel(), progress);
+        {
+            return Test((modifiers.Length > 0
+                ? data.SelectMany(xy => modifiers.Select<Action<IImageProcessingContext<TPixel>>, Func<(float[], float[])>>(f => () => (ImageLoader.Load(xy.X, normalization, f), xy.Y)))
+                : data.Select<(String X, float[] Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load<TPixel>(xy.X, normalization, null), xy.Y))).AsParallel(), progress);
+        }
 
         /// <summary>
         /// Creates a new <see cref="ITestDataset"/> instance to test a network from the input collection
         /// </summary>
-        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/> or <see cref="Argb32"/></typeparam>
+        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/>, <see cref="Argb32"/> or <see cref="Rgba32"/></typeparam>
         /// <param name="data">A list of <see cref="ValueTuple{T1, T2}"/> items, where the first element is the image path and the second is a <see cref="Func{TResult}"/> returning a vector with the expected outputs</param>
         /// <param name="progress">The optional progress callback to use</param>
-        /// <param name="modify">An optional <see cref="Action{T}"/> to modify each sample image when loading the dataset</param>
+        /// <param name="normalization">The desired image normalization mode to use when loading the images</param>
+        /// <param name="modifiers">The optional <see cref="Action{T}"/> instances to use to modify the loaded image. If no modifiers are provided, each loaded image will not me tweaked. If one or more
+        /// modifiers are passed to the method, a different image will be added to the dataset for each given modifier. This can be used to easily expand an image dataset.</param>
         [PublicAPI]
         [Pure, NotNull]
         [CollectionAccess(CollectionAccessType.Read)]
-        public static ITestDataset Test<TPixel>([NotNull] IEnumerable<(String X, Func<float[]> Y)> data, [CanBeNull] Action<TrainingProgressEventArgs> progress = null, [CanBeNull] Action<IImageProcessingContext<TPixel>> modify = null)
+        public static ITestDataset Test<TPixel>(
+            [NotNull] IEnumerable<(String X, Func<float[]> Y)> data, [CanBeNull] Action<TrainingProgressEventArgs> progress = null, 
+            ImageNormalizationMode normalization = ImageNormalizationMode.Sigmoid, [NotNull, ItemNotNull] params Action<IImageProcessingContext<TPixel>>[] modifiers)
             where TPixel : struct, IPixel<TPixel>
-            => Test(data.Select<(String X, Func<float[]> Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load(xy.X, modify), xy.Y())).AsParallel(), progress);
+        {
+            return Test((modifiers.Length > 0
+                ? data.SelectMany(xy => modifiers.Select<Action<IImageProcessingContext<TPixel>>, Func<(float[], float[])>>(f => () => (ImageLoader.Load(xy.X, normalization, f), xy.Y())))
+                : data.Select<(String X, Func<float[]> Y), Func<(float[], float[])>>(xy => () => (ImageLoader.Load<TPixel>(xy.X, normalization, null), xy.Y()))).AsParallel(), progress);
+        }
 
         #endregion
     }
diff --git a/NeuralNetwork.NET/APIs/Datasets/Cifar10.cs b/NeuralNetwork.NET/APIs/Datasets/Cifar10.cs
index f4bbb40..38fdd40 100644
--- a/NeuralNetwork.NET/APIs/Datasets/Cifar10.cs
+++ b/NeuralNetwork.NET/APIs/Datasets/Cifar10.cs
@@ -9,6 +9,9 @@
 using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Helpers;
 using NeuralNetworkNET.SupervisedLearning.Progress;
+using SixLabors.ImageSharp;
+using SixLabors.ImageSharp.Advanced;
+using SixLabors.ImageSharp.PixelFormats;
 
 namespace NeuralNetworkNET.APIs.Datasets
 {
@@ -25,11 +28,14 @@ public static class Cifar10
         // 32*32 RGB images
         private const int SampleSize = 3072;
 
+        // A single 32*32 image
+        private const int ImageSize = 1024;
+
         private const String DatasetURL = "https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz";
 
         [NotNull, ItemNotNull]
         private static readonly IReadOnlyList<String> TrainingBinFilenames = Enumerable.Range(1, 5).Select(i => $"data_batch_{i}.bin").ToArray();
-
+        
         private const String TestBinFilename = "test_batch.bin";
 
         #endregion
@@ -38,12 +44,13 @@ public static class Cifar10
         /// Downloads the CIFAR-10 training datasets and returns a new <see cref="ITestDataset"/> instance
         /// </summary>
         /// <param name="size">The desired dataset batch size</param>
+        /// <param name="callback">The optional progress calback</param>
         /// <param name="token">An optional cancellation token for the operation</param>
         [PublicAPI]
         [Pure, ItemCanBeNull]
-        public static async Task<ITrainingDataset> GetTrainingDatasetAsync(int size, CancellationToken token = default)
+        public static async Task<ITrainingDataset> GetTrainingDatasetAsync(int size, [CanBeNull] IProgress<HttpProgress> callback = null, CancellationToken token = default)
         {
-            IReadOnlyDictionary<String, Func<Stream>> map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, token);
+            IReadOnlyDictionary<String, Func<Stream>> map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, callback, token);
             if (map == null) return null;
             IReadOnlyList<(float[], float[])>[] data = new IReadOnlyList<(float[], float[])>[TrainingBinFilenames.Count];
             Parallel.For(0, TrainingBinFilenames.Count, i => data[i] = ParseSamples(map[TrainingBinFilenames[i]], TrainingSamplesInBinFiles)).AssertCompleted();
@@ -54,17 +61,37 @@ public static async Task<ITrainingDataset> GetTrainingDatasetAsync(int size, Can
         /// Downloads the CIFAR-10 test datasets and returns a new <see cref="ITestDataset"/> instance
         /// </summary>
         /// <param name="progress">The optional progress callback to use</param>
+        /// <param name="callback">The optional progress calback</param>
         /// <param name="token">An optional cancellation token for the operation</param>
         [PublicAPI]
         [Pure, ItemCanBeNull]
-        public static async Task<ITestDataset> GetTestDatasetAsync([CanBeNull] Action<TrainingProgressEventArgs> progress = null, CancellationToken token = default)
+        public static async Task<ITestDataset> GetTestDatasetAsync([CanBeNull] Action<TrainingProgressEventArgs> progress = null, [CanBeNull] IProgress<HttpProgress> callback = null, CancellationToken token = default)
         {
-            IReadOnlyDictionary<String, Func<Stream>> map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, token);
+            IReadOnlyDictionary<String, Func<Stream>> map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, callback, token);
             if (map == null) return null;
             IReadOnlyList<(float[], float[])> data = ParseSamples(map[TestBinFilename], TrainingSamplesInBinFiles);
             return DatasetLoader.Test(data, progress);
         }
 
+        /// <summary>
+        /// Downloads and exports the full CIFAR-10 dataset (both training and test samples) to the target directory
+        /// </summary>
+        /// <param name="directory">The target directory</param>
+        /// <param name="token">The cancellation token for the operation</param>
+        [PublicAPI]
+        public static async Task<bool> ExportDatasetAsync([NotNull] DirectoryInfo directory, CancellationToken token = default)
+        {
+            IReadOnlyDictionary<String, Func<Stream>> map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, null, token);
+            if (map == null) return false;
+            if (!directory.Exists) directory.Create();
+            ParallelLoopResult result = Parallel.ForEach(TrainingBinFilenames.Concat(new[] { TestBinFilename }), (name, state) =>
+            {
+                ExportSamples(directory, (name, map[name]), TrainingSamplesInBinFiles, token);
+                if (token.IsCancellationRequested) state.Stop();
+            });
+            return result.IsCompleted && !token.IsCancellationRequested;
+        }
+
         #region Tools
 
         /// <summary>
@@ -72,7 +99,7 @@ public static async Task<ITestDataset> GetTestDatasetAsync([CanBeNull] Action<Tr
         /// </summary>
         /// <param name="factory">A <see cref="Func{TResult}"/> that returns the <see cref="Stream"/> to read</param>
         /// <param name="count">The number of samples to parse</param>
-        private static unsafe IReadOnlyList<(float[], float[])> ParseSamples(Func<Stream> factory, int count)
+        private static unsafe IReadOnlyList<(float[], float[])> ParseSamples([NotNull] Func<Stream> factory, int count)
         {
             using (Stream stream = factory())
             {
@@ -89,8 +116,12 @@ public static async Task<ITestDataset> GetTestDatasetAsync([CanBeNull] Action<Tr
                         fixed (float* px = x)
                         {
                             stream.Read(temp, 0, SampleSize);
-                            for (int j = 0; j < SampleSize; j++)
+                            for (int j = 0; j < ImageSize; j++)
+                            {
                                 px[j] = ptemp[j] / 255f; // Normalized samples
+                                px[j] = ptemp[j + ImageSize] / 255f;
+                                px[j] = ptemp[j + 2 * ImageSize] / 255f;
+                            }
                         }
                         data[i] = (x, y);
                     }
@@ -99,6 +130,38 @@ public static async Task<ITestDataset> GetTestDatasetAsync([CanBeNull] Action<Tr
             }
         }
 
+        /// <summary>
+        /// Exports a CIFAR-10 .bin file
+        /// </summary>
+        /// <param name="folder">The target folder to use to save the images</param>
+        /// <param name="source">The source filename and a <see cref="Func{TResult}"/> that returns the <see cref="Stream"/> to read</param>
+        /// <param name="count">The number of samples to parse</param>
+        /// <param name="token">A token for the operation</param>
+        private static unsafe void ExportSamples([NotNull] DirectoryInfo folder, (String Name, Func<Stream> Factory) source, int count, CancellationToken token)
+        {
+            using (Stream stream = source.Factory())
+            {
+                byte[] temp = new byte[SampleSize];
+                fixed (byte* ptemp = temp)
+                {
+                    for (int i = 0; i < count; i++)
+                    {
+                        if (token.IsCancellationRequested) return;
+                        int label = stream.ReadByte();
+                        stream.Read(temp, 0, SampleSize);
+                        using (Image<Rgb24> image = new Image<Rgb24>(32, 32))
+                        fixed (Rgb24* p0 = &image.DangerousGetPinnableReferenceToPixelBuffer())
+                        {
+                            for (int j = 0; j < ImageSize; j++)
+                                p0[j] = new Rgb24(ptemp[j], ptemp[j + ImageSize], ptemp[j + 2 * ImageSize]);
+                            using (FileStream file = File.OpenWrite(Path.Combine(folder.FullName, $"[{source.Name}][{i}][{label}].bmp")))
+                                image.SaveAsBmp(file);
+                        }
+                    }
+                }
+            }
+        }
+
         #endregion
     }
 }
diff --git a/NeuralNetwork.NET/APIs/Datasets/Cifar100.cs b/NeuralNetwork.NET/APIs/Datasets/Cifar100.cs
index 64a9a4f..8f18d9c 100644
--- a/NeuralNetwork.NET/APIs/Datasets/Cifar100.cs
+++ b/NeuralNetwork.NET/APIs/Datasets/Cifar100.cs
@@ -1,7 +1,6 @@
 ﻿using System;
 using System.Collections.Generic;
 using System.IO;
-using System.Linq;
 using System.Threading;
 using System.Threading.Tasks;
 using JetBrains.Annotations;
@@ -9,6 +8,9 @@
 using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Helpers;
 using NeuralNetworkNET.SupervisedLearning.Progress;
+using SixLabors.ImageSharp;
+using SixLabors.ImageSharp.Advanced;
+using SixLabors.ImageSharp.PixelFormats;
 
 namespace NeuralNetworkNET.APIs.Datasets
 {
@@ -19,22 +21,27 @@ public static class Cifar100
     {
         #region Constants
 
-        // The number of training samples in each extracted .bin file
-        private const int TrainingSamplesInBinFiles = 10000;
+        // The number of training samples in the training .bin file
+        private const int TrainingSamplesInBinFile = 50000;
+
+        // The number of test samples in the .bin file
+        private const int TestSamplesInBinFile = 10000;
 
         // 32*32 RGB images
         private const int SampleSize = 3072;
 
+        // A single 32*32 image
+        private const int ImageSize = 1024;
+
         private const int CoarseLabels = 20;
 
         private const int FineLabels = 100;
 
         private const String DatasetURL = "https://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz";
-
-        [NotNull, ItemNotNull]
-        private static readonly IReadOnlyList<String> TrainingBinFilenames = Enumerable.Range(1, 5).Select(i => $"data_batch_{i}.bin").ToArray();
-
-        private const String TestBinFilename = "test_batch.bin";
+        
+        private const String TrainingBinFilename = "train.bin";
+        
+        private const String TestBinFilename = "test.bin";
 
         #endregion
 
@@ -43,16 +50,16 @@ public static class Cifar100
         /// </summary>
         /// <param name="size">The desired dataset batch size</param>
         /// <param name="mode">The desired output mode for the dataset classes</param>
+        /// <param name="callback">The optional progress calback</param>
         /// <param name="token">An optional cancellation token for the operation</param>
         [PublicAPI]
         [Pure, ItemCanBeNull]
-        public static async Task<ITrainingDataset> GetTrainingDatasetAsync(int size, Cifar100ClassificationMode mode = Cifar100ClassificationMode.Fine, CancellationToken token = default)
+        public static async Task<ITrainingDataset> GetTrainingDatasetAsync(int size, Cifar100ClassificationMode mode = Cifar100ClassificationMode.Fine, [CanBeNull] IProgress<HttpProgress> callback = null, CancellationToken token = default)
         {
-            IReadOnlyDictionary<String, Func<Stream>> map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, token);
+            IReadOnlyDictionary<String, Func<Stream>> map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, callback, token);
             if (map == null) return null;
-            IReadOnlyList<(float[], float[])>[] data = new IReadOnlyList<(float[], float[])>[TrainingBinFilenames.Count];
-            Parallel.For(0, TrainingBinFilenames.Count, i => data[i] = ParseSamples(map[TrainingBinFilenames[i]], TrainingSamplesInBinFiles, mode)).AssertCompleted();
-            return DatasetLoader.Training(data.Skip(1).Aggregate(data[0] as IEnumerable<(float[], float[])>, (s, l) => s.Concat(l)), size);
+            IReadOnlyList<(float[], float[])> data = ParseSamples(map[TrainingBinFilename], TrainingSamplesInBinFile, mode);
+            return DatasetLoader.Training(data, size);
         }
 
         /// <summary>
@@ -60,17 +67,43 @@ public static async Task<ITrainingDataset> GetTrainingDatasetAsync(int size, Cif
         /// </summary>
         /// <param name="progress">The optional progress callback to use</param>
         /// <param name="mode">The desired output mode for the dataset classes</param>
+        /// <param name="callback">The optional progress calback</param>
         /// <param name="token">An optional cancellation token for the operation</param>
         [PublicAPI]
         [Pure, ItemCanBeNull]
-        public static async Task<ITestDataset> GetTestDatasetAsync([CanBeNull] Action<TrainingProgressEventArgs> progress = null, Cifar100ClassificationMode mode = Cifar100ClassificationMode.Fine, CancellationToken token = default)
+        public static async Task<ITestDataset> GetTestDatasetAsync(
+            [CanBeNull] Action<TrainingProgressEventArgs> progress = null, Cifar100ClassificationMode mode = Cifar100ClassificationMode.Fine, 
+            [CanBeNull] IProgress<HttpProgress> callback = null, CancellationToken token = default)
         {
-            IReadOnlyDictionary<String, Func<Stream>> map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, token);
+            IReadOnlyDictionary<String, Func<Stream>> map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, callback, token);
             if (map == null) return null;
-            IReadOnlyList<(float[], float[])> data = ParseSamples(map[TestBinFilename], TrainingSamplesInBinFiles, mode);
+            IReadOnlyList<(float[], float[])> data = ParseSamples(map[TestBinFilename], TestSamplesInBinFile, mode);
             return DatasetLoader.Test(data, progress);
         }
 
+        /// <summary>
+        /// Downloads and exports the full CIFAR-100 dataset (both training and test samples) to the target directory
+        /// </summary>
+        /// <param name="directory">The target directory</param>
+        /// <param name="token">The cancellation token for the operation</param>
+        [PublicAPI]
+        public static async Task<bool> ExportDatasetAsync([NotNull] DirectoryInfo directory, CancellationToken token = default)
+        {
+            IReadOnlyDictionary<String, Func<Stream>> map = await DatasetsDownloader.GetArchiveAsync(DatasetURL, null, token);
+            if (map == null) return false;
+            if (!directory.Exists) directory.Create();
+            ParallelLoopResult result = Parallel.ForEach(new (String Name, int Count)[]
+            {
+                (TrainingBinFilename, TrainingSamplesInBinFile),
+                (TestBinFilename, TestSamplesInBinFile)
+            }, (pair, state) =>
+            {
+                ExportSamples(directory, (pair.Name, map[pair.Name]), pair.Count, token);
+                if (token.IsCancellationRequested) state.Stop();
+            });
+            return result.IsCompleted && !token.IsCancellationRequested;
+        }
+
         #region Tools
 
         /// <summary>
@@ -119,8 +152,12 @@ public static async Task<ITestDataset> GetTestDatasetAsync([CanBeNull] Action<Tr
                         fixed (float* px = x)
                         {
                             stream.Read(temp, 0, SampleSize);
-                            for (int j = 0; j < SampleSize; j++)
+                            for (int j = 0; j < ImageSize; j++)
+                            {
                                 px[j] = ptemp[j] / 255f; // Normalized samples
+                                px[j] = ptemp[j + ImageSize] / 255f;
+                                px[j] = ptemp[j + 2 * ImageSize] / 255f;
+                            }
                         }
                         data[i] = (x, y);
                     }
@@ -129,6 +166,40 @@ public static async Task<ITestDataset> GetTestDatasetAsync([CanBeNull] Action<Tr
             }
         }
 
+        /// <summary>
+        /// Exports a CIFAR-100 .bin file
+        /// </summary>
+        /// <param name="folder">The target folder to use to save the images</param>
+        /// <param name="source">The source filename and a <see cref="Func{TResult}"/> that returns the <see cref="Stream"/> to read</param>
+        /// <param name="count">The number of samples to parse</param>
+        /// <param name="token">A token for the operation</param>
+        private static unsafe void ExportSamples([NotNull] DirectoryInfo folder, (String Name, Func<Stream> Factory) source, int count, CancellationToken token)
+        {
+            using (Stream stream = source.Factory())
+            {
+                byte[] temp = new byte[SampleSize];
+                fixed (byte* ptemp = temp)
+                {
+                    for (int i = 0; i < count; i++)
+                    {
+                        if (token.IsCancellationRequested) return;
+                        int
+                            coarse = stream.ReadByte(),
+                            fine = stream.ReadByte();
+                        stream.Read(temp, 0, SampleSize);
+                        using (Image<Rgb24> image = new Image<Rgb24>(32, 32))
+                            fixed (Rgb24* p0 = &image.DangerousGetPinnableReferenceToPixelBuffer())
+                            {
+                                for (int j = 0; j < ImageSize; j++)
+                                    p0[j] = new Rgb24(ptemp[j], ptemp[j + ImageSize], ptemp[j + 2 * ImageSize]);
+                                using (FileStream file = File.OpenWrite(Path.Combine(folder.FullName, $"[{source.Name}][{i}][{coarse}][{fine}].bmp")))
+                                    image.SaveAsBmp(file);
+                            }
+                    }
+                }
+            }
+        }
+
         #endregion
 
         /// <summary>
diff --git a/NeuralNetwork.NET/APIs/Datasets/Mnist.cs b/NeuralNetwork.NET/APIs/Datasets/Mnist.cs
index 94fdd41..042aceb 100644
--- a/NeuralNetwork.NET/APIs/Datasets/Mnist.cs
+++ b/NeuralNetwork.NET/APIs/Datasets/Mnist.cs
@@ -8,6 +8,9 @@
 using NeuralNetworkNET.APIs.Interfaces.Data;
 using NeuralNetworkNET.Helpers;
 using NeuralNetworkNET.SupervisedLearning.Progress;
+using SixLabors.ImageSharp;
+using SixLabors.ImageSharp.Advanced;
+using SixLabors.ImageSharp.PixelFormats;
 
 namespace NeuralNetworkNET.APIs.Datasets
 {
@@ -25,15 +28,15 @@ public static class Mnist
         private const int TestSamples = 10000;
 
         private const int SampleSize = 784;
-
+        
         private const String MnistHttpRootPath = "http://yann.lecun.com/exdb/mnist/";
-
+        
         private const String TrainingSetValuesFilename = "train-images-idx3-ubyte.gz";
-
+        
         private const String TrainingSetLabelsFilename = "train-labels-idx1-ubyte.gz";
-
+        
         private const String TestSetValuesFilename = "t10k-images-idx3-ubyte.gz";
-
+        
         private const String TestSetLabelsFilename = "t10k-labels-idx1-ubyte.gz";
 
         #endregion
@@ -48,8 +51,8 @@ public static class Mnist
         public static async Task<ITrainingDataset> GetTrainingDatasetAsync(int size, CancellationToken token = default)
         {
             Func<Stream>[] factories = await Task.WhenAll(
-                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TrainingSetValuesFilename}", token), 
-                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TrainingSetLabelsFilename}", token));
+                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TrainingSetValuesFilename}", null, token), 
+                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TrainingSetLabelsFilename}", null, token));
             if (factories.Any(s => s == null)) return null;
             (float[,] X, float[,] Y) data = ParseSamples((factories[0], factories[1]), TrainingSamples);
             return data.X == null || data.Y == null
@@ -67,8 +70,8 @@ public static async Task<ITrainingDataset> GetTrainingDatasetAsync(int size, Can
         public static async Task<ITestDataset> GetTestDatasetAsync([CanBeNull] Action<TrainingProgressEventArgs> progress = null, CancellationToken token = default)
         {
             Func<Stream>[] factories = await Task.WhenAll(
-                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TestSetValuesFilename}", token), 
-                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TestSetLabelsFilename}", token));
+                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TestSetValuesFilename}", null, token), 
+                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TestSetLabelsFilename}", null, token));
             if (factories.Any(s => s == null)) return null;
             (float[,] X, float[,] Y) data = ParseSamples((factories[0], factories[1]), TestSamples);
             return data.X == null || data.Y == null
@@ -76,6 +79,33 @@ public static async Task<ITestDataset> GetTestDatasetAsync([CanBeNull] Action<Tr
                 : DatasetLoader.Test(data, progress);
         }
 
+        /// <summary>
+        /// Downloads and exports the full MNIST dataset (both training and test samples) to the target directory
+        /// </summary>
+        /// <param name="directory">The target directory</param>
+        /// <param name="token">The cancellation token for the operation</param>
+        [PublicAPI]
+        public static async Task<bool> ExportDatasetAsync([NotNull] DirectoryInfo directory, CancellationToken token = default)
+        {
+            Func<Stream>[] factories = await Task.WhenAll(
+                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TrainingSetValuesFilename}", null, token), 
+                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TrainingSetLabelsFilename}", null, token),
+                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TestSetValuesFilename}", null, token), 
+                DatasetsDownloader.GetFileAsync($"{MnistHttpRootPath}{TestSetLabelsFilename}", null, token));
+            if (factories.Any(s => s == null) || token.IsCancellationRequested) return false;
+            if (!directory.Exists) directory.Create();
+            ParallelLoopResult result = Parallel.ForEach(new (String Name, Func<Stream> X, Func<Stream> Y, int Count)[]
+            {
+                (TrainingSetValuesFilename, factories[0], factories[1], TrainingSamples),
+                (TestSetValuesFilename, factories[2], factories[3], TestSamples)
+            }, (tuple, state) =>
+            {
+                ExportSamples(directory, (tuple.Name, tuple.X, tuple.Y), tuple.Count, token);
+                if (token.IsCancellationRequested) state.Stop();
+            });
+            return result.IsCompleted && !token.IsCancellationRequested;
+        }
+
         #region Tools
 
         /// <summary>
@@ -87,32 +117,68 @@ private static unsafe (float[,], float[,]) ParseSamples((Func<Stream> X, Func<St
         {
             // Input checks
             using (Stream inputs = factory.X(), labels = factory.Y())
+            using (GZipStream
+                xGzip = new GZipStream(inputs, CompressionMode.Decompress),
+                yGzip = new GZipStream(labels, CompressionMode.Decompress))
             {
-                using (GZipStream
-                    xGzip = new GZipStream(inputs, CompressionMode.Decompress),
-                    yGzip = new GZipStream(labels, CompressionMode.Decompress))
+                float[,]
+                    x = new float[count, SampleSize],
+                    y = new float[count, 10];
+                xGzip.Read(new byte[16], 0, 16);
+                yGzip.Read(new byte[8], 0, 8);
+                byte[] temp = new byte[SampleSize];
+                fixed (float* px = x, py = y)
+                fixed (byte* ptemp = temp)
                 {
-                    float[,]
-                        x = new float[count, SampleSize],
-                        y = new float[count, 10];
-                    xGzip.Read(new byte[16], 0, 16);
-                    yGzip.Read(new byte[8], 0, 8);
-                    byte[] temp = new byte[SampleSize];
-                    fixed (float* px = x, py = y)
-                    fixed (byte* ptemp = temp)
+                    for (int i = 0; i < count; i++)
                     {
-                        for (int i = 0; i < count; i++)
-                        {
-                            // Read the image pixel values
-                            xGzip.Read(temp, 0, SampleSize);
-                            int offset = i * SampleSize;
-                            for (int j = 0; j < SampleSize; j++)
-                                px[offset + j] = ptemp[j] / 255f;
+                        // Read the image pixel values
+                        xGzip.Read(temp, 0, SampleSize);
+                        int offset = i * SampleSize;
+                        for (int j = 0; j < SampleSize; j++)
+                            px[offset + j] = ptemp[j] / 255f;
+
+                        // Read the label
+                        py[i * 10 + yGzip.ReadByte()] = 1;
+                    }
+                    return (x, y);
+                }
+            }
+        }
 
-                            // Read the label
-                            py[i * 10 + yGzip.ReadByte()] = 1;
-                        }
-                        return (x, y);
+        /// <summary>
+        /// Exports a MNIST dataset file
+        /// </summary>
+        /// <param name="folder">The target folder to use to save the images</param>
+        /// <param name="source">A pair of factories for the input <see cref="Stream"/> instances to read</param>
+        /// <param name="count">The number of samples to parse</param>
+        /// <param name="token">A token for the operation</param>
+        private static unsafe void ExportSamples([NotNull] DirectoryInfo folder, (String Name, Func<Stream> X, Func<Stream> Y) source, int count, CancellationToken token)
+        {
+            using (Stream inputs = source.X(), labels = source.Y())
+            using (GZipStream
+                xGzip = new GZipStream(inputs, CompressionMode.Decompress),
+                yGzip = new GZipStream(labels, CompressionMode.Decompress))
+            {
+                xGzip.Read(new byte[16], 0, 16);
+                yGzip.Read(new byte[8], 0, 8);
+                byte[] temp = new byte[SampleSize];
+                fixed (byte* ptemp = temp)
+                {
+                    if (token.IsCancellationRequested) return;
+                    for (int i = 0; i < count; i++)
+                    {
+                        // Read the image pixel values
+                        xGzip.Read(temp, 0, SampleSize);
+                        int label = yGzip.ReadByte();
+                        using (Image<Rgb24> image = new Image<Rgb24>(28, 28))
+                            fixed (Rgb24* p0 = &image.DangerousGetPinnableReferenceToPixelBuffer())
+                            {
+                                for (int j = 0; j < SampleSize; j++)
+                                    p0[j] = new Rgb24(ptemp[j], ptemp[j], ptemp[j]);
+                                using (FileStream file = File.OpenWrite(Path.Combine(folder.FullName, $"[{source.Name}][{i}][{label}].bmp")))
+                                    image.SaveAsBmp(file);
+                            }
                     }
                 }
             }
diff --git a/NeuralNetwork.NET/APIs/Enums/ImageNormalizationMode.cs b/NeuralNetwork.NET/APIs/Enums/ImageNormalizationMode.cs
new file mode 100644
index 0000000..220e69c
--- /dev/null
+++ b/NeuralNetwork.NET/APIs/Enums/ImageNormalizationMode.cs
@@ -0,0 +1,23 @@
+﻿namespace NeuralNetworkNET.APIs.Enums
+{
+    /// <summary>
+    /// Indicates the type of normalization to apply to a loaded image
+    /// </summary>
+    public enum ImageNormalizationMode : byte
+    {
+        /// <summary>
+        /// The individual pixel values are mapped in the [0,1] range
+        /// </summary>
+        Sigmoid,
+
+        /// <summary>
+        /// The individual pixel values are mapped in the [-1,1] range
+        /// </summary>
+        Normal,
+
+        /// <summary>
+        /// No normalization is applied, and all the pixel values are loaded with their original value
+        /// </summary>
+        None
+    }
+}
\ No newline at end of file
diff --git a/NeuralNetwork.NET/APIs/Enums/LayerType.cs b/NeuralNetwork.NET/APIs/Enums/LayerType.cs
index 7b342e7..395cd82 100644
--- a/NeuralNetwork.NET/APIs/Enums/LayerType.cs
+++ b/NeuralNetwork.NET/APIs/Enums/LayerType.cs
@@ -30,6 +30,11 @@ public enum LayerType : byte
         /// </summary>
         Softmax,
 
+        /// <summary>
+        /// A batch normalization layer, used to scale the input batch into a 0-mean, 1-variance activations map
+        /// </summary>
+        BatchNormalization,
+
         /// <summary>
         /// An inception module, combining different kinds of convolution with a pooling operation
         /// </summary>
diff --git a/NeuralNetwork.NET/APIs/Enums/NormalizationMode.cs b/NeuralNetwork.NET/APIs/Enums/NormalizationMode.cs
new file mode 100644
index 0000000..34ee8a1
--- /dev/null
+++ b/NeuralNetwork.NET/APIs/Enums/NormalizationMode.cs
@@ -0,0 +1,18 @@
+﻿namespace NeuralNetworkNET.APIs.Enums
+{
+    /// <summary>
+    /// An <see langword="enum"/> indicating the normalization mode to apply to the input data of a layer
+    /// </summary>
+    public enum NormalizationMode : byte
+    {
+        /// <summary>
+        /// Activation-wise normalization, with a separate mean and variance value per activation
+        /// </summary>
+        PerActivation = 0,
+
+        /// <summary>
+        /// Spatial normalization, with a single mean and variance value per input channel (feature map)
+        /// </summary>
+        Spatial = 1
+    }
+}
\ No newline at end of file
diff --git a/NeuralNetwork.NET/APIs/Interfaces/Data/ITrainingDataset.cs b/NeuralNetwork.NET/APIs/Interfaces/Data/ITrainingDataset.cs
index 1eacf09..303ed65 100644
--- a/NeuralNetwork.NET/APIs/Interfaces/Data/ITrainingDataset.cs
+++ b/NeuralNetwork.NET/APIs/Interfaces/Data/ITrainingDataset.cs
@@ -1,8 +1,6 @@
 ﻿using System;
 using JetBrains.Annotations;
 using NeuralNetworkNET.SupervisedLearning.Progress;
-using SixLabors.ImageSharp;
-using SixLabors.ImageSharp.PixelFormats;
 
 namespace NeuralNetworkNET.APIs.Interfaces.Data
 {
@@ -27,15 +25,6 @@ public interface ITrainingDataset : IDataset
         /// <param name="factories">The list of functions to use to generate new samples from each one in the dataset</param>
         void Expand([NotNull, ItemNotNull] params Func<float[], float[]>[] factories);
 
-        /// <summary>
-        /// Artificially expands the current dataset by reinterpreting each sample as an image of the specified type and applying the input transformation to each sample to create new ones
-        /// </summary>
-        /// <typeparam name="TPixel">The type of image pixels. It must be either <see cref="Alpha8"/>, <see cref="Rgb24"/> or <see cref="Argb32"/></typeparam>
-        /// <param name="width">The width of each sample image</param>
-        /// <param name="height">The height of each sample image</param>
-        /// <param name="factories">The list of functions to use to process the images and generate new samples</param>
-        void Expand<TPixel>(int width, int height, [NotNull, ItemNotNull] params Action<IImageProcessingContext<TPixel>>[] factories) where TPixel : struct, IPixel<TPixel>;
-
         /// <summary>
         /// Removes a specified fraction of samples from the current instance and returns a new <see cref="ITestDataset"/>
         /// </summary>
diff --git a/NeuralNetwork.NET/APIs/NetworkLayers.cs b/NeuralNetwork.NET/APIs/NetworkLayers.cs
index 0a07808..961f26f 100644
--- a/NeuralNetwork.NET/APIs/NetworkLayers.cs
+++ b/NeuralNetwork.NET/APIs/NetworkLayers.cs
@@ -75,5 +75,15 @@ public static LayerFactory Convolutional(
         [PublicAPI]
         [Pure, NotNull]
         public static LayerFactory Pooling(ActivationType activation) => input => new PoolingLayer(input, PoolingInfo.Default, activation);
+
+        /// <summary>
+        /// Creates a new batch normalization layer
+        /// </summary>
+        /// <param name="mode">The normalization mode to use for the new layer</param>
+        /// <param name="activation">The desired activation function to use in the network layer</param>
+        [PublicAPI]
+        [Pure, NotNull]
+        public static LayerFactory BatchNormalization(NormalizationMode mode, ActivationType activation)
+            => input => new BatchNormalizationLayer(input, mode, activation);
     }
 }
diff --git a/NeuralNetwork.NET/APIs/NetworkLoader.cs b/NeuralNetwork.NET/APIs/NetworkLoader.cs
index 7e22741..039d489 100644
--- a/NeuralNetwork.NET/APIs/NetworkLoader.cs
+++ b/NeuralNetwork.NET/APIs/NetworkLoader.cs
@@ -90,6 +90,7 @@ internal static INetworkLayer CpuLayerDeserialize([NotNull] Stream stream, Layer
                 case LayerType.Pooling: return PoolingLayer.Deserialize(stream);
                 case LayerType.Output: return OutputLayer.Deserialize(stream);
                 case LayerType.Softmax: return SoftmaxLayer.Deserialize(stream);
+                case LayerType.BatchNormalization: return BatchNormalizationLayer.Deserialize(stream);
                 default: throw new ArgumentOutOfRangeException(nameof(type), $"The {type} layer type is not supported by the default deserializer");
             }
         }
diff --git a/NeuralNetwork.NET/APIs/NetworkManager.cs b/NeuralNetwork.NET/APIs/NetworkManager.cs
index 91a6914..8b2c554 100644
--- a/NeuralNetwork.NET/APIs/NetworkManager.cs
+++ b/NeuralNetwork.NET/APIs/NetworkManager.cs
@@ -59,6 +59,11 @@ public static INeuralNetwork NewGraph(TensorInfo input, [NotNull] Action<NodeBui
 
         #region Training APIs
 
+        /// <summary>
+        /// Gets whether or not a neural network is currently being trained
+        /// </summary>
+        public static bool TrainingInProgress { get; private set; }
+
         /// <summary>
         /// Trains a neural network with the given parameters
         /// </summary>
@@ -139,15 +144,26 @@ private static TrainingSessionResult TrainNetworkCore(
             // Preliminary checks
             if (epochs < 1) throw new ArgumentOutOfRangeException(nameof(epochs), "The number of epochs must at be at least equal to 1");
             if (dropout < 0 || dropout >= 1) throw new ArgumentOutOfRangeException(nameof(dropout), "The dropout probability is invalid");
+            if (validationDataset != null && (validationDataset.InputFeatures != dataset.InputFeatures || validationDataset.OutputFeatures != dataset.OutputFeatures))
+                throw new ArgumentException("The validation dataset doesn't match the training dataset", nameof(validationDataset));
+            if (testDataset != null && (testDataset.InputFeatures != dataset.InputFeatures || testDataset.OutputFeatures != dataset.OutputFeatures))
+                throw new ArgumentException("The test dataset doesn't match the training dataset", nameof(testDataset));
+            if (dataset.InputFeatures != network.InputInfo.Size || dataset.OutputFeatures != network.OutputInfo.Size)
+                throw new ArgumentException("The input dataset doesn't match the number of input and output features for the current network", nameof(dataset));
 
             // Start the training
-            return NetworkTrainer.TrainNetwork(
+            TrainingInProgress = TrainingInProgress
+                ? throw new InvalidOperationException("Can't train two networks at the same time") // This would cause problems with cuDNN
+                : true;
+            TrainingSessionResult result = NetworkTrainer.TrainNetwork(
                 network as NeuralNetworkBase ?? throw new ArgumentException("The input network instance isn't valid", nameof(network)), 
                 dataset as BatchesCollection ?? throw new ArgumentException("The input dataset instance isn't valid", nameof(dataset)),
                 epochs, dropout, algorithm, batchProgress, trainingProgress, 
                 validationDataset as ValidationDataset,
                 testDataset as TestDataset,
                 token);
+            TrainingInProgress = false;
+            return result;
         }
     }
 }
diff --git a/NeuralNetwork.NET/APIs/Structs/Tensor.cs b/NeuralNetwork.NET/APIs/Structs/Tensor.cs
index a2d7030..24f9e19 100644
--- a/NeuralNetwork.NET/APIs/Structs/Tensor.cs
+++ b/NeuralNetwork.NET/APIs/Structs/Tensor.cs
@@ -95,8 +95,8 @@ private Tensor(IntPtr ptr, int entities, int length)
         /// <summary>
         /// Creates a new instance with the specified shape
         /// </summary>
-        /// <param name="n">The height of the matrix</param>
-        /// <param name="chw">The width of the matrix</param>
+        /// <param name="n">The height of the <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static void New(int n, int chw, out Tensor tensor)
@@ -108,8 +108,8 @@ public static void New(int n, int chw, out Tensor tensor)
         /// <summary>
         /// Creates a new instance with the specified shape and initializes the allocated memory to 0s
         /// </summary>
-        /// <param name="n">The height of the matrix</param>
-        /// <param name="chw">The width of the matrix</param>
+        /// <param name="n">The height of the <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe void NewZeroed(int n, int chw, out Tensor tensor)
@@ -124,8 +124,8 @@ public static unsafe void NewZeroed(int n, int chw, out Tensor tensor)
         /// Creates a new instance by wrapping the input pointer
         /// </summary>
         /// <param name="p">The target memory area</param>
-        /// <param name="n">The height of the final matrix</param>
-        /// <param name="chw">The width of the final matrix</param>
+        /// <param name="n">The height of the final <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the final <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe void Reshape(float* p, int n, int chw, out Tensor tensor)
@@ -152,8 +152,8 @@ public static unsafe void Reshape(float* p, int n, int chw, out Tensor tensor)
         /// Creates a new instance by copying the contents at the given memory location and reshaping it to the desired size
         /// </summary>
         /// <param name="p">The target memory area to copy</param>
-        /// <param name="n">The height of the final matrix</param>
-        /// <param name="chw">The width of the final matrix</param>
+        /// <param name="n">The height of the final <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the final <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe void From(float* p, int n, int chw, out Tensor tensor)
@@ -179,8 +179,8 @@ public static unsafe void From([NotNull] float[,] m, out Tensor tensor)
         /// Creates a new instance by copying the contents of the input vector and reshaping it to the desired size
         /// </summary>
         /// <param name="v">The input vector to copy</param>
-        /// <param name="n">The height of the final matrix</param>
-        /// <param name="chw">The width of the final matrix</param>
+        /// <param name="n">The height of the final <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the final <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe void From([NotNull] float[] v, int n, int chw, out Tensor tensor)
@@ -197,8 +197,8 @@ public static unsafe void From([NotNull] float[] v, int n, int chw, out Tensor t
         /// <summary>
         /// Creates a new instance by wrapping the current memory area
         /// </summary>
-        /// <param name="n">The height of the final matrix</param>
-        /// <param name="chw">The width of the final matrix</param>
+        /// <param name="n">The height of the final <see cref="Tensor"/></param>
+        /// <param name="chw">The width of the final <see cref="Tensor"/></param>
         /// <param name="tensor">The resulting instance</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void Reshape(int n, int chw, out Tensor tensor)
@@ -223,19 +223,31 @@ public void Reshape(int n, int chw, out Tensor tensor)
         public bool MatchShape(int entities, int length) => Entities == entities && Length == length;
 
         /// <summary>
-        /// Overwrites the contents of the current matrix with the input matrix
+        /// Overwrites the contents of the current instance with the input <see cref="Tensor"/>
         /// </summary>
         /// <param name="tensor">The input <see cref="Tensor"/> to copy</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public unsafe void Overwrite(in Tensor tensor)
         {
-            if (tensor.Entities != Entities || tensor.Length != Length) throw new ArgumentException("The input matrix doesn't have the same size as the target");
+            if (tensor.Entities != Entities || tensor.Length != Length) throw new ArgumentException("The input tensor doesn't have the same size as the target");
             int bytes = sizeof(float) * Size;
             Buffer.MemoryCopy(tensor, this, bytes, bytes);
         }
 
         /// <summary>
-        /// Duplicates the current instance to an output <see cref="Tensor"/> matrix
+        /// Overwrites the contents of the current <see cref="Tensor"/> with the input array
+        /// </summary>
+        /// <param name="array">The input array to copy</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public unsafe void Overwrite([NotNull] float[] array)
+        {
+            if (array.Length != Size) throw new ArgumentException("The input array doesn't have the same size as the target");
+            int bytes = sizeof(float) * Size;
+            fixed (float* p = array) Buffer.MemoryCopy(p, this, bytes, bytes);
+        }
+
+        /// <summary>
+        /// Duplicates the current instance to an output <see cref="Tensor"/>
         /// </summary>
         /// <param name="tensor">The output tensor</param>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -340,7 +352,7 @@ public static unsafe void TryFree([NotNull] params Tensor[] tensors)
         /// <summary>
         /// A proxy type to debug instances of the <see cref="Tensor"/> <see langword="struct"/>
         /// </summary>
-        private struct _TensorProxy
+        private readonly struct _TensorProxy
         {
             /// <summary>
             /// Gets a preview of the underlying memory area wrapped by this instance
@@ -352,7 +364,7 @@ private struct _TensorProxy
 
             private const int MaximumRowsCount = 10;
 
-            private const int MaximumItemsCount = 40000;
+            private const int MaximumItemsCount = 30000;
 
             [SuppressMessage("ReSharper", "UnusedMember.Local")]
             public _TensorProxy(Tensor obj)
diff --git a/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs b/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs
index f154ab7..2df440a 100644
--- a/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs
+++ b/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs
@@ -3,6 +3,7 @@
 using System;
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
+using SixLabors.ImageSharp;
 using SixLabors.ImageSharp.PixelFormats;
 
 namespace NeuralNetworkNET.APIs.Structs
@@ -95,7 +96,7 @@ public static TensorInfo Image<TPixel>(int height, int width) where TPixel : str
         {
             if (typeof(TPixel) == typeof(Alpha8)) return new TensorInfo(height, width, 1);
             if (typeof(TPixel) == typeof(Rgb24)) return new TensorInfo(height, width, 3);
-            if (typeof(TPixel) == typeof(Argb32)) return new TensorInfo(height, width, 4);
+            if (typeof(TPixel) == typeof(Argb32) || typeof(TPixel) == typeof(Rgba32)) return new TensorInfo(height, width, 4);
             throw new InvalidOperationException($"The {typeof(TPixel).Name} pixel format isn't currently supported");
         }
 
diff --git a/NeuralNetwork.NET/Extensions/DebugExtensions.cs b/NeuralNetwork.NET/Extensions/DebugExtensions.cs
index f66293c..99f395b 100644
--- a/NeuralNetwork.NET/Extensions/DebugExtensions.cs
+++ b/NeuralNetwork.NET/Extensions/DebugExtensions.cs
@@ -22,7 +22,13 @@ public static unsafe bool ContentEquals(this Span<float> x1, Span<float> x2, flo
             fixed (float* p1 = &x1.DangerousGetPinnableReference(), p2 = &x2.DangerousGetPinnableReference())
             {
                 for (int i = 0; i < x1.Length; i++)
-                    if (!p1[i].EqualsWithDelta(p2[i], absolute, relative)) return false;
+                    if (!p1[i].EqualsWithDelta(p2[i], absolute, relative))
+                    {
+                        #if DEBUG
+                        System.Diagnostics.Debug.WriteLine($"[NO MATCH] {p1[i]} | {p2[i]} | diff: {(p1[i] - p2[i]).Abs()}");
+                        #endif
+                        return false;
+                    }
             }
             return true;
         }
diff --git a/NeuralNetwork.NET/Extensions/HttpClientExtensions.cs b/NeuralNetwork.NET/Extensions/HttpClientExtensions.cs
new file mode 100644
index 0000000..c918d87
--- /dev/null
+++ b/NeuralNetwork.NET/Extensions/HttpClientExtensions.cs
@@ -0,0 +1,86 @@
+﻿using System;
+using System.IO;
+using System.Net.Http;
+using System.Threading;
+using System.Threading.Tasks;
+using JetBrains.Annotations;
+
+namespace NeuralNetworkNET.Extensions
+{
+    /// <summary>
+    /// A simple class with some extension methods for the <see cref="HttpClient"/> class
+    /// </summary>
+    public static class HttpClientExtensions
+    {
+        /// <summary>
+        /// Downloads a <see cref="Stream"/> from the given URL, and reports the download progress using the input callback
+        /// </summary>
+        /// <param name="client">The <see cref="HttpClient"/> instance to use to download the data</param>
+        /// <param name="url">The URL to download</param>
+        /// <param name="callback">The optional progress calback</param>
+        /// <param name="token">The optional token for the download operation</param>
+        [MustUseReturnValue, NotNull, ItemCanBeNull]
+        public static async Task<Stream> GetAsync([NotNull] this HttpClient client, String url, [CanBeNull] IProgress<HttpProgress> callback, CancellationToken token = default)
+        {
+            using (HttpResponseMessage response = await client.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, token))
+            {
+                if (!response.IsSuccessStatusCode || token.IsCancellationRequested) return null;
+                using (Stream source = await response.Content.ReadAsStreamAsync())
+                {
+                    // Read and store the data
+                    Stream result = new MemoryStream();
+                    long
+                        totalRead = 0L,
+                        totalReads = 0L,
+                        length = response.Content.Headers.ContentLength ?? 0;
+                    byte[] buffer = new byte[8192];
+                    bool isMoreToRead = true;
+                    do
+                    {
+                        int read = await source.ReadAsync(buffer, 0, buffer.Length, token);
+                        if (read == 0) isMoreToRead = false;
+                        else
+                        {
+                            await result.WriteAsync(buffer, 0, read, token);
+                            totalRead += read;
+                            if (totalReads++ % 2000 == 0) 
+                                callback?.Report(new HttpProgress(totalRead, length > 0 ? (int)(totalRead * 100 / length) : 0));
+                        }
+                    }
+                    while (isMoreToRead && !token.IsCancellationRequested);
+
+                    // Return the result
+                    if (token.IsCancellationRequested)
+                    {
+                        result.Dispose();
+                        return null;
+                    }
+                    result.Seek(0, SeekOrigin.Begin); // Move the content stream back to the start
+                    return result;
+                }
+            }
+        }
+    }
+
+    /// <summary>
+    /// A <see langword="struct"/> that contains info on a pending download
+    /// </summary>
+    public readonly struct HttpProgress
+    {
+        /// <summary>
+        /// Gets the total number of downloaded bytes
+        /// </summary>
+        public long DownloadedBytes { get; }
+
+        /// <summary>
+        /// Gets the current download percentage
+        /// </summary>
+        public int Percentage { get; }
+
+        internal HttpProgress(long bytes, int percentage)
+        {
+            DownloadedBytes = bytes;
+            Percentage = percentage;
+        }
+    }
+}
diff --git a/NeuralNetwork.NET/Extensions/MiscExtensions.cs b/NeuralNetwork.NET/Extensions/MiscExtensions.cs
index 891bf53..e198d8e 100644
--- a/NeuralNetwork.NET/Extensions/MiscExtensions.cs
+++ b/NeuralNetwork.NET/Extensions/MiscExtensions.cs
@@ -79,6 +79,30 @@ public static ref T SwitchRef<T>(this bool flag, ref T left, ref T right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static float Abs(this float value) => value >= 0 ? value : -value;
 
+        /// <summary>
+        /// Returns the minimum possible upper <see cref="float"/> approximation of the given <see cref="double"/> value
+        /// </summary>
+        /// <param name="value">The value to approximate</param>
+        public static unsafe float ToApproximatedFloat(this double value)
+        {
+            // Get the bit representation of the double value
+            ulong bits = *((ulong*)&value);
+
+            // Extract and re-bias the exponent field
+            ulong exponent = ((bits >> 52) & 0x7FF) - 1023 + 127;
+
+            // Extract the significand bits and truncate the excess
+            ulong significand = (bits >> 29) & 0x7FFFFF;
+
+            // Assemble the result in 32-bit unsigned integer format, then add 1
+            ulong converted = (((bits >> 32) & 0x80000000u)
+                               | (exponent << 23)
+                               | significand) + 1;
+
+            // Reinterpret the bit pattern as a float
+            return *((float*)&converted);
+        }
+
         /// <summary>
         /// Calculates if two values are within a given distance from one another
         /// </summary>
diff --git a/NeuralNetwork.NET/Helpers/DatasetsDownloader.cs b/NeuralNetwork.NET/Helpers/DatasetsDownloader.cs
index ef4b889..b90a16c 100644
--- a/NeuralNetwork.NET/Helpers/DatasetsDownloader.cs
+++ b/NeuralNetwork.NET/Helpers/DatasetsDownloader.cs
@@ -12,6 +12,7 @@
 using ICSharpCode.SharpZipLib.GZip;
 using ICSharpCode.SharpZipLib.Tar;
 using JetBrains.Annotations;
+using NeuralNetworkNET.Extensions;
 
 namespace NeuralNetworkNET.Helpers
 {
@@ -44,7 +45,7 @@ private static String DatasetsPath
 
         // Local lazy instance of the singleton HttpClient in use
         [NotNull]
-        private static readonly Lazy<HttpClient> _Client = new Lazy<HttpClient>(() => new HttpClient());
+        private static readonly Lazy<HttpClient> _Client = new Lazy<HttpClient>(() => new HttpClient { Timeout = TimeSpan.FromMinutes(10) }); // Large timeout to download .tar.gz archives
 
         /// <summary>
         /// Gets the singleton <see cref="HttpClient"/> to use, since it is reentrant and thread-safe, see <a href="https://docs.microsoft.com/it-it/dotnet/api/system.net.http.httpclient">docs.microsoft.com/it-it/dotnet/api/system.net.http.httpclient</a>
@@ -68,9 +69,10 @@ private static HttpClient Client
         /// Gets a <see cref="Func{TResult}"/> instance returning a <see cref="Stream"/> with the contents of the input URL
         /// </summary>
         /// <param name="url">The target URL to use to download the resources</param>
+        /// <param name="callback">The optional progress calback</param>
         /// <param name="token">A cancellation token for the operation</param>
         [MustUseReturnValue, ItemCanBeNull]
-        public static async Task<Func<Stream>> GetFileAsync([NotNull] String url, CancellationToken token)
+        public static async Task<Func<Stream>> GetFileAsync([NotNull] String url, [CanBeNull] IProgress<HttpProgress> callback, CancellationToken token)
         {
             // Get the target filename
             String
@@ -84,13 +86,17 @@ public static async Task<Func<Stream>> GetFileAsync([NotNull] String url, Cancel
                 try
                 {
                     // Download from the input URL
-                    HttpResponseMessage result = await Client.GetAsync(url, token);
-                    if (!result.IsSuccessStatusCode || token.IsCancellationRequested) return null;
-                    byte[] data = await result.Content.ReadAsByteArrayAsync();
-
-                    // Write the HTTP content
-                    using (FileStream stream = File.OpenWrite(path))
-                        await stream.WriteAsync(data, 0, data.Length, default); // Ensure the whole content is written to disk
+                    using (Stream stream = await Client.GetAsync(url, callback, token))
+                    {
+                        if (stream == null || token.IsCancellationRequested) return null;
+                        byte[] data = new byte[stream.Length];
+                        if (await stream.ReadAsync(data, 0, data.Length, token) != data.Length ||
+                            token.IsCancellationRequested) return null;
+
+                        // Write the HTTP content
+                        using (FileStream file = File.OpenWrite(path))
+                            await file.WriteAsync(data, 0, data.Length, default); // Ensure the whole content is written to disk
+                    }
                 }
                 catch
                 {
@@ -105,9 +111,10 @@ public static async Task<Func<Stream>> GetFileAsync([NotNull] String url, Cancel
         /// Gets an <see cref="IDictionary{TKey,TValue}"/> with a collection of <see cref="Func{TResult}"/> instances for each file in the tar.gz archive pointed by the input URL
         /// </summary>
         /// <param name="url">The target URL to use to download the archive</param>
+        /// <param name="callback">The optional progress calback</param>
         /// <param name="token">A cancellation token for the operation</param>
         [MustUseReturnValue, ItemCanBeNull]
-        public static async Task<IReadOnlyDictionary<String, Func<Stream>>> GetArchiveAsync([NotNull] String url, CancellationToken token)
+        public static async Task<IReadOnlyDictionary<String, Func<Stream>>> GetArchiveAsync([NotNull] String url, [CanBeNull] IProgress<HttpProgress> callback, CancellationToken token)
         {
             // Check if the archive is already present
             String folder = Path.Combine(DatasetsPath, GetFilename(url));
@@ -116,26 +123,25 @@ public static async Task<IReadOnlyDictionary<String, Func<Stream>>> GetArchiveAs
                 {
                     try
                     {
-                        // Download from the input URL
-                        HttpResponseMessage result = await Client.GetAsync(url, token);
-                        if (!result.IsSuccessStatusCode || token.IsCancellationRequested) return null;
-                        
-                        // Extract the .tar.gz archive
-                        using (Stream stream = await result.Content.ReadAsStreamAsync())
-                        using (GZipInputStream gzip = new GZipInputStream(stream))
-                        using (TarArchive tar = TarArchive.CreateInputTarArchive(gzip))
+                        // Download and extract the .tar.gz archive
+                        using (Stream stream = await Client.GetAsync(url, callback, token))
                         {
-                            // Extract into the target dir (this will create a subfolder in this position)
-                            Directory.CreateDirectory(folder);
-                            tar.ExtractContents(folder);
-
-                            // Move all the contents in the root directory
-                            foreach (String path in Directory.EnumerateFiles(folder, "*", SearchOption.AllDirectories))
-                                File.Move(path, Path.Combine(folder, Path.GetFileName(path)));
-
-                            // Delete the subfolders
-                            foreach (String subdir in Directory.GetDirectories(folder))
-                                Directory.Delete(subdir);
+                            if (stream == null || token.IsCancellationRequested) return null;
+                            using (GZipInputStream gzip = new GZipInputStream(stream))
+                            using (TarArchive tar = TarArchive.CreateInputTarArchive(gzip))
+                            {
+                                // Extract into the target dir (this will create a subfolder in this position)
+                                Directory.CreateDirectory(folder);
+                                tar.ExtractContents(folder);
+
+                                // Move all the contents in the root directory
+                                foreach (String path in Directory.EnumerateFiles(folder, "*", SearchOption.AllDirectories))
+                                    File.Move(path, Path.Combine(folder, Path.GetFileName(path)));
+
+                                // Delete the subfolders
+                                foreach (String subdir in Directory.GetDirectories(folder))
+                                    Directory.Delete(subdir);
+                            }
                         }
                     }
                     catch
diff --git a/NeuralNetwork.NET/Helpers/ImageLoader.cs b/NeuralNetwork.NET/Helpers/ImageLoader.cs
index df14ca7..8d48001 100644
--- a/NeuralNetwork.NET/Helpers/ImageLoader.cs
+++ b/NeuralNetwork.NET/Helpers/ImageLoader.cs
@@ -1,5 +1,8 @@
 ﻿using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
 using JetBrains.Annotations;
+using NeuralNetworkNET.APIs.Enums;
 using SixLabors.ImageSharp;
 using SixLabors.ImageSharp.Advanced;
 using SixLabors.ImageSharp.PixelFormats;
@@ -15,25 +18,27 @@ internal static class ImageLoader
         /// Loads the target image and applies the requested changes, then converts it to a dataset sample
         /// </summary>
         /// <param name="path">The path of the image to load</param>
+        /// <param name="normalization">The image normalization mode to apply</param>
         /// <param name="modify">The optional changes to apply to the image</param>
         [Pure, NotNull]
-        public static float[] Load<TPixel>([NotNull] String path, [CanBeNull] Action<IImageProcessingContext<TPixel>> modify) where TPixel : struct, IPixel<TPixel>
+        public static float[] Load<TPixel>([NotNull] String path, ImageNormalizationMode normalization, [CanBeNull] Action<IImageProcessingContext<TPixel>> modify) where TPixel : struct, IPixel<TPixel>
         {
             using (Image<TPixel> image = Image.Load<TPixel>(path))
             {
                 if (modify != null) image.Mutate(modify);
-                if (typeof(TPixel) == typeof(Alpha8)) return Load(image as Image<Alpha8>);
-                if (typeof(TPixel) == typeof(Rgb24)) return Load(image as Image<Rgb24>);
-                if (typeof(TPixel) == typeof(Argb32)) return Load(image as Image<Argb32>);
+                if (typeof(TPixel) == typeof(Alpha8)) return Load(image as Image<Alpha8>, normalization);
+                if (typeof(TPixel) == typeof(Rgb24)) return Load(image as Image<Rgb24>, normalization);
+                if (typeof(TPixel) == typeof(Argb32)) return Load(image as Image<Argb32>, normalization);
+                if (typeof(TPixel) == typeof(Rgba32)) return Load(image as Image<Rgba32>, normalization);
                 throw new InvalidOperationException($"The {typeof(TPixel).Name} pixel format isn't currently supported");
             }
         }
 
         #region Loaders
 
-        // Loads an RGBA32 image
+        // Loads an ARGB32 image
         [Pure, NotNull]
-        private static unsafe float[] Load(Image<Argb32> image)
+        private static unsafe float[] Load(Image<Argb32> image, ImageNormalizationMode normalization)
         {
             int resolution = image.Height * image.Width;
             float[] sample = new float[resolution * 4];
@@ -42,11 +47,32 @@ private static unsafe float[] Load(Image<Argb32> image)
             {
                 for (int i = 0; i < resolution; i++)
                 {
-                    Argb32* pxy = p0 + i;
-                    psample[i] = pxy->A / 255f;
-                    psample[i + resolution] = pxy->R / 255f;
-                    psample[i + 2 * resolution] = pxy->G / 255f;
-                    psample[i + 3 * resolution] = pxy->B / 255f;
+                    Vector4 pixels = p0[i].Normalize(normalization);
+                    psample[i] = pixels.W;
+                    psample[i + resolution] = pixels.X;
+                    psample[i + 2 * resolution] = pixels.Y;
+                    psample[i + 3 * resolution] = pixels.Z;
+                }
+            }
+            return sample;
+        }
+
+        // Loads an RGBA32 image
+        [Pure, NotNull]
+        private static unsafe float[] Load(Image<Rgba32> image, ImageNormalizationMode normalization)
+        {
+            int resolution = image.Height * image.Width;
+            float[] sample = new float[resolution * 4];
+            fixed (Rgba32* p0 = &image.DangerousGetPinnableReferenceToPixelBuffer())
+            fixed (float* psample = sample)
+            {
+                for (int i = 0; i < resolution; i++)
+                {
+                    Vector4 pixels = p0[i].Normalize(normalization);
+                    psample[i] = pixels.X;
+                    psample[i + resolution] = pixels.Y;
+                    psample[i + 2 * resolution] = pixels.Z;
+                    psample[i + 3 * resolution] = pixels.W;
                 }
             }
             return sample;
@@ -54,7 +80,7 @@ private static unsafe float[] Load(Image<Argb32> image)
 
         // Loads an RGBA24 image
         [Pure, NotNull]
-        private static unsafe float[] Load(Image<Rgb24> image)
+        private static unsafe float[] Load(Image<Rgb24> image, ImageNormalizationMode normalization)
         {
             int resolution = image.Height * image.Width;
             float[] sample = new float[resolution * 3];
@@ -63,10 +89,10 @@ private static unsafe float[] Load(Image<Rgb24> image)
             {
                 for (int i = 0; i < resolution; i++)
                 {
-                    Rgb24* pxy = p0 + i;
-                    psample[i] = pxy->R / 255f;
-                    psample[i + resolution] = pxy->G / 255f;
-                    psample[i + 2 * resolution] = pxy->B / 255f;
+                    Vector4 pixels = p0[i].Normalize(normalization);
+                    psample[i] = pixels.X;
+                    psample[i + resolution] = pixels.Y;
+                    psample[i + 2 * resolution] = pixels.Z;
                 }
             }
             return sample;
@@ -74,39 +100,43 @@ private static unsafe float[] Load(Image<Rgb24> image)
 
         // Loads an ALPHA8 image
         [Pure, NotNull]
-        private static unsafe float[] Load(Image<Alpha8> image)
+        private static unsafe float[] Load(Image<Alpha8> image, ImageNormalizationMode normalization)
         {
             int resolution = image.Height * image.Width;
             float[] sample = new float[resolution];
             fixed (Alpha8* p0 = &image.DangerousGetPinnableReferenceToPixelBuffer())
             fixed (float* psample = sample)
                 for (int i = 0; i < resolution; i++)
-                    psample[i] = p0[i].PackedValue / 255f;
+                {
+                    switch (normalization)
+                    {
+                        case ImageNormalizationMode.Sigmoid: psample[i] = p0[i].PackedValue / 255f; break;
+                        case ImageNormalizationMode.Normal: psample[i] = p0[i].PackedValue * 2 / 255f - 1; break;
+                        case ImageNormalizationMode.None: psample[i] = p0[i].PackedValue; break;
+                        default: throw new ArgumentOutOfRangeException(nameof(normalization), "Invalid normalization mode");
+                    }
+                }
             return sample;
         }
 
         #endregion
 
-        [Pure, NotNull]
-        public static unsafe float[] Process<TPixel>([NotNull] float[] data, int width, int height, [NotNull] Action<IImageProcessingContext<TPixel>> modify) where TPixel : struct, IPixel<TPixel>
+        /// <summary>
+        /// Normalizes the input <see cref="IPixel"/> value using the specified mode
+        /// </summary>
+        /// <typeparam name="TPixel">Tye input pixel type</typeparam>
+        /// <param name="pixel">The input pixel to normalize</param>
+        /// <param name="normalization">The normalization mode to use</param>
+        [Pure]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector4 Normalize<TPixel>(this TPixel pixel, ImageNormalizationMode normalization) where TPixel : struct, IPixel<TPixel>
         {
-            // Reconstruct the original image
-            byte[] pixels = new byte[data.Length];
-            fixed (float* pdata = data)
-            fixed (byte* px = pixels)
-            {
-                for (int i = 0; i < data.Length; i++)
-                    px[i] = (byte)(pdata[i] * 255);
-            }
-
-            // Edit the image and return the new processed sample
-            using (Image<TPixel> image = Image.LoadPixelData<TPixel>(pixels, width, height))
+            switch (normalization)
             {
-                image.Mutate(modify);
-                if (typeof(TPixel) == typeof(Alpha8)) return Load(image as Image<Alpha8>);
-                if (typeof(TPixel) == typeof(Rgb24)) return Load(image as Image<Rgb24>);
-                if (typeof(TPixel) == typeof(Argb32)) return Load(image as Image<Argb32>);
-                throw new InvalidOperationException($"The {typeof(TPixel).Name} pixel format isn't currently supported");
+                case ImageNormalizationMode.Sigmoid: return pixel.ToVector4(); // Already in the [0,1] range
+                case ImageNormalizationMode.Normal: return Vector4.Subtract(pixel.ToVector4() * 2, Vector4.One);
+                case ImageNormalizationMode.None: return pixel.ToVector4() * 255f; // Rescale in the [0,255] range
+                default: throw new ArgumentOutOfRangeException(nameof(normalization), "Invalid normalization mode");
             }
         }
     }
diff --git a/NeuralNetwork.NET/Helpers/Sha256.cs b/NeuralNetwork.NET/Helpers/Sha256.cs
new file mode 100644
index 0000000..82d176f
--- /dev/null
+++ b/NeuralNetwork.NET/Helpers/Sha256.cs
@@ -0,0 +1,69 @@
+﻿using System;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Security.Cryptography;
+using System.Threading.Tasks;
+using JetBrains.Annotations;
+using NeuralNetworkNET.Extensions;
+
+namespace NeuralNetworkNET.Helpers
+{
+    /// <summary>
+    /// A static class that can be used to quickly calculate hashes from array of an arbitrary <see langword="struct"/> type
+    /// </summary>
+    public static class Sha256
+    {
+        // The SHA256 hash bytes length
+        private const int HashLength = 32;
+
+        /// <summary>
+        /// Calculates an hash for the input <typeparamref name="T"/> array
+        /// </summary>
+        /// <typeparam name="T">The type of items in the input array</typeparam>
+        /// <param name="array">The input array to process</param>
+        [PublicAPI]
+        [Pure, NotNull]
+        public static unsafe byte[] Hash<T>([NotNull] T[] array) where T : struct
+        {
+            int size = Unsafe.SizeOf<T>() * array.Length;
+            fixed (byte* p = &Unsafe.As<T, byte>(ref array[0]))
+            using (UnmanagedMemoryStream stream = new UnmanagedMemoryStream(p, size, size, FileAccess.Read))
+            using (SHA256 provider = SHA256.Create())
+            {
+                return provider.ComputeHash(stream);
+            }
+        }
+
+        /// <summary>
+        /// Calculates an aggregate hash for the input <typeparamref name="T"/> arrays
+        /// </summary>
+        /// <typeparam name="T">The type of items in the input arrays</typeparam>
+        /// <param name="arrays">The arrays to process</param>
+        [PublicAPI]
+        [Pure, NotNull]
+        public static unsafe byte[] Hash<T>([NotNull, ItemNotNull] params T[][] arrays) where T : struct
+        {
+            // Compute the hashes in parallel
+            if (arrays.Length == 0) return new byte[0];
+            if (arrays.Any(v => v.Length == 0)) throw new ArgumentException("The input array can't contain empty vectors");
+            byte[][] hashes = new byte[arrays.Length][];
+            Parallel.For(0, arrays.Length, i => hashes[i] = Hash(arrays[i])).AssertCompleted();
+
+            // Merge the computed hashes into a single bytes array
+            unchecked
+            {
+                byte[] result = new byte[HashLength];
+                fixed (byte* p = result)
+                    for (int i = 0; i < HashLength; i++)
+                    {
+                        uint hash = 17;
+                        for (int j = 0; j < hashes.Length; j++)
+                            hash = hash * 31 + hashes[j][i];
+                        p[i] = (byte)(hash % byte.MaxValue);
+                    }
+                return result;
+            }
+        }
+    }
+}
diff --git a/NeuralNetwork.NET/Networks/Graph/ComputationGraphJsonConverter.cs b/NeuralNetwork.NET/Networks/Graph/ComputationGraphJsonConverter.cs
index 6c8f1f5..cf5f525 100644
--- a/NeuralNetwork.NET/Networks/Graph/ComputationGraphJsonConverter.cs
+++ b/NeuralNetwork.NET/Networks/Graph/ComputationGraphJsonConverter.cs
@@ -4,6 +4,7 @@
 using NeuralNetworkNET.APIs.Interfaces;
 using NeuralNetworkNET.Networks.Graph.Nodes;
 using Newtonsoft.Json;
+using Newtonsoft.Json.Converters;
 using Newtonsoft.Json.Linq;
 
 namespace NeuralNetworkNET.Networks.Graph
@@ -34,7 +35,8 @@ public override void WriteJson(JsonWriter writer, object value, JsonSerializer s
                 {
                     case ProcessingNode processing:
                         jNode.Add("Parent", map[processing.Parent]);
-                        jNode.Add("Layer", JToken.FromObject(processing.Layer));
+                        IList<JsonConverter> converters = new List<JsonConverter> { new StringEnumConverter() };
+                        jNode.Add("Layer", JToken.FromObject(processing.Layer, JsonSerializer.CreateDefault(new JsonSerializerSettings { Converters = converters })));
                         break;
                     case DepthConcatenationNode concatenation:
                         jNode.Add("Parents", new JArray(concatenation.Parents.Select(child => map[child]).ToList()));
diff --git a/NeuralNetwork.NET/Networks/Layers/Abstract/BatchNormalizationLayerBase.cs b/NeuralNetwork.NET/Networks/Layers/Abstract/BatchNormalizationLayerBase.cs
new file mode 100644
index 0000000..dcf2291
--- /dev/null
+++ b/NeuralNetwork.NET/Networks/Layers/Abstract/BatchNormalizationLayerBase.cs
@@ -0,0 +1,147 @@
+﻿using System;
+using System.IO;
+using System.Runtime.CompilerServices;
+using JetBrains.Annotations;
+using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.APIs.Interfaces;
+using NeuralNetworkNET.APIs.Structs;
+using NeuralNetworkNET.Extensions;
+using NeuralNetworkNET.Helpers;
+using NeuralNetworkNET.Networks.Layers.Initialization;
+using NeuralNetworkNET.SupervisedLearning.Optimization;
+using Newtonsoft.Json;
+
+namespace NeuralNetworkNET.Networks.Layers.Abstract
+{
+    /// <summary>
+    /// A base claass for a batch normalization layer
+    /// </summary>
+    internal abstract class BatchNormalizationLayerBase : WeightedLayerBase
+    {
+        #region Fields and parameters
+
+        /// <summary>
+        /// The cached mu tensor
+        /// </summary>
+        [NotNull]
+        public float[] Mu { get; }
+
+        /// <summary>
+        /// The cached sigma^2 tensor
+        /// </summary>
+        [NotNull]
+        public float[] Sigma2 { get; }
+
+        /// <summary>
+        /// Gets the current iteration number (for the Cumulative Moving Average)
+        /// </summary>
+        public int Iteration { get; private set; }
+
+        /// <summary>
+        /// Gets the current CMA factor used to update the <see cref="Mu"/> and <see cref="Sigma2"/> tensors
+        /// </summary>
+        [JsonProperty(nameof(CumulativeMovingAverageFactor), Order = 6)]
+        public float CumulativeMovingAverageFactor
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get => 1f / (1 + Iteration);
+        }
+
+        /// <inheritdoc/>
+        public override String Hash => Convert.ToBase64String(Sha256.Hash(Weights, Biases, Mu, Sigma2));
+
+        /// <inheritdoc/>
+        public override LayerType LayerType { get; } = LayerType.BatchNormalization;
+
+        /// <summary>
+        /// Gets the current normalization mode used in the layer
+        /// </summary>
+        [JsonProperty(nameof(NormalizationMode), Order = 6)]
+        public NormalizationMode NormalizationMode { get; }
+
+        #endregion
+
+        protected BatchNormalizationLayerBase(in TensorInfo shape, NormalizationMode mode, ActivationType activation) 
+            : base(shape, shape, 
+                WeightsProvider.NewGammaParameters(shape, mode), 
+                WeightsProvider.NewBetaParameters(shape, mode), activation)
+        {
+            switch (mode)
+            {
+                case NormalizationMode.Spatial:
+                    Mu = new float[InputInfo.Channels];
+                    Sigma2 = new float[InputInfo.Channels];
+                    break;
+                case NormalizationMode.PerActivation:
+                    Mu = new float[InputInfo.Size];
+                    Sigma2 = new float[InputInfo.Size];
+                    break;
+                default: throw new ArgumentOutOfRangeException("Invalid batch normalization mode");
+            }
+            Sigma2.AsSpan().Fill(1);
+            NormalizationMode = mode;
+        }
+
+        protected BatchNormalizationLayerBase(in TensorInfo shape, NormalizationMode mode, [NotNull] float[] w, [NotNull] float[] b, int iteration, [NotNull] float[] mu, [NotNull] float[] sigma2, ActivationType activation) 
+            : base(shape, shape, w, b, activation)
+        {
+            if (w.Length != b.Length) throw new ArgumentException("The size for both gamme and beta paarameters must be the same");
+            if (mode == NormalizationMode.Spatial && w.Length != shape.Channels ||
+                mode == NormalizationMode.PerActivation && w.Length != shape.Size)
+                throw new ArgumentException("Invalid parameters size for the selected normalization mode");
+            if (iteration < 0) throw new ArgumentOutOfRangeException(nameof(iteration), "The iteration value must be aat least equal to 0");
+            if (mu.Length != w.Length || sigma2.Length != w.Length)
+                throw new ArgumentException("The mu and sigma2 parameters must match the shape of the gamma and beta parameters");
+            NormalizationMode = mode;
+            Iteration = iteration;
+            Mu = mu;
+            Sigma2 = sigma2;
+        }
+
+        /// <inheritdoc/>
+        public override void Forward(in Tensor x, out Tensor z, out Tensor a)
+        {
+            if (NetworkTrainer.BackpropagationInProgress) ForwardTraining(1f / (1 + Iteration++), x, out z, out a);
+            else ForwardInference(x, out z, out a);
+        }
+
+        /// <summary>
+        /// Forwards the inputs through the batch normalization layer during an inference pass
+        /// </summary>
+        /// <param name="x">The input to process</param>
+        /// <param name="z">The output activity on the current layer</param>
+        /// <param name="a">The output activation on the current layer</param>
+        public abstract void ForwardInference(in Tensor x, out Tensor z, out Tensor a);
+
+        /// <summary>
+        /// Forwards the inputs through the batch normalization layer during a training pass, updating the CMA mean and variance <see cref="Tensor"/> instances
+        /// </summary>
+        /// <param name="factor">The factor to use to update the cumulative moving average</param>
+        /// <param name="x">The input to process</param>
+        /// <param name="z">The output activity on the current layer</param>
+        /// <param name="a">The output activation on the current layer</param>
+        public abstract void ForwardTraining(float factor, in Tensor x, out Tensor z, out Tensor a);
+
+        /// <inheritdoc/>
+        public override bool Equals(INetworkLayer other)
+        {
+            if (!base.Equals(other)) return false;
+            return other is BatchNormalizationLayerBase layer &&
+                   Iteration == layer.Iteration &&
+                   Mu.ContentEquals(layer.Mu) &&
+                   Sigma2.ContentEquals(layer.Sigma2);
+        }
+
+        /// <inheritdoc/>
+        public override void Serialize(Stream stream)
+        {
+            base.Serialize(stream);
+            stream.Write(NormalizationMode);
+            stream.Write(Iteration);
+            stream.Write(Mu.Length);
+            stream.WriteShuffled(Mu);
+            stream.Write(Sigma2.Length);
+            stream.WriteShuffled(Sigma2);
+        }
+    }
+}
diff --git a/NeuralNetwork.NET/Networks/Layers/Abstract/WeightedLayerBase.cs b/NeuralNetwork.NET/Networks/Layers/Abstract/WeightedLayerBase.cs
index 4d4373d..976083e 100644
--- a/NeuralNetwork.NET/Networks/Layers/Abstract/WeightedLayerBase.cs
+++ b/NeuralNetwork.NET/Networks/Layers/Abstract/WeightedLayerBase.cs
@@ -1,11 +1,11 @@
 ﻿using System;
 using System.IO;
-using System.Security.Cryptography;
 using JetBrains.Annotations;
 using NeuralNetworkNET.APIs.Enums;
 using NeuralNetworkNET.APIs.Interfaces;
 using NeuralNetworkNET.APIs.Structs;
 using NeuralNetworkNET.Extensions;
+using NeuralNetworkNET.Helpers;
 using Newtonsoft.Json;
 
 namespace NeuralNetworkNET.Networks.Layers.Abstract
@@ -23,39 +23,7 @@ internal abstract class WeightedLayerBase : NetworkLayerBase
         /// </summary>
         [NotNull]
         [JsonProperty(nameof(Hash), Order = 5)]
-        public unsafe String Hash
-        {
-            [Pure]
-            get
-            {
-                fixed (float* pw = Weights, pb = Biases)
-                {
-                    // Use unmanaged streams to avoid copying the weights and biases
-                    int
-                        weightsSize = sizeof(float) * Weights.Length,
-                        biasesSize = sizeof(float) * Biases.Length;
-                    using (UnmanagedMemoryStream
-                        weightsStream = new UnmanagedMemoryStream((byte*)pw, weightsSize, weightsSize, FileAccess.Read),
-                        biasesStream = new UnmanagedMemoryStream((byte*)pb, biasesSize, biasesSize, FileAccess.Read))
-                    using (SHA256 provider = SHA256.Create())
-                    {
-                        // Compute the two SHA256 hashes and combine them (there isn't a way to concatenate two streams with the hash class)
-                        byte[]
-                            weightsHash = provider.ComputeHash(weightsStream),
-                            biasesHash = provider.ComputeHash(biasesStream),
-                            hash = new byte[32];
-                        unchecked
-                        {
-                            for (int i = 0; i < 32; i++)
-                                hash[i] = (byte)(17 * 31 * weightsHash[i] * 31 * biasesHash[i] % byte.MaxValue); // Trust me
-                        }
-
-                        // Convert the final hash to a base64 string
-                        return Convert.ToBase64String(hash);
-                    }
-                }
-            }
-        }
+        public virtual String Hash => Convert.ToBase64String(Sha256.Hash(Weights, Biases));
 
         /// <summary>
         /// Gets the weights for the current network layer
diff --git a/NeuralNetwork.NET/Networks/Layers/Cpu/BatchNormalizationLayer.cs b/NeuralNetwork.NET/Networks/Layers/Cpu/BatchNormalizationLayer.cs
new file mode 100644
index 0000000..fc23087
--- /dev/null
+++ b/NeuralNetwork.NET/Networks/Layers/Cpu/BatchNormalizationLayer.cs
@@ -0,0 +1,112 @@
+﻿using System;
+using System.IO;
+using JetBrains.Annotations;
+using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.APIs.Interfaces;
+using NeuralNetworkNET.APIs.Structs;
+using NeuralNetworkNET.cpuDNN;
+using NeuralNetworkNET.Extensions;
+using NeuralNetworkNET.Networks.Layers.Abstract;
+
+namespace NeuralNetworkNET.Networks.Layers.Cpu
+{
+    /// <summary>
+    /// A batch normalization layer, used to improve the convergence speed of a neural network
+    /// </summary>
+    internal sealed class BatchNormalizationLayer : BatchNormalizationLayerBase
+    {
+        public BatchNormalizationLayer(in TensorInfo shape, NormalizationMode mode, ActivationType activation)
+            : base(shape, mode, activation) { }
+
+        public BatchNormalizationLayer(in TensorInfo shape, NormalizationMode mode, [NotNull] float[] w, [NotNull] float[] b, int iteration, [NotNull] float[] mu, [NotNull] float[] sigma2, ActivationType activation) 
+            : base(shape, mode, w, b, iteration, mu, sigma2, activation) { }
+
+        #region Implementation
+
+        /// <inheritdoc/>
+        public override unsafe void ForwardInference(in Tensor x, out Tensor z, out Tensor a)
+        {
+            fixed (float* pw = Weights, pb = Biases, pmu = Mu, ps2 = Sigma2)
+            {
+                Tensor.Reshape(pw, 1, Mu.Length, out Tensor gamma);
+                Tensor.Reshape(pb, 1, Mu.Length, out Tensor beta);
+                Tensor.Reshape(pmu, 1, Mu.Length, out Tensor mu);
+                Tensor.Reshape(ps2, 1, Mu.Length, out Tensor sigma2);
+                Tensor.Like(x, out z);
+                CpuDnn.BatchNormalizationForward(NormalizationMode, InputInfo, x, mu, sigma2, gamma, beta, z);
+                Tensor.Like(z, out a);
+                CpuDnn.ActivationForward(z, ActivationFunctions.Activation, a);
+            }
+        }
+
+        /// <inheritdoc/>
+        public override unsafe void ForwardTraining(float factor, in Tensor x, out Tensor z, out Tensor a)
+        {
+            fixed (float* pw = Weights, pb = Biases, pmu = Mu, ps2 = Sigma2)
+            {
+                Tensor.Reshape(pw, 1, Mu.Length, out Tensor gamma);
+                Tensor.Reshape(pb, 1, Mu.Length, out Tensor beta);
+                Tensor.Reshape(pmu, 1, Mu.Length, out Tensor mu);
+                Tensor.Reshape(ps2, 1, Mu.Length, out Tensor sigma2);
+                Tensor.Like(x, out z);
+                CpuDnn.BatchNormalizationForward(NormalizationMode, InputInfo, x, factor, mu, sigma2, gamma, beta, z);
+                Tensor.Like(z, out a);
+                CpuDnn.ActivationForward(z, ActivationFunctions.Activation, a);
+            }
+        }
+
+        /// <inheritdoc/>
+        public override unsafe void Backpropagate(in Tensor x, in Tensor y, in Tensor dy, in Tensor dx, out Tensor dJdw, out Tensor dJdb)
+        {
+            // Activation backward
+            Tensor.Like(dy, out Tensor dy_copy);
+            CpuDnn.ActivationBackward(y, dy, ActivationFunctions.ActivationPrime, dy_copy);
+
+            // Input error delta
+            fixed (float* pw = Weights, pmu = Mu, ps2 = Sigma2)
+            {
+                Tensor.Reshape(pw, 1, Mu.Length, out Tensor gamma);
+                Tensor.Reshape(pmu, 1, Mu.Length, out Tensor mu);
+                Tensor.Reshape(ps2, 1, Mu.Length, out Tensor sigma2);
+                CpuDnn.BatchNormalizationBackwardData(NormalizationMode, InputInfo, x, mu, sigma2, gamma, dy_copy, dx);
+
+                // Gamma gradient
+                Tensor.New(1, Weights.Length, out dJdw);
+                CpuDnn.BatchNormalizationBackwardGamma(NormalizationMode, InputInfo, x, mu, sigma2, dy_copy, dJdw);
+            }
+
+            // Beta gradient
+            Tensor.New(1, Biases.Length, out dJdb);
+            CpuDnn.BatchNormalizationBackwardBeta(NormalizationMode, InputInfo, dy_copy, dJdb);
+            dy_copy.Free();
+        }
+
+        #endregion
+
+        /// <summary>
+        /// Tries to deserialize a new <see cref="BatchNormalizationLayer"/> from the input <see cref="Stream"/>
+        /// </summary>
+        /// <param name="stream">The input <see cref="Stream"/> to use to read the layer data</param>
+        [MustUseReturnValue, CanBeNull]
+        public static INetworkLayer Deserialize([NotNull] Stream stream)
+        {
+            if (!stream.TryRead(out TensorInfo input)) return null;
+            if (!stream.TryRead(out TensorInfo output) || input != output) return null;
+            if (!stream.TryRead(out ActivationType activation)) return null;
+            if (!stream.TryRead(out int wLength)) return null;
+            float[] weights = stream.ReadUnshuffled(wLength);
+            if (!stream.TryRead(out int bLength)) return null;
+            float[] biases = stream.ReadUnshuffled(bLength);
+            if (!stream.TryRead(out NormalizationMode mode)) return null;
+            if (!stream.TryRead(out int iteration)) return null;
+            if (!stream.TryRead(out int mLength)) return null;
+            float[] mu = stream.ReadUnshuffled(mLength);
+            if (!stream.TryRead(out int sLength)) return null;
+            float[] sigma2 = stream.ReadUnshuffled(sLength);
+            return new BatchNormalizationLayer(input, mode, weights, biases, iteration, mu, sigma2, activation);
+        }
+
+        /// <inheritdoc/>
+        public override INetworkLayer Clone() => new BatchNormalizationLayer(InputInfo, NormalizationMode, Weights.AsSpan().Copy(), Biases.AsSpan().Copy(), Iteration, Mu.AsSpan().Copy(), Sigma2.AsSpan().Copy(), ActivationType);
+    }
+}
diff --git a/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnBatchNormalizationLayer.cs b/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnBatchNormalizationLayer.cs
new file mode 100644
index 0000000..25b3265
--- /dev/null
+++ b/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnBatchNormalizationLayer.cs
@@ -0,0 +1,189 @@
+﻿using System;
+using System.Diagnostics.CodeAnalysis;
+using Alea;
+using Alea.cuDNN;
+using JetBrains.Annotations;
+using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.APIs.Interfaces;
+using NeuralNetworkNET.APIs.Structs;
+using NeuralNetworkNET.cpuDNN;
+using NeuralNetworkNET.cuDNN;
+using NeuralNetworkNET.Extensions;
+using NeuralNetworkNET.Networks.Layers.Abstract;
+
+namespace NeuralNetworkNET.Networks.Layers.Cuda
+{
+    /// <summary>
+    /// A cuDNN-powered batch normalization layer
+    /// </summary>
+    internal sealed class CuDnnBatchNormalizationLayer : BatchNormalizationLayerBase, IDisposable
+    {
+        // The NCHW tensor info for the layer inputs and outputs
+        [NotNull]
+        private readonly TensorDescriptor DataDescription = new TensorDescriptor();
+
+        // The NCHW tensor info for the batch normalization parameters
+        [NotNull]
+        private readonly TensorDescriptor BatchNormalizationDescription = new TensorDescriptor();
+
+        // Cached mean tensor
+        private readonly Tensor SaveMean;
+
+        // Cached variance tensor
+        private readonly Tensor SaveInvVariance;
+
+        /// <summary>
+        /// Gets the <see cref="Dnn"/> instance for the current layer
+        /// </summary>
+        [NotNull]
+        private readonly Dnn DnnInstance = CuDnnService.Instance;
+
+        // cuDNN fields setup
+        private void SetupCuDnnInfo()
+        {
+            BatchNormalizationDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, Mu.Length, 1, 1);
+        }
+
+        public CuDnnBatchNormalizationLayer(in TensorInfo shape, NormalizationMode mode, ActivationType activation)
+            : base(shape, mode, activation)
+        {
+            Tensor.NewZeroed(1, Mu.Length, out SaveMean);
+            Tensor.NewZeroed(1, Mu.Length, out SaveInvVariance);
+            SetupCuDnnInfo();
+        }
+
+        public CuDnnBatchNormalizationLayer(in TensorInfo shape, NormalizationMode mode, [NotNull] float[] w, [NotNull] float[] b, int iteration, [NotNull] float[] mu, [NotNull] float[] sigma2, ActivationType activation) 
+            : base(shape, mode, w, b, iteration, mu, sigma2, activation)
+        {
+            Tensor.NewZeroed(1, Mu.Length, out SaveMean);
+            Tensor.NewZeroed(1, Mu.Length, out SaveInvVariance);
+            SetupCuDnnInfo();
+        }
+
+        #region Implementation
+
+        /// <inheritdoc/>
+        public override void ForwardInference(in Tensor x, out Tensor z, out Tensor a)
+        {
+            using (DeviceMemory<float>
+                x_gpu = DnnInstance.Gpu.AllocateDevice(x),
+                gamma_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
+                beta_gpu = DnnInstance.Gpu.AllocateDevice(Biases),
+                mu_gpu = DnnInstance.Gpu.AllocateDevice(Mu),
+                sigma2_gpu = DnnInstance.Gpu.AllocateDevice(Sigma2),
+                y_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Size))
+            {
+                if (NormalizationMode == NormalizationMode.PerActivation) DataDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, x.Length, 1, 1);
+                DataDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width);
+                DnnInstance.BatchNormalizationForwardInference(
+                    (BatchNormMode)NormalizationMode, 1, 0, DataDescription, x_gpu.Ptr, DataDescription, y_gpu.Ptr, 
+                    BatchNormalizationDescription, gamma_gpu.Ptr, beta_gpu.Ptr,
+                    mu_gpu.Ptr, sigma2_gpu.Ptr, CpuDnn.CUDNN_BN_MIN_EPSILON);
+                y_gpu.CopyToHost(x.Entities, x.Length, out z);
+                DnnInstance.ActivationForward(x.Entities, x.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
+                y_gpu.CopyToHost(x.Entities, x.Length, out a);
+            }
+        }
+
+        /// <inheritdoc/>
+        public override void ForwardTraining(float factor, in Tensor x, out Tensor z, out Tensor a)
+        {
+            using (DeviceMemory<float>
+                x_gpu = DnnInstance.Gpu.AllocateDevice(x),
+                gamma_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
+                beta_gpu = DnnInstance.Gpu.AllocateDevice(Biases),
+                mu_gpu = DnnInstance.Gpu.AllocateDevice(Mu),
+                sigma2_gpu = DnnInstance.Gpu.AllocateDevice(Sigma2),
+                y_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Size),
+                saveMean_gpu = DnnInstance.Gpu.AllocateDevice(SaveMean),
+                saveInvVariance_gpu = DnnInstance.Gpu.AllocateDevice(SaveInvVariance))
+            {
+                if (NormalizationMode == NormalizationMode.PerActivation) DataDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, x.Length, 1, 1);
+                DataDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width);
+                DnnInstance.BatchNormalizationForwardTraining(
+                    (BatchNormMode)NormalizationMode, 1, 0, DataDescription, x_gpu.Ptr, DataDescription, y_gpu.Ptr,
+                    BatchNormalizationDescription, gamma_gpu.Ptr, beta_gpu.Ptr, factor, mu_gpu.Ptr, sigma2_gpu.Ptr, CpuDnn.CUDNN_BN_MIN_EPSILON,
+                    saveMean_gpu.Ptr, saveInvVariance_gpu.Ptr);
+                mu_gpu.CopyTo(Mu);
+                sigma2_gpu.CopyTo(Sigma2);
+                saveMean_gpu.CopyTo(SaveMean);
+                saveInvVariance_gpu.CopyTo(SaveInvVariance);
+                y_gpu.CopyToHost(x.Entities, x.Length, out z);
+                DnnInstance.ActivationForward(x.Entities, x.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
+                y_gpu.CopyToHost(x.Entities, x.Length, out a);
+            }
+        }
+
+        /// <inheritdoc/>
+        public override void Backpropagate(in Tensor x, in Tensor y, in Tensor dy, in Tensor dx, out Tensor dJdw, out Tensor dJdb)
+        {
+            using (DeviceMemory<float>
+                x_gpu = DnnInstance.Gpu.AllocateDevice(x),
+                y_gpu = DnnInstance.Gpu.AllocateDevice(y),
+                dy_gpu = DnnInstance.Gpu.AllocateDevice(dy),
+                dx_gpu = DnnInstance.Gpu.AllocateDevice<float>(dx.Size),
+                gamma = DnnInstance.Gpu.AllocateDevice(Weights),
+                dgamma = DnnInstance.Gpu.AllocateDevice<float>(Weights.Length),
+                dbeta = DnnInstance.Gpu.AllocateDevice<float>(Biases.Length),
+                saveMean_gpu = DnnInstance.Gpu.AllocateDevice(SaveMean),
+                saveInvVariance_gpu = DnnInstance.Gpu.AllocateDevice(SaveInvVariance))
+            {
+                // Backpropagation
+                DnnInstance.ActivationBackward(x.Entities, x.Length, y_gpu.Ptr, dy_gpu.Ptr, ActivationFunctions.ActivationPrime, dy_gpu.Ptr);
+                DnnInstance.BatchNormalizationBackward(
+                    (BatchNormMode)NormalizationMode, 1, 0, 1, 0,
+                    DataDescription, x_gpu.Ptr, DataDescription, dy_gpu.Ptr, DataDescription, dx_gpu.Ptr,
+                    BatchNormalizationDescription, gamma.Ptr, dgamma.Ptr, dbeta.Ptr,
+                    CpuDnn.CUDNN_BN_MIN_EPSILON, saveMean_gpu.Ptr, saveInvVariance_gpu.Ptr);
+                dx_gpu.CopyTo(dx);
+                dgamma.CopyToHost(1, Weights.Length, out dJdw);
+                dbeta.CopyToHost(1, Biases.Length, out dJdb);
+            }
+        }
+
+        #endregion
+
+        /// <summary>
+        /// Tries to deserialize a new <see cref="CuDnnBatchNormalizationLayer"/> from the input <see cref="System.IO.Stream"/>
+        /// </summary>
+        /// <param name="stream">The input <see cref="System.IO.Stream"/> to use to read the layer data</param>
+        [MustUseReturnValue, CanBeNull]
+        public static INetworkLayer Deserialize([NotNull] System.IO.Stream stream)
+        {
+            if (!stream.TryRead(out TensorInfo input)) return null;
+            if (!stream.TryRead(out TensorInfo output) || input != output) return null;
+            if (!stream.TryRead(out ActivationType activation)) return null;
+            if (!stream.TryRead(out int wLength)) return null;
+            float[] weights = stream.ReadUnshuffled(wLength);
+            if (!stream.TryRead(out int bLength)) return null;
+            float[] biases = stream.ReadUnshuffled(bLength);
+            if (!stream.TryRead(out NormalizationMode mode)) return null;
+            if (!stream.TryRead(out int iteration)) return null;
+            if (!stream.TryRead(out int mLength)) return null;
+            float[] mu = stream.ReadUnshuffled(mLength);
+            if (!stream.TryRead(out int sLength)) return null;
+            float[] sigma2 = stream.ReadUnshuffled(sLength);
+            return new CuDnnBatchNormalizationLayer(input, mode, weights, biases, iteration, mu, sigma2, activation);
+        }
+
+        /// <inheritdoc/>
+        public override INetworkLayer Clone() => new CuDnnBatchNormalizationLayer(InputInfo, NormalizationMode, Weights.AsSpan().Copy(), Biases.AsSpan().Copy(), Iteration, Mu.AsSpan().Copy(), Sigma2.AsSpan().Copy(), ActivationType);
+
+        #region IDisposable
+
+        ~CuDnnBatchNormalizationLayer() => Dispose();
+
+        /// <inheritdoc/>
+        void IDisposable.Dispose() => Dispose();
+
+        // Disposes the temporary tensors
+        [SuppressMessage("ReSharper", "ImpureMethodCallOnReadonlyValueField")]
+        private void Dispose()
+        {
+            SaveMean.Free();
+            SaveInvVariance.Free();
+        }
+
+        #endregion
+    }
+}
diff --git a/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnFullyConnectedLayer.cs b/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnFullyConnectedLayer.cs
index 457f75d..d56bd54 100644
--- a/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnFullyConnectedLayer.cs
+++ b/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnFullyConnectedLayer.cs
@@ -11,6 +11,9 @@
 
 namespace NeuralNetworkNET.Networks.Layers.Cuda
 {
+    /// <summary>
+    /// A cuDNN-powered fully connected layer
+    /// </summary>
     internal class CuDnnFullyConnectedLayer : FullyConnectedLayer
     {
         /// <summary>
diff --git a/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnPoolingLayer.cs b/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnPoolingLayer.cs
index 2ae7185..8e22f9a 100644
--- a/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnPoolingLayer.cs
+++ b/NeuralNetwork.NET/Networks/Layers/Cuda/CuDnnPoolingLayer.cs
@@ -7,14 +7,12 @@
 using NeuralNetworkNET.cuDNN;
 using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Networks.Layers.Cpu;
-using Newtonsoft.Json;
 
 namespace NeuralNetworkNET.Networks.Layers.Cuda
 {
     /// <summary>
     /// A pooling layer running on cuDNN, with a custom pooling mode
     /// </summary>
-    [JsonObject(MemberSerialization.OptIn)]
     internal sealed class CuDnnPoolingLayer : PoolingLayer
     {
         #region cuDNN fields
diff --git a/NeuralNetwork.NET/Networks/Layers/Initialization/WeightsProvider.cs b/NeuralNetwork.NET/Networks/Layers/Initialization/WeightsProvider.cs
index 59c24af..2235cb8 100644
--- a/NeuralNetwork.NET/Networks/Layers/Initialization/WeightsProvider.cs
+++ b/NeuralNetwork.NET/Networks/Layers/Initialization/WeightsProvider.cs
@@ -133,5 +133,40 @@ public static float[] NewBiases(int length, BiasInitializationMode mode)
                 default: throw new ArgumentOutOfRangeException(nameof(mode), "Unsupported biases initialization mode");
             }
         }
+
+        /// <summary>
+        /// Creates a new weights vector for a batch normalization layer
+        /// </summary>
+        /// <param name="shape">The layer inputs and ouputs</param>
+        /// <param name="mode">The normalization mode to use</param>
+        [Pure, NotNull]
+        public static unsafe float[] NewGammaParameters(in TensorInfo shape, NormalizationMode mode)
+        {
+            int l;
+            if (mode == NormalizationMode.Spatial) l = shape.Channels;
+            else if (mode == NormalizationMode.PerActivation) l = shape.Size;
+            else throw new ArgumentOutOfRangeException(nameof(mode), "Invalid normalization mode");
+            float[] weights = new float[l];
+            fixed (float* pw = weights)
+                for (int i = 0; i < l; i++)
+                    pw[i] = 1;
+            return weights;
+        }
+
+        /// <summary>
+        /// Creates a new beta weights vector for a batch normalization layer
+        /// </summary>
+        /// <param name="shape">The layer inputs and ouputs</param>
+        /// <param name="mode">The normalization mode to use</param>
+        [Pure, NotNull]
+        public static float[] NewBetaParameters(in TensorInfo shape, NormalizationMode mode)
+        {
+            switch (mode)
+            {
+                case NormalizationMode.Spatial: return NewBiases(shape.Channels, BiasInitializationMode.Zero);
+                case NormalizationMode.PerActivation: return NewBiases(shape.Size, BiasInitializationMode.Zero);
+                default: throw new ArgumentOutOfRangeException(nameof(mode), "Invalid normalization mode");
+            }
+        }
     }
 }
diff --git a/NeuralNetwork.NET/NeuralNetwork.NET.csproj b/NeuralNetwork.NET/NeuralNetwork.NET.csproj
index bf06969..2c6817c 100644
--- a/NeuralNetwork.NET/NeuralNetwork.NET.csproj
+++ b/NeuralNetwork.NET/NeuralNetwork.NET.csproj
@@ -3,7 +3,7 @@
   <PropertyGroup>
     <TargetFramework>netstandard2.0</TargetFramework>
     <RootNamespace>NeuralNetworkNET</RootNamespace>
-    <Version>2.0.0</Version>
+    <Version>2.1.0</Version>
     <Authors>Sergio Pedri</Authors>
     <Company>Sergio Pedri</Company>
     <Description>A TensorFlow-inspired neural network library built from scratch in C# 7.2 for .NET Standard 2.0, with GPU support through cuDNN and native memory management</Description>
@@ -14,9 +14,9 @@
     <PackageTags>cnn, neuralnetwork, deeplearning, ai, cuda, csharp, gpu, net, netstandard</PackageTags>
     <GeneratePackageOnBuild>true</GeneratePackageOnBuild>
     <PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
-    <PackageReleaseNotes>• Added computation graph networks (eg. ResNet, Inception)
-• New dataset manipulation APIs
-• Bug fixes and code improvements</PackageReleaseNotes>
+    <PackageReleaseNotes>• Added batch normalization layers
+• APIs refactoring and improvements
+• Bug fixes</PackageReleaseNotes>
   </PropertyGroup>
 
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
@@ -35,11 +35,11 @@
     <RestoreProjectStyle>PackageReference</RestoreProjectStyle>
     <AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
     <GenerateBindingRedirectsOutputType>true</GenerateBindingRedirectsOutputType>
-    <AssemblyVersion>2.0.0.0</AssemblyVersion>
+    <AssemblyVersion>2.1.0.0</AssemblyVersion>
     <AssetTargetFallback>
       $(AssetTargetFallback);net45
     </AssetTargetFallback>
-    <FileVersion>2.0.0.0</FileVersion>
+    <FileVersion>2.1.0.0</FileVersion>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/NeuralNetwork.NET/SupervisedLearning/Data/BatchesCollection.cs b/NeuralNetwork.NET/SupervisedLearning/Data/BatchesCollection.cs
index f152518..14011a2 100644
--- a/NeuralNetwork.NET/SupervisedLearning/Data/BatchesCollection.cs
+++ b/NeuralNetwork.NET/SupervisedLearning/Data/BatchesCollection.cs
@@ -11,8 +11,6 @@
 using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Helpers;
 using NeuralNetworkNET.SupervisedLearning.Progress;
-using SixLabors.ImageSharp;
-using SixLabors.ImageSharp.PixelFormats;
 
 namespace NeuralNetworkNET.SupervisedLearning.Data
 {
@@ -113,30 +111,6 @@ public void Expand(params Func<float[], float[]>[] factories)
             }), BatchSize).Batches;
         }
 
-        /// <inheritdoc/>
-        public void Expand<TPixel>(int width, int height, params Action<IImageProcessingContext<TPixel>>[] factories) where TPixel : struct, IPixel<TPixel>
-        {
-            if (factories.Length < 1) throw new ArgumentException("There haas to be at least one input factory", nameof(factories));
-            if (width * height != InputFeatures) throw new ArgumentException("The specified image resolution doesn't match the samples size");
-            Batches = From(Batches.SelectMany(b =>
-            {
-                IEnumerable<Func<(float[], float[])>> Expander()
-                {
-                    int n = b.X.GetLength(0);
-                    for (int i = 0; i < n; i++)
-                    {
-                        float[]
-                            x = b.X.Slice(i).ToArray(),
-                            y = b.Y.Slice(i).ToArray();
-                        yield return () => (x, y);
-                        foreach (Action<IImageProcessingContext<TPixel>> f in factories)
-                            yield return () => (ImageLoader.Process(x, width, height, f), y);
-                    }
-                }
-                return Expander();
-            }), BatchSize).Batches;
-        }
-
         /// <inheritdoc/>
         public (ITrainingDataset, ITestDataset) PartitionWithTest(float ratio, Action<TrainingProgressEventArgs> progress = null)
         {
diff --git a/NeuralNetwork.NET/SupervisedLearning/Optimization/NetworkTrainer.cs b/NeuralNetwork.NET/SupervisedLearning/Optimization/NetworkTrainer.cs
index a1380b7..569b973 100644
--- a/NeuralNetwork.NET/SupervisedLearning/Optimization/NetworkTrainer.cs
+++ b/NeuralNetwork.NET/SupervisedLearning/Optimization/NetworkTrainer.cs
@@ -82,6 +82,11 @@ public static TrainingSessionResult TrainNetwork(
             return Optimize(network, batches, epochs, dropout, optimizer, batchProgress, trainingProgress, validationDataset, testDataset, token);
         }
 
+        /// <summary>
+        /// Gets whether or not a neural network is currently processing the training samples through backpropagation (as opposed to evaluating them)
+        /// </summary>
+        public static bool BackpropagationInProgress { get; private set; }
+
         /// <summary>
         /// Trains the target <see cref="SequentialNetwork"/> using the input algorithm
         /// </summary>
@@ -122,12 +127,18 @@ TrainingSessionResult PrepareResult(TrainingStopReason reason, int loops)
                 miniBatches.CrossShuffle();
 
                 // Gradient descent over the current batches
+                BackpropagationInProgress = true;
                 for (int j = 0; j < miniBatches.BatchesCount; j++)
                 {
-                    if (token.IsCancellationRequested) return PrepareResult(TrainingStopReason.TrainingCanceled, i);
+                    if (token.IsCancellationRequested)
+                    {
+                        BackpropagationInProgress = false;
+                        return PrepareResult(TrainingStopReason.TrainingCanceled, i);
+                    }
                     network.Backpropagate(miniBatches.Batches[j], dropout, updater);
                     batchMonitor?.NotifyCompletedBatch(miniBatches.Batches[j].X.GetLength(0));
                 }
+                BackpropagationInProgress = false;
                 batchMonitor?.Reset();
                 if (network.IsInNumericOverflow) return PrepareResult(TrainingStopReason.NumericOverflow, i);
 
diff --git a/NeuralNetwork.NET/cpuDNN/CpuDnn{Normalization}.cs b/NeuralNetwork.NET/cpuDNN/CpuDnn{Normalization}.cs
new file mode 100644
index 0000000..429e949
--- /dev/null
+++ b/NeuralNetwork.NET/cpuDNN/CpuDnn{Normalization}.cs
@@ -0,0 +1,500 @@
+﻿using System;
+using System.Threading.Tasks;
+using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.APIs.Structs;
+using NeuralNetworkNET.Extensions;
+
+namespace NeuralNetworkNET.cpuDNN
+{
+    public static partial class CpuDnn
+    {
+        /// <summary>
+        /// Gets the minimum epsilon allowed to be used in batch normalization methods
+        /// </summary>
+        internal static readonly float CUDNN_BN_MIN_EPSILON = 1e-5.ToApproximatedFloat();
+
+        /// <summary>
+        /// Executes the forward pass in a batch normalization layer
+        /// </summary>
+        /// <param name="mode">The desired normalization mode to apply</param>
+        /// <param name="info">The ifo on the input <see cref="Tensor"/> to process</param>
+        /// <param name="x">The input <see cref="Tensor"/> to normalize</param>
+        /// <param name="factor">The factor for the cumulative moving average</param>
+        /// <param name="mu">A <see cref="Tensor"/> to use to store the temporary median values (used for backpropagation too)</param>
+        /// <param name="sigma2">A <see cref="Tensor"/> to use to store the temporary standard deviation values (used for backpropagation too)</param>
+        /// <param name="gamma">The layer gamma parameters</param>
+        /// <param name="beta">The layer beta parameters</param>
+        /// <param name="y">The output <see cref="Tensor"/> for the current layer</param>
+        public static void BatchNormalizationForward(
+            NormalizationMode mode, in TensorInfo info, in Tensor x, 
+            float factor, in Tensor mu, in Tensor sigma2, 
+            in Tensor gamma, in Tensor beta, in Tensor y)
+        {
+            if (info.Size != x.Length) throw new ArgumentException("The tensor info doesn't match the length of the input tensor", nameof(x));
+            if (!sigma2.MatchShape(mu)) throw new ArgumentException("Invalid standard deviation tensor shape", nameof(sigma2));
+            if (!gamma.MatchShape(sigma2)) throw new ArgumentException("The gamma tensor doesn't have the right shape", nameof(gamma));
+            if (!beta.MatchShape(gamma)) throw new ArgumentException("The beta tensor doesn't have the right shape", nameof(beta));
+            if (!x.MatchShape(y)) throw new ArgumentException("The input and output tensors must have the same shape", nameof(y));
+            switch (mode)
+            {
+                // A single mu and variance value per input channel
+                case NormalizationMode.Spatial:
+                    BatchNormalizationForward(info, x, factor, mu, sigma2, gamma, beta, y);
+                    break;
+
+                // Each individual activation has its own median and variance
+                case NormalizationMode.PerActivation:
+                    BatchNormalizationForward(x, factor, mu, sigma2, gamma, beta, y);
+                    break;
+                default: throw new ArgumentOutOfRangeException(nameof(mode), "Invalid normalization mode");
+            }
+        }
+
+        /// <summary>
+        /// Executes the forward pass in a batch normalization layer in inference mode
+        /// </summary>
+        /// <param name="mode">The desired normalization mode to apply</param>
+        /// <param name="info">The ifo on the input <see cref="Tensor"/> to process</param>
+        /// <param name="x">The input <see cref="Tensor"/> to normalize</param>
+        /// <param name="mu">A <see cref="Tensor"/> to use to store the temporary median values (used for backpropagation too)</param>
+        /// <param name="sigma2">A <see cref="Tensor"/> to use to store the temporary standard deviation values (used for backpropagation too)</param>
+        /// <param name="gamma">The layer gamma parameters</param>
+        /// <param name="beta">The layer beta parameters</param>
+        /// <param name="y">The output <see cref="Tensor"/> for the current layer</param>
+        public static void BatchNormalizationForward(
+            NormalizationMode mode, in TensorInfo info, in Tensor x, 
+            in Tensor mu, in Tensor sigma2, 
+            in Tensor gamma, in Tensor beta, in Tensor y)
+        {
+            if (info.Size != x.Length) throw new ArgumentException("The tensor info doesn't match the length of the input tensor", nameof(x));
+            if (!sigma2.MatchShape(mu)) throw new ArgumentException("Invalid standard deviation tensor shape", nameof(sigma2));
+            if (!gamma.MatchShape(sigma2)) throw new ArgumentException("The gamma tensor doesn't have the right shape", nameof(gamma));
+            if (!beta.MatchShape(gamma)) throw new ArgumentException("The beta tensor doesn't have the right shape", nameof(beta));
+            if (!x.MatchShape(y)) throw new ArgumentException("The input and output tensors must have the same shape", nameof(y));
+            switch (mode)
+            {
+                // A single mu and variance value per input channel
+                case NormalizationMode.Spatial:
+                    BatchNormalizationForward(info, x, mu, sigma2, gamma, beta, y);
+                    break;
+
+                // Each individual activation has its own median and variance
+                case NormalizationMode.PerActivation:
+                    BatchNormalizationForward(x, mu, sigma2, gamma, beta, y);
+                    break;
+                default: throw new ArgumentOutOfRangeException(nameof(mode), "Invalid normalization mode");
+            }
+        }
+
+        /// <summary>
+        /// Executes the backward pass through a batch normalization layer
+        /// </summary>
+        /// <param name="mode">The desired normalization mode to apply</param>
+        /// <param name="info">The ifo on the input <see cref="Tensor"/> to process</param>
+        /// <param name="x">The input <see cref="Tensor"/> to normalize</param>
+        /// <param name="mu">A <see cref="Tensor"/> with the temporary median values calculated in the forward pass</param>
+        /// <param name="sigma2">A <see cref="Tensor"/> with the temporary standard deviation values calculated in the forward pass</param>
+        /// <param name="gamma">The layer gamma parameters</param>
+        /// <param name="dy">The output error delta <see cref="Tensor"/></param>
+        /// <param name="dx">The resulting backpropagated error delta <see cref="Tensor"/></param>
+        public static void BatchNormalizationBackwardData(
+            NormalizationMode mode, in TensorInfo info, in Tensor x, 
+            in Tensor mu, in Tensor sigma2, in Tensor gamma, 
+            in Tensor dy, in Tensor dx)
+        {
+            // Checks
+            if (!sigma2.MatchShape(mu)) throw new ArgumentException("Invalid standard deviation tensor shape", nameof(sigma2));
+            if (!gamma.MatchShape(sigma2)) throw new ArgumentException("The gamma tensor doesn't have the right shape", nameof(gamma));
+            if (!x.MatchShape(dy)) throw new ArgumentException("The input and output tensors must have the same shape", nameof(dy));
+            if (!x.MatchShape(dx)) throw new ArgumentException("The input the resulting error tensor must have the same shape", nameof(dx));
+            switch (mode)
+            {
+                case NormalizationMode.Spatial:
+                    BatchNormalizationBackwardData(info, x, mu, sigma2, gamma, dy, dx);
+                    break;
+                case NormalizationMode.PerActivation:
+                    BatchNormalizationBackwardData(x, mu, sigma2, gamma, dy, dx);
+                    break;
+                default: throw new ArgumentOutOfRangeException(nameof(mode), "Invalid normalization mode");
+            }
+        }
+
+        /// <summary>
+        /// Calculates the gradient with respect to the gamma <see cref="Tensor"/> in a batch normalization layer
+        /// </summary>
+        /// <param name="mode">The desired normalization mode to apply</param>
+        /// <param name="info">The ifo on the input <see cref="Tensor"/> to process</param>
+        /// <param name="x">The input <see cref="Tensor"/> used in the forward pass</param>
+        /// <param name="mu">A <see cref="Tensor"/> with the temporary median values calculated in the forward pass</param>
+        /// <param name="sigma2">A <see cref="Tensor"/> with the temporary standard deviation values calculated in the forward pass</param>
+        /// <param name="dy">The output <see cref="Tensor"/> error delta for the current layer</param>
+        /// <param name="dgamma">The resulting gamma gradient <see cref="Tensor"/></param>
+        public static void BatchNormalizationBackwardGamma(
+            NormalizationMode mode, in TensorInfo info, in Tensor x, 
+            in Tensor mu, in Tensor sigma2, 
+            in Tensor dy, in Tensor dgamma)
+        {
+            // Checks
+            if (!sigma2.MatchShape(mu)) throw new ArgumentException("Invalid standard deviation tensor shape", nameof(sigma2));
+            if (!dgamma.MatchShape(sigma2)) throw new ArgumentException("Invalid gamma gradient tensor size", nameof(dgamma));
+            if (!x.MatchShape(dy)) throw new ArgumentException("The input and output tensors must have the same shape", nameof(dy));
+            switch (mode)
+            {
+                case NormalizationMode.Spatial:
+                    BatchNormalizationBackwardGamma(info, x, mu, sigma2, dy, dgamma);
+                    break;
+                case NormalizationMode.PerActivation:
+                    BatchNormalizationBackwardGamma(x, mu, sigma2, dy, dgamma);
+                    break;
+                default: throw new ArgumentOutOfRangeException(nameof(mode), "Invalid normalization mode");
+            }
+        }
+
+        /// <summary>
+        /// Calculates the gradient with respect to the beta <see cref="Tensor"/> in a batch normalization layer
+        /// </summary>
+        /// <param name="mode">The desired normalization mode to apply</param>
+        /// <param name="info">The ifo on the input <see cref="Tensor"/> to process</param>
+        /// <param name="dy">The output <see cref="Tensor"/> error delta for the current layer</param>
+        /// <param name="dbeta">The resulting beta gradient <see cref="Tensor"/></param>
+        public static void BatchNormalizationBackwardBeta(
+            NormalizationMode mode, in TensorInfo info, in Tensor dy, in Tensor dbeta)
+        {
+            if (info.Size != dy.Length) throw new ArgumentException("The tensor shape doesn't match the input info", nameof(dy));
+            switch (mode)
+            {
+                case NormalizationMode.Spatial:
+                    BatchNormalizationBackwardBeta(info, dy, dbeta);
+                    break;
+                case NormalizationMode.PerActivation: 
+                    if (!dbeta.MatchShape(1, dy.Length)) throw new ArgumentException("The beta tensor must have a value for output feature", nameof(dbeta));
+                    FullyConnectedBackwardBias(dy, dbeta); // Vertical compression
+                    break;
+                default: throw new ArgumentOutOfRangeException(nameof(mode), "Invalid normalization mode");
+            }
+        }
+
+        #region Spatial
+
+        // Spatial forward training batch normalization
+        private static unsafe void BatchNormalizationForward(
+            in TensorInfo info, in Tensor x, 
+            float factor, in Tensor mu, in Tensor sigma2, 
+            in Tensor gamma, in Tensor beta, in Tensor y)
+        {
+            // Setup
+            if (!mu.MatchShape(1, info.Channels)) throw new ArgumentException("Invalid mu tensor size");
+            int
+                n = x.Entities,
+                l = x.Length,
+                nhw = x.Entities * info.SliceSize,
+                slice = info.SliceSize;
+            float* px = x, pmu = mu, psigma2 = sigma2, py = y, pg = gamma, pb = beta;
+
+            // Mean and variance
+            Parallel.For(0, info.Channels, c =>
+            {
+                // Mu
+                float mc = 0;
+                float* start = px + slice * c;
+                for (int i = 0; i < n; i++)
+                {
+                    float* offset = start + i * l;
+                    for (int xy = 0; xy < slice; xy++)
+                        mc += offset[xy];
+                }
+                pmu[c] = mc /= nhw * factor + pmu[c] * (1 - factor);
+
+                // Variance
+                float sc = 0;
+                for (int i = 0; i < n; i++)
+                {
+                    float* offset = start + i * l;
+                    for (int xy = 0; xy < slice; xy++)
+                    {
+                        float sq = offset[xy] - mc;
+                        sc += sq * sq;
+                    }
+                }
+                psigma2[c] = sc / nhw * factor + psigma2[c] * (1 - factor);
+
+            }).AssertCompleted();
+
+            // Normalization
+            Parallel.For(0, info.Channels, c =>
+            {
+                float
+                    gc = pg[c],
+                    bc = pb[c],
+                    mc = pmu[c],
+                    sqrt_1 = 1 / (float)Math.Sqrt(psigma2[c] + CUDNN_BN_MIN_EPSILON);
+                float*
+                    start = px + slice * c,
+                    end = py + slice * c;
+                for (int i = 0; i < n; i++)
+                {
+                    float*
+                        offset = start + i * l,
+                        target = end + i * l;
+                    for (int xy = 0; xy < slice; xy++)
+                    {
+                        float hat = (offset[xy] - mc) * sqrt_1;
+                        target[xy] = gc * hat + bc;
+                    }
+                }
+            }).AssertCompleted();
+        }
+
+        // Spatial forward inference batch normalization
+        private static unsafe void BatchNormalizationForward(
+            in TensorInfo info, in Tensor x, 
+            in Tensor mu, in Tensor sigma2, 
+            in Tensor gamma, in Tensor beta, in Tensor y)
+        {
+            // Setup
+            if (!mu.MatchShape(1, info.Channels)) throw new ArgumentException("Invalid mu tensor size");
+            int
+                n = x.Entities,
+                l = x.Length,
+                slice = info.SliceSize;
+            float* px = x, pmu = mu, psigma2 = sigma2, py = y, pg = gamma, pb = beta;
+            Parallel.For(0, info.Channels, c =>
+            {
+                float
+                    gc = pg[c],
+                    bc = pb[c],
+                    mc = pmu[c],
+                    sqrt_1 = 1 / (float)Math.Sqrt(psigma2[c] + CUDNN_BN_MIN_EPSILON);
+                float*
+                    start = px + slice * c,
+                    end = py + slice * c;
+                for (int i = 0; i < n; i++)
+                {
+                    float*
+                        offset = start + i * l,
+                        target = end + i * l;
+                    for (int xy = 0; xy < slice; xy++)
+                    {
+                        float hat = (offset[xy] - mc) * sqrt_1;
+                        target[xy] = gc * hat + bc;
+                    }
+                }
+            }).AssertCompleted();
+        }
+
+        // Spatial backward batch normalization
+        private static unsafe void BatchNormalizationBackwardData(
+            in TensorInfo info, in Tensor x, 
+            in Tensor mu, in Tensor sigma2, in Tensor gamma, 
+            in Tensor dy, in Tensor dx)
+        {
+            if (!mu.MatchShape(1, info.Channels)) throw new ArgumentException("Invalid mu tensor size");
+            int
+                n = dx.Entities,
+                l = dx.Length,
+                nhw = x.Entities * info.SliceSize,
+                slice = info.SliceSize;
+            float* px = x, pg = gamma, pmu = mu, psigma2 = sigma2, pdy = dy, pdx = dx;
+            Parallel.For(0, info.Channels, c =>
+            {
+                // Calculate the two summatories
+                float
+                    mc = pmu[c],
+                    sc = psigma2[c],
+                    left = 1f / nhw * pg[c] / (float)Math.Sqrt(psigma2[c] + CUDNN_BN_MIN_EPSILON),
+                    _2nd = 0,
+                    _3rdRight = 0;
+                float*
+                    startdy = pdy + slice * c,
+                    startx = px + slice * c;
+                for (int i = 0; i < n; i++, startdy += l, startx += l)
+                for (int xy = 0; xy < slice; xy++)
+                {
+                    float pdyicxy = startdy[xy];
+                    _2nd += pdyicxy;
+                    _3rdRight += pdyicxy * (startx[xy] - mc);
+                }
+
+                // Assign the backpropagated tensor
+                float* startdx = pdx + slice * c;
+                startdy = pdy + slice * c;
+                startx = px + slice * c;
+                for (int i = 0; i < n; i++, startdy += l, startx += l, startdx += l)
+                for (int xy = 0; xy < slice; xy++)
+                    startdx[xy] = left * (nhw * startdy[xy] - _2nd - (startx[xy] - mc) / (sc + CUDNN_BN_MIN_EPSILON) * _3rdRight);
+
+            }).AssertCompleted();
+        }
+
+        // Spatial batch normalization gamma gradient
+        private static unsafe void BatchNormalizationBackwardGamma(
+            in TensorInfo info, in Tensor x, 
+            in Tensor mu, in Tensor sigma2, 
+            in Tensor dy, in Tensor dgamma)
+        {
+            if (!mu.MatchShape(1, info.Channels)) throw new ArgumentException("Invalid mu tensor size");
+            int
+                n = x.Entities,
+                l = x.Length,
+                slice = info.SliceSize;
+            float* px = x, pdy = dy, pdg = dgamma, pmu = mu, psigma2 = sigma2;
+            Parallel.For(0, info.Channels, c =>
+            {
+                float gc = 0, mc = pmu[c], sc = (float)Math.Sqrt(psigma2[c] + CUDNN_BN_MIN_EPSILON);
+                int offset = slice * c;
+                for (int i = 0; i < n; i++, offset += l)
+                for (int xy = 0; xy < slice; xy++)
+                    gc += pdy[offset + xy] * (px[offset + xy] - mc) / sc;
+                pdg[c] = gc;
+            }).AssertCompleted();
+        }
+
+        // Spatial batch normalization beta gradient
+        private static unsafe void BatchNormalizationBackwardBeta(in TensorInfo info, in Tensor dy, in Tensor dbeta)
+        {
+            // Setup
+            if (!dbeta.MatchShape(1, info.Channels)) throw new ArgumentException("The beta tensor must have a value for each input channel", nameof(dbeta));
+            int
+                n = dy.Entities,
+                slice = info.SliceSize,
+                l = info.Size;
+            float* pdy = dy, pdbeta = dbeta;
+
+            // Accumulate the output gradient
+            Parallel.For(0, info.Channels, c =>
+            {
+                float bc = 0;
+                float* start = pdy + c * slice;
+                for (int i = 0; i < n; i++, start += l)
+                for (int xy = 0; xy < slice; xy++)
+                    bc += start[xy];
+                pdbeta[c] = bc;
+            }).AssertCompleted();
+        }
+
+        #endregion
+
+        #region Per activation
+
+        // Per-activation forward training batch normalization
+        private static unsafe void BatchNormalizationForward(
+            in Tensor x,
+            float factor, in Tensor mu, in Tensor sigma2, 
+            in Tensor gamma, in Tensor beta, in Tensor y)
+        {
+            if (!mu.MatchShape(1, x.Length)) throw new ArgumentException("Invalid mu tensor size");
+            int
+                n = x.Entities,
+                l = x.Length;
+            float* px = x, pmu = mu, psigma2 = sigma2, py = y, pg = gamma, pb = beta;
+            Parallel.For(0, l, j =>
+            {
+                // Mean
+                float mi = 0;
+                for (int i = 0; i < n; i++)
+                    mi += px[i * l + j];
+                pmu[j] = mi /= n * factor + pmu[j] * (1 - factor);
+
+                // Variance
+                float sl = 0;
+                for (int i = 0; i < n; i++)
+                {
+                    float hm = px[i * l + j] - mi;
+                    sl += hm * hm;
+                }
+                psigma2[j] = sl / n * factor + psigma2[j] * (1 - factor);
+
+            }).AssertCompleted();
+
+            // Apply the batch normalization pass
+            Parallel.For(0, n, i =>
+            {
+                int offset = i * l;
+                for (int j = 0; j < l; j++)
+                {
+                    float hat = (px[offset + j] - pmu[j]) / (float)Math.Sqrt(psigma2[j] + CUDNN_BN_MIN_EPSILON);
+                    py[offset + j] = pg[j] * hat + pb[j];
+                }
+            }).AssertCompleted();
+        }
+
+        // Per-activation forward inference batch normalization
+        private static unsafe void BatchNormalizationForward(
+            in Tensor x,
+            in Tensor mu, in Tensor sigma2, 
+            in Tensor gamma, in Tensor beta, in Tensor y)
+        {
+            if (!mu.MatchShape(1, x.Length)) throw new ArgumentException("Invalid mu tensor size");
+            int
+                n = x.Entities,
+                l = x.Length;
+            float* px = x, pmu = mu, psigma2 = sigma2, py = y, pg = gamma, pb = beta;
+            Parallel.For(0, n, i =>
+            {
+                int offset = i * l;
+                for (int j = 0; j < l; j++)
+                {
+                    float hat = (px[offset + j] - pmu[j]) / (float)Math.Sqrt(psigma2[j] + CUDNN_BN_MIN_EPSILON);
+                    py[offset + j] = pg[j] * hat + pb[j];
+                }
+            }).AssertCompleted();
+        }
+
+        // Per-activation backward batch normalization
+        private static unsafe void BatchNormalizationBackwardData(
+            in Tensor x, 
+            in Tensor mu, in Tensor sigma2, in Tensor gamma, 
+            in Tensor dy, in Tensor dx)
+        {
+            if (!mu.MatchShape(1, x.Length)) throw new ArgumentException("Invalid mu tensor size");
+            int
+                n = dx.Entities,
+                l = dx.Length;
+            float* px = x, pg = gamma, pmu = mu, psigma2 = sigma2, pdy = dy, pdx = dx;
+            Parallel.For(0, n, i =>
+            {
+                for (int j = 0; j < l; j++)
+                {
+                    float
+                        left = 1f / n * pg[j] / (float)Math.Sqrt(psigma2[j] + CUDNN_BN_MIN_EPSILON),
+                        _1st = n * pdy[i * l + j],
+                        _2nd = 0,
+                        _3rdLeft = (px[i * l + j] - pmu[j]) / (psigma2[j] + CUDNN_BN_MIN_EPSILON),
+                        _3rdRight = 0;
+                    for (int k = 0; k < n; k++)
+                    {
+                        float pdykj = pdy[k * l + j];
+                        _2nd += pdykj;
+                        _3rdRight += pdykj * (px[k * l + j] - pmu[j]);
+                    }
+                    pdx[i * l + j] = left * (_1st - _2nd - _3rdLeft * _3rdRight);
+                }
+            }).AssertCompleted();
+        }
+
+        // Per-activation batch normalization gamma gradient
+        private static unsafe void BatchNormalizationBackwardGamma(
+            in Tensor x, 
+            in Tensor mu, in Tensor sigma2, 
+            in Tensor dy, in Tensor dgamma)
+        {
+            if (!mu.MatchShape(1, x.Length)) throw new ArgumentException("Invalid mu tensor size");
+            int
+                n = x.Entities,
+                l = x.Length;
+            float* px = x, pdy = dy, pdg = dgamma, pmu = mu, psigma2 = sigma2;
+            Parallel.For(0, x.Length, j =>
+            {
+                float sum = 0, sj = (float)Math.Sqrt(psigma2[j] + CUDNN_BN_MIN_EPSILON);
+                for (int i = 0; i < n; i++)
+                {
+                    float hat = (px[i * l + j] - pmu[j]) / sj;
+                    sum += pdy[i * l + j] * hat;
+                }
+                pdg[j] = sum;
+            }).AssertCompleted();
+        }
+
+        #endregion
+    }
+}
diff --git a/NeuralNetwork.NET/cuDNN/CuDnnService.cs b/NeuralNetwork.NET/cuDNN/CuDnnService.cs
index 8016b18..95d78b3 100644
--- a/NeuralNetwork.NET/cuDNN/CuDnnService.cs
+++ b/NeuralNetwork.NET/cuDNN/CuDnnService.cs
@@ -1,8 +1,10 @@
 ﻿using System;
+using System.Linq;
 using System.Threading;
 using Alea;
 using Alea.cuDNN;
 using JetBrains.Annotations;
+using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Services;
 
 namespace NeuralNetworkNET.cuDNN
@@ -58,5 +60,62 @@ public static Dnn Instance
                 }
             }
         }
+
+        #region Availability check
+
+        /// <summary>
+        /// Gets whether or not the cuDNN support is available on the current system
+        /// </summary>
+        public static bool IsAvailable
+        {
+            get
+            {
+                try
+                {
+                    // Calling this directly could cause a crash in the <Module> loader due to the missing .dll files
+                    return CuDnnSupportHelper.IsGpuAccelerationSupported();
+                }
+                catch (TypeInitializationException)
+                {
+                    // Missing .dll file
+                    return false;
+                }
+            }
+        }
+
+        /// <summary>
+        /// A private class that is used to create a new standalone type that contains the actual test method (decoupling is needed to &lt;Module&gt; loading crashes)
+        /// </summary>
+        private static class CuDnnSupportHelper
+        {
+            /// <summary>
+            /// Checks whether or not the Cuda features are currently supported
+            /// </summary>
+            public static bool IsGpuAccelerationSupported()
+            {
+                try
+                {
+                    // CUDA test
+                    Gpu gpu = Gpu.Default;
+                    if (gpu == null) return false;
+                    if (!Dnn.IsAvailable) return false; // cuDNN
+                    using (DeviceMemory<float> sample_gpu = gpu.AllocateDevice<float>(1024))
+                    {
+                        deviceptr<float> ptr = sample_gpu.Ptr;
+                        void Kernel(int i) => ptr[i] = i;
+                        Alea.Parallel.GpuExtension.For(gpu, 0, 1024, Kernel); // JIT test
+                        float[] sample = Gpu.CopyToHost(sample_gpu);
+                        return Enumerable.Range(0, 1024).Select<int, float>(i => i).ToArray().ContentEquals(sample);
+                    }
+                }
+                catch
+                {
+                    // Missing .dll or other errors
+                    return false;
+                }
+            }
+        }
+
+        #endregion
     }
 }
diff --git a/NeuralNetwork.NET/cuDNN/GpuExtensions.cs b/NeuralNetwork.NET/cuDNN/GpuExtensions.cs
index 6cc7d29..9014cf3 100644
--- a/NeuralNetwork.NET/cuDNN/GpuExtensions.cs
+++ b/NeuralNetwork.NET/cuDNN/GpuExtensions.cs
@@ -73,6 +73,22 @@ public static void CopyTo([NotNull] this DeviceMemory<float> source, in Tensor d
                 throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
         }
 
+        /// <summary>
+        /// Copies the contents of the input <see cref="DeviceMemory{T}"/> instance to the target host array
+        /// </summary>
+        /// <param name="source">The <see cref="DeviceMemory{T}"/> area to read</param>
+        /// <param name="destination">The destination array to write on</param>
+        public static unsafe void CopyTo([NotNull] this DeviceMemory<float> source, [NotNull] float[] destination)
+        {
+            if (destination.Length != source.Length) throw new ArgumentException("The target array doesn't have the same size as the source GPU memory");
+            fixed (void* p = destination)
+            {
+                CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy(new IntPtr(p), source.Handle, new IntPtr(sizeof(float) * destination.Length));
+                if (result != CUDAInterop.cudaError_enum.CUDA_SUCCESS)
+                    throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
+            }
+        }
+
         /// <summary>
         /// Copies the source data into the target <see cref="Tensor"/>, splitting each individual entry into its own row
         /// </summary>
diff --git a/README.md b/README.md
index ce0a650..2be27b8 100644
--- a/README.md
+++ b/README.md
@@ -113,7 +113,7 @@ Some complex network structures, like residual networks or inception modules , c
 Computation graph networks are created using the `NetworkManager.NewGraph` API, here's an example:
 
 ```C#
-INeuralNetwork network = NetworkManager.NewGraph(TensorInfo.Image<Alpha8>(28, 28), root =>
+INeuralNetwork network = NetworkManager.NewGraph(TensorInfo.Image<Rgb24>(32,32), root =>
 {
     var conv1 = root.Layer(CuDnnNetworkLayers.Convolutional((5, 5), 20, ActivationType.Identity));
     var pool1 = conv1.Layer(CuDnnNetworkLayers.Pooling(ActivationType.ReLU));
diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs
index 1e616cd..e886741 100644
--- a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs
+++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs
@@ -1,4 +1,6 @@
-﻿using JetBrains.Annotations;
+﻿using System;
+using System.Reflection;
+using JetBrains.Annotations;
 using Microsoft.VisualStudio.TestTools.UnitTesting;
 using NeuralNetworkNET.APIs.Enums;
 using NeuralNetworkNET.APIs.Structs;
@@ -8,6 +10,7 @@
 using NeuralNetworkNET.Networks.Layers.Cpu;
 using NeuralNetworkNET.Networks.Layers.Cuda;
 using NeuralNetworkNET.Networks.Layers.Initialization;
+using NeuralNetworkNET.SupervisedLearning.Optimization;
 
 namespace NeuralNetworkNET.Cuda.Unit
 {
@@ -44,8 +47,17 @@ private static void TestForward(NetworkLayerBase cpu, NetworkLayerBase gpu, int
             Tensor.Free(x, z_cpu, a_cpu, z_gpu, a_gpu);
         }
 
+        // Sets the static property that signals whenever the backpropagation pass is being executed (needed for some layer types)
+        private static void SetBackpropagationProperty(bool value)
+        {
+            PropertyInfo property = typeof(NetworkTrainer).GetProperty(nameof(NetworkTrainer.BackpropagationInProgress), BindingFlags.Static | BindingFlags.Public);
+            if (property == null) throw new InvalidOperationException("Couldn't find the target property");
+            property.SetValue(null, value);
+        }
+
         private static void TestBackward(WeightedLayerBase cpu, WeightedLayerBase gpu, int samples)
         {
+            SetBackpropagationProperty(true);
             Tensor
                 x = CreateRandomTensor(samples, cpu.InputInfo.Size),
                 dy = CreateRandomTensor(samples, cpu.OutputInfo.Size);
@@ -55,14 +67,16 @@ private static void TestBackward(WeightedLayerBase cpu, WeightedLayerBase gpu, i
             gpu.Forward(x, out Tensor z_gpu, out Tensor a_gpu);
             cpu.Backpropagate(x, z_cpu, dy, dx1, out Tensor dJdw_cpu, out Tensor dJdb_cpu);
             gpu.Backpropagate(x, z_gpu, dy, dx2, out Tensor dJdw_gpu, out Tensor dJdb_gpu);
-            Assert.IsTrue(dx1.ContentEquals(dx2));
-            Assert.IsTrue(dJdw_cpu.ContentEquals(dJdw_gpu));
+            Assert.IsTrue(dx1.ContentEquals(dx2, 1e-5f, 1e-5f));
+            Assert.IsTrue(dJdw_cpu.ContentEquals(dJdw_gpu, 1e-4f, 1e-5f));
             Assert.IsTrue(dJdb_cpu.ContentEquals(dJdb_gpu, 1e-4f, 1e-5f)); // The cuDNN ConvolutionBackwardBias is not always as precise as the CPU version
             Tensor.Free(x, dy, dx1, dx2, z_cpu, a_cpu, z_gpu, a_gpu, dJdw_cpu, dJdb_cpu, dJdw_gpu, dJdb_gpu);
+            SetBackpropagationProperty(false);
         }
 
         private static unsafe void TestBackward(OutputLayerBase cpu, OutputLayerBase gpu, float[,] y)
         {
+            SetBackpropagationProperty(true);
             int n = y.GetLength(0);
             fixed (float* p = y)
             {
@@ -81,6 +95,7 @@ private static unsafe void TestBackward(OutputLayerBase cpu, OutputLayerBase gpu
                 Assert.IsTrue(dJdb_cpu.ContentEquals(dJdb_gpu, 1e-4f, 1e-5f));
                 Tensor.Free(x, dy, dx1, dx2, z_cpu, a_cpu, z_gpu, a_gpu, dJdw_cpu, dJdw_gpu, dJdb_cpu, dJdb_gpu);
             }
+            SetBackpropagationProperty(false);
         }
 
         #endregion
@@ -154,6 +169,46 @@ public void ConvolutionBackward()
 
         #endregion
 
+        #region Batch normalization
+
+        [TestMethod]
+        public void PerActivationBatchNormalizationForward()
+        {
+            BatchNormalizationLayerBase
+                cpu = new BatchNormalizationLayer(TensorInfo.Linear(250), NormalizationMode.PerActivation, ActivationType.ReLU),
+                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.PerActivation, cpu.Weights, cpu.Biases, cpu.Iteration, cpu.Mu.AsSpan().Copy(), cpu.Sigma2.AsSpan().Copy(), cpu.ActivationType);
+            TestForward(cpu, gpu, 400);
+        }
+
+        [TestMethod]
+        public void PerActivationBatchNormalizationBackward()
+        {
+            BatchNormalizationLayerBase
+                cpu = new BatchNormalizationLayer(TensorInfo.Linear(250), NormalizationMode.PerActivation, ActivationType.ReLU),
+                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.PerActivation, cpu.Weights, cpu.Biases, cpu.Iteration, cpu.Mu.AsSpan().Copy(), cpu.Sigma2.AsSpan().Copy(), cpu.ActivationType);
+            TestBackward(cpu, gpu, 400);
+        }
+
+        [TestMethod]
+        public void SpatialBatchNormalizationForward()
+        {
+            BatchNormalizationLayerBase
+                cpu = new BatchNormalizationLayer(TensorInfo.Volume(12, 12, 13), NormalizationMode.Spatial, ActivationType.ReLU),
+                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.Spatial, cpu.Weights, cpu.Biases, cpu.Iteration, cpu.Mu.AsSpan().Copy(), cpu.Sigma2.AsSpan().Copy(), cpu.ActivationType);
+            TestForward(cpu, gpu, 400);
+        }
+
+        [TestMethod]
+        public void SpatialBatchNormalizationBackward()
+        {
+            BatchNormalizationLayerBase
+                cpu = new BatchNormalizationLayer(TensorInfo.Volume(12, 12, 13), NormalizationMode.Spatial, ActivationType.ReLU),
+                gpu = new CuDnnBatchNormalizationLayer(cpu.InputInfo, NormalizationMode.Spatial, cpu.Weights, cpu.Biases, cpu.Iteration, cpu.Mu.AsSpan().Copy(), cpu.Sigma2.AsSpan().Copy(), cpu.ActivationType);
+            TestBackward(cpu, gpu, 400);
+        }
+
+        #endregion
+
         #region Pooling
 
         [TestMethod]
diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnTest.cs
index 77cc473..6ba8cb2 100644
--- a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnTest.cs
+++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnTest.cs
@@ -7,6 +7,7 @@
 using NeuralNetworkNET.cpuDNN;
 using NeuralNetworkNET.cuDNN;
 using NeuralNetworkNET.Extensions;
+using NeuralNetworkNET.Helpers;
 using NeuralNetworkNET.Networks.Activations;
 using NeuralNetworkNET.Networks.Layers.Cpu;
 using NeuralNetworkNET.Networks.Layers.Initialization;
@@ -153,5 +154,257 @@ public void FullyConnectedBackwardFilter()
         }
 
         #endregion
+
+        #region Batch normalization
+
+        [TestMethod]
+        public void PerActivationBatchNormalizationForward()
+        {
+            // Setup
+            Tensor x = CreateRandomTensor(400, 250);
+            Tensor.NewZeroed(1, 250, out Tensor mu);
+            Tensor.LikeZeroed(mu, out Tensor sigma2);
+            Tensor.New(1, 250, out Tensor gamma);
+            Tensor.NewZeroed(1, 250, out Tensor beta);
+            for (int i = 0; i < 250; i++) gamma[i] = ThreadSafeRandom.NextFloat();
+
+            // Cpu
+            Tensor.Like(x, out Tensor y1);
+            CpuDnn.BatchNormalizationForward(NormalizationMode.PerActivation, TensorInfo.Linear(250), x, 1, mu, sigma2, gamma, beta, y1);
+
+            // Gpu
+            Gpu gpu = Gpu.Default;
+            using (DeviceMemory<float>
+                x_gpu = gpu.AllocateDevice(x),
+                y_gpu = gpu.AllocateDevice<float>(x.Size),
+                gamma_gpu = gpu.AllocateDevice(gamma),
+                beta_gpu = gpu.AllocateDevice(beta),
+                run_mean = gpu.AllocateDevice<float>(mu.Size),
+                run_var = gpu.AllocateDevice<float>(mu.Size))
+            {
+                TensorDescriptor desc = new TensorDescriptor();
+                desc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, x.Length, 1, 1);
+                TensorDescriptor gammaBetadesc = new TensorDescriptor();
+                gammaBetadesc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, x.Length, 1, 1);
+                Dnn.Get(gpu).BatchNormalizationForwardTraining(
+                    BatchNormMode.PER_ACTIVATION, 1, 0, 
+                    desc, x_gpu.Ptr, desc, y_gpu.Ptr, 
+                    gammaBetadesc, gamma_gpu.Ptr, beta_gpu.Ptr, 
+                    1, run_mean.Ptr, run_var.Ptr, CpuDnn.CUDNN_BN_MIN_EPSILON, 
+                    default, default);
+                y_gpu.CopyToHost(x.Entities, x.Length, out Tensor y2);
+                run_mean.CopyToHost(1, 250, out Tensor runmean);
+                run_var.CopyToHost(1, 250, out Tensor runvar);
+
+                // Tests
+                Assert.IsTrue(y1.ContentEquals(y2, 1e-5f));
+                Assert.IsTrue(mu.ContentEquals(runmean, 1e-5f));
+                Assert.IsTrue(sigma2.ContentEquals(runvar, 1e-5f));
+
+                // Inference
+                CpuDnn.BatchNormalizationForward(NormalizationMode.PerActivation, TensorInfo.Linear(250), x, mu, sigma2, gamma, beta, y1);
+                Dnn.Get(gpu).BatchNormalizationForwardInference(
+                    BatchNormMode.PER_ACTIVATION, 1, 0,
+                    desc, x_gpu.Ptr, desc, y_gpu.Ptr,
+                    gammaBetadesc, gamma_gpu.Ptr, beta_gpu.Ptr,
+                    run_mean.Ptr, run_var.Ptr, CpuDnn.CUDNN_BN_MIN_EPSILON);
+                y_gpu.CopyTo(y2);
+                Assert.IsTrue(y1.ContentEquals(y2, 1e-3f, 1e-2f));
+                Tensor.Free(mu, sigma2, gamma, beta, x, y1, y2, runmean, runvar);
+            }
+        }
+
+        [TestMethod]
+        public void PerActivationBatchNormalizationBackwardBeta()
+        {
+            // Setup
+            Tensor 
+                x = CreateRandomTensor(400, 250),
+                dy = CreateRandomTensor(400, 250);
+            Tensor.NewZeroed(1, 250, out Tensor mu);
+            Tensor.LikeZeroed(mu, out Tensor sigma2);
+            Tensor.Like(x, out Tensor dx1);
+            Tensor.New(1, 250, out Tensor dgamma1);
+            Tensor.Like(dgamma1, out Tensor dbeta1);
+            Tensor.New(1, 250, out Tensor gamma);
+            Tensor.NewZeroed(1, 250, out Tensor beta);
+            for (int i = 0; i < 250; i++) gamma[i] = ThreadSafeRandom.NextFloat();
+
+            // Cpu
+            Tensor.Like(x, out Tensor y1);
+            CpuDnn.BatchNormalizationForward(NormalizationMode.PerActivation, TensorInfo.Linear(250), x, 1, mu, sigma2, gamma, beta, y1);
+            CpuDnn.BatchNormalizationBackwardData(NormalizationMode.PerActivation, TensorInfo.Linear(250), x, mu, sigma2, gamma, dy, dx1);
+            CpuDnn.BatchNormalizationBackwardGamma(NormalizationMode.PerActivation, TensorInfo.Linear(250), x, mu, sigma2, dy, dgamma1);
+            CpuDnn.BatchNormalizationBackwardBeta(NormalizationMode.PerActivation, TensorInfo.Linear(250), dy, dbeta1);
+
+            // Gpu
+            Gpu gpu = Gpu.Default;
+            using (DeviceMemory<float>
+                x_gpu = gpu.AllocateDevice(x),
+                y_gpu = gpu.AllocateDevice<float>(x.Size),
+                dy_gpu = gpu.AllocateDevice(dy),
+                dx_gpu = gpu.AllocateDevice<float>(x.Size),
+                gamma_gpu = gpu.AllocateDevice(gamma),
+                beta_gpu = gpu.AllocateDevice(beta),
+                dgamma_gpu = gpu.AllocateDevice<float>(gamma.Size),
+                dbeta_gpu = gpu.AllocateDevice<float>(gamma.Size),
+                run_mean = gpu.AllocateDevice<float>(mu.Size),
+                run_var = gpu.AllocateDevice<float>(mu.Size))
+            {
+                TensorDescriptor desc = new TensorDescriptor();
+                desc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, x.Length, 1, 1);
+                TensorDescriptor gammaBetadesc = new TensorDescriptor();
+                gammaBetadesc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, x.Length, 1, 1);
+                Dnn dnn = Dnn.Get(gpu);
+                dnn.BatchNormalizationForwardTraining(
+                    BatchNormMode.PER_ACTIVATION, 1, 0, 
+                    desc, x_gpu.Ptr, desc, y_gpu.Ptr, 
+                    gammaBetadesc, gamma_gpu.Ptr, beta_gpu.Ptr, 
+                    1, run_mean.Ptr, run_var.Ptr, CpuDnn.CUDNN_BN_MIN_EPSILON, 
+                    default, default);
+                dnn.BatchNormalizationBackward(
+                    BatchNormMode.PER_ACTIVATION, 1, 0, 1, 0,
+                    desc, x_gpu.Ptr, desc, dy_gpu.Ptr, desc, dx_gpu.Ptr,
+                    gammaBetadesc, gamma_gpu.Ptr, dgamma_gpu.Ptr, dbeta_gpu.Ptr,
+                    CpuDnn.CUDNN_BN_MIN_EPSILON, default, default);
+
+                y_gpu.CopyToHost(x.Entities, x.Length, out Tensor y2);
+                dx_gpu.CopyToHost(x.Entities, x.Length, out Tensor dx2);
+                dgamma_gpu.CopyToHost(1, x.Length, out Tensor dgamma2);
+                dbeta_gpu.CopyToHost(1, x.Length, out Tensor dbeta2);
+
+                Assert.IsTrue(y1.ContentEquals(y2, 1e-5f, 1e-4f));
+                Assert.IsTrue(dx1.ContentEquals(dx2, 1e-5f, 1e-4f));
+                Assert.IsTrue(dgamma1.ContentEquals(dgamma2, 1e-5f, 1e-4f));
+                Assert.IsTrue(dbeta1.ContentEquals(dbeta2, 1e-5f, 1e-4f));
+                Tensor.Free(x, dy, mu, sigma2, dx1, dgamma1, dbeta1, gamma, beta, y1, y2, dx2, dgamma2, dbeta2);
+            }
+        }
+
+        [TestMethod]
+        public void SpatialBatchNormalizationForward()
+        {
+            // Setup
+            Tensor x = CreateRandomTensor(400, 12 * 12 * 13);
+            Tensor.NewZeroed(1, 13, out Tensor mu);
+            Tensor.LikeZeroed(mu, out Tensor sigma2);
+            Tensor.New(1, 13, out Tensor gamma);
+            Tensor.NewZeroed(1, 13, out Tensor beta);
+            for (int i = 0; i < 13; i++) gamma[i] = ThreadSafeRandom.NextFloat();
+
+            // Cpu
+            Tensor.Like(x, out Tensor y1);
+            CpuDnn.BatchNormalizationForward(NormalizationMode.Spatial, TensorInfo.Volume(12, 12, 13), x, 1, mu, sigma2, gamma, beta, y1);
+
+            // Gpu
+            Gpu gpu = Gpu.Default;
+            using (DeviceMemory<float>
+                x_gpu = gpu.AllocateDevice(x),
+                y_gpu = gpu.AllocateDevice<float>(x.Size),
+                gamma_gpu = gpu.AllocateDevice(gamma),
+                beta_gpu = gpu.AllocateDevice(beta),
+                run_mean = gpu.AllocateDevice<float>(mu.Size),
+                run_var = gpu.AllocateDevice<float>(mu.Size))
+            {
+                TensorDescriptor desc = new TensorDescriptor();
+                desc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, 13, 12, 12);
+                TensorDescriptor gammaBetadesc = new TensorDescriptor();
+                gammaBetadesc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, 13, 1, 1);
+                Dnn.Get(gpu).BatchNormalizationForwardTraining(
+                    BatchNormMode.SPATIAL, 1, 0, 
+                    desc, x_gpu.Ptr, desc, y_gpu.Ptr, 
+                    gammaBetadesc, gamma_gpu.Ptr, beta_gpu.Ptr, 
+                    1, run_mean.Ptr, run_var.Ptr, CpuDnn.CUDNN_BN_MIN_EPSILON, 
+                    default, default);
+                y_gpu.CopyToHost(x.Entities, x.Length, out Tensor y2);
+                run_mean.CopyToHost(1, 13, out Tensor runmean);
+                run_var.CopyToHost(1, 13, out Tensor runvar);
+
+                // Tests
+                Assert.IsTrue(y1.ContentEquals(y2, 1e-5f, 1e-5f));
+                Assert.IsTrue(mu.ContentEquals(runmean, 1e-5f, 1e-5f));
+                Assert.IsTrue(sigma2.ContentEquals(runvar, 1e-5f, 1e-5f));
+
+                // Inference
+                CpuDnn.BatchNormalizationForward(NormalizationMode.Spatial, TensorInfo.Volume(12, 12, 13), x, mu, sigma2, gamma, beta, y1);
+                Dnn.Get(gpu).BatchNormalizationForwardInference(
+                    BatchNormMode.SPATIAL, 1, 0,
+                    desc, x_gpu.Ptr, desc, y_gpu.Ptr,
+                    gammaBetadesc, gamma_gpu.Ptr, beta_gpu.Ptr,
+                    run_mean.Ptr, run_var.Ptr, CpuDnn.CUDNN_BN_MIN_EPSILON);
+                y_gpu.CopyTo(y2);
+                Assert.IsTrue(y1.ContentEquals(y2, 1e-3f, 1e-3f));
+                Tensor.Free(mu, sigma2, gamma, beta, x, y1, y2, runmean, runvar);
+            }
+        }
+
+        [TestMethod]
+        public void SpatialBatchNormalizationBackwardBeta()
+        {
+            // Setup
+            Tensor 
+                x = CreateRandomTensor(400, 12 * 12 * 13),
+                dy = CreateRandomTensor(400, 12 * 12 * 13);
+            Tensor.NewZeroed(1, 13, out Tensor mu);
+            Tensor.LikeZeroed(mu, out Tensor sigma2);
+            Tensor.Like(x, out Tensor dx1);
+            Tensor.New(1, 13, out Tensor dgamma1);
+            Tensor.Like(dgamma1, out Tensor dbeta1);
+            Tensor.New(1, 13, out Tensor gamma);
+            Tensor.NewZeroed(1, 13, out Tensor beta);
+            for (int i = 0; i < 13; i++) gamma[i] = ThreadSafeRandom.NextFloat();
+
+            // Cpu
+            Tensor.Like(x, out Tensor y1);
+            CpuDnn.BatchNormalizationForward(NormalizationMode.Spatial, TensorInfo.Volume(12, 12, 13), x, 1, mu, sigma2, gamma, beta, y1);
+            CpuDnn.BatchNormalizationBackwardData(NormalizationMode.Spatial, TensorInfo.Volume(12, 12, 13), x, mu, sigma2, gamma, dy, dx1);
+            CpuDnn.BatchNormalizationBackwardGamma(NormalizationMode.Spatial, TensorInfo.Volume(12, 12, 13), x, mu, sigma2, dy, dgamma1);
+            CpuDnn.BatchNormalizationBackwardBeta(NormalizationMode.Spatial, TensorInfo.Volume(12, 12, 13), dy, dbeta1);
+
+            // Gpu
+            Gpu gpu = Gpu.Default;
+            using (DeviceMemory<float>
+                x_gpu = gpu.AllocateDevice(x),
+                y_gpu = gpu.AllocateDevice<float>(x.Size),
+                dy_gpu = gpu.AllocateDevice(dy),
+                dx_gpu = gpu.AllocateDevice<float>(x.Size),
+                gamma_gpu = gpu.AllocateDevice(gamma),
+                beta_gpu = gpu.AllocateDevice(beta),
+                dgamma_gpu = gpu.AllocateDevice<float>(gamma.Size),
+                dbeta_gpu = gpu.AllocateDevice<float>(gamma.Size),
+                run_mean = gpu.AllocateDevice<float>(mu.Size),
+                run_var = gpu.AllocateDevice<float>(mu.Size))
+            {
+                TensorDescriptor desc = new TensorDescriptor();
+                desc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, 13, 12, 12);
+                TensorDescriptor gammaBetadesc = new TensorDescriptor();
+                gammaBetadesc.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, 13, 1, 1);
+                Dnn dnn = Dnn.Get(gpu);
+                dnn.BatchNormalizationForwardTraining(
+                    BatchNormMode.SPATIAL, 1, 0, 
+                    desc, x_gpu.Ptr, desc, y_gpu.Ptr, 
+                    gammaBetadesc, gamma_gpu.Ptr, beta_gpu.Ptr, 
+                    1, run_mean.Ptr, run_var.Ptr, CpuDnn.CUDNN_BN_MIN_EPSILON, 
+                    default, default);
+                dnn.BatchNormalizationBackward(
+                    BatchNormMode.SPATIAL, 1, 0, 1, 0,
+                    desc, x_gpu.Ptr, desc, dy_gpu.Ptr, desc, dx_gpu.Ptr,
+                    gammaBetadesc, gamma_gpu.Ptr, dgamma_gpu.Ptr, dbeta_gpu.Ptr,
+                    CpuDnn.CUDNN_BN_MIN_EPSILON, default, default);
+
+                y_gpu.CopyToHost(x.Entities, x.Length, out Tensor y2);
+                dx_gpu.CopyToHost(x.Entities, x.Length, out Tensor dx2);
+                dgamma_gpu.CopyToHost(1, 13, out Tensor dgamma2);
+                dbeta_gpu.CopyToHost(1, 13, out Tensor dbeta2);
+
+                Assert.IsTrue(y1.ContentEquals(y2, 1e-5f, 1e-4f));
+                Assert.IsTrue(dx1.ContentEquals(dx2, 1e-5f, 1e-4f));
+                Assert.IsTrue(dgamma1.ContentEquals(dgamma2, 1e-4f, 1e-4f));
+                Assert.IsTrue(dbeta1.ContentEquals(dbeta2, 1e-5f, 1e-4f));
+                Tensor.Free(x, dy, mu, sigma2, dx1, dgamma1, dbeta1, gamma, beta, y1, y2, dx2, dgamma2, dbeta2);
+            }
+        }
+
+        #endregion
     }
 }
diff --git a/Unit/NeuralNetwork.NET.Unit/GraphNetworkTest.cs b/Unit/NeuralNetwork.NET.Unit/GraphNetworkTest.cs
index bbddda5..ebdd8ef 100644
--- a/Unit/NeuralNetwork.NET.Unit/GraphNetworkTest.cs
+++ b/Unit/NeuralNetwork.NET.Unit/GraphNetworkTest.cs
@@ -328,16 +328,17 @@ public void JsonMetadataSerialization1()
                 var conv1 = root.Layer(NetworkLayers.Convolutional((5, 5), 10, ActivationType.ReLU));
                 var pool1 = conv1.Layer(NetworkLayers.Pooling(ActivationType.Sigmoid));
 
-                var _1x1 = pool1.Layer(NetworkLayers.Convolutional((1, 1), 20, ActivationType.ReLU));
+                var _1x1 = pool1.Layer(NetworkLayers.Convolutional((1, 1), 20, ActivationType.Identity));
                 var _3x3reduce1x1 = pool1.Layer(NetworkLayers.Convolutional((1, 1), 20, ActivationType.ReLU));
-                var _3x3 = _3x3reduce1x1.Layer(NetworkLayers.Convolutional((1, 1), 20, ActivationType.ReLU));
+                var _3x3 = _3x3reduce1x1.Layer(NetworkLayers.Convolutional((1, 1), 20, ActivationType.Identity));
 
                 var split = _3x3.TrainingBranch();
                 var fct = split.Layer(NetworkLayers.FullyConnected(100, ActivationType.LeCunTanh));
                 _ = fct.Layer(NetworkLayers.Softmax(10));
 
                 var stack = _1x1.DepthConcatenation(_3x3);
-                var fc1 = stack.Layer(NetworkLayers.FullyConnected(100, ActivationType.Sigmoid));
+                var bn = stack.Layer(NetworkLayers.BatchNormalization(NormalizationMode.Spatial, ActivationType.ReLU));
+                var fc1 = bn.Layer(NetworkLayers.FullyConnected(100, ActivationType.Sigmoid));
                 _ = fc1.Layer(NetworkLayers.Softmax(10));
             });
             String json = network.SerializeMetadataAsJson();
diff --git a/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs b/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs
index e79d88f..5235ff4 100644
--- a/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs
+++ b/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs
@@ -69,7 +69,8 @@ public void NetworkSerialization()
                 NetworkLayers.Convolutional((10, 10), 20, ActivationType.Identity),
                 NetworkLayers.Pooling(ActivationType.ReLU),
                 NetworkLayers.Convolutional((10, 10), 20, ActivationType.Identity),
-                NetworkLayers.Pooling(ActivationType.ReLU),
+                NetworkLayers.Pooling(ActivationType.Identity),
+                NetworkLayers.BatchNormalization(NormalizationMode.Spatial, ActivationType.ReLU),
                 NetworkLayers.FullyConnected(125, ActivationType.Tanh),
                 NetworkLayers.Softmax(133));
             using (MemoryStream stream = new MemoryStream())
@@ -90,7 +91,8 @@ public void JsonMetadataSerialization()
                 NetworkLayers.Convolutional((10, 10), 20, ActivationType.Identity),
                 NetworkLayers.Pooling(ActivationType.ReLU),
                 NetworkLayers.Convolutional((10, 10), 20, ActivationType.Identity),
-                NetworkLayers.Pooling(ActivationType.ReLU),
+                NetworkLayers.Pooling(ActivationType.Identity),
+                NetworkLayers.BatchNormalization(NormalizationMode.Spatial, ActivationType.ReLU),
                 NetworkLayers.FullyConnected(125, ActivationType.Tanh),
                 NetworkLayers.Softmax(133));
             String metadata1 = network.SerializeMetadataAsJson();