diff --git a/src/ocr_captcha/components/data_ingestion.py b/src/ocr_captcha/components/data_ingestion.py index 0515603..6f6b486 100644 --- a/src/ocr_captcha/components/data_ingestion.py +++ b/src/ocr_captcha/components/data_ingestion.py @@ -19,13 +19,13 @@ class DataIngestionConfig: test_data_path_x: str = os.path.join("artifact", "test_x.csv") test_data_path_y: str = os.path.join("artifact", "test_y.csv") unique_charachters: str = os.path.join("artifact", "unique_char.csv") - + class DataIngestion: def __init__(self): self.ingestion_config = DataIngestionConfig() - def initiate_data_ingestion (self): + def initiate_data_ingestion(self): logging.info("Starting data ingestion") try: logging.info("Reading The data from the folders...") @@ -87,6 +87,7 @@ def initiate_data_ingestion (self): logging.info() raise customexception(e, sys) + if __name__ == '__main__': - obj=DataIngestion() + obj = DataIngestion() obj.initiate_data_ingestion() \ No newline at end of file diff --git a/src/ocr_captcha/components/model_trainer.py b/src/ocr_captcha/components/model_trainer.py index 8a3c1d2..a840fff 100644 --- a/src/ocr_captcha/components/model_trainer.py +++ b/src/ocr_captcha/components/model_trainer.py @@ -1,10 +1,7 @@ import pandas as pd -import numpy as np from src.ocr_captcha.logger.logging import logging from src.ocr_captcha.exception.exception import customexception import os -os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' -import shutil import sys from pathlib import Path from dataclasses import dataclass @@ -16,26 +13,25 @@ from src.ocr_captcha.utils.utils import build_model import keras from tensorflow.keras.callbacks import ModelCheckpoint -from keras.models import load_model import json -from src.ocr_captcha.utils.utils import save_object - - - +os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' @dataclass class ModelTrainerConfig: - trained_model_file_path: str = os.path.join('artifact','model.weights.h5') - char_to_num:str = os.path.join("artifact", "char_to_num.json") - num_to_char:str = os.path.join("artifact", "num_to_char.json") + trained_model_file_path: str = os.path.join('artifact', 'model.weights.h5') + char_to_num: str = os.path.join("artifact", "char_to_num.json") + num_to_char: str = os.path.join("artifact", "num_to_char.json") + + class ModelTrainer: def __init__(self, img_height, img_width, batch_size): self.model_trainer_config = ModelTrainerConfig() self.img_height = img_height self.img_width = img_width self.batch_size = batch_size - def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_path_y, unique_chars, pre_trained: bool = False, model_path:str = None): + + def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_path_y, unique_chars, pre_trained: bool = False, model_path: str = None): try: logging.info("Getting the train-test | feature-labels and unique charachters from the artifacts...") train_x = pd.read_csv(train_path_x, header=None)[0] @@ -51,12 +47,6 @@ def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_p ) logging.info("Mappings created") logging.info("storing the mapping...") - - - - - - os.makedirs(os.path.dirname(os.path.join(self.model_trainer_config.char_to_num)), exist_ok=True) saved_data = {'config': char_to_num.get_config(), 'weights': char_to_num.get_weights()} with open(self.model_trainer_config.char_to_num, "w") as json_file: @@ -65,15 +55,7 @@ def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_p os.makedirs(os.path.dirname(os.path.join(self.model_trainer_config.num_to_char)), exist_ok=True) saved_data = {'config': num_to_char.get_config(), 'weights': num_to_char.get_weights()} with open(self.model_trainer_config.num_to_char, "w") as json_file: - json.dump(saved_data, json_file) - - - - - - - - + json.dump(saved_data, json_file) logging.info("saved the mappings") partial_encode_single_sample_training = partial(encode_single_sample_training, img_height=self.img_height, img_width=self.img_width, char_to_num=char_to_num) logging.info("Creating the training Dataset") @@ -98,7 +80,7 @@ def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_p else: logging.info("GPU not found, proceding with CPU...") model = build_model(self.img_width, self.img_height, char_to_num) - if pre_trained==True: + if pre_trained: model.load_weights(model_path) logging.info("Starting with pre trained model") else: @@ -140,7 +122,7 @@ def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_p if __name__ == '__main__': trainer = ModelTrainer(100, 200, 32) train_path_x = Path("artifact/train_x.csv") - train_path_y= Path("artifact/train_y.csv") + train_path_y = Path("artifact/train_y.csv") test_path_x = Path("artifact/test_x.csv") test_path_y = Path("artifact/test_y.csv") unique_chars = Path("artifact/unique_char.csv") diff --git a/src/ocr_captcha/exception/exception.py b/src/ocr_captcha/exception/exception.py index 555fbc7..46c5b48 100644 --- a/src/ocr_captcha/exception/exception.py +++ b/src/ocr_captcha/exception/exception.py @@ -1,8 +1,9 @@ import sys + class customexception(Exception): - def __init__(self, error_message, error_details:sys): + def __init__(self, error_message, error_details: sys): self.error_message = error_message _, _, exc_tb = error_details.exc_info() diff --git a/src/ocr_captcha/logger/logging.py b/src/ocr_captcha/logger/logging.py index d5326ec..71a55d0 100644 --- a/src/ocr_captcha/logger/logging.py +++ b/src/ocr_captcha/logger/logging.py @@ -1,9 +1,6 @@ import logging import os from datetime import datetime - - - LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log" log_path = os.path.join(os.getcwd(), "logs") os.makedirs(log_path, exist_ok=True) diff --git a/src/ocr_captcha/pipeline/prediction_pipeline.py b/src/ocr_captcha/pipeline/prediction_pipeline.py index 588122c..7ffb8d4 100644 --- a/src/ocr_captcha/pipeline/prediction_pipeline.py +++ b/src/ocr_captcha/pipeline/prediction_pipeline.py @@ -1,20 +1,21 @@ import os -os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' +import numpy as np +from pathlib import Path import sys import json +import keras from src.ocr_captcha.logger.logging import logging from src.ocr_captcha.exception.exception import customexception from keras.models import load_model -import keras from keras.src.layers.preprocessing.string_lookup import StringLookup from src.ocr_captcha.utils.utils import build_model, generate_image_id, encode_single_sample_testing, decode_batch_predictions -import numpy as np -from pathlib import Path +os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' class PredictionPipline: def __init__(self, model_path): self.model_path = model_path + def predict(self, img): try: with open(Path("artifact/char_to_num.json"), "r") as json_file: diff --git a/src/ocr_captcha/pipeline/training_pipeline.py b/src/ocr_captcha/pipeline/training_pipeline.py index 4bf2139..e77c6af 100644 --- a/src/ocr_captcha/pipeline/training_pipeline.py +++ b/src/ocr_captcha/pipeline/training_pipeline.py @@ -14,11 +14,10 @@ class TrainingPipeline: - def __init__(self, pretrained:bool, model_path: str): + def __init__(self, pretrained: bool, model_path: str): self.pre_trained = pretrained self.model_path = os.path.join("artifact", model_path) - def start_data_ingestion(self): try: obj = DataIngestion() @@ -32,7 +31,6 @@ def start_data_ingestion(self): except Exception as e: raise customexception(e, sys) - def initiate_training(self): try: with mlflow.start_run() as run: @@ -49,16 +47,15 @@ def initiate_training(self): except Exception as e: raise customexception(e, sys) - def train(self, train_data_path_x, train_data_path_y, test_data_path_x, test_data_path_y, unique_charachters): try: - trainer = ModelTrainer(self.img_height, self.img_width, self.batch_size) - model_path, history = trainer.initate_model_training(train_data_path_x, train_data_path_y, test_data_path_x, test_data_path_y, unique_charachters, self.pre_trained, self.model_path) - mlflow.log_artifact(model_path) - for epoch, val_loss_value in enumerate(history.history["val_loss"]): - mlflow.log_metric("val_loss_epoch_" + str(epoch), val_loss_value) - for epoch, loss_value in enumerate(history.history["loss"]): - mlflow.log_metric("loss_epoch_" + str(epoch), loss_value) + trainer = ModelTrainer(self.img_height, self.img_width, self.batch_size) + model_path, history = trainer.initate_model_training(train_data_path_x, train_data_path_y, test_data_path_x, test_data_path_y, unique_charachters, self.pre_trained, self.model_path) + mlflow.log_artifact(model_path) + for epoch, val_loss_value in enumerate(history.history["val_loss"]): + mlflow.log_metric("val_loss_epoch_" + str(epoch), val_loss_value) + for epoch, loss_value in enumerate(history.history["loss"]): + mlflow.log_metric("loss_epoch_" + str(epoch), loss_value) except Exception as e: logging.info(e) raise customexception(e, sys) diff --git a/src/ocr_captcha/utils/utils.py b/src/ocr_captcha/utils/utils.py index a1de60d..3ea2cbb 100644 --- a/src/ocr_captcha/utils/utils.py +++ b/src/ocr_captcha/utils/utils.py @@ -3,7 +3,7 @@ from keras.src.layers.preprocessing.string_lookup import StringLookup from keras import layers import keras -from tensorflow.python.framework.ops import SymbolicTensor, EagerTensor +from tensorflow.python.framework.ops import SymbolicTensor from tensorflow.python.framework.sparse_tensor import SparseTensor from typing import Dict, Union from keras.src.models.functional import Functional @@ -21,7 +21,7 @@ def generate_image_id(): return str(image_id) -def split_data(images: np.ndarray, labels: np.ndarray, train_size: float = 0.8, shuffle: bool =True) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: +def split_data(images: np.ndarray, labels: np.ndarray, train_size: float = 0.8, shuffle: bool = True) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: # 1. Get the total size of the dataset size = len(images) # 2. Make an indices array and shuffle it, if required @@ -35,7 +35,8 @@ def split_data(images: np.ndarray, labels: np.ndarray, train_size: float = 0.8, x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]] return x_train, x_valid, y_train, y_valid -def encode_single_sample_training(img_path: str, label: str, img_height: int, img_width: int, char_to_num:StringLookup) -> Dict[str, Union[tf.Tensor, tf.Tensor]]: + +def encode_single_sample_training(img_path: str, label: str, img_height: int, img_width: int, char_to_num: StringLookup) -> Dict[str, Union[tf.Tensor, tf.Tensor]]: # 1. Read image img = tf.io.read_file(img_path) # 2. Decode and convert to grayscale @@ -53,14 +54,16 @@ def encode_single_sample_training(img_path: str, label: str, img_height: int, im # 7. Return a dict as our model is expecting two inputs return {"image": img, "label": label} + def ctc_label_dense_to_sparse(labels: SymbolicTensor, label_lengths: SymbolicTensor) -> SparseTensor: - label_shape = tf.shape(labels) # B, T, C + label_shape = tf.shape(labels) # B, T, C # print("label_shape:", label_shape) num_batches = tf.stack([label_shape[0]]) # print("num_batches:", num_batches) max_num_labels = tf.stack([label_shape[1]]) # print("max_num_labels:", max_num_labels) - def range_less_than(old_input, current_input): + + def range_less_than(old_input, current_input): ''' Creates a boolean mask for the label_lengths we need to pay attention to ''' @@ -93,6 +96,7 @@ def range_less_than(old_input, current_input): tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64) ) + def ctc_batch_cost(y_true: SymbolicTensor, y_pred: SymbolicTensor, input_length: SymbolicTensor, label_length: SymbolicTensor) -> SymbolicTensor: label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32) # print(f"label_length: {label_length}") @@ -111,7 +115,7 @@ def ctc_batch_cost(y_true: SymbolicTensor, y_pred: SymbolicTensor, input_length: ''' Generating a concentrated sparse matrix ''' - y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0 ,2]) + keras.backend.epsilon()) + y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon()) # print(f"y_pred: {y_pred}") ''' Add a small value of epsilon before taking log...so as to not take the log of 0 by mistake @@ -120,13 +124,12 @@ def ctc_batch_cost(y_true: SymbolicTensor, y_pred: SymbolicTensor, input_length: returns the actual loss value add a singleton dim to the output...so as to represent the batch size ''' - return tf.expand_dims(tf.compat.v1.nn.ctc_loss( - inputs=y_pred, labels=sparse_labels, sequence_length=input_length - ), 1, - ) + return tf.expand_dims(tf.compat.v1.nn.ctc_loss(inputs=y_pred, labels=sparse_labels, sequence_length=input_length), 1, + ) + class CTCLayer(layers.Layer): - def __init__(self, trainable=True, name: str=None, dtype=None): + def __init__(self, trainable = True, name: str = None): super().__init__(name=name) self.trainable = trainable self.loss_fn = ctc_batch_cost @@ -143,7 +146,7 @@ def call(self, y_true: SymbolicTensor, y_pred: SymbolicTensor) -> SymbolicTensor self.add_loss(loss) # At test time, just return the computed predictions return y_pred - + def get_config(self): config = super().get_config() config.update({'trainable': self.trainable}) @@ -161,7 +164,7 @@ def build_model(img_width: int, img_height: int, char_to_num: StringLookup) -> F x = layers.Conv2D(128, (3, 3), activation="relu", kernel_initializer="he_normal", padding="same", name="conv2",)(x) x = layers.MaxPooling2D((2, 2), name="pool2")(x) # reshaping just in case - new_shape = ((img_width//4), (img_height//4) * 128) # each spatial location will be denoted by 64 values + new_shape = ((img_width // 4), (img_height // 4) * 128) # each spatial location will be denoted by 64 values x = layers.Reshape(target_shape=new_shape, name="reshape")(x) x = layers.Dense(128, activation="relu", name="dense1")(x) x = layers.Dropout(0.2)(x) @@ -201,6 +204,7 @@ def build_model(img_width: int, img_height: int, char_to_num: StringLookup) -> F # with open("artifact\optimizer.json", "w") as opt_json_file: # opt_json_file.write(optimizer.get_config()) + def encode_single_sample_testing(img_path: str, img_height: int, img_width: int, ids: str) -> Dict[str, Union[tf.Tensor, tf.Tensor]]: # 1. Read image img = tf.io.read_file(img_path) @@ -218,6 +222,7 @@ def encode_single_sample_testing(img_path: str, img_height: int, img_width: int, # 7. Return a dict as our model is expecting two inputs return {"image": img, "ids": ids} + def ctc_decode(y_pred, input_length, greedy, beam_width=100, top_paths=1) -> Tuple[list, tf.Tensor]: input_shape = tf.shape(y_pred) num_samples, num_steps = input_shape[0], input_shape[1] @@ -241,7 +246,8 @@ def ctc_decode(y_pred, input_length, greedy, beam_width=100, top_paths=1) -> Tup decoded_dense.append(tf.sparse.to_dense(sp_input=st, default_value=-1)) return (decoded_dense, log_prob) -def decode_batch_predictions(pred, max_length:int, num_to_char: StringLookup) -> List: + +def decode_batch_predictions(pred, max_length: int, num_to_char: StringLookup) -> List: input_len = np.ones(pred.shape[0]) * pred.shape[1] # results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][:, :max_length] results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][ @@ -262,7 +268,7 @@ def save_object(file_path, obj): pickle.dump(obj, file_obj) except Exception as e: raise customexception(e, sys) - + def load_object(file_path): try: @@ -271,20 +277,3 @@ def load_object(file_path): except Exception as e: logging.info("Exception Error occured in loading object") raise customexception(e, sys) - - - - - - - - - - - - - - - - - diff --git a/tox.ini b/tox.ini index fb2e0cd..32a29ef 100644 --- a/tox.ini +++ b/tox.ini @@ -14,7 +14,7 @@ commands = # stop the build if there are Python syntax errors or undefined names flake8 src --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 src --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + flake8 src --count --exit-zero --max-complexity=10 --max-line-length=150 --select=E --ignore=E501,E251 --statistics # type linting mypy src/ # pytest unit