Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Azazel0203 committed Mar 31, 2024
1 parent bcc798a commit 3a36fae
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 84 deletions.
7 changes: 4 additions & 3 deletions src/ocr_captcha/components/data_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ class DataIngestionConfig:
test_data_path_x: str = os.path.join("artifact", "test_x.csv")
test_data_path_y: str = os.path.join("artifact", "test_y.csv")
unique_charachters: str = os.path.join("artifact", "unique_char.csv")


class DataIngestion:
def __init__(self):
self.ingestion_config = DataIngestionConfig()

def initiate_data_ingestion (self):
def initiate_data_ingestion(self):
logging.info("Starting data ingestion")
try:
logging.info("Reading The data from the folders...")
Expand Down Expand Up @@ -87,6 +87,7 @@ def initiate_data_ingestion (self):
logging.info()
raise customexception(e, sys)


if __name__ == '__main__':
obj=DataIngestion()
obj = DataIngestion()
obj.initiate_data_ingestion()
40 changes: 11 additions & 29 deletions src/ocr_captcha/components/model_trainer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import pandas as pd
import numpy as np
from src.ocr_captcha.logger.logging import logging
from src.ocr_captcha.exception.exception import customexception
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
import shutil
import sys
from pathlib import Path
from dataclasses import dataclass
Expand All @@ -16,26 +13,25 @@
from src.ocr_captcha.utils.utils import build_model
import keras
from tensorflow.keras.callbacks import ModelCheckpoint
from keras.models import load_model
import json
from src.ocr_captcha.utils.utils import save_object



os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'


@dataclass
class ModelTrainerConfig:
trained_model_file_path: str = os.path.join('artifact','model.weights.h5')
char_to_num:str = os.path.join("artifact", "char_to_num.json")
num_to_char:str = os.path.join("artifact", "num_to_char.json")
trained_model_file_path: str = os.path.join('artifact', 'model.weights.h5')
char_to_num: str = os.path.join("artifact", "char_to_num.json")
num_to_char: str = os.path.join("artifact", "num_to_char.json")


class ModelTrainer:
def __init__(self, img_height, img_width, batch_size):
self.model_trainer_config = ModelTrainerConfig()
self.img_height = img_height
self.img_width = img_width
self.batch_size = batch_size
def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_path_y, unique_chars, pre_trained: bool = False, model_path:str = None):

def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_path_y, unique_chars, pre_trained: bool = False, model_path: str = None):
try:
logging.info("Getting the train-test | feature-labels and unique charachters from the artifacts...")
train_x = pd.read_csv(train_path_x, header=None)[0]
Expand All @@ -51,12 +47,6 @@ def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_p
)
logging.info("Mappings created")
logging.info("storing the mapping...")






os.makedirs(os.path.dirname(os.path.join(self.model_trainer_config.char_to_num)), exist_ok=True)
saved_data = {'config': char_to_num.get_config(), 'weights': char_to_num.get_weights()}
with open(self.model_trainer_config.char_to_num, "w") as json_file:
Expand All @@ -65,15 +55,7 @@ def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_p
os.makedirs(os.path.dirname(os.path.join(self.model_trainer_config.num_to_char)), exist_ok=True)
saved_data = {'config': num_to_char.get_config(), 'weights': num_to_char.get_weights()}
with open(self.model_trainer_config.num_to_char, "w") as json_file:
json.dump(saved_data, json_file)








json.dump(saved_data, json_file)
logging.info("saved the mappings")
partial_encode_single_sample_training = partial(encode_single_sample_training, img_height=self.img_height, img_width=self.img_width, char_to_num=char_to_num)
logging.info("Creating the training Dataset")
Expand All @@ -98,7 +80,7 @@ def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_p
else:
logging.info("GPU not found, proceding with CPU...")
model = build_model(self.img_width, self.img_height, char_to_num)
if pre_trained==True:
if pre_trained:
model.load_weights(model_path)
logging.info("Starting with pre trained model")
else:
Expand Down Expand Up @@ -140,7 +122,7 @@ def initate_model_training(self, train_path_x, train_path_y, test_path_x, test_p
if __name__ == '__main__':
trainer = ModelTrainer(100, 200, 32)
train_path_x = Path("artifact/train_x.csv")
train_path_y= Path("artifact/train_y.csv")
train_path_y = Path("artifact/train_y.csv")
test_path_x = Path("artifact/test_x.csv")
test_path_y = Path("artifact/test_y.csv")
unique_chars = Path("artifact/unique_char.csv")
Expand Down
3 changes: 2 additions & 1 deletion src/ocr_captcha/exception/exception.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import sys


class customexception(Exception):

def __init__(self, error_message, error_details:sys):
def __init__(self, error_message, error_details: sys):
self.error_message = error_message
_, _, exc_tb = error_details.exc_info()

Expand Down
3 changes: 0 additions & 3 deletions src/ocr_captcha/logger/logging.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import logging
import os
from datetime import datetime



LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
log_path = os.path.join(os.getcwd(), "logs")
os.makedirs(log_path, exist_ok=True)
Expand Down
9 changes: 5 additions & 4 deletions src/ocr_captcha/pipeline/prediction_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
import numpy as np
from pathlib import Path
import sys
import json
import keras
from src.ocr_captcha.logger.logging import logging
from src.ocr_captcha.exception.exception import customexception
from keras.models import load_model
import keras
from keras.src.layers.preprocessing.string_lookup import StringLookup
from src.ocr_captcha.utils.utils import build_model, generate_image_id, encode_single_sample_testing, decode_batch_predictions
import numpy as np
from pathlib import Path
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'


class PredictionPipline:
def __init__(self, model_path):
self.model_path = model_path

def predict(self, img):
try:
with open(Path("artifact/char_to_num.json"), "r") as json_file:
Expand Down
19 changes: 8 additions & 11 deletions src/ocr_captcha/pipeline/training_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@


class TrainingPipeline:
def __init__(self, pretrained:bool, model_path: str):
def __init__(self, pretrained: bool, model_path: str):
self.pre_trained = pretrained
self.model_path = os.path.join("artifact", model_path)


def start_data_ingestion(self):
try:
obj = DataIngestion()
Expand All @@ -32,7 +31,6 @@ def start_data_ingestion(self):
except Exception as e:
raise customexception(e, sys)


def initiate_training(self):
try:
with mlflow.start_run() as run:
Expand All @@ -49,16 +47,15 @@ def initiate_training(self):
except Exception as e:
raise customexception(e, sys)


def train(self, train_data_path_x, train_data_path_y, test_data_path_x, test_data_path_y, unique_charachters):
try:
trainer = ModelTrainer(self.img_height, self.img_width, self.batch_size)
model_path, history = trainer.initate_model_training(train_data_path_x, train_data_path_y, test_data_path_x, test_data_path_y, unique_charachters, self.pre_trained, self.model_path)
mlflow.log_artifact(model_path)
for epoch, val_loss_value in enumerate(history.history["val_loss"]):
mlflow.log_metric("val_loss_epoch_" + str(epoch), val_loss_value)
for epoch, loss_value in enumerate(history.history["loss"]):
mlflow.log_metric("loss_epoch_" + str(epoch), loss_value)
trainer = ModelTrainer(self.img_height, self.img_width, self.batch_size)
model_path, history = trainer.initate_model_training(train_data_path_x, train_data_path_y, test_data_path_x, test_data_path_y, unique_charachters, self.pre_trained, self.model_path)
mlflow.log_artifact(model_path)
for epoch, val_loss_value in enumerate(history.history["val_loss"]):
mlflow.log_metric("val_loss_epoch_" + str(epoch), val_loss_value)
for epoch, loss_value in enumerate(history.history["loss"]):
mlflow.log_metric("loss_epoch_" + str(epoch), loss_value)
except Exception as e:
logging.info(e)
raise customexception(e, sys)
Expand Down
53 changes: 21 additions & 32 deletions src/ocr_captcha/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from keras.src.layers.preprocessing.string_lookup import StringLookup
from keras import layers
import keras
from tensorflow.python.framework.ops import SymbolicTensor, EagerTensor
from tensorflow.python.framework.ops import SymbolicTensor
from tensorflow.python.framework.sparse_tensor import SparseTensor
from typing import Dict, Union
from keras.src.models.functional import Functional
Expand All @@ -21,7 +21,7 @@ def generate_image_id():
return str(image_id)


def split_data(images: np.ndarray, labels: np.ndarray, train_size: float = 0.8, shuffle: bool =True) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
def split_data(images: np.ndarray, labels: np.ndarray, train_size: float = 0.8, shuffle: bool = True) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
# 1. Get the total size of the dataset
size = len(images)
# 2. Make an indices array and shuffle it, if required
Expand All @@ -35,7 +35,8 @@ def split_data(images: np.ndarray, labels: np.ndarray, train_size: float = 0.8,
x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]]
return x_train, x_valid, y_train, y_valid

def encode_single_sample_training(img_path: str, label: str, img_height: int, img_width: int, char_to_num:StringLookup) -> Dict[str, Union[tf.Tensor, tf.Tensor]]:

def encode_single_sample_training(img_path: str, label: str, img_height: int, img_width: int, char_to_num: StringLookup) -> Dict[str, Union[tf.Tensor, tf.Tensor]]:
# 1. Read image
img = tf.io.read_file(img_path)
# 2. Decode and convert to grayscale
Expand All @@ -53,14 +54,16 @@ def encode_single_sample_training(img_path: str, label: str, img_height: int, im
# 7. Return a dict as our model is expecting two inputs
return {"image": img, "label": label}


def ctc_label_dense_to_sparse(labels: SymbolicTensor, label_lengths: SymbolicTensor) -> SparseTensor:
label_shape = tf.shape(labels) # B, T, C
label_shape = tf.shape(labels) # B, T, C
# print("label_shape:", label_shape)
num_batches = tf.stack([label_shape[0]])
# print("num_batches:", num_batches)
max_num_labels = tf.stack([label_shape[1]])
# print("max_num_labels:", max_num_labels)
def range_less_than(old_input, current_input):

def range_less_than(old_input, current_input):
'''
Creates a boolean mask for the label_lengths we need to pay attention to
'''
Expand Down Expand Up @@ -93,6 +96,7 @@ def range_less_than(old_input, current_input):
tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64)
)


def ctc_batch_cost(y_true: SymbolicTensor, y_pred: SymbolicTensor, input_length: SymbolicTensor, label_length: SymbolicTensor) -> SymbolicTensor:
label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32)
# print(f"label_length: {label_length}")
Expand All @@ -111,7 +115,7 @@ def ctc_batch_cost(y_true: SymbolicTensor, y_pred: SymbolicTensor, input_length:
'''
Generating a concentrated sparse matrix
'''
y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0 ,2]) + keras.backend.epsilon())
y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon())
# print(f"y_pred: {y_pred}")
'''
Add a small value of epsilon before taking log...so as to not take the log of 0 by mistake
Expand All @@ -120,13 +124,12 @@ def ctc_batch_cost(y_true: SymbolicTensor, y_pred: SymbolicTensor, input_length:
returns the actual loss value
add a singleton dim to the output...so as to represent the batch size
'''
return tf.expand_dims(tf.compat.v1.nn.ctc_loss(
inputs=y_pred, labels=sparse_labels, sequence_length=input_length
), 1,
)
return tf.expand_dims(tf.compat.v1.nn.ctc_loss(inputs=y_pred, labels=sparse_labels, sequence_length=input_length), 1,
)


class CTCLayer(layers.Layer):
def __init__(self, trainable=True, name: str=None, dtype=None):
def __init__(self, trainable = True, name: str = None):
super().__init__(name=name)
self.trainable = trainable
self.loss_fn = ctc_batch_cost
Expand All @@ -143,7 +146,7 @@ def call(self, y_true: SymbolicTensor, y_pred: SymbolicTensor) -> SymbolicTensor
self.add_loss(loss)
# At test time, just return the computed predictions
return y_pred

def get_config(self):
config = super().get_config()
config.update({'trainable': self.trainable})
Expand All @@ -161,7 +164,7 @@ def build_model(img_width: int, img_height: int, char_to_num: StringLookup) -> F
x = layers.Conv2D(128, (3, 3), activation="relu", kernel_initializer="he_normal", padding="same", name="conv2",)(x)
x = layers.MaxPooling2D((2, 2), name="pool2")(x)
# reshaping just in case
new_shape = ((img_width//4), (img_height//4) * 128) # each spatial location will be denoted by 64 values
new_shape = ((img_width // 4), (img_height // 4) * 128) # each spatial location will be denoted by 64 values
x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
x = layers.Dense(128, activation="relu", name="dense1")(x)
x = layers.Dropout(0.2)(x)
Expand Down Expand Up @@ -201,6 +204,7 @@ def build_model(img_width: int, img_height: int, char_to_num: StringLookup) -> F
# with open("artifact\optimizer.json", "w") as opt_json_file:
# opt_json_file.write(optimizer.get_config())


def encode_single_sample_testing(img_path: str, img_height: int, img_width: int, ids: str) -> Dict[str, Union[tf.Tensor, tf.Tensor]]:
# 1. Read image
img = tf.io.read_file(img_path)
Expand All @@ -218,6 +222,7 @@ def encode_single_sample_testing(img_path: str, img_height: int, img_width: int,
# 7. Return a dict as our model is expecting two inputs
return {"image": img, "ids": ids}


def ctc_decode(y_pred, input_length, greedy, beam_width=100, top_paths=1) -> Tuple[list, tf.Tensor]:
input_shape = tf.shape(y_pred)
num_samples, num_steps = input_shape[0], input_shape[1]
Expand All @@ -241,7 +246,8 @@ def ctc_decode(y_pred, input_length, greedy, beam_width=100, top_paths=1) -> Tup
decoded_dense.append(tf.sparse.to_dense(sp_input=st, default_value=-1))
return (decoded_dense, log_prob)

def decode_batch_predictions(pred, max_length:int, num_to_char: StringLookup) -> List:

def decode_batch_predictions(pred, max_length: int, num_to_char: StringLookup) -> List:
input_len = np.ones(pred.shape[0]) * pred.shape[1]
# results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][:, :max_length]
results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
Expand All @@ -262,7 +268,7 @@ def save_object(file_path, obj):
pickle.dump(obj, file_obj)
except Exception as e:
raise customexception(e, sys)


def load_object(file_path):
try:
Expand All @@ -271,20 +277,3 @@ def load_object(file_path):
except Exception as e:
logging.info("Exception Error occured in loading object")
raise customexception(e, sys)

















2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ commands =
# stop the build if there are Python syntax errors or undefined names
flake8 src --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 src --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
flake8 src --count --exit-zero --max-complexity=10 --max-line-length=150 --select=E --ignore=E501,E251 --statistics
# type linting
mypy src/
# pytest unit
Expand Down

0 comments on commit 3a36fae

Please sign in to comment.