Skip to content

Commit

Permalink
Integrated W&B
Browse files Browse the repository at this point in the history
  • Loading branch information
sagnik1511 committed Jul 10, 2022
1 parent 92f91f5 commit a1046e6
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 27 deletions.
41 changes: 29 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
<img src="https://forthebadge.com/images/badges/built-with-love.svg">
<img src="https://forthebadge.com/images/badges/made-with-python.svg">
<img src="https://forthebadge.com/images/badges/built-with-science.svg">
</div>
<h2>GOALS of the Project</h2>
<i>1. Reduces implementation time upto 50%.</i><br>
<i>2. Presents Eye-catching Training Job Monitor</i><br>
<i>2. Presents Eye-catching Training Job Monitor.</i><br>
<i>3. Stores training data efficiently.</i><br>
<b>Visit at <a href="https://pypi.org/project/torch-tutor/0.0.1/#description">PyPI</a></b><br>
<b>Primary Release</b>
</div>
<i>4. Integrated with W&B.</i>
<h4>Visit at <a href="https://pypi.org/project/torch-tutor/0.0.4/#description">PyPI</a></h4>
<h4>Primary Release</h4>

## Installation

Expand All @@ -21,9 +22,11 @@
b) Torch >= 1.11.0 + cu113

Visit [Custom Installation PyTorch](https://pytorch.org/) to install the latest version(Date : 07-09-2022)


2. Install the `torch_tutor` package.

Procedures :
#### Package Installation

a) From PyPi :
```shell
Expand All @@ -38,7 +41,10 @@ git clone https://github.com/sagnik1511/Torch-Tutor.git
Go to the directory.
```shell
cd Torch_Tutor
pip -m install -v -e .
```
Install from the local repository.
```shell
pip install -v -e .
```

## Usage
Expand All @@ -51,8 +57,7 @@ callback = CallBack(tracker="accuracy",
stop_epoch=5,
save_weights=True,
on="training",
save_directory="../weight_directory"
)
save_directory="../weight_directory")

trainer = Trainer(train_dataset="<add your train_dataset_here>",
model="<add your model here>",
Expand All @@ -73,7 +78,9 @@ trainer.train(batch_size=32,
logging_index=10,
shuffle=True,
drop_last_batches=True,
callback=callback)
callback=callback,
connect_wandb=True,
exp_name="New Experiment")

```

Expand Down Expand Up @@ -145,8 +152,21 @@ from torch_tutor.core.trainer import Trainer

`callback` [type: torch_tutor.core.callback.callBack] : CallBack function.

`connect_wandb` [type: bool] : Flag to connect W&B loggers.

`exp_name` [type: str] : Name of the experiment.

---

## Module Functionalities Achieved

- [x] Training through CPU / GPU / TPU.
- [x] Basic Loggings & Reports.
- [x] W&B Integration.
- [ ] MLFlow Integration.
- [ ] Distributed Training.
- [ ] Multi-Model Training.
- [ ] Effective Web-platform UI.

<div align = "center">
<h3>If you get any errors while running the code, please make a PR.</h3>
Expand All @@ -155,6 +175,3 @@ from torch_tutor.core.trainer import Trainer
</div>

<div align = "center"><h1>Also follow me on <a href="https://github.com/sagnik1511">GitHub</a> , <a href="https://kaggle.com/sagnik1511">Kaggle</a> , <a href="https://in.linkedin.com/in/sagnik1511">LinkedIn</a></h1></div>



15 changes: 12 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from setuptools import setup, find_packages
import codecs
import os

VERSION = '0.0.4'
DESCRIPTION = 'Simplified PyTorch Trainer'


here = os.path.abspath(os.path.dirname(__file__))
with codecs.open(os.path.join(here, "README.md"), encoding="utf-8") as fh:
long_description = "\n" + fh.read()

VERSION = '0.0.2'
DESCRIPTION = 'Simple & Efficient PyTorch Trainer'

# Setting up
setup(
Expand All @@ -10,9 +18,10 @@
author="sagnik1511 (Sagnik Roy)",
author_email="<[email protected]>",
description=DESCRIPTION,
long_description=long_description,
long_description_content_type="text/markdown",
packages=find_packages(),
install_requires=["numpy", "pandas"],
install_requires=["numpy", "pandas", "wandb"],
keywords=['python', 'pytorch', 'training', 'logging'],
classifiers=[
"Development Status :: 1 - Planning",
Expand Down
10 changes: 6 additions & 4 deletions torch_tutor/core/engine/loops.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def train_single_epoch(training_loader: DataLoader,
model: nn.Module, loss_fn: nn.Module,
optimizer: Any, training_steps: int,
metrics: Dict[str, Metric], logging_index: int,
device: str = "cpu") -> Tuple[Tuple[nn.Module, Any], Dict[str, float]]:
device: str = "cpu", wandb_runner=None) -> Tuple[Tuple[nn.Module, Any], Dict[str, float]]:

train_metric_arr = {k: [] for k, _ in metrics.items()}
train_metric_arr["loss"] = []
Expand All @@ -42,6 +42,8 @@ def train_single_epoch(training_loader: DataLoader,
loss.backward()
optimizer.step()
metric_arr = {k: round(v, 6) for k, v in metric_arr.items()}
if wandb_runner:
wandb_runner.log({f"training_{k}": round(v, 6) for k, v in metric_arr.items()})
for k in train_metric_arr.keys():
train_metric_arr[k].append(metric_arr[k])
if index % logging_index == 0:
Expand Down Expand Up @@ -73,18 +75,18 @@ def validate_single_epoch(val_loader: DataLoader, model: nn.Module,
def run_single_epoch(train_loader: DataLoader, model: nn.Module, loss_fn: nn.Module, optimizer: Any,
training_steps: int, metrics: Dict[str, Metric],
val_loader: Any, val_steps: int, logging_index: int,
device: str = 'cpu') -> Tuple[Tuple[nn.Module, Any], List[Dict[str, float]]]:
device: str = 'cpu', wandb_runner=None) -> Tuple[Tuple[nn.Module, Any], List[Dict[str, float]]]:
init = time.time()
res_arr = []
(model, optimizer), train_res = train_single_epoch(train_loader, model, loss_fn,
optimizer, training_steps,
metrics, logging_index, device)
metrics, logging_index, device, wandb_runner)
train_res = {k: round(v, 6) for k, v in train_res.items()}
res_arr.append(train_res)
if val_loader:
val_res = validate_single_epoch(val_loader, model, loss_fn, val_steps, metrics, device)
val_res = {k: round(v, 6) for k, v in val_res.items()}
res_arr.append(val_res)

print(f"Execution Time : {round(time.time() - init, 6)} seconds")
print(f"Execution Time : {round(time.time() - init, 4)} seconds.\n")
return (model, optimizer), res_arr
25 changes: 17 additions & 8 deletions torch_tutor/core/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from torch_tutor.metrics.generals import *
import pandas as pd
import warnings
import wandb
warnings.filterwarnings("ignore")


Expand Down Expand Up @@ -60,45 +61,53 @@ def _load_metrics(self) -> Dict[str, Metric]:
def train(self, batch_size: int, num_epochs: int, training_steps: int = -1,
validation_set: Any = None, logging_index: int = 10,
validation_steps: int = -1, shuffle: bool = True,
drop_last_batches: bool = True, callback: CallBack = None):

drop_last_batches: bool = True, callback: CallBack = None,
connect_wandb: bool = True, exp_name: str = "sample_experiment"):
print(f"launching Experoment : {exp_name}.")
train_dl, val_dl = self._prepare_data(self.train_ds, validation_set,
batch_size, shuffle, drop_last_batches)
device = torch.device(self.device)
self.model.to(device)
print("Model loaded to device...\n")
init = time.time()
run = wandb.init(project=exp_name) if connect_wandb else None
if connect_wandb:
run.watch(self.model, criterion=self.loss_fn, log_freq=100, log_graph=True)
for epoch in range(num_epochs):
print(f"Epoch : {epoch + 1} :")
print(f"Epoch : {epoch + 1}\n")
(self.model, self.optim), res_arr = run_single_epoch(train_dl, self.model, self.loss_fn, self.optim,
training_steps, self.metrics, val_dl, validation_steps,
logging_index, self.device)
print(f"Training scores : \n{pd.DataFrame(res_arr[0], index=[0])}")
logging_index, self.device, run)
print(f"\nTraining scores : \n{pd.DataFrame(res_arr[0], index=[0])}")
for k in self.train_scores.keys():
self.train_scores[k].append(res_arr[0][k])
if validation_set:
print(f"Validation scores : \n{pd.DataFrame(res_arr[1], index=[0])}")
for k in self.validation_scores.keys():
self.validation_scores[k].append(res_arr[1][k])
print("\n")
if callback:
is_continue_run = callback.update([self.train_scores, self.validation_scores], self.model)
if is_continue_run:
break
print("\n")
print(f"Training Completed...")
print(f"Training Completed.")
print(f"Executed in {round(time.time() - init, 4)} seconds.\n")
self._prepare_training_report()
if connect_wandb:
run.finish()

def _prepare_training_report(self):
self.train_report = pd.DataFrame(self.train_scores)
self.val_report = pd.DataFrame(self.validation_scores) \
if len(self.validation_scores[next(iter(self.validation_scores.keys()))]) > 0 else pd.DataFrame()
print("Prepared training reports...")
print("Prepared training reports.")

@staticmethod
def _prepare_data(train_set: Dataset, val_set: Any,
batch_size: int, shuffle: bool = True,
drop_last: bool = False) -> Tuple[DataLoader, Any]:
train_dl = DataLoader(train_set, batch_size, shuffle, drop_last=drop_last)
val_dl = DataLoader(val_set, batch_size, shuffle, drop_last=drop_last) if val_set else None

print(f"Data Prepared.")
return train_dl, val_dl

0 comments on commit a1046e6

Please sign in to comment.