Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: use different LayerOps trait requirements per framework #122

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
355 changes: 349 additions & 6 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion coaster/src/plugin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
//!
//! Extending the Backend with your own Plugin is a straight forward process.
//! For now we recommend that you take a look at the general code structure of [Coaster-BLAS][coaster-blas]
//! or its documentation. Let us now about your Plugin on the Gitter chat, we are happy to feature
//! or its documentation. Let us know about your Plugin on the Gitter chat, we are happy to feature
//! your Coaster Plugin on the README.
//!
//! [program]: ../program/index.html
Expand Down
35 changes: 5 additions & 30 deletions juice/src/layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -762,10 +762,10 @@ impl<B: IBackend> Layer<B> {
/// # }
/// # }
/// ```
pub fn load<LB: IBackend + LayerOps<f32> + 'static, P: AsRef<Path>>(
backend: Rc<LB>,
pub fn load<NetB: IBackend + LayerOps<<NetB as IBackend>::F, f32> + 'static, P: AsRef<Path>>(
backend: Rc<NetB>,
path: P,
) -> io::Result<Layer<LB>> {
) -> io::Result<Layer<NetB>> {
let path = path.as_ref();
let ref mut file = File::open(path)?;
let mut reader = BufReader::new(file);
Expand Down Expand Up @@ -951,7 +951,7 @@ impl<'a, B: IBackend> CapnpWrite<'a> for Layer<B> {
}
}

impl<B: IBackend + LayerOps<f32> + crate::coblas::plugin::Copy<f32> + 'static> Layer<B> {
impl<B> Layer<B> where B: IBackend + LayerOps<<B as IBackend>::F,f32> + crate::coblas::plugin::Copy<f32> + 'static {
/// Creates a new Layer from a [LayerConfig][1].
/// [1]: ./struct.LayerConfig.html
pub fn from_config(backend: Rc<B>, config: &LayerConfig) -> Layer<B> {
Expand Down Expand Up @@ -984,39 +984,14 @@ impl<B: IBackend + LayerOps<f32> + crate::coblas::plugin::Copy<f32> + 'static> L

backend: backend.clone(),

worker: Layer::<B>::worker_from_config(backend, &cfg),
worker: <B as LayerOps::<<B as IBackend>::F, f32>>::layer_from_config::<B>(backend, &cfg),
config: cfg,
};
layer.expose_inputs();
layer.expose_outputs();

layer
}

/// Helper for [from_config] to match a [LayerType][2] to its [implementation][3].
/// [1]: #method.from_config
/// [2]: ./enum.LayerType.html
/// [3]: ../layers/index.html
fn worker_from_config(backend: Rc<B>, config: &LayerConfig) -> Box<dyn ILayer<B>> {
match config.layer_type.clone() {
LayerType::Convolution(layer_config) => Box::new(Convolution::from_config(&layer_config)),
LayerType::Rnn(layer_config) => Box::new(Rnn::from_config(&layer_config)),
LayerType::Linear(layer_config) => Box::new(Linear::from_config(&layer_config)),
LayerType::LogSoftmax => Box::new(LogSoftmax::default()),
LayerType::Pooling(layer_config) => Box::new(Pooling::from_config(&layer_config)),
LayerType::Sequential(layer_config) => Box::new(Sequential::from_config(backend, &layer_config)),
LayerType::Softmax => Box::new(Softmax::default()),
LayerType::ReLU => Box::new(ReLU),
LayerType::TanH => Box::new(TanH),
LayerType::Sigmoid => Box::new(Sigmoid),
LayerType::NegativeLogLikelihood(layer_config) => {
Box::new(NegativeLogLikelihood::from_config(&layer_config))
}
LayerType::MeanSquaredError => Box::new(MeanSquaredError),
LayerType::Reshape(layer_config) => Box::new(Reshape::from_config(&layer_config)),
LayerType::Dropout(layer_config) => Box::new(Dropout::from_config(&layer_config)),
}
}
}

/// A Layer in a Neural Network that can handle forward and backward of a computation step.
Expand Down
16 changes: 11 additions & 5 deletions juice/src/layers/common/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,18 @@
use crate::capnp_util::*;
use crate::co::backend::IBackend;
use crate::co::tensor::SharedTensor;
use crate::coblas::plugin::*;
use crate::coblas::transpose::Transpose;
use crate::juice_capnp::linear_config as capnp_config;
use crate::layer::*;
use crate::util::{native_scalar, ArcLock, LayerOps};
use crate::util::{native_scalar, ArcLock, LayerOps, Axpby};
use crate::weight::FillerType;


trait ILinearCalc<F>: Gemm<F> + Axpby<F> + Copy<F> {}

impl<T,F> ILinearCalc<F> for T where T: Gemm<F> + Axpby<F> + Copy<F> {}

#[derive(Debug)]
/// Linear Layer
pub struct Linear {
Expand Down Expand Up @@ -67,7 +73,7 @@ impl Linear {
}
}

impl<B: IBackend + LayerOps<f32>> ILayer<B> for Linear {
impl<B: IBackend + ILinearCalc<f32>> ILayer<B> for Linear {
fn auto_weight_blobs(&self) -> bool {
true
}
Expand Down Expand Up @@ -123,7 +129,7 @@ impl<B: IBackend + LayerOps<f32>> ILayer<B> for Linear {
}
}

impl<B: IBackend + LayerOps<f32>> ComputeOutput<f32, B> for Linear {
impl<B: IBackend + ILinearCalc<f32>> ComputeOutput<f32, B> for Linear {
/// Basically, x has the shape (k, n) where k is the batch size. Given W with shape (m, n) where
/// m is output vector length, we compute the output with the formula xW^T which will give us a
/// matrix of size (k, m) with the outputs.
Expand Down Expand Up @@ -162,7 +168,7 @@ impl<B: IBackend + LayerOps<f32>> ComputeOutput<f32, B> for Linear {
}
}

impl<B: IBackend + LayerOps<f32>> ComputeInputGradient<f32, B> for Linear {
impl<B: IBackend + ILinearCalc<f32>> ComputeInputGradient<f32, B> for Linear {
/// Since we have row vectors instead of columns, xW^T = (Wx^T)^T. Take the derivative with
/// respect to x^T (gives us a column vector of dimension (n, 1)), we get d((Wx^T)^T)/d(x^T) =
/// W^T of dims (n, m). In backpropagation with column vectors, we would take W^T * output_grad,
Expand Down Expand Up @@ -192,7 +198,7 @@ impl<B: IBackend + LayerOps<f32>> ComputeInputGradient<f32, B> for Linear {
}
}

impl<B: IBackend + LayerOps<f32>> ComputeParametersGradient<f32, B> for Linear {
impl<B: IBackend + ILinearCalc<f32>> ComputeParametersGradient<f32, B> for Linear {
fn compute_parameters_gradient(
&self,
backend: &B,
Expand Down
12 changes: 6 additions & 6 deletions juice/src/layers/container/sequential.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use std::sync::{Arc, RwLock};

#[derive(Debug)]
/// Sequential Layer
pub struct Sequential<B: IBackend + LayerOps<f32>> {
pub struct Sequential<B: IBackend + LayerOps<<B as IBackend>::F,f32>> {
layers: Vec<RefCell<Layer<B>>>,

input_tensor_names: Vec<String>,
Expand All @@ -26,7 +26,7 @@ pub struct Sequential<B: IBackend + LayerOps<f32>> {
registry: HashMap<String, (ArcLock<SharedTensor<f32>>, ArcLock<SharedTensor<f32>>)>,
}

impl<B: IBackend + LayerOps<f32> + 'static> Sequential<B> {
impl<B: IBackend + LayerOps<<B as IBackend>::F,f32> + 'static> Sequential<B> {
/// Create a empty Sequential container layer.
pub fn empty() -> Sequential<B> {
Sequential {
Expand Down Expand Up @@ -219,7 +219,7 @@ impl<B: IBackend + LayerOps<f32> + 'static> Sequential<B> {
}
}

impl<B: IBackend + LayerOps<f32> + 'static> ILayer<B> for Sequential<B> {
impl<B: IBackend + LayerOps<<B as IBackend>::F,f32> + 'static> ILayer<B> for Sequential<B> {
fn is_container(&self) -> bool {
true
}
Expand Down Expand Up @@ -344,7 +344,7 @@ impl<B: IBackend + LayerOps<f32> + 'static> ILayer<B> for Sequential<B> {
}
}

impl<B: IBackend + LayerOps<f32> + 'static> ComputeOutput<f32, B> for Sequential<B> {
impl<B: IBackend + LayerOps<<B as IBackend>::F,f32> + 'static> ComputeOutput<f32, B> for Sequential<B> {
// we are overriding `forward` and not calling `compute_output`
fn compute_output(
&self,
Expand All @@ -356,7 +356,7 @@ impl<B: IBackend + LayerOps<f32> + 'static> ComputeOutput<f32, B> for Sequential
}
}

impl<B: IBackend + LayerOps<f32> + 'static> ComputeInputGradient<f32, B> for Sequential<B> {
impl<B: IBackend + LayerOps<<B as IBackend>::F,f32> + 'static> ComputeInputGradient<f32, B> for Sequential<B> {
// we are overriding `backward_input` and not calling `compute_input_gradient`
fn compute_input_gradient(
&self,
Expand All @@ -370,7 +370,7 @@ impl<B: IBackend + LayerOps<f32> + 'static> ComputeInputGradient<f32, B> for Seq
}
}

impl<B: IBackend + LayerOps<f32> + 'static> ComputeParametersGradient<f32, B> for Sequential<B> {
impl<B: IBackend + LayerOps<<B as IBackend>::F,f32> + 'static> ComputeParametersGradient<f32, B> for Sequential<B> {
// we are overriding `backward_parameters` and not calling `compute_parameters_gradient`
fn compute_parameters_gradient(
&self,
Expand Down
4 changes: 2 additions & 2 deletions juice/src/layers/loss/mean_squared_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl MeanSquaredError {
}
}

impl<B: IBackend + LayerOps<f32> + Axpby<f32>> ILayer<B> for MeanSquaredError {
impl<B: IBackend + LayerOps<<B as IBackend>::F,f32> + Axpby<f32>> ILayer<B> for MeanSquaredError {
fn reshape(
&mut self,
backend: ::std::rc::Rc<B>,
Expand Down Expand Up @@ -70,7 +70,7 @@ impl<B: IBackend> ComputeOutput<f32, B> for MeanSquaredError {
}

// Calculate a Gradient for Mean Squared Error
impl<B: IBackend + LayerOps<f32>> ComputeInputGradient<f32, B> for MeanSquaredError {
impl<B: IBackend + LayerOps<<B as IBackend>::F,f32>> ComputeInputGradient<f32, B> for MeanSquaredError {
fn compute_input_gradient(
&self,
backend: &B,
Expand Down
22 changes: 11 additions & 11 deletions juice/src/solver/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use std::rc::Rc;
#[derive(Debug)]
/// Solver that optimizes a [Layer][1] with a given objective.
/// [1]: ../layer/index.html
pub struct Solver<SolverB: IBackend + SolverOps<f32>, B: IBackend + LayerOps<f32>> {
pub struct Solver<SolverB: IBackend + SolverOps<f32>, B: IBackend + LayerOps<<B as IBackend>::F,f32>> {
net: Layer<B>,
objective: Layer<SolverB>,
/// The implementation of the Solver
Expand All @@ -34,7 +34,7 @@ pub struct Solver<SolverB: IBackend + SolverOps<f32>, B: IBackend + LayerOps<f32
solver_backend: PhantomData<SolverB>,
}

impl<SolverB: IBackend + SolverOps<f32> + 'static, B: IBackend + LayerOps<f32> + 'static> Solver<SolverB, B> {
impl<SolverB: IBackend + SolverOps<f32> + 'static, B: IBackend + LayerOps<<B as IBackend>::F,f32> + 'static> Solver<SolverB, B> {
/// Create Solver from [SolverConfig][1]
/// [1]: ./struct.SolverConfig.html
///
Expand All @@ -56,7 +56,7 @@ impl<SolverB: IBackend + SolverOps<f32> + 'static, B: IBackend + LayerOps<f32> +
}
}

impl<SolverB: IBackend + SolverOps<f32> + 'static, B: IBackend + LayerOps<f32> + 'static> Solver<SolverB, B> {
impl<SolverB: IBackend + SolverOps<f32> + 'static, B: IBackend + LayerOps<<B as IBackend>::F,f32> + 'static> Solver<SolverB, B> {
fn init(&mut self, backend: Rc<B>) {
info!("Initializing solver from configuration");

Expand Down Expand Up @@ -112,7 +112,7 @@ impl<SolverB: IBackend + SolverOps<f32> + 'static, B: IBackend + LayerOps<f32> +
///
/// See [Solvers][1]
/// [1]: ../solvers/index.html
pub trait ISolver<SolverB, B: IBackend + LayerOps<f32>> {
pub trait ISolver<SolverB, B: IBackend + LayerOps<<B as IBackend>::F,f32>> {
/// Initialize the solver, setting up any network related data.
fn init(&mut self, net: &Layer<B>) {}

Expand All @@ -133,7 +133,7 @@ pub trait ISolver<SolverB, B: IBackend + LayerOps<f32>> {
fn backend(&self) -> &SolverB;
}

impl<SolverB, B: IBackend + LayerOps<f32>> ::std::fmt::Debug for dyn ISolver<SolverB, B> {
impl<SolverB, B: IBackend + LayerOps<<B as IBackend>::F,f32>> ::std::fmt::Debug for dyn ISolver<SolverB, B> {
fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
write!(f, "({})", "ILayer")
}
Expand Down Expand Up @@ -216,7 +216,7 @@ pub struct SolverConfig {
/// The value should always be between 0 and 1 and dictates how much of the previous
/// gradient update will be added to the current one.
///
/// Default: 0
/// Default: 0.0
pub momentum: f32,
}

Expand All @@ -240,7 +240,7 @@ impl Default for SolverConfig {
weight_decay: None,
regularization_method: None,

momentum: 0f32,
momentum: 0.0f32,
}
}
}
Expand Down Expand Up @@ -338,11 +338,11 @@ pub enum SolverKind {

impl SolverKind {
/// Create a Solver of the specified kind with the supplied SolverConfig.
pub fn with_config<B: IBackend + SolverOps<f32> + 'static, NetB: IBackend + LayerOps<f32> + 'static>(
pub fn with_config<SolverB: IBackend + SolverOps<f32> + 'static, NetB: IBackend + LayerOps<<NetB as IBackend>::F,f32> + 'static>(
&self,
backend: Rc<B>,
backend: Rc<SolverB>,
config: &SolverConfig,
) -> Box<dyn ISolver<B, NetB>> {
) -> Box<dyn ISolver<SolverB, NetB>> {
match *self {
SolverKind::SGD(sgd) => sgd.with_config(backend, config),
}
Expand All @@ -359,7 +359,7 @@ pub enum SGDKind {

impl SGDKind {
/// Create a Solver of the specified kind with the supplied SolverConfig.
pub fn with_config<B: IBackend + SolverOps<f32> + 'static, NetB: IBackend + LayerOps<f32> + 'static>(
pub fn with_config<B: IBackend + SolverOps<f32> + 'static, NetB: IBackend + LayerOps<<B as IBackend>::F,f32> + 'static>(
&self,
backend: Rc<B>,
config: &SolverConfig,
Expand Down
4 changes: 2 additions & 2 deletions juice/src/solvers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use crate::layer::*;
use crate::solver::*;
use crate::util::*;

trait SGDSolver<SolverB: IBackend + SolverOps<f32>, NetB: IBackend + LayerOps<f32>>: ISolver<SolverB, NetB> {
trait SGDSolver<SolverB: IBackend + SolverOps<f32>, NetB: IBackend + LayerOps<<NetB as IBackend>::F,f32>>: ISolver<SolverB, NetB> {
fn compute_update_value(
&mut self,
config: &SolverConfig,
Expand All @@ -59,7 +59,7 @@ trait SGDSolver<SolverB: IBackend + SolverOps<f32>, NetB: IBackend + LayerOps<f3
/// [3]: https://en.wikipedia.org/wiki/Recurrent_neural_network
/// [4]: https://en.wikipedia.org/wiki/Norm_(mathematics)#Euclidean_norm
#[allow(unused_must_use)]
fn clip_gradients<B: IBackend + LayerOps<f32> + 'static>(&self, config: &SolverConfig, net: &mut Layer<B>) {
fn clip_gradients<B: IBackend + LayerOps<<B as IBackend>::F,f32> + 'static>(&self, config: &SolverConfig, net: &mut Layer<B>) {
// skip clipping gradients if SolverConfig.clip_gradients is set to None
if let Some(clip_threshold) = config.clip_gradients {
let native = native_backend();
Expand Down
2 changes: 1 addition & 1 deletion juice/src/solvers/sgd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub use self::momentum::Momentum;
#[macro_export]
macro_rules! impl_isolver_sgd {
($t:ty) => {
impl<SolverB: IBackend + SolverOps<f32>, NetB: IBackend + LayerOps<f32> + 'static> ISolver<SolverB, NetB>
impl<SolverB: IBackend + SolverOps<f32>, NetB: IBackend + LayerOps<<NetB as IBackend>::F,f32> + 'static> ISolver<SolverB, NetB>
for $t
{
/// Initialize the SGD Momentum solver, allocating memory for its history.
Expand Down
2 changes: 1 addition & 1 deletion juice/src/solvers/sgd/momentum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl<SolverB: IBackend + SolverOps<f32>> Momentum<SolverB> {
}
}

impl<B: IBackend + SolverOps<f32>, NetB: IBackend + LayerOps<f32> + 'static> SGDSolver<B, NetB> for Momentum<B> {
impl<B: IBackend + SolverOps<f32>, NetB: IBackend + LayerOps<<NetB as IBackend>::F,f32> + 'static> SGDSolver<B, NetB> for Momentum<B> {
fn compute_update_value(
&mut self,
config: &SolverConfig,
Expand Down
Loading