Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ROCK-NN] - POC - SynapseML integration #40

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
240 changes: 240 additions & 0 deletions rocket-ml-extensions/rocket-3.0.0-SDK/synapseML/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.stratio.rocket.poc</groupId>
<artifactId>rocket-synapseml</artifactId>
<version>1.0-SNAPSHOT</version>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.encoding>UTF-8</maven.compiler.encoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<scala.binary.version>2.12</scala.binary.version>
<apache.spark.version>3.1.1</apache.spark.version>
<junit.version>4.13.1</junit.version>
<scalatest.version>2.2.5</scalatest.version>
<rocket.version>3.0.0-SNAPSHOT</rocket.version>
<crossdata.version>3.4.0-M6</crossdata.version>
</properties>

<repositories>
<repository>
<id>mmlspark.azureedge.net/</id>
<name>mmlspark.azureedge.net/</name>
<url>https://mmlspark.azureedge.net/maven</url>
</repository>
<repository>
<id>spark-packages</id>
<name>spark-packages</name>
<url>https://repos.spark-packages.org</url>
</repository>
</repositories>


<dependencies>

<!-- ********************************************** -->
<!-- Spark dependencies -->
<!-- ********************************************** -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${apache.spark.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${apache.spark.version}</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.12</artifactId>
<version>${apache.spark.version}</version>
<scope>provided</scope>
</dependency>

<!-- ********************************************** -->
<!-- Rocket dependencies -->
<!-- ********************************************** -->

<dependency>
<groupId>com.stratio.sparta</groupId>
<artifactId>sdk-lite-xd</artifactId>
<version>${rocket.version}</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>com.stratio.crossdata</groupId>
<artifactId>crossdata-core_${scala.binary.version}</artifactId>
<version>${crossdata.version}</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>com.stratio.sparta</groupId>
<artifactId>rocket-ml-client</artifactId>
<version>3.1.0-SNAPSHOT</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>com.stratio.sparta</groupId>
<artifactId>ml-pipeline-core</artifactId>
<version>3.1.0-SNAPSHOT</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>com.stratio.sparta</groupId>
<artifactId>core</artifactId>
<version>3.0.0-SNAPSHOT</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.json4s</groupId>
<artifactId>json4s-core_${scala.binary.version}</artifactId>
<version>3.6.10</version>
<scope>provided</scope>
</dependency>

<!-- ********************************************** -->
<!-- Microsoft SynapseMl -->
<!-- ********************************************** -->

<dependency>
<groupId>com.microsoft.azure</groupId>
<artifactId>synapseml_2.12</artifactId>
<version>0.9.5-13-d1b51517-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-tags_2.12</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-avro_2.12</artifactId>
</exclusion>
<exclusion>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.12</artifactId>
</exclusion>
<exclusion>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-azure</artifactId>
<version>3.3.1</version>
<scope>provided</scope>
</dependency>

</dependencies>

<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<configuration>
<checkMultipleScalaVersions>false</checkMultipleScalaVersions>
<recompileMode>incremental</recompileMode>
</configuration>
<executions>
<execution>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
<goal>testCompile</goal>
<goal>doc</goal>
<goal>doc-jar</goal>
</goals>
</execution>
</executions>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.3.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
<configuration>
<finalName>rocket-synapseml</finalName>
<shadedArtifactAttached>false</shadedArtifactAttached>
<minimizeJar>false</minimizeJar>
<createDependencyReducedPom>false</createDependencyReducedPom>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
<exclude>log4j2.xml</exclude>
<exclude>log4j.properties</exclude>
<exclude>logback.xml</exclude>
<exclude>**/Log4j2Plugins.dat</exclude>
<exclude>**/models/*.json</exclude>
</excludes>
</filter>
</filters>
<artifactSet>
<excludes>
<exclude>classworlds:classworlds</exclude>
<exclude>junit:junit</exclude>
<exclude>jmock:*</exclude>
<exclude>*:xml-apis</exclude>
<exclude>org.apache.maven:lib:tests</exclude>
<exclude>com.fasterxml.jackson.*</exclude>
<exclude>org.scala-lang</exclude>
<exclude>org.slf4j</exclude>
</excludes>
</artifactSet>

</configuration>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
© 2017 Stratio Big Data Inc., Sucursal en España. All rights reserved.
This software – including all its source code – contains proprietary information of Stratio Big Data Inc., Sucursal en España and may not be revealed, sold, transferred, modified, distributed or otherwise made available, licensed or sublicensed to third parties; nor reverse engineered, disassembled or decompiled, without express written authorization from Stratio Big Data Inc., Sucursal en España.
Usage:
In order to use xml properly, log4j 2 Mapping Diagnosis Context (MDC) keys must be replaced by proper key name (the one used inside project to set MDC property value)
Example:
Considering a project that sets MDC properties using these keys: [user, audit, process, data-json], these replacements must be executed over xml file
- sed -i -e 's/\[MDC_USER_KEY\]/user/g' log4j2.xml
- sed -i -e 's/\[MDC_AUDIT_KEY\]/audit/g' log4j2.xml
- sed -i -e 's/\[MDC_PROCESS_KEY\]/process/g' log4j2.xml
- sed -i -e 's/\[MDC_DATA-JSON_KEY\]/data-json/g' log4j2.xml

-->
<Configuration monitorInterval="30">
<Properties>
<Property name="formattedJsonPatternCentralizedLogging">
%d{yyyy-MM-dd'T'HH:mm:ss.SSS}%replace{%d{XXX}}{^Z$}{+00:00} %level %replace{%X{[MDC_USER_KEY]}}{^.{0}$}{-} %replace{%X{[MDC_AUDIT_KEY]}}{^.{0}$}{0} %replace{%X{[MDC_PROCESS_KEY]}}{^.{0}$}{-} %c %encode{%m}{CRLF}%n
</Property>
<Property name="unformattedJsonPatternCentralizedLogging">
%d{yyyy-MM-dd'T'HH:mm:ss.SSS}%replace{%d{XXX}}{^Z$}{+00:00} %level %replace{%X{[MDC_USER_KEY]}}{^.{0}$}{-} %replace{%X{[MDC_AUDIT_KEY]}}{^.{0}$}{0} %replace{%X{[MDC_PROCESS_KEY]}}{^.{0}$}{-} %c {"@message":"Unformatted message. Provided log has been added to @data","@data": %encode{%m}{CRLF},"@exception":"%enc{%throwable}{JSON}"}%n
</Property>
<Property name="commonPatternCentralizedLogging">
%d{yyyy-MM-dd'T'HH:mm:ss.SSS}%replace{%d{XXX}}{^Z$}{+00:00} %level %replace{%X{[MDC_USER_KEY]}}{^.{0}$}{-} %replace{%X{[MDC_AUDIT_KEY]}}{^.{0}$}{0} %replace{%X{[MDC_PROCESS_KEY]}}{^.{0}$}{-} %c {"@message":"%enc{%m}{JSON}","@data":%replace{%X{[MDC_DATA-JSON_KEY]}}{^.{0}$}{{}},"@exception":"%enc{%throwable}{JSON}"}%n
</Property>
<Property name="basePattern">
%level | %d{dd-MM-yyyy HH:mm:ss,SSS} | %c | %encode{%m}{CRLF}%n
</Property>
</Properties>
<Scripts>
<Script name="messageSelectorCentralizedLogging" language="javascript"><![CDATA[
var result = "Common";
try {
var originalMessage = logEvent.getMessage().getFormattedMessage();
var stringMessage = typeof originalMessage !== "string" ? JSON.stringify(originalMessage) : originalMessage;
var message = JSON.parse(stringMessage);
if (typeof message === "object" && message !== null) {
if (message.hasOwnProperty("@message")) {
result = "FormattedJSON";
} else {
result = "UnformattedJSON";
}
}
} catch (e) {
result = "Common";
}
result;
]]>
</Script>
</Scripts>
<Appenders>
<Console name="stderrCentralizedJSONLogging" target="SYSTEM_ERR">
<PatternLayout>
<ScriptPatternSelector alwaysWriteExceptions="false">
<ScriptRef ref="messageSelectorCentralizedLogging"/>
<PatternMatch key="FormattedJSON">
<Pattern>${formattedJsonPatternCentralizedLogging}</Pattern>
</PatternMatch>
<PatternMatch key="UnformattedJSON">
<Pattern>${unformattedJsonPatternCentralizedLogging}</Pattern>
</PatternMatch>
<PatternMatch key="Common">
<Pattern>${commonPatternCentralizedLogging}</Pattern>
</PatternMatch>
</ScriptPatternSelector>
</PatternLayout>
<ThresholdFilter level="ERROR" onMatch="ACCEPT" onMismatch="DENY"/>
</Console>
<Console name="stdoutCentralizedJSONLogging" target="SYSTEM_OUT">
<PatternLayout alwaysWriteExceptions="false">
<ScriptPatternSelector>
<ScriptRef ref="messageSelectorCentralizedLogging"/>
<PatternMatch key="FormattedJSON">
<Pattern>${formattedJsonPatternCentralizedLogging}</Pattern>
</PatternMatch>
<PatternMatch key="UnformattedJSON">
<Pattern>${unformattedJsonPatternCentralizedLogging}</Pattern>
</PatternMatch>
<PatternMatch key="Common">
<Pattern>${commonPatternCentralizedLogging}</Pattern>
</PatternMatch>
</ScriptPatternSelector>
</PatternLayout>
<ThresholdFilter level="ERROR" onMatch="DENY" onMismatch="ACCEPT"/>
</Console>
<Console name="stdoutCentralizedLogging" target="SYSTEM_OUT">
<PatternLayout alwaysWriteExceptions="false">
<Pattern>${formattedJsonPatternCentralizedLogging}</Pattern>
</PatternLayout>
<ThresholdFilter level="ERROR" onMatch="DENY" onMismatch="ACCEPT"/>
</Console>
<Console name="stderrDevelopmentLogging" target="SYSTEM_ERR">
<PatternLayout alwaysWriteExceptions="true">
<Pattern>${basePattern}</Pattern>
</PatternLayout>
<ThresholdFilter level="ERROR" onMatch="ACCEPT" onMismatch="DENY"/>
</Console>
<Console name="stdoutDevelopmentLogging" target="SYSTEM_OUT">
<PatternLayout alwaysWriteExceptions="true">
<Pattern>${basePattern}</Pattern>
</PatternLayout>
<ThresholdFilter level="ERROR" onMatch="DENY" onMismatch="ACCEPT"/>
</Console>
<Async name="stdoutAsync">
<AppenderRef ref="stdoutCentralizedJSONLogging"/>
</Async>
</Appenders>
<Loggers>
<Root level="${env:SERVICE_LOG_LEVEL:-INFO}">
<AppenderRef ref="stdoutAsync"/>
<AppenderRef ref="stderrCentralizedJSONLogging"/>
</Root>
<Logger name="com.stratio.sparta" level="${env:SPARTA_LOG_LEVEL:-INFO}"/>
<Logger name="com.stratio.rocket" level="${env:SPARTA_LOG_LEVEL:-INFO}"/>
</Loggers>
</Configuration>
Loading