Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/sina/hdf5 output support #1480

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
261 changes: 220 additions & 41 deletions src/axom/sina/core/Document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
#include <utility>
#include <sstream>
#include <stdexcept>
#include <algorithm>
#include "conduit.hpp"
#include "conduit_relay.hpp"
#include "conduit_relay_io.hpp"

namespace axom
{
Expand All @@ -33,7 +37,124 @@ namespace
char const RECORDS_KEY[] = "records";
char const RELATIONSHIPS_KEY[] = "relationships";
char const SAVE_TMP_FILE_EXTENSION[] = ".sina.tmp";
} // namespace
}

void protocol_warn(std::string protocol, std::string const &name) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function name should be camel case protocolWarn instead (see here). Also, protocol can probably be a const reference here like name is.

size_t pos = name.rfind('.');

if (pos != std::string::npos) {
std::string found = name.substr(pos+1);

if (("." + found) != protocol && protocol == ".json") {
std::cout << ".json extension not found, did you mean to save to this format?";
} else if (("." + found) != protocol && protocol == ".hdf5") {
std::cout << ".hdf5 extension not found, did you use one of its other supported types? (h5, hdf, ...)";
} else {
return;
}
} else {
std::cout << "No file extension found, did you mean to use one of " << protocol << "'s supported types?";
}
}
Comment on lines +43 to +58
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can reduce redundancy in if/else chain if we use a map in this function instead:

// Define a mapping of protocols to their warning messages
std::unordered_map<std::string, std::string> protocolMessages = {
    {".json", ".json extension not found, did you mean to save to this format?"},
    {".hdf5", ".hdf5 extension not found, did you use one of its other supported types? (h5, hdf, ...)"}
};

// Check that the file name has a file extension
size_t pos = name.rfind('.');
if (pos != std::string::npos) {
    std::string found = name.substr(pos);
    
    // Check if the found extension matches the expected protocol
    if (found != protocol) {
        auto messageIt = protocolMessages.find(protocol);
        if (messageIt != protocolMessages.end()) {
            std::cout << messageIt->second << std::endl;
        }
    }
} else {
    std::cout << "No file extension found, did you mean to use one of " 
              << protocol << "'s supported types?" << std::endl;
}

This might make it easier to add support for new file extensions in the future as well.


void removeSlashes(const conduit::Node& originalNode, conduit::Node& modifiedNode)
{
for (auto it = originalNode.children(); it.has_next();)
{
it.next();
std::string key = it.name();
std::string modifiedKey = key;

std::string toReplace = "/";

size_t pos = 0;
// Find and replace all occurrences of "/"
while ((pos = modifiedKey.find(toReplace, pos)) != std::string::npos) {
modifiedKey.replace(pos, toReplace.length(), slashSubstitute);
pos += slashSubstitute.length(); // Move past the replaced substring
}

modifiedNode[modifiedKey] = it.node();

if (it.node().dtype().is_object())
{
conduit::Node nestedNode;
removeSlashes(it.node(), nestedNode);
modifiedNode[modifiedKey].set(nestedNode);
}
}
}

void restoreSlashes(const conduit::Node& modifiedNode, conduit::Node& restoredNode)
{
// Check if List or Object, if its a list the else statement would turn it into an object
// which breaks the Document

if (modifiedNode.dtype().is_list())
{
// If its empty with no children it's the end of a tree

for (auto it = modifiedNode.children(); it.has_next();)
{
it.next();
conduit::Node& newChild = restoredNode.append();

// Leaves empty nodes empty, if null data is set the
// Document breaks

if (it.node().dtype().is_string() || it.node().dtype().is_number())
{
newChild.set(it.node()); // Lists need .set
}

// Recursive Call
if (it.node().number_of_children() > 0)
{
restoreSlashes(it.node(), newChild);
}
}
}
else
{
for (auto it = modifiedNode.children(); it.has_next();)
{
it.next();
std::string key = it.name();
std::string restoredKey = key;
std::string replacement = "/";

size_t pos = 0;
// Find and replace all occurrences of "__SLASH__"

while ((pos = restoredKey.find(slashSubstitute, pos)) != std::string::npos) {
restoredKey.replace(pos, toReplace.length(), replacement);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

toReplace does not exist within the scope of this function. Should this be slashSubstitute instead?

pos += replacement.length();
}


// Initialize a new node for the restored key
conduit::Node& newChild = restoredNode.add_child(restoredKey);

// Leaves empty keys empty but continues recursive call if its a list
if (it.node().dtype().is_string() || it.node().dtype().is_number() || it.node().dtype().is_object())
{
newChild.set(it.node());
}
else if (it.node().dtype().is_list())
{
restoreSlashes(it.node(), newChild); // Handle nested lists
}

// If the node has children, recursively restore them
if (it.node().number_of_children() > 0)
{
conduit::Node nestedNode;
restoreSlashes(it.node(), nestedNode);
newChild.set(nestedNode);
}
}
}
}

void Document::add(std::unique_ptr<Record> record)
{
Expand Down Expand Up @@ -87,26 +208,27 @@ void Document::createFromNode(conduit::Node const &asNode,
}
}

if(asNode.has_child(RELATIONSHIPS_KEY))
{
conduit::Node relationship_nodes = asNode[RELATIONSHIPS_KEY];
if(relationship_nodes.dtype().is_list())
{
auto relationshipsIter = relationship_nodes.children();
while(relationshipsIter.has_next())
{
auto &relationship = relationshipsIter.next();
add(Relationship {relationship});
}
}
else
if (asNode.has_child(RELATIONSHIPS_KEY))
{
std::ostringstream message;
message << "The '" << RELATIONSHIPS_KEY
<< "' element of a document must be an array";
throw std::invalid_argument(message.str());
conduit::Node relationship_nodes = asNode[RELATIONSHIPS_KEY];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be a ref to avoid a copy

if (relationship_nodes.number_of_children() == 0)
{
relationship_nodes.set(conduit::DataType::list());
}
Comment on lines +214 to +217
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be adding in this same number of children check for Records as well? If so, maybe a refactor of this entire method would help reduce redundant code:

void Document::createFromNode(conduit::Node const &asNode,
                              RecordLoader const &recordLoader)
{
    auto processChildNodes = [&](const char* key, std::function<void(conduit::Node&)> addFunc) {
        if (asNode.has_child(key)) {
            conduit::Node& childNodes = asNode[key];
            if (childNodes.number_of_children() == 0) {
                childNodes.set(conduit::DataType::list());
            }
            if (!childNodes.dtype().is_list()) {
                std::ostringstream message;
                message << "The '" << key << "' element of a document must be an array";
                throw std::invalid_argument(message.str());
            }

            auto childIter = childNodes.children();
            while (childIter.has_next()) {
                auto child = childIter.next();
                addFunc(child);
            }
        }
    };

    processChildNodes(RECORDS_KEY, [&](conduit::Node& record) {
        add(recordLoader.load(record));
    });

    processChildNodes(RELATIONSHIPS_KEY, [&](conduit::Node& relationship) {
        add(Relationship{relationship});
    });
}

(This is untested so I'm not sure if it'd actually compile).

else if (!relationship_nodes.dtype().is_list())
{
std::ostringstream message;
message << "The '" << RELATIONSHIPS_KEY << "' element of a document must be an array";
throw std::invalid_argument(message.str());
}

auto relationshipsIter = relationship_nodes.children();
while (relationshipsIter.has_next())
{
auto &relationship = relationshipsIter.next();
add(Relationship{relationship});
}
}
}
}

Document::Document(conduit::Node const &asNode, RecordLoader const &recordLoader)
Expand All @@ -121,6 +243,37 @@ Document::Document(std::string const &asJson, RecordLoader const &recordLoader)
this->createFromNode(asNode, recordLoader);
}

void Document::toHDF5(const std::string &filename) const
{
conduit::Node node;
conduit::Node &recordsNode = node["records"];
conduit::Node &relationshipsNode = node["relationships"];

for (const auto& record : getRecords())
{
conduit::Node recordNode = record->toNode();
conduit::Node modifiedRecordNode;

removeSlashes(recordNode, modifiedRecordNode);

recordsNode.append() = modifiedRecordNode;
}

// Process relationships
for (const auto& relationship : getRelationships())
{
conduit::Node relationshipNode = relationship.toNode();
conduit::Node modifiedRelationshipsNode;

removeSlashes(relationshipNode, modifiedRelationshipsNode);
relationshipsNode.append() = modifiedRelationshipsNode;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could pass ref to append result into removeSlashes to avoid one extra copy.

}

conduit::relay::io::save(node, filename, "hdf5");
}

//

std::string Document::toJson(conduit::index_t indent,
conduit::index_t depth,
const std::string &pad,
Expand All @@ -129,7 +282,7 @@ std::string Document::toJson(conduit::index_t indent,
return this->toNode().to_json("json", indent, depth, pad, eoe);
}

void saveDocument(Document const &document, std::string const &fileName)
void saveDocument(Document const &document, std::string const &fileName, Protocol protocol)
{
// It is a common use case for users to want to overwrite their files as
// the simulation progresses. However, this operation should be atomic so
Expand All @@ -138,37 +291,63 @@ void saveDocument(Document const &document, std::string const &fileName)
// file is in the same directory to ensure that it is part of the same
// file system as the destination file so that the move operation is
// atomic.

std::string tmpFileName = fileName + SAVE_TMP_FILE_EXTENSION;
auto asJson = document.toJson();
std::ofstream fout {tmpFileName};
fout.exceptions(std::ostream::failbit | std::ostream::badbit);
fout << asJson;
fout.close();

if(rename(tmpFileName.c_str(), fileName.c_str()) != 0)
if (protocol == Protocol::JSON)
{
std::string message {"Could not save to '"};
message += fileName;
message += "'";
throw std::ios::failure {message};
protocol_warn(".json", fileName);
auto asJson = document.toJson();
std::ofstream fout {tmpFileName};
fout.exceptions(std::ostream::failbit | std::ostream::badbit);
fout << asJson;
fout.close();
}
else if (protocol == Protocol::HDF5)
{
protocol_warn(".hdf5", fileName);
document.toHDF5(tmpFileName);
}
else
{
throw std::invalid_argument("Invalid format choice. Please enter 'json' or 'hdf5'.");
}
Comment on lines +297 to +314
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might consider porting this to switch rather than if/else


if (rename(tmpFileName.c_str(), fileName.c_str()) != 0)
{
std::string message {"Could not save to '"};
message += fileName;
message += "'";
throw std::ios::failure {message};
}
}

Document loadDocument(std::string const &path)
Document loadDocument(std::string const &path, Protocol protocol)
{
return loadDocument(path, createRecordLoaderWithAllKnownTypes());
return loadDocument(path, createRecordLoaderWithAllKnownTypes(), protocol);
}

Document loadDocument(std::string const &path, RecordLoader const &recordLoader)
Document loadDocument(std::string const &path, RecordLoader const &recordLoader, Protocol protocol)
{
conduit::Node nodeFromJson;
std::ifstream file_in {path};
std::ostringstream file_contents;
file_contents << file_in.rdbuf();
file_in.close();
nodeFromJson.parse(file_contents.str(), "json");
return Document {nodeFromJson, recordLoader};
conduit::Node node;

// Load the file depending on the protocol
if (protocol == Protocol::JSON) {
std::ifstream file_in {path};
std::ostringstream file_contents;
file_contents << file_in.rdbuf();
file_in.close();
node.parse(file_contents.str(), "json");
return Document {node, recordLoader};
} else if (protocol == Protocol::HDF5) {
conduit::Node modifiedNode;
conduit::relay::io::load(path, "hdf5", node);
restoreSlashes(node, modifiedNode);
return Document {modifiedNode, recordLoader};
}
Comment on lines +335 to +347
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to saveDocument we might consider using a switch statement here as well.


// Finally, use the node to create the Document object (existing logic)
}

} // namespace sina
} // namespace axom
} // namespace axom
Loading
Loading