Skip to content

Commit

Permalink
[wasm-metadata] parse OCI author custom section (bytecodealliance#1925)
Browse files Browse the repository at this point in the history
* init OCI author parsing

* add serialization

* fix tests

* fix json formatting test

* add author parsing
  • Loading branch information
yoshuawuyts authored Dec 4, 2024
1 parent 48cc636 commit b2e621d
Show file tree
Hide file tree
Showing 11 changed files with 185 additions and 10 deletions.
2 changes: 1 addition & 1 deletion crates/wasm-encoder/src/core/custom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::borrow::Cow;
use crate::{encoding_size, Encode, Section, SectionId};

/// A custom section holding arbitrary data.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub struct CustomSection<'a> {
/// The name of this custom section.
pub name: Cow<'a, str>,
Expand Down
8 changes: 7 additions & 1 deletion crates/wasm-metadata/src/add_metadata.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{rewrite_wasm, Producers, RegistryMetadata};
use crate::{rewrite_wasm, Author, Producers, RegistryMetadata};

use anyhow::Result;

Expand All @@ -25,6 +25,11 @@ pub struct AddMetadata {
#[cfg_attr(feature="clap", clap(long, value_parser = parse_key_value, value_name="NAME=VERSION"))]
pub sdk: Vec<(String, String)>,

/// Contact details of the people or organization responsible,
/// encoded as a freeform string.
#[cfg_attr(feature = "clap", clap(long, value_name = "NAME"))]
pub author: Option<Author>,

/// Add an registry metadata to the registry-metadata section
#[cfg_attr(feature="clap", clap(long, value_parser = parse_registry_metadata_value, value_name="PATH"))]
pub registry_metadata: Option<RegistryMetadata>,
Expand Down Expand Up @@ -54,6 +59,7 @@ impl AddMetadata {
rewrite_wasm(
&self.name,
&Producers::from_meta(self),
&self.author,
self.registry_metadata.as_ref(),
input,
)
Expand Down
2 changes: 2 additions & 0 deletions crates/wasm-metadata/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
pub use add_metadata::AddMetadata;
pub use metadata::Metadata;
pub use names::{ComponentNames, ModuleNames};
pub use oci_annotations::Author;
pub use producers::{Producers, ProducersField};
pub use registry::{CustomLicense, Link, LinkType, RegistryMetadata};

Expand All @@ -13,6 +14,7 @@ pub(crate) use rewrite::rewrite_wasm;
mod add_metadata;
mod metadata;
mod names;
mod oci_annotations;
mod producers;
mod registry;
mod rewrite;
Expand Down
15 changes: 14 additions & 1 deletion crates/wasm-metadata/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::fmt;
use std::ops::Range;
use wasmparser::{KnownCustom, Parser, Payload::*};

use crate::{ComponentNames, ModuleNames, Producers, RegistryMetadata};
use crate::{Author, ComponentNames, ModuleNames, Producers, RegistryMetadata};

/// A tree of the metadata found in a WebAssembly binary.
#[derive(Debug, Serialize)]
Expand All @@ -18,6 +18,8 @@ pub enum Metadata {
producers: Option<Producers>,
/// The component's registry metadata section, if any.
registry_metadata: Option<RegistryMetadata>,
/// The component's author section, if any.
author: Option<Author>,
/// All child modules and components inside the component.
children: Vec<Box<Metadata>>,
/// Byte range of the module in the parent binary
Expand All @@ -31,6 +33,8 @@ pub enum Metadata {
producers: Option<Producers>,
/// The module's registry metadata section, if any.
registry_metadata: Option<RegistryMetadata>,
/// The component's author section, if any.
author: Option<Author>,
/// Byte range of the module in the parent binary
range: Range<usize>,
},
Expand Down Expand Up @@ -106,6 +110,13 @@ impl Metadata {
.expect("non-empty metadata stack")
.set_registry_metadata(registry);
}
KnownCustom::Unknown if c.name() == "author" => {
let a = Author::parse_custom_section(&c)?;
match metadata.last_mut().expect("non-empty metadata stack") {
Metadata::Module { author, .. } => *author = Some(a),
Metadata::Component { author, .. } => *author = Some(a),
}
}
_ => {}
},
_ => {}
Expand All @@ -120,6 +131,7 @@ impl Metadata {
Metadata::Component {
name: None,
producers: None,
author: None,
registry_metadata: None,
children: Vec::new(),
range,
Expand All @@ -130,6 +142,7 @@ impl Metadata {
Metadata::Module {
name: None,
producers: None,
author: None,
registry_metadata: None,
range,
}
Expand Down
111 changes: 111 additions & 0 deletions crates/wasm-metadata/src/oci_annotations/author.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
use std::borrow::Cow;
use std::fmt::{self, Display};
use std::str::FromStr;

use anyhow::{ensure, Error, Result};
use serde::Serialize;
use wasm_encoder::{ComponentSection, CustomSection, Encode, Section};
use wasmparser::CustomSectionReader;

/// Contact details of the people or organization responsible,
/// encoded as a freeform string.
#[derive(Debug, Clone, PartialEq)]
pub struct Author(CustomSection<'static>);

impl Author {
/// Create a new instance of `Author`.
pub fn new<S: Into<Cow<'static, str>>>(s: S) -> Self {
Self(CustomSection {
name: "author".into(),
data: match s.into() {
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
Cow::Owned(s) => Cow::Owned(s.into()),
},
})
}

/// Parse an `author` custom section from a wasm binary.
pub(crate) fn parse_custom_section(reader: &CustomSectionReader<'_>) -> Result<Self> {
ensure!(
reader.name() == "author",
"The `author` custom section should have a name of 'author'"
);
let data = String::from_utf8(reader.data().to_owned())?;
Ok(Self::new(data))
}
}

impl FromStr for Author {
type Err = Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self::new(s.to_owned()))
}
}

impl Serialize for Author {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&self.to_string())
}
}

impl Display for Author {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// NOTE: this will never panic since we always guarantee the data is
// encoded as utf8, even if we internally store it as [u8].
let data = String::from_utf8(self.0.data.to_vec()).unwrap();
write!(f, "{data}")
}
}

impl ComponentSection for Author {
fn id(&self) -> u8 {
ComponentSection::id(&self.0)
}
}

impl Section for Author {
fn id(&self) -> u8 {
Section::id(&self.0)
}
}

impl Encode for Author {
fn encode(&self, sink: &mut Vec<u8>) {
self.0.encode(sink);
}
}

#[cfg(test)]
mod test {
use super::*;
use wasm_encoder::Component;
use wasmparser::Payload;

#[test]
fn roundtrip() {
let mut component = Component::new();
component.section(&Author::new("Nori Cat"));
let component = component.finish();

let mut parsed = false;
for section in wasmparser::Parser::new(0).parse_all(&component) {
if let Payload::CustomSection(reader) = section.unwrap() {
let author = Author::parse_custom_section(&reader).unwrap();
assert_eq!(author.to_string(), "Nori Cat");
parsed = true;
}
}
assert!(parsed);
}

#[test]
fn serialize() {
let author = Author::new("Chashu Cat");
let json = serde_json::to_string(&author).unwrap();
assert_eq!(r#""Chashu Cat""#, json);
}
}
20 changes: 20 additions & 0 deletions crates/wasm-metadata/src/oci_annotations/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//! Annotations following the [OCI Annotations Spec].
//!
//! The fields of these annotations are encoded into custom sections of
//! component binaries, and are explicitly compatible with the OCI Annotations
//! Spec. That enables Compontents to be encoded to OCI and back without needing
//! to perform any additional parsing. This greatly simplifies adding metadata to
//! component registries, since language-native component toolchains can encode them
//! directly into components. Which in turn can be picked up by Component-to-OCI
//! tooling to take those annotations and display them in a way that registries can
//! understand.
//!
//! For the files in this submodule that means we want to be explicitly
//! compatible with the OCI Annotations specification. Any deviation in our
//! parsing rules from the spec should be considered a bug we have to fix.
//!
//! [OCI Annotations Spec]: https://specs.opencontainers.org/image-spec/annotations/
pub use author::Author;

mod author;
2 changes: 1 addition & 1 deletion crates/wasm-metadata/src/producers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ impl Producers {
/// Merge into an existing wasm module. Rewrites the module with this producers section
/// merged into its existing one, or adds this producers section if none is present.
pub fn add_to_wasm(&self, input: &[u8]) -> Result<Vec<u8>> {
rewrite_wasm(&None, self, None, input)
rewrite_wasm(&None, self, &None, None, input)
}

pub(crate) fn display(&self, f: &mut fmt::Formatter, indent: usize) -> fmt::Result {
Expand Down
2 changes: 1 addition & 1 deletion crates/wasm-metadata/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl RegistryMetadata {
/// Merge into an existing wasm module. Rewrites the module with this registry-metadata section
/// overwriting its existing one, or adds this registry-metadata section if none is present.
pub fn add_to_wasm(&self, input: &[u8]) -> Result<Vec<u8>> {
rewrite_wasm(&None, &Producers::empty(), Some(&self), input)
rewrite_wasm(&None, &Producers::empty(), &None, Some(&self), input)
}

/// Parse a Wasm binary and extract the `Registry` section, if there is any.
Expand Down
13 changes: 12 additions & 1 deletion crates/wasm-metadata/src/rewrite.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{ComponentNames, ModuleNames, Producers, RegistryMetadata};
use crate::{Author, ComponentNames, ModuleNames, Producers, RegistryMetadata};
use anyhow::Result;
use std::borrow::Cow;
use std::mem;
Expand All @@ -9,6 +9,7 @@ use wasmparser::{KnownCustom, Parser, Payload::*};
pub(crate) fn rewrite_wasm(
add_name: &Option<String>,
add_producers: &Producers,
add_author: &Option<Author>,
add_registry_metadata: Option<&RegistryMetadata>,
input: &[u8],
) -> Result<Vec<u8>> {
Expand Down Expand Up @@ -90,6 +91,13 @@ pub(crate) fn rewrite_wasm(
continue;
}
}
KnownCustom::Unknown if c.name() == "author" => {
if add_author.is_none() {
let author = Author::parse_custom_section(c)?;
author.append_to(&mut output);
continue;
}
}
_ => {}
}
}
Expand Down Expand Up @@ -119,6 +127,9 @@ pub(crate) fn rewrite_wasm(
// Encode into output:
producers.section().append_to(&mut output);
}
if let Some(author) = add_author {
author.append_to(&mut output);
}
if add_registry_metadata.is_some() {
let registry_metadata = wasm_encoder::CustomSection {
name: Cow::Borrowed("registry-metadata"),
Expand Down
14 changes: 11 additions & 3 deletions crates/wasm-metadata/tests/component.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ fn add_to_empty_component() {
language: vec![("bar".to_owned(), "1.0".to_owned())],
processed_by: vec![("baz".to_owned(), "1.0".to_owned())],
sdk: vec![],
author: Some(Author::new("Chashu Cat")),
registry_metadata: Some(RegistryMetadata {
authors: Some(vec!["foo".to_owned()]),
description: Some("foo bar baz".to_owned()),
Expand Down Expand Up @@ -42,6 +43,7 @@ fn add_to_empty_component() {
name,
producers,
registry_metadata,
author,
children,
range,
} => {
Expand All @@ -57,6 +59,8 @@ fn add_to_empty_component() {
"1.0"
);

assert_eq!(author.unwrap(), Author::new("Chashu Cat"));

let registry_metadata = registry_metadata.unwrap();

assert!(registry_metadata.validate().is_ok());
Expand Down Expand Up @@ -99,7 +103,7 @@ fn add_to_empty_component() {
);

assert_eq!(range.start, 0);
assert_eq!(range.end, 435);
assert_eq!(range.end, 454);
}
_ => panic!("metadata should be component"),
}
Expand All @@ -114,6 +118,7 @@ fn add_to_nested_component() {
language: vec![("bar".to_owned(), "1.0".to_owned())],
processed_by: vec![("baz".to_owned(), "1.0".to_owned())],
sdk: vec![],
author: Some(Author::new("Chashu Cat")),
registry_metadata: Some(RegistryMetadata {
authors: Some(vec!["Foo".to_owned()]),
..Default::default()
Expand Down Expand Up @@ -159,6 +164,7 @@ fn add_to_nested_component() {
Metadata::Module {
name,
producers,
author,
registry_metadata,
range,
} => {
Expand All @@ -173,14 +179,16 @@ fn add_to_nested_component() {
"1.0"
);

assert_eq!(author, &Some(Author::new("Chashu Cat")));

let registry_metadata = registry_metadata.as_ref().unwrap();
assert_eq!(
registry_metadata.authors.as_ref().unwrap(),
&["Foo".to_owned()]
);

assert_eq!(range.start, 10);
assert_eq!(range.end, 123);
assert_eq!(range.start, 11);
assert_eq!(range.end, 143);
}
_ => panic!("child is a module"),
}
Expand Down
6 changes: 5 additions & 1 deletion crates/wasm-metadata/tests/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ fn add_to_empty_module() {
language: vec![("bar".to_owned(), "1.0".to_owned())],
processed_by: vec![("baz".to_owned(), "1.0".to_owned())],
sdk: vec![],
author: Some(Author::new("Chashu Cat")),
registry_metadata: Some(RegistryMetadata {
authors: Some(vec!["foo".to_owned()]),
description: Some("foo bar baz".to_owned()),
Expand Down Expand Up @@ -41,6 +42,7 @@ fn add_to_empty_module() {
Metadata::Module {
name,
producers,
author,
registry_metadata,
range,
} => {
Expand All @@ -55,6 +57,8 @@ fn add_to_empty_module() {
"1.0"
);

assert_eq!(author.unwrap(), Author::new("Chashu Cat"));

let registry_metadata = registry_metadata.unwrap();

assert!(registry_metadata.validate().is_ok());
Expand Down Expand Up @@ -97,7 +101,7 @@ fn add_to_empty_module() {
);

assert_eq!(range.start, 0);
assert_eq!(range.end, 425);
assert_eq!(range.end, 444);
}
_ => panic!("metadata should be module"),
}
Expand Down

0 comments on commit b2e621d

Please sign in to comment.