Skip to content

Commit

Permalink
fix: 392: PBJ codecs to provide access to raw field bytes during pars…
Browse files Browse the repository at this point in the history
…ing (#396)

Fixes: #392
Reviewed-by: Anthony Petrov <[email protected]>, Ivan Malygin <[email protected]>
Signed-off-by: Artem Ananev <[email protected]>
  • Loading branch information
artemananiev authored Feb 18, 2025
1 parent c189321 commit 2ed3662
Show file tree
Hide file tree
Showing 3 changed files with 252 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import com.hedera.pbj.runtime.io.ReadableSequentialData;
import com.hedera.pbj.runtime.io.buffer.Bytes;
import edu.umd.cs.findbugs.annotations.NonNull;
import edu.umd.cs.findbugs.annotations.Nullable;
import java.io.IOException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
Expand All @@ -15,6 +17,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;

/**
* This class is full of parse helper methods, they depend on a DataInput as input with position and limit set
Expand Down Expand Up @@ -305,6 +308,64 @@ public static Bytes readBytes(final ReadableSequentialData input, final long max
return bytes;
}

/**
* Reads a requested length-delimited protobuf field from the input and returns it as a
* {@link Bytes} object. If the requested field is repeated or not length-delimited, this
* method throws an {@link IllegalArgumentException}. .
*
* <p>The input must contain valid protobuf encoded bytes. If the field is not found in
* the input {@code null} is returned. If the field occurs multiple time in the input, bytes
* for the first occurrence are returned.
*
* <p>The returned Bytes object, if not null, will not contain the tag or the length.
*
* @param input The input to read from
* @param field Field definition to extract bytes for
* @return Field bytes without tag or length, or {@code null} if the field is not found
* in the input
* @throws IOException If an I/O error occurred
* @throws ParseException If there is a mismatch between the requested field and the field
* in the input with the same field ID
*/
@Nullable
public static Bytes extractFieldBytes(
@NonNull final ReadableSequentialData input, @NonNull final FieldDefinition field)
throws IOException, ParseException {
Objects.requireNonNull(input);
Objects.requireNonNull(field);
if (field.repeated()) {
throw new IllegalArgumentException("Cannot extract field bytes for a repeated field: " + field);
}
if (ProtoWriterTools.wireType(field) != ProtoConstants.WIRE_TYPE_DELIMITED) {
throw new IllegalArgumentException("Cannot extract field bytes for a non-length-delimited field: " + field);
}
while (input.hasRemaining()) {
final int tag;
// hasRemaining() doesn't work very well for streaming data, it returns false only when
// the end of input is already reached using a read operation. Let's catch an underflow
// (actually, EOF) exception here and exit cleanly. Underflow exception in any other
// place means malformed input and should be rethrown
try {
tag = input.readVarInt(false);
} catch (final BufferUnderflowException e) {
// No more fields
break;
}
final int fieldNum = tag >> TAG_FIELD_OFFSET;
final ProtoConstants wireType = ProtoConstants.get(tag & ProtoConstants.TAG_WIRE_TYPE_MASK);
if (fieldNum == field.number()) {
if (wireType != ProtoConstants.WIRE_TYPE_DELIMITED) {
throw new ParseException("Unexpected wire type: " + tag);
}
final int length = input.readVarInt(false);
return input.readBytes(length);
} else {
skipField(input, wireType);
}
}
return null;
}

/**
* Skip over the bytes in a stream for a given wire type. Assumes you have already read tag.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: Apache-2.0
package com.hedera.pbj.runtime;

import static com.hedera.pbj.runtime.FieldType.BYTES;
import static com.hedera.pbj.runtime.FieldType.FIXED32;
import static com.hedera.pbj.runtime.FieldType.FIXED64;
import static com.hedera.pbj.runtime.FieldType.INT32;
import static com.hedera.pbj.runtime.FieldType.MESSAGE;
import static com.hedera.pbj.runtime.FieldType.STRING;
import static com.hedera.pbj.runtime.ProtoConstants.TAG_WIRE_TYPE_MASK;
import static com.hedera.pbj.runtime.ProtoConstants.WIRE_TYPE_DELIMITED;
import static com.hedera.pbj.runtime.ProtoConstants.WIRE_TYPE_FIXED_32_BIT;
import static com.hedera.pbj.runtime.ProtoConstants.WIRE_TYPE_FIXED_64_BIT;
Expand All @@ -20,16 +22,25 @@
import static com.hedera.pbj.runtime.ProtoWriterToolsTest.createFieldDefinition;
import static com.hedera.pbj.runtime.ProtoWriterToolsTest.randomVarSizeString;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;

import com.hedera.pbj.runtime.io.ReadableSequentialData;
import com.hedera.pbj.runtime.io.WritableSequentialData;
import com.hedera.pbj.runtime.io.buffer.BufferedData;
import com.hedera.pbj.runtime.io.buffer.Bytes;
import com.hedera.pbj.runtime.io.stream.ReadableStreamingData;
import com.hedera.pbj.runtime.io.stream.WritableStreamingData;
import com.hedera.pbj.runtime.test.UncheckedThrowingFunction;
import edu.umd.cs.findbugs.annotations.NonNull;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.BufferUnderflowException;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.function.Supplier;
Expand Down Expand Up @@ -327,6 +338,98 @@ void testSkipUnsupported(ProtoConstants unsupportedType) {
assertThrows(IOException.class, () -> skipField(data, unsupportedType));
}

@Test
void testExtractBytesNullInput() {
final FieldDefinition field = createFieldDefinition(BYTES);
assertThrows(NullPointerException.class, () -> ProtoParserTools.extractFieldBytes(null, field));
}

@Test
void testExtractBytesNullField() {
final ReadableSequentialData input = Bytes.EMPTY.toReadableSequentialData();
assertThrows(NullPointerException.class, () -> ProtoParserTools.extractFieldBytes(input, null));
}

@Test
void testExtractBytesRepeatedField() {
final ReadableSequentialData input = Bytes.EMPTY.toReadableSequentialData();
final FieldDefinition field = new FieldDefinition("field", FieldType.BYTES, true, true, false, 1);
assertThrows(IllegalArgumentException.class, () -> ProtoParserTools.extractFieldBytes(input, field));
}

private static final FieldDefinition INT32_F =
new FieldDefinition("int32field", FieldType.INT32, false, true, false, 1);
private static final int INT32_V = 101;

private static final FieldDefinition FIXED_F =
new FieldDefinition("fixed32field", FieldType.FIXED32, false, true, false, 2);
private static final int FIXED32_V = 102;

private static final FieldDefinition STRING_F =
new FieldDefinition("stringfield", FieldType.STRING, false, true, false, 3);
private static final String STRING_V = "StringValue";

private static final FieldDefinition BYTES_F =
new FieldDefinition("bytesfield", FieldType.BYTES, false, true, false, 4);
private static final Bytes BYTES_V = Bytes.wrap(STRING_V.getBytes(StandardCharsets.UTF_8));

private static final FieldDefinition MESSAGE_F =
new FieldDefinition("messagefield", FieldType.MESSAGE, false, true, false, 5);
private static final TestMessage MESSAGE_V = new TestMessage(STRING_V);

private static final FieldDefinition DOUBLE_F =
new FieldDefinition("doublefield", FieldType.DOUBLE, false, true, false, 6);
private static final double DOUBLE32_V = 103.0;

private static final FieldDefinition UNKNOWN_F =
new FieldDefinition("nofield", FieldType.BYTES, false, true, false, 10);

private static Bytes prepareExtractBytesTestInput() throws IOException {
try (final ByteArrayOutputStream bout = new ByteArrayOutputStream();
final WritableStreamingData out = new WritableStreamingData(bout)) {
ProtoWriterTools.writeInteger(out, INT32_F, INT32_V);
ProtoWriterTools.writeInteger(out, FIXED_F, FIXED32_V);
ProtoWriterTools.writeString(out, STRING_F, STRING_V);
ProtoWriterTools.writeBytes(out, BYTES_F, BYTES_V);
ProtoWriterTools.writeMessage(out, MESSAGE_F, MESSAGE_V, TestMessageCodec.INSTANCE);
ProtoWriterTools.writeDouble(out, DOUBLE_F, DOUBLE32_V);
return Bytes.wrap(bout.toByteArray());
}
}

@Test
void testExtractBytesStringField() throws IOException, ParseException {
final ReadableSequentialData input = prepareExtractBytesTestInput().toReadableSequentialData();
final Bytes bytes = ProtoParserTools.extractFieldBytes(input, STRING_F);
assertNotNull(bytes);
assertEquals(STRING_V, new String(bytes.toByteArray(), StandardCharsets.UTF_8));
}

@Test
void testExtractBytesBytesField() throws IOException, ParseException {
final ReadableSequentialData input = prepareExtractBytesTestInput().toReadableSequentialData();
final Bytes bytes = ProtoParserTools.extractFieldBytes(input, BYTES_F);
assertNotNull(bytes);
assertEquals(BYTES_V, bytes);
}

@Test
void testExtractBytesMessageField() throws IOException, ParseException {
final ReadableSequentialData input = prepareExtractBytesTestInput().toReadableSequentialData();
final Bytes bytes = ProtoParserTools.extractFieldBytes(input, MESSAGE_F);
assertNotNull(bytes);
final TestMessage value = TestMessageCodec.INSTANCE.parse(bytes.toReadableSequentialData());
assertNotNull(value);
assertEquals(MESSAGE_V, value);
}

@Test
void testExtractBytesUnknownField() throws IOException, ParseException {
final ReadableSequentialData input = prepareExtractBytesTestInput().toReadableSequentialData();
final Bytes bytes = ProtoParserTools.extractFieldBytes(input, UNKNOWN_F);
assertNull(bytes);
}

private static void skipTag(BufferedData data) {
data.readVarInt(false);
}
Expand All @@ -342,4 +445,91 @@ private static <T> void testRead(
data.flip();
assertEquals(value, reader.apply(data));
}

private static final class TestMessage {

private final String value;

public TestMessage(final String value) {
this.value = value;
}

public String getValue() {
return value;
}

@Override
public int hashCode() {
return Objects.hashCode(value);
}

@Override
public boolean equals(Object obj) {
if (!(obj instanceof TestMessage other)) {
return false;
}
return Objects.equals(value, other.value);
}
}

private static final class TestMessageCodec implements Codec<TestMessage> {

public static final TestMessageCodec INSTANCE = new TestMessageCodec();

public static final FieldDefinition VALUE_FIELD =
new FieldDefinition("value", FieldType.STRING, false, true, false, 1);

@NonNull
@Override
public TestMessage parse(@NonNull final ReadableSequentialData in, final boolean strictMode, final int maxDepth)
throws ParseException {
String value = null;
while (in.hasRemaining()) {
final int tag = in.readVarInt(false);
final int fieldNum = tag >> ProtoParserTools.TAG_FIELD_OFFSET;
final int wireType = tag & TAG_WIRE_TYPE_MASK;
if ((fieldNum == VALUE_FIELD.number())
&& (wireType == ProtoWriterTools.wireType(VALUE_FIELD).ordinal())) {
final int length = in.readVarInt(false);
final byte[] valueBytes = new byte[length];
if (in.readBytes(valueBytes) != length) {
throw new ParseException("Failed to read value bytes");
}
value = new String(valueBytes, StandardCharsets.UTF_8);
} else {
throw new ParseException("Unknown field: " + tag);
}
}
return new TestMessage(value);
}

@Override
public void write(@NonNull final TestMessage item, @NonNull final WritableSequentialData out)
throws IOException {
final String value = item.getValue();
if (value != null) {
ProtoWriterTools.writeString(out, VALUE_FIELD, value);
}
}

@Override
public int measure(@NonNull ReadableSequentialData input) throws ParseException {
throw new UnsupportedOperationException();
}

@Override
public int measureRecord(@NonNull final TestMessage item) {
final String value = item.getValue();
if (value != null) {
return ProtoWriterTools.sizeOfString(VALUE_FIELD, value);
}
return 0;
}

@Override
public boolean fastEquals(@NonNull TestMessage item, @NonNull ReadableSequentialData input)
throws ParseException {
throw new UnsupportedOperationException();
}
}
}
2 changes: 1 addition & 1 deletion pbj-core/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.9.17-SNAPSHOT
0.9.18-SNAPSHOT

0 comments on commit 2ed3662

Please sign in to comment.