Skip to content

Commit

Permalink
Merge pull request #18629 from MathiasVP/fix-more-fps-in-buffer-overflow
Browse files Browse the repository at this point in the history
C++: Fix more FPs in `cpp/overflow-buffer`
  • Loading branch information
MathiasVP authored Jan 30, 2025
2 parents 0a8b76c + 02cf458 commit f35fea3
Show file tree
Hide file tree
Showing 7 changed files with 303 additions and 85 deletions.
4 changes: 4 additions & 0 deletions cpp/ql/lib/change-notes/2025-01-30-getOffsetInClass.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: feature
---
* A new predicate `getOffsetInClass` was added to the `Field` class, which computes the byte offset of a field relative to a given `Class`.
45 changes: 45 additions & 0 deletions cpp/ql/lib/semmle/code/cpp/Field.qll
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,30 @@
import semmle.code.cpp.Variable
import semmle.code.cpp.Enum

private predicate hasAFieldWithOffset(Class c, Field f, int offset) {
// Base case: `f` is a field in `c`.
f = c.getAField() and
offset = f.getByteOffset() and
not f.getUnspecifiedType().(Class).hasDefinition()
or
// Otherwise, we find the struct that is a field of `c` which then has
// the field `f` as a member.
exists(Field g |
g = c.getAField() and
// Find the field with the largest offset that's less than or equal to
// offset. That's the struct we need to search recursively.
g =
max(Field cand, int candOffset |
cand = c.getAField() and
candOffset = cand.getByteOffset() and
offset >= candOffset
|
cand order by candOffset
) and
hasAFieldWithOffset(g.getUnspecifiedType(), f, offset - g.getByteOffset())
)
}

/**
* A C structure member or C++ non-static member variable. For example the
* member variable `m` in the following code (but not `s`):
Expand Down Expand Up @@ -76,6 +100,27 @@ class Field extends MemberVariable {
rank[result + 1](int index | cls.getCanonicalMember(index).(Field).isInitializable())
)
}

/**
* Gets the offset (in bytes) of this field starting at `c`.
*
* For example, consider:
* ```cpp
* struct S1 {
* int a;
* void* b;
* };
*
* struct S2 {
* S1 s1;
* char c;
* };
* ```
* If `f` represents the field `s1` and `c` represents the class `S2` then
* `f.getOffsetInClass(S2) = 0` holds. Likewise, if `f` represents the
* field `a`, then `f.getOffsetInClass(c) = 0` holds.
*/
int getOffsetInClass(Class c) { hasAFieldWithOffset(c, this, result) }
}

/**
Expand Down
98 changes: 70 additions & 28 deletions cpp/ql/lib/semmle/code/cpp/commons/Buffer.qll
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,74 @@ predicate memberMayBeVarSize(Class c, MemberVariable v) {
exists(ArrayType t | t = v.getUnspecifiedType() | not t.getArraySize() > 1)
}

/**
* Given a chain of accesses of the form `x.f1.f2...fn` this
* predicate gives the type of `x`. Note that `x` may be an implicit
* `this` expression.
*/
private Class getRootType(FieldAccess fa) {
// If the object is accessed inside a member function then the root will
// be a(n implicit) `this`. And the root type will be the type of `this`.
exists(VariableAccess root |
root = fa.getQualifier*() and
result =
root.getQualifier()
.(ThisExpr)
.getUnspecifiedType()
.(PointerType)
.getBaseType()
.getUnspecifiedType()
)
or
// Otherwise, if this is not inside a member function there will not be
// a(n implicit) `this`. And the root type is the type of the outermost
// access.
exists(VariableAccess root |
root = fa.getQualifier+() and
not exists(root.getQualifier()) and
result = root.getUnspecifiedType()
)
}

/**
* Gets the size of the buffer access at `va`.
*/
private int getSize(VariableAccess va) {
exists(Variable v | va.getTarget() = v |
// If `v` is not a field then the size of the buffer is just
// the size of the type of `v`.
exists(Type t |
t = v.getUnspecifiedType() and
not v instanceof Field and
not t instanceof ReferenceType and
result = t.getSize()
)
or
exists(Class c |
// Otherwise, we find the "outermost" object and compute the size
// as the difference between the size of the type of the "outermost
// object" and the offset of the field relative to that type.
// For example, consider the following structs:
// ```
// struct S {
// uint32_t x;
// uint32_t y;
// };
// struct S2 {
// S s;
// uint32_t z;
// };
// ```
// Given an object `S2 s2` the size of the buffer `&s2.s.y`
// is the size of the base object type (i.e., `S2`) minutes the offset
// of `y` relative to the type `S2` (i.e., `4`). So the size of the
// buffer is `12 - 4 = 8`.
c = getRootType(va) and
result = c.getSize() - v.(Field).getOffsetInClass(c)
)
)
}

/**
* Holds if `bufferExpr` is an allocation-like expression.
*
Expand Down Expand Up @@ -54,37 +122,11 @@ private int isSource(Expr bufferExpr, Element why) {
result = bufferExpr.(AllocationExpr).getSizeBytes() and
why = bufferExpr
or
exists(Type bufferType, Variable v |
exists(Variable v |
v = why and
// buffer is the address of a variable
why = bufferExpr.(AddressOfExpr).getAddressable() and
bufferType = v.getUnspecifiedType() and
not bufferType instanceof ReferenceType and
not any(Union u).getAMemberVariable() = why
|
not v instanceof Field and
result = bufferType.getSize()
or
// If it's an address of a field (i.e., a non-static member variable)
// then it's okay to use that address to access the other member variables.
// For example, this is okay:
// ```
// struct S { uint8_t a, b, c; };
// S s;
// memset(&s.a, 0, sizeof(S) - offsetof(S, a));
exists(Field f |
v = f and
result = f.getDeclaringType().getSize() - f.getByteOffset()
)
)
or
exists(Union bufferType |
// buffer is the address of a union member; in this case, we
// take the size of the union itself rather the union member, since
// it's usually OK to access that amount (e.g. clearing with memset).
why = bufferExpr.(AddressOfExpr).getAddressable() and
bufferType.getAMemberVariable() = why and
result = bufferType.getSize()
result = getSize(bufferExpr.(AddressOfExpr).getOperand())
)
}

Expand Down
48 changes: 3 additions & 45 deletions cpp/ql/src/Security/CWE/CWE-843/TypeConfusion.ql
Original file line number Diff line number Diff line change
Expand Up @@ -14,48 +14,6 @@ import cpp
import semmle.code.cpp.dataflow.new.DataFlow
import Flow::PathGraph

/**
* Holds if `f` is a field located at byte offset `offset` in `c`.
*
* Note that predicate is recursive, so that given the following:
* ```cpp
* struct S1 {
* int a;
* void* b;
* };
*
* struct S2 {
* S1 s1;
* char c;
* };
* ```
* both `hasAFieldWithOffset(S2, s1, 0)` and `hasAFieldWithOffset(S2, a, 0)`
* holds.
*/
predicate hasAFieldWithOffset(Class c, Field f, int offset) {
// Base case: `f` is a field in `c`.
f = c.getAField() and
offset = f.getByteOffset() and
not f.getUnspecifiedType().(Class).hasDefinition()
or
// Otherwise, we find the struct that is a field of `c` which then has
// the field `f` as a member.
exists(Field g |
g = c.getAField() and
// Find the field with the largest offset that's less than or equal to
// offset. That's the struct we need to search recursively.
g =
max(Field cand, int candOffset |
cand = c.getAField() and
candOffset = cand.getByteOffset() and
offset >= candOffset
|
cand order by candOffset
) and
hasAFieldWithOffset(g.getUnspecifiedType(), f, offset - g.getByteOffset())
)
}

/** Holds if `f` is the last field of its declaring class. */
predicate lastField(Field f) {
exists(Class c | c = f.getDeclaringType() |
Expand All @@ -75,7 +33,7 @@ predicate lastField(Field f) {
bindingset[f1, offset, c2]
pragma[inline_late]
predicate hasCompatibleFieldAtOffset(Field f1, int offset, Class c2) {
exists(Field f2 | hasAFieldWithOffset(c2, f2, offset) |
exists(Field f2 | offset = f2.getOffsetInClass(c2) |
// Let's not deal with bit-fields for now.
f2 instanceof BitField
or
Expand All @@ -100,15 +58,15 @@ predicate prefix(Class c1, Class c2) {
exists(Field f1, int offset |
// Let's not deal with bit-fields for now.
not f1 instanceof BitField and
hasAFieldWithOffset(c1, f1, offset)
offset = f1.getOffsetInClass(c1)
|
hasCompatibleFieldAtOffset(f1, offset, c2)
)
else
forall(Field f1, int offset |
// Let's not deal with bit-fields for now.
not f1 instanceof BitField and
hasAFieldWithOffset(c1, f1, offset)
offset = f1.getOffsetInClass(c1)
|
hasCompatibleFieldAtOffset(f1, offset, c2)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,33 @@
| tests.cpp:712:3:712:8 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:693:16:693:16 | c | destination buffer |
| tests.cpp:716:3:716:8 | call to memset | This 'memset' operation accesses 24 bytes but the $@ is only 16 bytes. | tests.cpp:692:16:692:16 | b | destination buffer |
| tests.cpp:727:2:727:7 | call to memset | This 'memset' operation accesses 24 bytes but the $@ is only 8 bytes. | tests.cpp:693:16:693:16 | c | destination buffer |
| tests.cpp:753:5:753:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:735:20:735:22 | b_1 | destination buffer |
| tests.cpp:756:5:756:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:735:20:735:22 | b_1 | destination buffer |
| tests.cpp:760:5:760:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
| tests.cpp:761:5:761:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
| tests.cpp:763:5:763:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
| tests.cpp:764:5:764:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 8 bytes. | tests.cpp:736:20:736:22 | c_1 | destination buffer |
| tests.cpp:774:5:774:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:740:20:740:22 | b_2 | destination buffer |
| tests.cpp:777:5:777:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:740:20:740:22 | b_2 | destination buffer |
| tests.cpp:795:5:795:10 | call to memset | This 'memset' operation accesses 8 bytes but the $@ is only 4 bytes. | tests.cpp:790:16:790:16 | b | destination buffer |
| tests.cpp:822:5:822:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 16 bytes. | tests.cpp:801:16:801:16 | b | destination buffer |
| tests.cpp:825:5:825:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 16 bytes. | tests.cpp:801:16:801:16 | b | destination buffer |
| tests.cpp:827:5:827:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 16 bytes. | tests.cpp:801:16:801:16 | b | destination buffer |
| tests.cpp:830:5:830:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
| tests.cpp:831:5:831:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
| tests.cpp:833:5:833:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
| tests.cpp:835:5:835:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 12 bytes. | tests.cpp:802:16:802:16 | c | destination buffer |
| tests.cpp:846:5:846:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
| tests.cpp:847:5:847:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
| tests.cpp:848:5:848:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
| tests.cpp:849:5:849:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
| tests.cpp:851:5:851:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 8 bytes. | tests.cpp:807:16:807:16 | x | destination buffer |
| tests.cpp:862:5:862:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
| tests.cpp:863:5:863:10 | call to memset | This 'memset' operation accesses 16 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
| tests.cpp:864:5:864:10 | call to memset | This 'memset' operation accesses 12 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
| tests.cpp:865:5:865:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
| tests.cpp:866:5:866:10 | call to memset | This 'memset' operation accesses 8 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
| tests.cpp:867:5:867:10 | call to memset | This 'memset' operation accesses 20 bytes but the $@ is only 4 bytes. | tests.cpp:812:12:812:12 | u | destination buffer |
| tests_restrict.c:12:2:12:7 | call to memcpy | This 'memcpy' operation accesses 2 bytes but the $@ is only 1 byte. | tests_restrict.c:7:6:7:13 | smallbuf | source buffer |
| unions.cpp:26:2:26:7 | call to memset | This 'memset' operation accesses 200 bytes but the $@ is only 100 bytes. | unions.cpp:21:10:21:11 | mu | destination buffer |
| unions.cpp:30:2:30:7 | call to memset | This 'memset' operation accesses 200 bytes but the $@ is only 100 bytes. | unions.cpp:15:7:15:11 | small | destination buffer |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ edges
| main.cpp:9:29:9:32 | *argv | tests_restrict.c:15:41:15:44 | *argv | provenance | |
| main.cpp:9:29:9:32 | tests_restrict_main output argument | main.cpp:10:20:10:23 | **argv | provenance | |
| main.cpp:9:29:9:32 | tests_restrict_main output argument | main.cpp:10:20:10:23 | *argv | provenance | |
| main.cpp:10:20:10:23 | **argv | tests.cpp:730:32:730:35 | **argv | provenance | |
| main.cpp:10:20:10:23 | *argv | tests.cpp:730:32:730:35 | *argv | provenance | |
| main.cpp:10:20:10:23 | **argv | tests.cpp:872:32:872:35 | **argv | provenance | |
| main.cpp:10:20:10:23 | *argv | tests.cpp:872:32:872:35 | *argv | provenance | |
| overflowdestination.cpp:23:45:23:48 | **argv | overflowdestination.cpp:23:45:23:48 | **argv | provenance | |
| overflowdestination.cpp:23:45:23:48 | **argv | overflowdestination.cpp:23:45:23:48 | *argv | provenance | |
| test_buffer_overrun.cpp:32:46:32:49 | **argv | test_buffer_overrun.cpp:32:46:32:49 | **argv | provenance | |
Expand All @@ -41,12 +41,12 @@ edges
| tests.cpp:628:14:628:14 | *s [*home] | tests.cpp:628:14:628:19 | *home | provenance | |
| tests.cpp:628:14:628:14 | *s [*home] | tests.cpp:628:16:628:19 | *home | provenance | |
| tests.cpp:628:16:628:19 | *home | tests.cpp:628:14:628:19 | *home | provenance | |
| tests.cpp:730:32:730:35 | **argv | tests.cpp:755:9:755:15 | *access to array | provenance | |
| tests.cpp:730:32:730:35 | **argv | tests.cpp:756:9:756:15 | *access to array | provenance | |
| tests.cpp:730:32:730:35 | *argv | tests.cpp:755:9:755:15 | *access to array | provenance | |
| tests.cpp:730:32:730:35 | *argv | tests.cpp:756:9:756:15 | *access to array | provenance | |
| tests.cpp:755:9:755:15 | *access to array | tests.cpp:613:19:613:24 | *source | provenance | |
| tests.cpp:756:9:756:15 | *access to array | tests.cpp:622:19:622:24 | *source | provenance | |
| tests.cpp:872:32:872:35 | **argv | tests.cpp:897:9:897:15 | *access to array | provenance | |
| tests.cpp:872:32:872:35 | **argv | tests.cpp:898:9:898:15 | *access to array | provenance | |
| tests.cpp:872:32:872:35 | *argv | tests.cpp:897:9:897:15 | *access to array | provenance | |
| tests.cpp:872:32:872:35 | *argv | tests.cpp:898:9:898:15 | *access to array | provenance | |
| tests.cpp:897:9:897:15 | *access to array | tests.cpp:613:19:613:24 | *source | provenance | |
| tests.cpp:898:9:898:15 | *access to array | tests.cpp:622:19:622:24 | *source | provenance | |
| tests_restrict.c:15:41:15:44 | **argv | tests_restrict.c:15:41:15:44 | **argv | provenance | |
| tests_restrict.c:15:41:15:44 | *argv | tests_restrict.c:15:41:15:44 | *argv | provenance | |
nodes
Expand Down Expand Up @@ -80,10 +80,10 @@ nodes
| tests.cpp:628:14:628:14 | *s [*home] | semmle.label | *s [*home] |
| tests.cpp:628:14:628:19 | *home | semmle.label | *home |
| tests.cpp:628:16:628:19 | *home | semmle.label | *home |
| tests.cpp:730:32:730:35 | **argv | semmle.label | **argv |
| tests.cpp:730:32:730:35 | *argv | semmle.label | *argv |
| tests.cpp:755:9:755:15 | *access to array | semmle.label | *access to array |
| tests.cpp:756:9:756:15 | *access to array | semmle.label | *access to array |
| tests.cpp:872:32:872:35 | **argv | semmle.label | **argv |
| tests.cpp:872:32:872:35 | *argv | semmle.label | *argv |
| tests.cpp:897:9:897:15 | *access to array | semmle.label | *access to array |
| tests.cpp:898:9:898:15 | *access to array | semmle.label | *access to array |
| tests_restrict.c:15:41:15:44 | **argv | semmle.label | **argv |
| tests_restrict.c:15:41:15:44 | **argv | semmle.label | **argv |
| tests_restrict.c:15:41:15:44 | *argv | semmle.label | *argv |
Expand Down
Loading

0 comments on commit f35fea3

Please sign in to comment.