-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SandboxVec] Tag insts in a Region with metadata. #109353
Conversation
For each region, we create a metadata node with the Region ID. Then when an instruction is added to the Region, it gets tagged with the metadata node for that region. In the following example, we have a Region that contains only the `%t0` instruction. ``` define i8 @foo(i8 %v0, i8 %v1) { %t0 = add i8 %v0, 1, !sbvec !0 %t1 = add i8 %t0, %v1 ret i8 %t1 } !0 = !{!"region", i32 0} ``` This commit also adds a function to create regions from metadata already present in a Function. This metadata can be used for debugging: if we dump IR before a Region pass, the IR will contain enough info to re-create the Region and run the pass by itself in a later invocation.
@llvm/pr-subscribers-llvm-transforms Author: Jorge Gorbe Moya (slackito) ChangesFor each region, we create a metadata node with the Region ID. Then when an instruction is added to the Region, it gets tagged with the metadata node for that region. In the following example, we have a Region that contains only the
This commit also adds a function to create regions from metadata already present in a Function. This metadata can be used for debugging: if we dump IR before a Region pass, the IR will contain enough info to re-create the Region and run the pass by itself in a later invocation. Full diff: https://github.com/llvm/llvm-project/pull/109353.diff 4 Files Affected:
diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h
index c01516aa9d31ac..a16ba6832f42fc 100644
--- a/llvm/include/llvm/SandboxIR/SandboxIR.h
+++ b/llvm/include/llvm/SandboxIR/SandboxIR.h
@@ -338,6 +338,9 @@ class Value {
friend class GlobalIFunc; // For `Val`.
friend class GlobalVariable; // For `Val`.
friend class GlobalAlias; // For `Val`.
+ // Region needs to manipulate metadata in the underlying LLVM Value, we don't
+ // expose metadata in sandboxir.
+ friend class Region;
/// All values point to the context.
Context &Ctx;
@@ -4337,6 +4340,7 @@ class Context {
friend class IntegerType; // For LLVMCtx.
friend class StructType; // For LLVMCtx.
friend class TargetExtType; // For LLVMCtx.
+ friend class Region; // For LLVMCtx.
Tracker IRTracker;
/// Maps LLVM Value to the corresponding sandboxir::Value. Owns all
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h
index 2f893bac213a01..2663de823188ad 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h
@@ -9,6 +9,8 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_REGION_H
#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_REGION_H
+#include <memory>
+
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/SandboxIR/SandboxIR.h"
@@ -58,6 +60,11 @@ class Region {
/// A unique ID, used for debugging.
unsigned RegionID = 0;
+ /// MDNode that we'll use to mark instructions as being part of the region.
+ MDNode *RegionMDN;
+ static constexpr const char *MDKind = "sbvec";
+ static constexpr const char *RegionStr = "region";
+
Context &Ctx;
// TODO: Add cost modeling.
@@ -85,6 +92,8 @@ class Region {
iterator end() { return Insts.end(); }
iterator_range<iterator> insts() { return make_range(begin(), end()); }
+ static SmallVector<std::unique_ptr<Region>> createRegionsFromMD(Function &F);
+
#ifndef NDEBUG
/// This is an expensive check, meant for testing.
bool operator==(const Region &Other) const;
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp
index 34aa9f3786f34c..4d26b6c4f7c552 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp
@@ -13,13 +13,25 @@ namespace llvm::sandboxir {
Region::Region(Context &Ctx) : Ctx(Ctx) {
static unsigned StaticRegionID;
RegionID = StaticRegionID++;
+
+ LLVMContext &LLVMCtx = Ctx.LLVMCtx;
+ auto *RegionStrMD = MDString::get(LLVMCtx, RegionStr);
+ auto *RegionIDMD = llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(LLVMCtx, APInt(sizeof(RegionID) * 8, RegionID)));
+ RegionMDN = MDNode::get(LLVMCtx, {RegionStrMD, RegionIDMD});
}
Region::~Region() {}
-void Region::add(Instruction *I) { Insts.insert(I); }
+void Region::add(Instruction *I) {
+ Insts.insert(I);
+ cast<llvm::Instruction>(I->Val)->setMetadata(MDKind, RegionMDN);
+}
-void Region::remove(Instruction *I) { Insts.remove(I); }
+void Region::remove(Instruction *I) {
+ Insts.remove(I);
+ cast<llvm::Instruction>(I->Val)->setMetadata(MDKind, nullptr);
+}
#ifndef NDEBUG
bool Region::operator==(const Region &Other) const {
@@ -42,4 +54,27 @@ void Region::dump() const {
}
#endif // NDEBUG
+SmallVector<std::unique_ptr<Region>> Region::createRegionsFromMD(Function &F) {
+ SmallVector<std::unique_ptr<Region>> Regions;
+ DenseMap<MDNode *, Region *> MDNToRegion;
+ auto &Ctx = F.getContext();
+ for (BasicBlock &BB : F) {
+ for (Instruction &Inst : BB) {
+ if (auto *MDN = cast<llvm::Instruction>(Inst.Val)->getMetadata(MDKind)) {
+ Region *R = nullptr;
+ auto It = MDNToRegion.find(MDN);
+ if (It == MDNToRegion.end()) {
+ Regions.push_back(std::make_unique<Region>(Ctx));
+ R = Regions.back().get();
+ MDNToRegion[MDN] = R;
+ } else {
+ R = It->second;
+ }
+ R->add(&Inst);
+ }
+ }
+ }
+ return Regions;
+}
+
} // namespace llvm::sandboxir
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp
index 2c7390c515f114..dcd3bd0975014f 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp
@@ -45,9 +45,8 @@ define i8 @foo(i8 %v0, i8 %v1) {
auto *Ret = cast<sandboxir::Instruction>(&*It++);
sandboxir::Region Rgn(Ctx);
- // Check getters
+ // Check getContext.
EXPECT_EQ(&Ctx, &Rgn.getContext());
- EXPECT_EQ(0U, Rgn.getID());
// Check add / remove / empty.
EXPECT_TRUE(Rgn.empty());
@@ -79,3 +78,31 @@ define i8 @foo(i8 %v0, i8 %v1) {
EXPECT_EQ(Rgn, Other);
#endif
}
+
+TEST_F(RegionTest, MetadataRoundTrip) {
+ parseIR(C, R"IR(
+define i8 @foo(i8 %v0, i8 %v1) {
+ %t0 = add i8 %v0, 1
+ %t1 = add i8 %t0, %v1
+ ret i8 %t1
+}
+)IR");
+ llvm::Function *LLVMF = &*M->getFunction("foo");
+ sandboxir::Context Ctx(C);
+ auto *F = Ctx.createFunction(LLVMF);
+ auto *BB = &*F->begin();
+ auto It = BB->begin();
+ auto *T0 = cast<sandboxir::Instruction>(&*It++);
+ auto *T1 = cast<sandboxir::Instruction>(&*It++);
+
+ sandboxir::Region Rgn(Ctx);
+ Rgn.add(T0);
+ Rgn.add(T1);
+
+ SmallVector<std::unique_ptr<sandboxir::Region>> Regions =
+ sandboxir::Region::createRegionsFromMD(*F);
+ ASSERT_EQ(1U, Regions.size());
+#ifndef NDEBUG
+ EXPECT_EQ(Rgn, *Regions[0].get());
+#endif
+}
|
for (Instruction &Inst : BB) { | ||
if (auto *MDN = cast<llvm::Instruction>(Inst.Val)->getMetadata(MDKind)) { | ||
Region *R = nullptr; | ||
auto It = MDNToRegion.find(MDN); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps move this code into a lambda?
auto GetOrCreateNode = [&MDNToRegion](MDNode *MDN, std::unique_ptr<Region> &Regions) {
Region *R = nullptr;
auto It = MDNToRegion.find(MDN);
if (It == MDNToRegion.end()) {
Regions.push_back(std::make_unique<Region>(Ctx));
R = Regions.back().get();
MDNToRegion[MDN] = R;
} else {
R = It->second;
}
return R;
};
Region *R = GetOrCreateNode(MDN, Regions);
R->add(&Inst);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think the lambda wins us any clarity here. It needs to access both Regions
and MDNToRegion
from the surrounding scope, so it's not very self-contained anyway, and this block is just 10 lines of code.
#ifndef NDEBUG | ||
EXPECT_EQ(Rgn, *Regions[0].get()); | ||
#endif | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The round-trip test is very useful, but it doesn't actually test that the right type of metadata is being created. I would add some checks that the metadata nodes are actually being created.
A separate test that just builds a region out of metadata IR would also be helpful as a reference of how a region looks like in IR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 Another test with explicit metadata being converted to regions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More tests.
#ifndef NDEBUG | ||
EXPECT_EQ(Rgn, *Regions[0].get()); | ||
#endif | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 Another test with explicit metadata being converted to regions?
void Region::add(Instruction *I) { Insts.insert(I); } | ||
void Region::add(Instruction *I) { | ||
Insts.insert(I); | ||
cast<llvm::Instruction>(I->Val)->setMetadata(MDKind, RegionMDN); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's also add a TODO about a potential redesign where we tag instructions lazily, perhaps at the end of the region pass manager, to save compile time. It might be worth exploring in the future.
- Added new test case that creates regions from explicit metadata. - As suggested offline by @aeubanks, removed region IDs in favor of using `distinct` metadata nodes to identify each region. Also removed a stale include for "InstructionCost" left over from the original version of PR108899. All cost-related stuff was removed.
✅ With the latest revision this PR passed the C/C++ code formatter. |
ASSERT_EQ(1U, Regions.size()); | ||
#ifndef NDEBUG | ||
EXPECT_EQ(Rgn, *Regions[0].get()); | ||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about dumping the IR and explicitly check for required number of "distinct !{!"region"}" and create more than one region?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think that is any better than doing a roundtrip and checking the Regions created from metadata in the dumped IR, but done.
sandboxir::Region Rgn(Ctx); | ||
Rgn.add(T0); | ||
Rgn.add(T1); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's also add a check here to test that the metadata has been created. Or otherwise a separate test Region to IR that checks the IR dump? Wdyt ?
/*IsForDebug=*/true); | ||
|
||
// Check that the dumped IR contains two `distinct !{!"region"}` nodes. | ||
std::string RegionString = "distinct !{!\"region\"}"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- Maybe use a string literal to avoid escaping the double quotes
R"(distinct !{!"region"})"
- The IR is quite small, so why not compare the whole dump? If we change the dumps it would be relatively easy to update it.
|
||
// Check that the dumped IR contains two `distinct !{!"region"}` nodes. | ||
std::string RegionString = "distinct !{!\"region\"}"; | ||
auto Pos = output.find(RegionString); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think llvm::StringRef
implements a contains()
function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see a contains()
variant that takes a start position, so then to find the second occurrence I should create a substr()
and use contains()
on that? :|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Never mind, I could use count
for that. I'm going to follow your suggestion in the other comment and compare the whole dump anyway.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good to me, thanks!
llvm::raw_string_ostream RSO(output); | ||
M->print(RSO, nullptr, /*ShouldPreserveUseListOrder=*/true, | ||
/*IsForDebug=*/true); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a TODO that we should replace this with a lit test, because lit tests are designed for this type of IR comparison.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: I'd suggest renaming mdnodes for clarity, in case the region name is used elsewhere.
llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h
Outdated
Show resolved
Hide resolved
llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h
Outdated
Show resolved
Hide resolved
Co-authored-by: Alina Sbirlea <[email protected]>
Co-authored-by: Alina Sbirlea <[email protected]>
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/6846 Here is the relevant piece of the build log for the reference
|
For each region, we create a metadata node. Then when an instruction is added to the Region, it gets tagged with the metadata node for that region. In the following example, we have a Region that contains only the `%t0` instruction. ``` define i8 @foo(i8 %v0, i8 %v1) { %t0 = add i8 %v0, 1, !sbvec !0 %t1 = add i8 %t0, %v1 ret i8 %t1 } !0 = distinct !{!"region"} ``` This commit also adds a function to create regions from metadata already present in a Function. This metadata can be used for debugging: if we dump IR before a Region pass, the IR will contain enough info to re-create the Region and run the pass by itself in a later invocation. --------- Co-authored-by: Alina Sbirlea <[email protected]>
For each region, we create a metadata node. Then when an instruction is added to the Region, it gets tagged with the metadata node for that region. In the following example, we have a Region that contains only the `%t0` instruction. ``` define i8 @foo(i8 %v0, i8 %v1) { %t0 = add i8 %v0, 1, !sbvec !0 %t1 = add i8 %t0, %v1 ret i8 %t1 } !0 = distinct !{!"region"} ``` This commit also adds a function to create regions from metadata already present in a Function. This metadata can be used for debugging: if we dump IR before a Region pass, the IR will contain enough info to re-create the Region and run the pass by itself in a later invocation. --------- Co-authored-by: Alina Sbirlea <[email protected]>
For each region, we create a metadata node. Then when an instruction is added to the Region, it gets tagged with the metadata node for that region. In the following example, we have a Region that contains only the
%t0
instruction.This commit also adds a function to create regions from metadata already present in a Function.
This metadata can be used for debugging: if we dump IR before a Region pass, the IR will contain enough info to re-create the Region and run the pass by itself in a later invocation.