Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallelize the binary parsing of function bodies #7134

Draft
wants to merge 1 commit into
base: binary-reader-pass-pos
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ class Pass {
// This method is used to create instances per function for a
// function-parallel pass. You may need to override this if you subclass a
// Walker, as otherwise this will create the parent class.
virtual std::unique_ptr<Pass> create() { WASM_UNREACHABLE("unimplenented"); }
virtual std::unique_ptr<Pass> create() { WASM_UNREACHABLE("unimplemented"); }

// Whether this pass modifies the Binaryen IR in the module. This is true for
// most passes, except for passes that have no side effects, or passes that
Expand Down
15 changes: 7 additions & 8 deletions src/wasm-binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -1524,16 +1524,15 @@ class WasmBinaryReader {
std::unordered_map<Index, Name> dataNames;
std::unordered_map<Index, Name> elemNames;

Function* currFunction = nullptr;
// before we see a function (like global init expressions), there is no end of
// function to check
Index endOfFunction = -1;

void readFunctions(size_t& pos);
void readVars(size_t& pos);
void setLocalNames(Function& func, Index i);
void readVars(size_t& pos, Function* func);
void setLocalNames(Function* func, Index i);

Result<> readInst(size_t& pos);
Result<>
readInst(size_t& pos, IRBuilder& builder, SourceMapReader& sourceMapReader);
Result<> readInst(size_t& pos) {
return readInst(pos, builder, sourceMapReader);
}

void readExports(size_t& pos);

Expand Down
136 changes: 85 additions & 51 deletions src/wasm/wasm-binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2583,7 +2583,7 @@ void WasmBinaryReader::readImports(size_t& pos) {
curr->hasExplicitName = isExplicit;
curr->module = module;
curr->base = base;
setLocalNames(*curr, wasm.functions.size());
setLocalNames(curr.get(), wasm.functions.size());
wasm.addFunction(std::move(curr));
break;
}
Expand Down Expand Up @@ -2676,16 +2676,16 @@ void WasmBinaryReader::readImports(size_t& pos) {
numFuncImports = wasm.functions.size();
}

void WasmBinaryReader::setLocalNames(Function& func, Index i) {
void WasmBinaryReader::setLocalNames(Function* func, Index i) {
if (auto it = localNames.find(i); it != localNames.end()) {
for (auto& [local, name] : it->second) {
if (local >= func.getNumLocals()) {
if (local >= func->getNumLocals()) {
std::cerr << "warning: local index out of bounds in name section: "
<< name << " at index " << local << " in function " << i
<< '\n';
continue;
}
func.setLocalName(local, name);
func->setLocalName(local, name);
}
}
}
Expand Down Expand Up @@ -2762,74 +2762,106 @@ void WasmBinaryReader::readFunctions(size_t& pos) {
if (numFuncBodies + numFuncImports != wasm.functions.size()) {
throwError(pos, "invalid function section size, must equal types");
}
if (DWARF) {
builder.setBinaryLocation(&pos, codeSectionLocation);
}
for (size_t i = 0; i < numFuncBodies; i++) {
auto sizePos = pos;

using FuncPositions =
std::unordered_map<Function*,
std::tuple<Index, size_t, size_t, SourceMapReader>>;
FuncPositions funcPositions;

for (Index i = 0; i < numFuncBodies; i++) {
auto funcIndex = numFuncImports + i;
auto start = pos;
size_t size = getU32LEB(pos);
if (size == 0) {
throwError(pos, "empty function size");
}
Index endOfFunction = pos + size;
auto end = pos + size;

auto& func = wasm.functions[numFuncImports + i];
currFunction = func.get();
auto* func = wasm.functions[funcIndex].get();

if (DWARF) {
func->funcLocation = BinaryLocations::FunctionLocations{
BinaryLocation(sizePos - codeSectionLocation),
BinaryLocation(start - codeSectionLocation),
BinaryLocation(pos - codeSectionLocation),
BinaryLocation(pos - codeSectionLocation + size)};
}

func->prologLocation = sourceMapReader.readDebugLocationAt(pos);

readVars(pos);
setLocalNames(*func, numFuncImports + i);
{
// Process the function body. Even if we are skipping function bodies we
// need to not skip the start function. That contains important code for
// wasm-emscripten-finalize in the form of pthread-related segment
// initializations. As this is just one function, it doesn't add
// significant time, so the optimization of skipping bodies is still very
// useful.
auto currFunctionIndex = wasm.functions.size();
bool isStart = startIndex == currFunctionIndex;
if (skipFunctionBodies && !isStart) {
// When skipping the function body we need to put something valid in
// their place so we validate. An unreachable is always acceptable
// there.
func->body = Builder(wasm).makeUnreachable();
// Skip reading the contents.
pos = endOfFunction;
} else {
auto start = builder.visitFunctionStart(func.get());
funcPositions.insert({func, {funcIndex, pos, end, sourceMapReader}});
pos = end;
sourceMapReader.finishFunction();
}

// Parse the function bodies in parallel. This obviously modifies the IR, but
// we say it's Immutable because we don't need to perform any cleanup actions
// after parsing the IR.
ModuleUtils::ParallelFunctionAnalysis<std::optional<ParseException>,
Immutable,
InsertOrderedMap>
bodyParser(wasm, [&](Function* func, std::optional<ParseException>& error) {
if (func->imported()) {
return;
}
auto [funcIndex, pos, end, sourceMapReader] =
funcPositions.find(func)->second;

IRBuilder builder(wasm);
if (DWARF) {
builder.setBinaryLocation(&pos, codeSectionLocation);
}

try {
readVars(pos, func);
setLocalNames(func, funcIndex);

// Process the function body. Even if we are skipping function bodies we
// need to not skip the start function. That contains important code for
// wasm-emscripten-finalize in the form of pthread-related segment
// initializations. As this is just one function, it doesn't add
// significant time, so the optimization of skipping bodies is still
// very useful.
if (skipFunctionBodies && startIndex != funcIndex) {
func->body = Builder(wasm).makeUnreachable();
return;
}

auto start = builder.visitFunctionStart(func);
if (auto* err = start.getErr()) {
throwError(pos, err->msg);
error = ParseException(err->msg, 0, pos);
return;
}
while (pos < endOfFunction) {
auto inst = readInst(pos);
while (pos < end) {
auto inst = readInst(pos, builder, sourceMapReader);
if (auto* err = inst.getErr()) {
throwError(pos, err->msg);
error = ParseException(err->msg, 0, pos);
return;
}
}
if (pos != endOfFunction) {
throwError(pos, "function overflowed its bounds");
}
if (!builder.empty()) {
throwError(pos, "expected function end");
}
} catch (ParseException e) {
error = e;
return;
}
}

sourceMapReader.finishFunction();
TypeUpdating::handleNonDefaultableLocals(func.get(), wasm);
currFunction = nullptr;
if (pos != end) {
error = ParseException("function overflowed its bounds", 0, pos);
return;
}
if (!builder.empty()) {
error = ParseException("expected function end", 0, pos);
return;
}
TypeUpdating::handleNonDefaultableLocals(func, wasm);
});

for (auto& [_, err] : bodyParser.map) {
if (err) {
throw *err;
}
}
}

void WasmBinaryReader::readVars(size_t& pos) {
void WasmBinaryReader::readVars(size_t& pos, Function* func) {
uint32_t totalVars = 0;
size_t numLocalTypes = getU32LEB(pos);
// Use a SmallVector as in the common (MVP) case there are only 4 possible
Expand All @@ -2844,16 +2876,18 @@ void WasmBinaryReader::readVars(size_t& pos) {
auto type = getConcreteType(pos);
decodedVars.emplace_back(num, type);
}
currFunction->vars.reserve(totalVars);
func->vars.reserve(totalVars);
for (auto [num, type] : decodedVars) {
while (num > 0) {
currFunction->vars.push_back(type);
func->vars.push_back(type);
num--;
}
}
}

Result<> WasmBinaryReader::readInst(size_t& pos) {
Result<> WasmBinaryReader::readInst(size_t& pos,
IRBuilder& builder,
SourceMapReader& sourceMapReader) {
if (auto loc = sourceMapReader.readDebugLocationAt(pos)) {
builder.setDebugLocation(loc);
}
Expand Down
5 changes: 5 additions & 0 deletions test/lit/binary/debug-bad-binary.test
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ RUN: not wasm-opt --debug %s.wasm 2>&1 | filecheck %s
;; CHECK-NEXT: (i32.const 10)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (func $2
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (i32.const 30)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
Loading