-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add kmp_* wrapper for gomp environment #79
Changes from 9 commits
34d10ea
0b4332b
c43f481
382171b
ef75da8
f1fd0ae
a773ea6
84933c2
4cca4df
70c5e97
1e06c98
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
add_subdirectory(CPURuntime) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
find_package(OpenMP REQUIRED) | ||
|
||
if ("iomp" IN_LIST OpenMP_C_LIB_NAMES OR "omp" IN_LIST OpenMP_C_LIB_NAMES OR "omp5" IN_LIST OpenMP_C_LIB_NAMES) | ||
else() | ||
add_definitions("-DGC_NEEDS_OMP_WRAPPER=1") | ||
endif() | ||
|
||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp") | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") | ||
add_mlir_library(GCCpuRuntime | ||
SHARED | ||
Parallel.cpp | ||
|
||
EXCLUDE_FROM_LIBMLIR | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
//===-- Parallel.cpp - parallel ---------------------------------*- C++ -*-===// | ||
// | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <assert.h> | ||
#include <atomic> | ||
#include <chrono> | ||
#include <immintrin.h> | ||
#include <omp.h> | ||
#include <stdarg.h> | ||
|
||
#define likely(x) __builtin_expect(!!(x), 1) | ||
#define unlikely(x) __builtin_expect(!!(x), 0) | ||
|
||
#define WEAK_SYMBOL __attribute__((weak)) | ||
|
||
namespace { | ||
struct barrier_t { | ||
alignas(64) std::atomic<int32_t> pending_; | ||
std::atomic<int32_t> rounds_; | ||
uint64_t total_; | ||
// pad barrier to size of cacheline to avoid false sharing | ||
char padding_[64 - 4 * sizeof(int32_t)]; | ||
}; | ||
|
||
using barrier_idle_func = uint64_t (*)(std::atomic<int32_t> *remaining, | ||
int32_t expected_remain, int32_t tid, | ||
void *args); | ||
} // namespace | ||
|
||
extern "C" { | ||
int gc_runtime_keep_alive = 0; | ||
void gc_arrive_at_barrier(barrier_t *b, barrier_idle_func idle_func, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the usage scenario for the barrier? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is not currently used for now. But we will introduce cpu barrier ops in CPURuntime in near future. |
||
void *idle_args) { | ||
auto cur_round = b->rounds_.load(std::memory_order_acquire); | ||
auto cnt = --b->pending_; | ||
assert(cnt >= 0); | ||
if (cnt == 0) { | ||
b->pending_.store(b->total_); | ||
b->rounds_.store(cur_round + 1); | ||
} else { | ||
if (idle_func) { | ||
if (cur_round != b->rounds_.load()) { | ||
return; | ||
} | ||
idle_func(&b->rounds_, cur_round + 1, -1, idle_args); | ||
ciyongch marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
while (cur_round == b->rounds_.load()) { | ||
_mm_pause(); | ||
} | ||
} | ||
} | ||
|
||
static_assert(sizeof(barrier_t) == 64, "size of barrier_t should be 64-byte"); | ||
|
||
void gc_init_barrier(barrier_t *b, int num_barriers, uint64_t thread_count) { | ||
for (int i = 0; i < num_barriers; i++) { | ||
b[i].total_ = thread_count; | ||
b[i].pending_.store(thread_count); | ||
b[i].rounds_.store(0); | ||
} | ||
} | ||
|
||
#if GC_NEEDS_OMP_WRAPPER | ||
void WEAK_SYMBOL __kmpc_barrier(void *loc, int32_t global_tid) { | ||
#pragma omp barrier | ||
} | ||
|
||
int WEAK_SYMBOL __kmpc_global_thread_num(void *loc) { | ||
return omp_get_thread_num(); | ||
} | ||
|
||
// The implementation was extracted and simplified from LLVM libomp | ||
// at openmp/runtime/src/kmp_sched.cpp | ||
void WEAK_SYMBOL __kmpc_for_static_init_8u(void *loc, int32_t gtid, | ||
int32_t schedtype, | ||
int32_t *plastiter, uint64_t *plower, | ||
uint64_t *pupper, int64_t *pstride, | ||
int64_t incr, int64_t chunk) { | ||
if (unlikely(schedtype != 34)) { | ||
std::abort(); | ||
} | ||
const int32_t FALSE = 0; | ||
const int32_t TRUE = 1; | ||
using UT = uint64_t; | ||
// using ST = int64_t; | ||
/* this all has to be changed back to TID and such.. */ | ||
uint32_t tid = gtid; | ||
uint32_t nth = omp_get_num_threads(); | ||
UT trip_count; | ||
|
||
/* special handling for zero-trip loops */ | ||
if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { | ||
if (plastiter != nullptr) | ||
*plastiter = FALSE; | ||
/* leave pupper and plower set to entire iteration space */ | ||
*pstride = incr; /* value should never be used */ | ||
return; | ||
} | ||
|
||
if (nth == 1) { | ||
if (plastiter != nullptr) | ||
*plastiter = TRUE; | ||
*pstride = | ||
(incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); | ||
return; | ||
} | ||
|
||
/* compute trip count */ | ||
if (incr == 1) { | ||
trip_count = *pupper - *plower + 1; | ||
} else if (incr == -1) { | ||
trip_count = *plower - *pupper + 1; | ||
} else if (incr > 0) { | ||
// upper-lower can exceed the limit of signed type | ||
trip_count = (UT)(*pupper - *plower) / incr + 1; | ||
} else { | ||
trip_count = (UT)(*plower - *pupper) / (-incr) + 1; | ||
Comment on lines
+114
to
+122
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems we can combine all these into a single statement There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is copied and simplified from libomp of LLVM. I would like to keep the original code if possible. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, then it would be better to add a reference link here? |
||
} | ||
if (trip_count < nth) { | ||
if (tid < trip_count) { | ||
*pupper = *plower = *plower + tid * incr; | ||
} else { | ||
// set bounds so non-active threads execute no iterations | ||
*plower = *pupper + (incr > 0 ? 1 : -1); | ||
} | ||
if (plastiter != nullptr) | ||
*plastiter = (tid == trip_count - 1); | ||
} else { | ||
UT small_chunk = trip_count / nth; | ||
UT extras = trip_count % nth; | ||
*plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); | ||
*pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); | ||
if (plastiter != nullptr) | ||
*plastiter = (tid == nth - 1); | ||
} | ||
*pstride = trip_count; | ||
} | ||
|
||
void WEAK_SYMBOL __kmpc_for_static_fini(void *ptr, int32_t v) {} | ||
|
||
static thread_local int next_num_threads = 0; | ||
|
||
/*! | ||
@ingroup PARALLEL | ||
The type for a microtask which gets passed to @ref __kmpc_fork_call(). | ||
The arguments to the outlined function are | ||
@param global_tid the global thread identity of the thread executing the | ||
function. | ||
@param bound_tid the local identity of the thread executing the function | ||
@param ... pointers to shared variables accessed by the function. | ||
*/ | ||
using kmpc_micro = void (*)(int32_t *global_tid, int32_t *bound_tid, ...); | ||
void WEAK_SYMBOL __kmpc_fork_call(void *loc, int32_t argc, void *pfunc, ...) { | ||
if (unlikely(argc != 1 && argc != 0)) { | ||
std::abort(); | ||
} | ||
va_list ap; | ||
va_start(ap, pfunc); | ||
void *c = va_arg(ap, void *); | ||
int32_t global_tid = 0; | ||
if (unlikely(next_num_threads)) { | ||
#pragma omp parallel num_threads(next_num_threads) | ||
{ | ||
kmpc_micro func = (kmpc_micro)(pfunc); | ||
func(&global_tid, nullptr, c); | ||
} | ||
next_num_threads = 0; | ||
} else { | ||
#pragma omp parallel | ||
{ | ||
kmpc_micro func = (kmpc_micro)(pfunc); | ||
func(&global_tid, nullptr, c); | ||
} | ||
} | ||
va_end(ap); | ||
} | ||
|
||
void WEAK_SYMBOL __kmpc_push_num_threads(void *loc, int32_t global_tid, | ||
int32_t num_threads) { | ||
next_num_threads = num_threads; | ||
} | ||
#endif | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
// RUN: gc-opt %s --convert-cpuruntime-to-llvm --convert-openmp-to-llvm --convert-func-to-llvm --convert-arith-to-llvm --convert-cf-to-llvm --reconcile-unrealized-casts | gc-cpu-runner -e main -entry-point-result=void | FileCheck %s | ||
module { | ||
func.func private @omp_get_thread_num() -> i32 | ||
|
||
func.func @check_parallel() { | ||
%c64 = arith.constant 64 : index | ||
%c1 = arith.constant 1 : index | ||
%c0 = arith.constant 0 : index | ||
%c8 = arith.constant 8 : index | ||
%0 = llvm.mlir.constant(1 : i64) : i64 | ||
omp.parallel num_threads(%c8: index) { | ||
omp.wsloop { | ||
omp.loop_nest (%arg1, %arg2) : index = (%c0, %c0) to (%c1, %c64) step (%c1, %c1) { | ||
cpuruntime.printf "ITR %zu\n" %arg2 : index | ||
omp.yield | ||
} | ||
omp.terminator | ||
} | ||
%tid = func.call @omp_get_thread_num() : () -> i32 | ||
cpuruntime.printf "EXIT %d\n" %tid : i32 | ||
omp.terminator | ||
} | ||
return | ||
} | ||
|
||
func.func @main() { | ||
%0 = func.call @omp_get_thread_num() : () -> i32 | ||
cpuruntime.printf "TID %d\n" %0 : i32 | ||
call @check_parallel() : ()->() | ||
return | ||
} | ||
// CHECK: TID 0 | ||
// CHECK-COUNT-64: ITR {{[0-9]+}} | ||
// CHECK-NOT: ITR | ||
// CHECK-COUNT-8: EXIT {{[0-9]+}} | ||
// CHECK-NOT: EXIT | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this match "libiomp", "libiomp5", "libomp", "libomp5"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it should. :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If so, then the
"omp" IN_LIST OpenMP_C_LIB_NAMES
will also matchlibgomp
? And this is not what we want?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, I suppose. libgomp has the name
gomp
inOpenMP_C_LIB_NAMES
.IN_LIST
of cmake performs a full-name search, instead of finding substr.It should only match
iomp omp omp5