LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "ompt-specific.h"
22
23#define MAX_MESSAGE 512
24
25// flags will be used in future, e.g. to implement openmp_strict library
26// restrictions
27
36void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
43 } else if (__kmp_ignore_mppbeg() == FALSE) {
44 // By default __kmp_ignore_mppbeg() returns TRUE.
45 __kmp_internal_begin();
46 KC_TRACE(10, ("__kmpc_begin: called\n"));
47 }
48}
49
58void __kmpc_end(ident_t *loc) {
59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
62 // returns FALSE and __kmpc_end() will unregister this root (it can cause
63 // library shut down).
64 if (__kmp_ignore_mppend() == FALSE) {
65 KC_TRACE(10, ("__kmpc_end: called\n"));
66 KA_TRACE(30, ("__kmpc_end\n"));
67
68 __kmp_internal_end_thread(-1);
69 }
70#if KMP_OS_WINDOWS && OMPT_SUPPORT
71 // Normal exit process on Windows does not allow worker threads of the final
72 // parallel region to finish reporting their events, so shutting down the
73 // library here fixes the issue at least for the cases where __kmpc_end() is
74 // placed properly.
75 if (ompt_enabled.enabled)
76 __kmp_internal_end_library(__kmp_gtid_get_specific());
77#endif
78}
79
99 kmp_int32 gtid = __kmp_entry_gtid();
100
101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
102
103 return gtid;
104}
105
121 KC_TRACE(10,
122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
123
124 return TCR_4(__kmp_all_nth);
125}
126
134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
135 return __kmp_tid_from_gtid(__kmp_entry_gtid());
136}
137
144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
145
146 return __kmp_entry_thread()->th.th_team->t.t_nproc;
147}
148
155kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
156#ifndef KMP_DEBUG
157
158 return TRUE;
159
160#else
161
162 const char *semi2;
163 const char *semi3;
164 int line_no;
165
166 if (__kmp_par_range == 0) {
167 return TRUE;
168 }
169 semi2 = loc->psource;
170 if (semi2 == NULL) {
171 return TRUE;
172 }
173 semi2 = strchr(semi2, ';');
174 if (semi2 == NULL) {
175 return TRUE;
176 }
177 semi2 = strchr(semi2 + 1, ';');
178 if (semi2 == NULL) {
179 return TRUE;
180 }
181 if (__kmp_par_range_filename[0]) {
182 const char *name = semi2 - 1;
183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
184 name--;
185 }
186 if ((*name == '/') || (*name == ';')) {
187 name++;
188 }
189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
190 return __kmp_par_range < 0;
191 }
192 }
193 semi3 = strchr(semi2 + 1, ';');
194 if (__kmp_par_range_routine[0]) {
195 if ((semi3 != NULL) && (semi3 > semi2) &&
196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
197 return __kmp_par_range < 0;
198 }
199 }
200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
202 return __kmp_par_range > 0;
203 }
204 return __kmp_par_range < 0;
205 }
206 return TRUE;
207
208#endif /* KMP_DEBUG */
209}
210
218 return __kmp_entry_thread()->th.th_root->r.r_active;
219}
220
230void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
231 kmp_int32 num_threads) {
232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
233 global_tid, num_threads));
234
235 __kmp_push_num_threads(loc, global_tid, num_threads);
236}
237
238void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
240
241 /* the num_threads are automatically popped */
242}
243
244void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 proc_bind));
248
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250}
251
262void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid();
264
265#if (KMP_STATS_ENABLED)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE();
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL);
279 }
280#endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask);
286
287#if OMPT_SUPPORT
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 kmp_team_t *parent_team = master_th->th.th_team;
292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
293 if (lwt)
294 ompt_frame = &(lwt->ompt_task_info.frame);
295 else {
296 int tid = __kmp_tid_from_gtid(gtid);
297 ompt_frame = &(
298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
299 }
300 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
301 OMPT_STORE_RETURN_ADDRESS(gtid);
302 }
303#endif
304
305#if INCLUDE_SSC_MARKS
306 SSC_MARK_FORKING();
307#endif
308 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
309 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
311/* TODO: revert workaround for Intel(R) 64 tracker #96 */
312#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
313 &ap
314#else
315 ap
316#endif
317 );
318#if INCLUDE_SSC_MARKS
319 SSC_MARK_JOINING();
320#endif
321 __kmp_join_call(loc, gtid
322#if OMPT_SUPPORT
323 ,
324 fork_context_intel
325#endif
326 );
327
328 va_end(ap);
329 }
330
331#if KMP_STATS_ENABLED
332 if (previous_state == stats_state_e::SERIAL_REGION) {
333 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
334 } else {
335 KMP_POP_PARTITIONED_TIMER();
336 }
337#endif // KMP_STATS_ENABLED
338}
339
351void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
352 kmp_int32 num_teams, kmp_int32 num_threads) {
353 KA_TRACE(20,
354 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
355 global_tid, num_teams, num_threads));
356
357 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
358}
359
370void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
371 ...) {
372 int gtid = __kmp_entry_gtid();
373 kmp_info_t *this_thr = __kmp_threads[gtid];
374 va_list ap;
375 va_start(ap, microtask);
376
377#if KMP_STATS_ENABLED
378 KMP_COUNT_BLOCK(OMP_TEAMS);
379 stats_state_e previous_state = KMP_GET_THREAD_STATE();
380 if (previous_state == stats_state_e::SERIAL_REGION) {
381 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
382 } else {
383 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
384 }
385#endif
386
387 // remember teams entry point and nesting level
388 this_thr->th.th_teams_microtask = microtask;
389 this_thr->th.th_teams_level =
390 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
391
392#if OMPT_SUPPORT
393 kmp_team_t *parent_team = this_thr->th.th_team;
394 int tid = __kmp_tid_from_gtid(gtid);
395 if (ompt_enabled.enabled) {
396 parent_team->t.t_implicit_task_taskdata[tid]
397 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
398 }
399 OMPT_STORE_RETURN_ADDRESS(gtid);
400#endif
401
402 // check if __kmpc_push_num_teams called, set default number of teams
403 // otherwise
404 if (this_thr->th.th_teams_size.nteams == 0) {
405 __kmp_push_num_teams(loc, gtid, 0, 0);
406 }
407 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
410
411 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
412 VOLATILE_CAST(microtask_t)
413 __kmp_teams_master, // "wrapped" task
414 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
415#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
416 &ap
417#else
418 ap
419#endif
420 );
421 __kmp_join_call(loc, gtid
422#if OMPT_SUPPORT
423 ,
424 fork_context_intel
425#endif
426 );
427
428 // Pop current CG root off list
429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
430 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
431 this_thr->th.th_cg_roots = tmp->up;
432 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
433 " to node %p. cg_nthreads was %d\n",
434 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
435 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
436 int i = tmp->cg_nthreads--;
437 if (i == 1) { // check is we are the last thread in CG (not always the case)
438 __kmp_free(tmp);
439 }
440 // Restore current task's thread_limit from CG root
441 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
442 this_thr->th.th_current_task->td_icvs.thread_limit =
443 this_thr->th.th_cg_roots->cg_thread_limit;
444
445 this_thr->th.th_teams_microtask = NULL;
446 this_thr->th.th_teams_level = 0;
447 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
448 va_end(ap);
449#if KMP_STATS_ENABLED
450 if (previous_state == stats_state_e::SERIAL_REGION) {
451 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
452 } else {
453 KMP_POP_PARTITIONED_TIMER();
454 }
455#endif // KMP_STATS_ENABLED
456}
457
458// I don't think this function should ever have been exported.
459// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
460// openmp code ever called it, but it's been exported from the RTL for so
461// long that I'm afraid to remove the definition.
462int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
463
476void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
477// The implementation is now in kmp_runtime.cpp so that it can share static
478// functions with kmp_fork_call since the tasks to be done are similar in
479// each case.
480#if OMPT_SUPPORT
481 OMPT_STORE_RETURN_ADDRESS(global_tid);
482#endif
483 __kmp_serialized_parallel(loc, global_tid);
484}
485
493void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
494 kmp_internal_control_t *top;
495 kmp_info_t *this_thr;
496 kmp_team_t *serial_team;
497
498 KC_TRACE(10,
499 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
500
501 /* skip all this code for autopar serialized loops since it results in
502 unacceptable overhead */
503 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
504 return;
505
506 // Not autopar code
507 if (!TCR_4(__kmp_init_parallel))
508 __kmp_parallel_initialize();
509
510 __kmp_resume_if_soft_paused();
511
512 this_thr = __kmp_threads[global_tid];
513 serial_team = this_thr->th.th_serial_team;
514
515 kmp_task_team_t *task_team = this_thr->th.th_task_team;
516 // we need to wait for the proxy tasks before finishing the thread
517 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
518 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
519
520 KMP_MB();
521 KMP_DEBUG_ASSERT(serial_team);
522 KMP_ASSERT(serial_team->t.t_serialized);
523 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
524 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
525 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
526 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
527
528#if OMPT_SUPPORT
529 if (ompt_enabled.enabled &&
530 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
531 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
532 if (ompt_enabled.ompt_callback_implicit_task) {
533 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
534 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
535 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
536 }
537
538 // reset clear the task id only after unlinking the task
539 ompt_data_t *parent_task_data;
540 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
541
542 if (ompt_enabled.ompt_callback_parallel_end) {
543 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
544 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
545 ompt_parallel_invoker_program | ompt_parallel_team,
546 OMPT_LOAD_RETURN_ADDRESS(global_tid));
547 }
548 __ompt_lw_taskteam_unlink(this_thr);
549 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
550 }
551#endif
552
553 /* If necessary, pop the internal control stack values and replace the team
554 * values */
555 top = serial_team->t.t_control_stack_top;
556 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
557 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
558 serial_team->t.t_control_stack_top = top->next;
559 __kmp_free(top);
560 }
561
562 // if( serial_team -> t.t_serialized > 1 )
563 serial_team->t.t_level--;
564
565 /* pop dispatch buffers stack */
566 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
567 {
568 dispatch_private_info_t *disp_buffer =
569 serial_team->t.t_dispatch->th_disp_buffer;
570 serial_team->t.t_dispatch->th_disp_buffer =
571 serial_team->t.t_dispatch->th_disp_buffer->next;
572 __kmp_free(disp_buffer);
573 }
574 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
575
576 --serial_team->t.t_serialized;
577 if (serial_team->t.t_serialized == 0) {
578
579/* return to the parallel section */
580
581#if KMP_ARCH_X86 || KMP_ARCH_X86_64
582 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
583 __kmp_clear_x87_fpu_status_word();
584 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
585 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
586 }
587#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
588
589 this_thr->th.th_team = serial_team->t.t_parent;
590 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
591
592 /* restore values cached in the thread */
593 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
594 this_thr->th.th_team_master =
595 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
596 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
597
598 /* TODO the below shouldn't need to be adjusted for serialized teams */
599 this_thr->th.th_dispatch =
600 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
601
602 __kmp_pop_current_task_from_thread(this_thr);
603
604 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
605 this_thr->th.th_current_task->td_flags.executing = 1;
606
607 if (__kmp_tasking_mode != tskm_immediate_exec) {
608 // Copy the task team from the new child / old parent team to the thread.
609 this_thr->th.th_task_team =
610 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
611 KA_TRACE(20,
612 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
613 "team %p\n",
614 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
615 }
616 } else {
617 if (__kmp_tasking_mode != tskm_immediate_exec) {
618 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
619 "depth of serial team %p to %d\n",
620 global_tid, serial_team, serial_team->t.t_serialized));
621 }
622 }
623
624 if (__kmp_env_consistency_check)
625 __kmp_pop_parallel(global_tid, NULL);
626#if OMPT_SUPPORT
627 if (ompt_enabled.enabled)
628 this_thr->th.ompt_thread_info.state =
629 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
630 : ompt_state_work_parallel);
631#endif
632}
633
643 KC_TRACE(10, ("__kmpc_flush: called\n"));
644
645 /* need explicit __mf() here since use volatile instead in library */
646 KMP_MB(); /* Flush all pending memory write invalidates. */
647
648#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
649#if KMP_MIC
650// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
651// We shouldn't need it, though, since the ABI rules require that
652// * If the compiler generates NGO stores it also generates the fence
653// * If users hand-code NGO stores they should insert the fence
654// therefore no incomplete unordered stores should be visible.
655#else
656 // C74404
657 // This is to address non-temporal store instructions (sfence needed).
658 // The clflush instruction is addressed either (mfence needed).
659 // Probably the non-temporal load monvtdqa instruction should also be
660 // addressed.
661 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
662 if (!__kmp_cpuinfo.initialized) {
663 __kmp_query_cpuid(&__kmp_cpuinfo);
664 }
665 if (!__kmp_cpuinfo.sse2) {
666 // CPU cannot execute SSE2 instructions.
667 } else {
668#if KMP_COMPILER_ICC
669 _mm_mfence();
670#elif KMP_COMPILER_MSVC
671 MemoryBarrier();
672#else
673 __sync_synchronize();
674#endif // KMP_COMPILER_ICC
675 }
676#endif // KMP_MIC
677#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
678 KMP_ARCH_RISCV64)
679// Nothing to see here move along
680#elif KMP_ARCH_PPC64
681// Nothing needed here (we have a real MB above).
682#if KMP_OS_CNK
683 // The flushing thread needs to yield here; this prevents a
684 // busy-waiting thread from saturating the pipeline. flush is
685 // often used in loops like this:
686 // while (!flag) {
687 // #pragma omp flush(flag)
688 // }
689 // and adding the yield here is good for at least a 10x speedup
690 // when running >2 threads per core (on the NAS LU benchmark).
691 __kmp_yield();
692#endif
693#else
694#error Unknown or unsupported architecture
695#endif
696
697#if OMPT_SUPPORT && OMPT_OPTIONAL
698 if (ompt_enabled.ompt_callback_flush) {
699 ompt_callbacks.ompt_callback(ompt_callback_flush)(
700 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
701 }
702#endif
703}
704
705/* -------------------------------------------------------------------------- */
713void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
714 KMP_COUNT_BLOCK(OMP_BARRIER);
715 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
716
717 if (!TCR_4(__kmp_init_parallel))
718 __kmp_parallel_initialize();
719
720 __kmp_resume_if_soft_paused();
721
722 if (__kmp_env_consistency_check) {
723 if (loc == 0) {
724 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
725 }
726 __kmp_check_barrier(global_tid, ct_barrier, loc);
727 }
728
729#if OMPT_SUPPORT
730 ompt_frame_t *ompt_frame;
731 if (ompt_enabled.enabled) {
732 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
733 if (ompt_frame->enter_frame.ptr == NULL)
734 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
735 OMPT_STORE_RETURN_ADDRESS(global_tid);
736 }
737#endif
738 __kmp_threads[global_tid]->th.th_ident = loc;
739 // TODO: explicit barrier_wait_id:
740 // this function is called when 'barrier' directive is present or
741 // implicit barrier at the end of a worksharing construct.
742 // 1) better to add a per-thread barrier counter to a thread data structure
743 // 2) set to 0 when a new team is created
744 // 4) no sync is required
745
746 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
747#if OMPT_SUPPORT && OMPT_OPTIONAL
748 if (ompt_enabled.enabled) {
749 ompt_frame->enter_frame = ompt_data_none;
750 }
751#endif
752}
753
754/* The BARRIER for a MASTER section is always explicit */
761kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
762 int status = 0;
763
764 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
765
766 if (!TCR_4(__kmp_init_parallel))
767 __kmp_parallel_initialize();
768
769 __kmp_resume_if_soft_paused();
770
771 if (KMP_MASTER_GTID(global_tid)) {
772 KMP_COUNT_BLOCK(OMP_MASTER);
773 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
774 status = 1;
775 }
776
777#if OMPT_SUPPORT && OMPT_OPTIONAL
778 if (status) {
779 if (ompt_enabled.ompt_callback_master) {
780 kmp_info_t *this_thr = __kmp_threads[global_tid];
781 kmp_team_t *team = this_thr->th.th_team;
782
783 int tid = __kmp_tid_from_gtid(global_tid);
784 ompt_callbacks.ompt_callback(ompt_callback_master)(
785 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
786 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
787 OMPT_GET_RETURN_ADDRESS(0));
788 }
789 }
790#endif
791
792 if (__kmp_env_consistency_check) {
793#if KMP_USE_DYNAMIC_LOCK
794 if (status)
795 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
796 else
797 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
798#else
799 if (status)
800 __kmp_push_sync(global_tid, ct_master, loc, NULL);
801 else
802 __kmp_check_sync(global_tid, ct_master, loc, NULL);
803#endif
804 }
805
806 return status;
807}
808
817void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
818 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
819
820 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
821 KMP_POP_PARTITIONED_TIMER();
822
823#if OMPT_SUPPORT && OMPT_OPTIONAL
824 kmp_info_t *this_thr = __kmp_threads[global_tid];
825 kmp_team_t *team = this_thr->th.th_team;
826 if (ompt_enabled.ompt_callback_master) {
827 int tid = __kmp_tid_from_gtid(global_tid);
828 ompt_callbacks.ompt_callback(ompt_callback_master)(
829 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
830 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
831 OMPT_GET_RETURN_ADDRESS(0));
832 }
833#endif
834
835 if (__kmp_env_consistency_check) {
836 if (global_tid < 0)
837 KMP_WARNING(ThreadIdentInvalid);
838
839 if (KMP_MASTER_GTID(global_tid))
840 __kmp_pop_sync(global_tid, ct_master, loc);
841 }
842}
843
851void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
852 int cid = 0;
853 kmp_info_t *th;
854 KMP_DEBUG_ASSERT(__kmp_init_serial);
855
856 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
857
858 if (!TCR_4(__kmp_init_parallel))
859 __kmp_parallel_initialize();
860
861 __kmp_resume_if_soft_paused();
862
863#if USE_ITT_BUILD
864 __kmp_itt_ordered_prep(gtid);
865// TODO: ordered_wait_id
866#endif /* USE_ITT_BUILD */
867
868 th = __kmp_threads[gtid];
869
870#if OMPT_SUPPORT && OMPT_OPTIONAL
871 kmp_team_t *team;
872 ompt_wait_id_t lck;
873 void *codeptr_ra;
874 if (ompt_enabled.enabled) {
875 OMPT_STORE_RETURN_ADDRESS(gtid);
876 team = __kmp_team_from_gtid(gtid);
877 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
878 /* OMPT state update */
879 th->th.ompt_thread_info.wait_id = lck;
880 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
881
882 /* OMPT event callback */
883 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
884 if (ompt_enabled.ompt_callback_mutex_acquire) {
885 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
886 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
887 codeptr_ra);
888 }
889 }
890#endif
891
892 if (th->th.th_dispatch->th_deo_fcn != 0)
893 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
894 else
895 __kmp_parallel_deo(&gtid, &cid, loc);
896
897#if OMPT_SUPPORT && OMPT_OPTIONAL
898 if (ompt_enabled.enabled) {
899 /* OMPT state update */
900 th->th.ompt_thread_info.state = ompt_state_work_parallel;
901 th->th.ompt_thread_info.wait_id = 0;
902
903 /* OMPT event callback */
904 if (ompt_enabled.ompt_callback_mutex_acquired) {
905 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
906 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
907 }
908 }
909#endif
910
911#if USE_ITT_BUILD
912 __kmp_itt_ordered_start(gtid);
913#endif /* USE_ITT_BUILD */
914}
915
923void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
924 int cid = 0;
925 kmp_info_t *th;
926
927 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
928
929#if USE_ITT_BUILD
930 __kmp_itt_ordered_end(gtid);
931// TODO: ordered_wait_id
932#endif /* USE_ITT_BUILD */
933
934 th = __kmp_threads[gtid];
935
936 if (th->th.th_dispatch->th_dxo_fcn != 0)
937 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
938 else
939 __kmp_parallel_dxo(&gtid, &cid, loc);
940
941#if OMPT_SUPPORT && OMPT_OPTIONAL
942 OMPT_STORE_RETURN_ADDRESS(gtid);
943 if (ompt_enabled.ompt_callback_mutex_released) {
944 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
945 ompt_mutex_ordered,
946 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
947 ->t.t_ordered.dt.t_value,
948 OMPT_LOAD_RETURN_ADDRESS(gtid));
949 }
950#endif
951}
952
953#if KMP_USE_DYNAMIC_LOCK
954
955static __forceinline void
956__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
957 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
958 // Pointer to the allocated indirect lock is written to crit, while indexing
959 // is ignored.
960 void *idx;
961 kmp_indirect_lock_t **lck;
962 lck = (kmp_indirect_lock_t **)crit;
963 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
964 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
965 KMP_SET_I_LOCK_LOCATION(ilk, loc);
966 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
967 KA_TRACE(20,
968 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
969#if USE_ITT_BUILD
970 __kmp_itt_critical_creating(ilk->lock, loc);
971#endif
972 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
973 if (status == 0) {
974#if USE_ITT_BUILD
975 __kmp_itt_critical_destroyed(ilk->lock);
976#endif
977 // We don't really need to destroy the unclaimed lock here since it will be
978 // cleaned up at program exit.
979 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
980 }
981 KMP_DEBUG_ASSERT(*lck != NULL);
982}
983
984// Fast-path acquire tas lock
985#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
986 { \
987 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
988 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
989 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
990 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
991 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
992 kmp_uint32 spins; \
993 KMP_FSYNC_PREPARE(l); \
994 KMP_INIT_YIELD(spins); \
995 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
996 do { \
997 if (TCR_4(__kmp_nth) > \
998 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
999 KMP_YIELD(TRUE); \
1000 } else { \
1001 KMP_YIELD_SPIN(spins); \
1002 } \
1003 __kmp_spin_backoff(&backoff); \
1004 } while ( \
1005 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1006 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1007 } \
1008 KMP_FSYNC_ACQUIRED(l); \
1009 }
1010
1011// Fast-path test tas lock
1012#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1013 { \
1014 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1015 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1016 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1017 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1018 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1019 }
1020
1021// Fast-path release tas lock
1022#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1023 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1024
1025#if KMP_USE_FUTEX
1026
1027#include <sys/syscall.h>
1028#include <unistd.h>
1029#ifndef FUTEX_WAIT
1030#define FUTEX_WAIT 0
1031#endif
1032#ifndef FUTEX_WAKE
1033#define FUTEX_WAKE 1
1034#endif
1035
1036// Fast-path acquire futex lock
1037#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1038 { \
1039 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1040 kmp_int32 gtid_code = (gtid + 1) << 1; \
1041 KMP_MB(); \
1042 KMP_FSYNC_PREPARE(ftx); \
1043 kmp_int32 poll_val; \
1044 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1045 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1046 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1047 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1048 if (!cond) { \
1049 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1050 poll_val | \
1051 KMP_LOCK_BUSY(1, futex))) { \
1052 continue; \
1053 } \
1054 poll_val |= KMP_LOCK_BUSY(1, futex); \
1055 } \
1056 kmp_int32 rc; \
1057 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1058 NULL, NULL, 0)) != 0) { \
1059 continue; \
1060 } \
1061 gtid_code |= 1; \
1062 } \
1063 KMP_FSYNC_ACQUIRED(ftx); \
1064 }
1065
1066// Fast-path test futex lock
1067#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1068 { \
1069 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1070 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1071 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1072 KMP_FSYNC_ACQUIRED(ftx); \
1073 rc = TRUE; \
1074 } else { \
1075 rc = FALSE; \
1076 } \
1077 }
1078
1079// Fast-path release futex lock
1080#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1081 { \
1082 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1083 KMP_MB(); \
1084 KMP_FSYNC_RELEASING(ftx); \
1085 kmp_int32 poll_val = \
1086 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1087 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1088 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1089 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1090 } \
1091 KMP_MB(); \
1092 KMP_YIELD_OVERSUB(); \
1093 }
1094
1095#endif // KMP_USE_FUTEX
1096
1097#else // KMP_USE_DYNAMIC_LOCK
1098
1099static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1100 ident_t const *loc,
1101 kmp_int32 gtid) {
1102 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1103
1104 // Because of the double-check, the following load doesn't need to be volatile
1105 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1106
1107 if (lck == NULL) {
1108 void *idx;
1109
1110 // Allocate & initialize the lock.
1111 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1112 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1113 __kmp_init_user_lock_with_checks(lck);
1114 __kmp_set_user_lock_location(lck, loc);
1115#if USE_ITT_BUILD
1116 __kmp_itt_critical_creating(lck);
1117// __kmp_itt_critical_creating() should be called *before* the first usage
1118// of underlying lock. It is the only place where we can guarantee it. There
1119// are chances the lock will destroyed with no usage, but it is not a
1120// problem, because this is not real event seen by user but rather setting
1121// name for object (lock). See more details in kmp_itt.h.
1122#endif /* USE_ITT_BUILD */
1123
1124 // Use a cmpxchg instruction to slam the start of the critical section with
1125 // the lock pointer. If another thread beat us to it, deallocate the lock,
1126 // and use the lock that the other thread allocated.
1127 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1128
1129 if (status == 0) {
1130// Deallocate the lock and reload the value.
1131#if USE_ITT_BUILD
1132 __kmp_itt_critical_destroyed(lck);
1133// Let ITT know the lock is destroyed and the same memory location may be reused
1134// for another purpose.
1135#endif /* USE_ITT_BUILD */
1136 __kmp_destroy_user_lock_with_checks(lck);
1137 __kmp_user_lock_free(&idx, gtid, lck);
1138 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1139 KMP_DEBUG_ASSERT(lck != NULL);
1140 }
1141 }
1142 return lck;
1143}
1144
1145#endif // KMP_USE_DYNAMIC_LOCK
1146
1157void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1158 kmp_critical_name *crit) {
1159#if KMP_USE_DYNAMIC_LOCK
1160#if OMPT_SUPPORT && OMPT_OPTIONAL
1161 OMPT_STORE_RETURN_ADDRESS(global_tid);
1162#endif // OMPT_SUPPORT
1163 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1164#else
1165 KMP_COUNT_BLOCK(OMP_CRITICAL);
1166#if OMPT_SUPPORT && OMPT_OPTIONAL
1167 ompt_state_t prev_state = ompt_state_undefined;
1168 ompt_thread_info_t ti;
1169#endif
1170 kmp_user_lock_p lck;
1171
1172 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1173
1174 // TODO: add THR_OVHD_STATE
1175
1176 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1177 KMP_CHECK_USER_LOCK_INIT();
1178
1179 if ((__kmp_user_lock_kind == lk_tas) &&
1180 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1181 lck = (kmp_user_lock_p)crit;
1182 }
1183#if KMP_USE_FUTEX
1184 else if ((__kmp_user_lock_kind == lk_futex) &&
1185 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1186 lck = (kmp_user_lock_p)crit;
1187 }
1188#endif
1189 else { // ticket, queuing or drdpa
1190 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1191 }
1192
1193 if (__kmp_env_consistency_check)
1194 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1195
1196// since the critical directive binds to all threads, not just the current
1197// team we have to check this even if we are in a serialized team.
1198// also, even if we are the uber thread, we still have to conduct the lock,
1199// as we have to contend with sibling threads.
1200
1201#if USE_ITT_BUILD
1202 __kmp_itt_critical_acquiring(lck);
1203#endif /* USE_ITT_BUILD */
1204#if OMPT_SUPPORT && OMPT_OPTIONAL
1205 OMPT_STORE_RETURN_ADDRESS(gtid);
1206 void *codeptr_ra = NULL;
1207 if (ompt_enabled.enabled) {
1208 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1209 /* OMPT state update */
1210 prev_state = ti.state;
1211 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1212 ti.state = ompt_state_wait_critical;
1213
1214 /* OMPT event callback */
1215 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1216 if (ompt_enabled.ompt_callback_mutex_acquire) {
1217 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1218 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1219 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1220 }
1221 }
1222#endif
1223 // Value of 'crit' should be good for using as a critical_id of the critical
1224 // section directive.
1225 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1226
1227#if USE_ITT_BUILD
1228 __kmp_itt_critical_acquired(lck);
1229#endif /* USE_ITT_BUILD */
1230#if OMPT_SUPPORT && OMPT_OPTIONAL
1231 if (ompt_enabled.enabled) {
1232 /* OMPT state update */
1233 ti.state = prev_state;
1234 ti.wait_id = 0;
1235
1236 /* OMPT event callback */
1237 if (ompt_enabled.ompt_callback_mutex_acquired) {
1238 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1239 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1240 }
1241 }
1242#endif
1243 KMP_POP_PARTITIONED_TIMER();
1244
1245 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1246 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1247#endif // KMP_USE_DYNAMIC_LOCK
1248}
1249
1250#if KMP_USE_DYNAMIC_LOCK
1251
1252// Converts the given hint to an internal lock implementation
1253static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1254#if KMP_USE_TSX
1255#define KMP_TSX_LOCK(seq) lockseq_##seq
1256#else
1257#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1258#endif
1259
1260#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1261#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1262#else
1263#define KMP_CPUINFO_RTM 0
1264#endif
1265
1266 // Hints that do not require further logic
1267 if (hint & kmp_lock_hint_hle)
1268 return KMP_TSX_LOCK(hle);
1269 if (hint & kmp_lock_hint_rtm)
1270 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1271 if (hint & kmp_lock_hint_adaptive)
1272 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1273
1274 // Rule out conflicting hints first by returning the default lock
1275 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1276 return __kmp_user_lock_seq;
1277 if ((hint & omp_lock_hint_speculative) &&
1278 (hint & omp_lock_hint_nonspeculative))
1279 return __kmp_user_lock_seq;
1280
1281 // Do not even consider speculation when it appears to be contended
1282 if (hint & omp_lock_hint_contended)
1283 return lockseq_queuing;
1284
1285 // Uncontended lock without speculation
1286 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1287 return lockseq_tas;
1288
1289 // HLE lock for speculation
1290 if (hint & omp_lock_hint_speculative)
1291 return KMP_TSX_LOCK(hle);
1292
1293 return __kmp_user_lock_seq;
1294}
1295
1296#if OMPT_SUPPORT && OMPT_OPTIONAL
1297#if KMP_USE_DYNAMIC_LOCK
1298static kmp_mutex_impl_t
1299__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1300 if (user_lock) {
1301 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1302 case 0:
1303 break;
1304#if KMP_USE_FUTEX
1305 case locktag_futex:
1306 return kmp_mutex_impl_queuing;
1307#endif
1308 case locktag_tas:
1309 return kmp_mutex_impl_spin;
1310#if KMP_USE_TSX
1311 case locktag_hle:
1312 return kmp_mutex_impl_speculative;
1313#endif
1314 default:
1315 return kmp_mutex_impl_none;
1316 }
1317 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1318 }
1319 KMP_ASSERT(ilock);
1320 switch (ilock->type) {
1321#if KMP_USE_TSX
1322 case locktag_adaptive:
1323 case locktag_rtm:
1324 return kmp_mutex_impl_speculative;
1325#endif
1326 case locktag_nested_tas:
1327 return kmp_mutex_impl_spin;
1328#if KMP_USE_FUTEX
1329 case locktag_nested_futex:
1330#endif
1331 case locktag_ticket:
1332 case locktag_queuing:
1333 case locktag_drdpa:
1334 case locktag_nested_ticket:
1335 case locktag_nested_queuing:
1336 case locktag_nested_drdpa:
1337 return kmp_mutex_impl_queuing;
1338 default:
1339 return kmp_mutex_impl_none;
1340 }
1341}
1342#else
1343// For locks without dynamic binding
1344static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1345 switch (__kmp_user_lock_kind) {
1346 case lk_tas:
1347 return kmp_mutex_impl_spin;
1348#if KMP_USE_FUTEX
1349 case lk_futex:
1350#endif
1351 case lk_ticket:
1352 case lk_queuing:
1353 case lk_drdpa:
1354 return kmp_mutex_impl_queuing;
1355#if KMP_USE_TSX
1356 case lk_hle:
1357 case lk_rtm:
1358 case lk_adaptive:
1359 return kmp_mutex_impl_speculative;
1360#endif
1361 default:
1362 return kmp_mutex_impl_none;
1363 }
1364}
1365#endif // KMP_USE_DYNAMIC_LOCK
1366#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1367
1381void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1382 kmp_critical_name *crit, uint32_t hint) {
1383 KMP_COUNT_BLOCK(OMP_CRITICAL);
1384 kmp_user_lock_p lck;
1385#if OMPT_SUPPORT && OMPT_OPTIONAL
1386 ompt_state_t prev_state = ompt_state_undefined;
1387 ompt_thread_info_t ti;
1388 // This is the case, if called from __kmpc_critical:
1389 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1390 if (!codeptr)
1391 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1392#endif
1393
1394 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1395
1396 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1397 // Check if it is initialized.
1398 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1399 if (*lk == 0) {
1400 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1401 if (KMP_IS_D_LOCK(lckseq)) {
1402 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1403 KMP_GET_D_TAG(lckseq));
1404 } else {
1405 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1406 }
1407 }
1408 // Branch for accessing the actual lock object and set operation. This
1409 // branching is inevitable since this lock initialization does not follow the
1410 // normal dispatch path (lock table is not used).
1411 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1412 lck = (kmp_user_lock_p)lk;
1413 if (__kmp_env_consistency_check) {
1414 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1415 __kmp_map_hint_to_lock(hint));
1416 }
1417#if USE_ITT_BUILD
1418 __kmp_itt_critical_acquiring(lck);
1419#endif
1420#if OMPT_SUPPORT && OMPT_OPTIONAL
1421 if (ompt_enabled.enabled) {
1422 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1423 /* OMPT state update */
1424 prev_state = ti.state;
1425 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1426 ti.state = ompt_state_wait_critical;
1427
1428 /* OMPT event callback */
1429 if (ompt_enabled.ompt_callback_mutex_acquire) {
1430 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1431 ompt_mutex_critical, (unsigned int)hint,
1432 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1433 codeptr);
1434 }
1435 }
1436#endif
1437#if KMP_USE_INLINED_TAS
1438 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1439 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1440 } else
1441#elif KMP_USE_INLINED_FUTEX
1442 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1443 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1444 } else
1445#endif
1446 {
1447 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1448 }
1449 } else {
1450 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1451 lck = ilk->lock;
1452 if (__kmp_env_consistency_check) {
1453 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1454 __kmp_map_hint_to_lock(hint));
1455 }
1456#if USE_ITT_BUILD
1457 __kmp_itt_critical_acquiring(lck);
1458#endif
1459#if OMPT_SUPPORT && OMPT_OPTIONAL
1460 if (ompt_enabled.enabled) {
1461 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1462 /* OMPT state update */
1463 prev_state = ti.state;
1464 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1465 ti.state = ompt_state_wait_critical;
1466
1467 /* OMPT event callback */
1468 if (ompt_enabled.ompt_callback_mutex_acquire) {
1469 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1470 ompt_mutex_critical, (unsigned int)hint,
1471 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1472 codeptr);
1473 }
1474 }
1475#endif
1476 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1477 }
1478 KMP_POP_PARTITIONED_TIMER();
1479
1480#if USE_ITT_BUILD
1481 __kmp_itt_critical_acquired(lck);
1482#endif /* USE_ITT_BUILD */
1483#if OMPT_SUPPORT && OMPT_OPTIONAL
1484 if (ompt_enabled.enabled) {
1485 /* OMPT state update */
1486 ti.state = prev_state;
1487 ti.wait_id = 0;
1488
1489 /* OMPT event callback */
1490 if (ompt_enabled.ompt_callback_mutex_acquired) {
1491 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1492 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1493 }
1494 }
1495#endif
1496
1497 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1498 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1499} // __kmpc_critical_with_hint
1500
1501#endif // KMP_USE_DYNAMIC_LOCK
1502
1512void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1513 kmp_critical_name *crit) {
1514 kmp_user_lock_p lck;
1515
1516 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1517
1518#if KMP_USE_DYNAMIC_LOCK
1519 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1520 lck = (kmp_user_lock_p)crit;
1521 KMP_ASSERT(lck != NULL);
1522 if (__kmp_env_consistency_check) {
1523 __kmp_pop_sync(global_tid, ct_critical, loc);
1524 }
1525#if USE_ITT_BUILD
1526 __kmp_itt_critical_releasing(lck);
1527#endif
1528#if KMP_USE_INLINED_TAS
1529 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1530 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1531 } else
1532#elif KMP_USE_INLINED_FUTEX
1533 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1534 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1535 } else
1536#endif
1537 {
1538 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1539 }
1540 } else {
1541 kmp_indirect_lock_t *ilk =
1542 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1543 KMP_ASSERT(ilk != NULL);
1544 lck = ilk->lock;
1545 if (__kmp_env_consistency_check) {
1546 __kmp_pop_sync(global_tid, ct_critical, loc);
1547 }
1548#if USE_ITT_BUILD
1549 __kmp_itt_critical_releasing(lck);
1550#endif
1551 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1552 }
1553
1554#else // KMP_USE_DYNAMIC_LOCK
1555
1556 if ((__kmp_user_lock_kind == lk_tas) &&
1557 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1558 lck = (kmp_user_lock_p)crit;
1559 }
1560#if KMP_USE_FUTEX
1561 else if ((__kmp_user_lock_kind == lk_futex) &&
1562 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1563 lck = (kmp_user_lock_p)crit;
1564 }
1565#endif
1566 else { // ticket, queuing or drdpa
1567 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1568 }
1569
1570 KMP_ASSERT(lck != NULL);
1571
1572 if (__kmp_env_consistency_check)
1573 __kmp_pop_sync(global_tid, ct_critical, loc);
1574
1575#if USE_ITT_BUILD
1576 __kmp_itt_critical_releasing(lck);
1577#endif /* USE_ITT_BUILD */
1578 // Value of 'crit' should be good for using as a critical_id of the critical
1579 // section directive.
1580 __kmp_release_user_lock_with_checks(lck, global_tid);
1581
1582#endif // KMP_USE_DYNAMIC_LOCK
1583
1584#if OMPT_SUPPORT && OMPT_OPTIONAL
1585 /* OMPT release event triggers after lock is released; place here to trigger
1586 * for all #if branches */
1587 OMPT_STORE_RETURN_ADDRESS(global_tid);
1588 if (ompt_enabled.ompt_callback_mutex_released) {
1589 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1590 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1591 OMPT_LOAD_RETURN_ADDRESS(0));
1592 }
1593#endif
1594
1595 KMP_POP_PARTITIONED_TIMER();
1596 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1597}
1598
1608kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1609 int status;
1610
1611 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1612
1613 if (!TCR_4(__kmp_init_parallel))
1614 __kmp_parallel_initialize();
1615
1616 __kmp_resume_if_soft_paused();
1617
1618 if (__kmp_env_consistency_check)
1619 __kmp_check_barrier(global_tid, ct_barrier, loc);
1620
1621#if OMPT_SUPPORT
1622 ompt_frame_t *ompt_frame;
1623 if (ompt_enabled.enabled) {
1624 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1625 if (ompt_frame->enter_frame.ptr == NULL)
1626 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1627 OMPT_STORE_RETURN_ADDRESS(global_tid);
1628 }
1629#endif
1630#if USE_ITT_NOTIFY
1631 __kmp_threads[global_tid]->th.th_ident = loc;
1632#endif
1633 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1634#if OMPT_SUPPORT && OMPT_OPTIONAL
1635 if (ompt_enabled.enabled) {
1636 ompt_frame->enter_frame = ompt_data_none;
1637 }
1638#endif
1639
1640 return (status != 0) ? 0 : 1;
1641}
1642
1652void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1653 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1654
1655 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1656}
1657
1668kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1669 kmp_int32 ret;
1670
1671 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1672
1673 if (!TCR_4(__kmp_init_parallel))
1674 __kmp_parallel_initialize();
1675
1676 __kmp_resume_if_soft_paused();
1677
1678 if (__kmp_env_consistency_check) {
1679 if (loc == 0) {
1680 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1681 }
1682 __kmp_check_barrier(global_tid, ct_barrier, loc);
1683 }
1684
1685#if OMPT_SUPPORT
1686 ompt_frame_t *ompt_frame;
1687 if (ompt_enabled.enabled) {
1688 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1689 if (ompt_frame->enter_frame.ptr == NULL)
1690 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1691 OMPT_STORE_RETURN_ADDRESS(global_tid);
1692 }
1693#endif
1694#if USE_ITT_NOTIFY
1695 __kmp_threads[global_tid]->th.th_ident = loc;
1696#endif
1697 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1698#if OMPT_SUPPORT && OMPT_OPTIONAL
1699 if (ompt_enabled.enabled) {
1700 ompt_frame->enter_frame = ompt_data_none;
1701 }
1702#endif
1703
1704 ret = __kmpc_master(loc, global_tid);
1705
1706 if (__kmp_env_consistency_check) {
1707 /* there's no __kmpc_end_master called; so the (stats) */
1708 /* actions of __kmpc_end_master are done here */
1709
1710 if (global_tid < 0) {
1711 KMP_WARNING(ThreadIdentInvalid);
1712 }
1713 if (ret) {
1714 /* only one thread should do the pop since only */
1715 /* one did the push (see __kmpc_master()) */
1716
1717 __kmp_pop_sync(global_tid, ct_master, loc);
1718 }
1719 }
1720
1721 return (ret);
1722}
1723
1724/* The BARRIER for a SINGLE process section is always explicit */
1736kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1737 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1738
1739 if (rc) {
1740 // We are going to execute the single statement, so we should count it.
1741 KMP_COUNT_BLOCK(OMP_SINGLE);
1742 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1743 }
1744
1745#if OMPT_SUPPORT && OMPT_OPTIONAL
1746 kmp_info_t *this_thr = __kmp_threads[global_tid];
1747 kmp_team_t *team = this_thr->th.th_team;
1748 int tid = __kmp_tid_from_gtid(global_tid);
1749
1750 if (ompt_enabled.enabled) {
1751 if (rc) {
1752 if (ompt_enabled.ompt_callback_work) {
1753 ompt_callbacks.ompt_callback(ompt_callback_work)(
1754 ompt_work_single_executor, ompt_scope_begin,
1755 &(team->t.ompt_team_info.parallel_data),
1756 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1757 1, OMPT_GET_RETURN_ADDRESS(0));
1758 }
1759 } else {
1760 if (ompt_enabled.ompt_callback_work) {
1761 ompt_callbacks.ompt_callback(ompt_callback_work)(
1762 ompt_work_single_other, ompt_scope_begin,
1763 &(team->t.ompt_team_info.parallel_data),
1764 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1765 1, OMPT_GET_RETURN_ADDRESS(0));
1766 ompt_callbacks.ompt_callback(ompt_callback_work)(
1767 ompt_work_single_other, ompt_scope_end,
1768 &(team->t.ompt_team_info.parallel_data),
1769 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1770 1, OMPT_GET_RETURN_ADDRESS(0));
1771 }
1772 }
1773 }
1774#endif
1775
1776 return rc;
1777}
1778
1788void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1789 __kmp_exit_single(global_tid);
1790 KMP_POP_PARTITIONED_TIMER();
1791
1792#if OMPT_SUPPORT && OMPT_OPTIONAL
1793 kmp_info_t *this_thr = __kmp_threads[global_tid];
1794 kmp_team_t *team = this_thr->th.th_team;
1795 int tid = __kmp_tid_from_gtid(global_tid);
1796
1797 if (ompt_enabled.ompt_callback_work) {
1798 ompt_callbacks.ompt_callback(ompt_callback_work)(
1799 ompt_work_single_executor, ompt_scope_end,
1800 &(team->t.ompt_team_info.parallel_data),
1801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1802 OMPT_GET_RETURN_ADDRESS(0));
1803 }
1804#endif
1805}
1806
1814void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1815 KMP_POP_PARTITIONED_TIMER();
1816 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1817
1818#if OMPT_SUPPORT && OMPT_OPTIONAL
1819 if (ompt_enabled.ompt_callback_work) {
1820 ompt_work_t ompt_work_type = ompt_work_loop;
1821 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1822 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1823 // Determine workshare type
1824 if (loc != NULL) {
1825 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1826 ompt_work_type = ompt_work_loop;
1827 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1828 ompt_work_type = ompt_work_sections;
1829 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1830 ompt_work_type = ompt_work_distribute;
1831 } else {
1832 // use default set above.
1833 // a warning about this case is provided in __kmpc_for_static_init
1834 }
1835 KMP_DEBUG_ASSERT(ompt_work_type);
1836 }
1837 ompt_callbacks.ompt_callback(ompt_callback_work)(
1838 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1839 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1840 }
1841#endif
1842 if (__kmp_env_consistency_check)
1843 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1844}
1845
1846// User routines which take C-style arguments (call by value)
1847// different from the Fortran equivalent routines
1848
1849void ompc_set_num_threads(int arg) {
1850 // !!!!! TODO: check the per-task binding
1851 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1852}
1853
1854void ompc_set_dynamic(int flag) {
1855 kmp_info_t *thread;
1856
1857 /* For the thread-private implementation of the internal controls */
1858 thread = __kmp_entry_thread();
1859
1860 __kmp_save_internal_controls(thread);
1861
1862 set__dynamic(thread, flag ? TRUE : FALSE);
1863}
1864
1865void ompc_set_nested(int flag) {
1866 kmp_info_t *thread;
1867
1868 /* For the thread-private internal controls implementation */
1869 thread = __kmp_entry_thread();
1870
1871 __kmp_save_internal_controls(thread);
1872
1873 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1874}
1875
1876void ompc_set_max_active_levels(int max_active_levels) {
1877 /* TO DO */
1878 /* we want per-task implementation of this internal control */
1879
1880 /* For the per-thread internal controls implementation */
1881 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1882}
1883
1884void ompc_set_schedule(omp_sched_t kind, int modifier) {
1885 // !!!!! TODO: check the per-task binding
1886 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1887}
1888
1889int ompc_get_ancestor_thread_num(int level) {
1890 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1891}
1892
1893int ompc_get_team_size(int level) {
1894 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1895}
1896
1897/* OpenMP 5.0 Affinity Format API */
1898
1899void ompc_set_affinity_format(char const *format) {
1900 if (!__kmp_init_serial) {
1901 __kmp_serial_initialize();
1902 }
1903 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1904 format, KMP_STRLEN(format) + 1);
1905}
1906
1907size_t ompc_get_affinity_format(char *buffer, size_t size) {
1908 size_t format_size;
1909 if (!__kmp_init_serial) {
1910 __kmp_serial_initialize();
1911 }
1912 format_size = KMP_STRLEN(__kmp_affinity_format);
1913 if (buffer && size) {
1914 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1915 format_size + 1);
1916 }
1917 return format_size;
1918}
1919
1920void ompc_display_affinity(char const *format) {
1921 int gtid;
1922 if (!TCR_4(__kmp_init_middle)) {
1923 __kmp_middle_initialize();
1924 }
1925 gtid = __kmp_get_gtid();
1926 __kmp_aux_display_affinity(gtid, format);
1927}
1928
1929size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1930 char const *format) {
1931 int gtid;
1932 size_t num_required;
1933 kmp_str_buf_t capture_buf;
1934 if (!TCR_4(__kmp_init_middle)) {
1935 __kmp_middle_initialize();
1936 }
1937 gtid = __kmp_get_gtid();
1938 __kmp_str_buf_init(&capture_buf);
1939 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1940 if (buffer && buf_size) {
1941 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1942 capture_buf.used + 1);
1943 }
1944 __kmp_str_buf_free(&capture_buf);
1945 return num_required;
1946}
1947
1948void kmpc_set_stacksize(int arg) {
1949 // __kmp_aux_set_stacksize initializes the library if needed
1950 __kmp_aux_set_stacksize(arg);
1951}
1952
1953void kmpc_set_stacksize_s(size_t arg) {
1954 // __kmp_aux_set_stacksize initializes the library if needed
1955 __kmp_aux_set_stacksize(arg);
1956}
1957
1958void kmpc_set_blocktime(int arg) {
1959 int gtid, tid;
1960 kmp_info_t *thread;
1961
1962 gtid = __kmp_entry_gtid();
1963 tid = __kmp_tid_from_gtid(gtid);
1964 thread = __kmp_thread_from_gtid(gtid);
1965
1966 __kmp_aux_set_blocktime(arg, thread, tid);
1967}
1968
1969void kmpc_set_library(int arg) {
1970 // __kmp_user_set_library initializes the library if needed
1971 __kmp_user_set_library((enum library_type)arg);
1972}
1973
1974void kmpc_set_defaults(char const *str) {
1975 // __kmp_aux_set_defaults initializes the library if needed
1976 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1977}
1978
1979void kmpc_set_disp_num_buffers(int arg) {
1980 // ignore after initialization because some teams have already
1981 // allocated dispatch buffers
1982 if (__kmp_init_serial == 0 && arg > 0)
1983 __kmp_dispatch_num_buffers = arg;
1984}
1985
1986int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1987#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1988 return -1;
1989#else
1990 if (!TCR_4(__kmp_init_middle)) {
1991 __kmp_middle_initialize();
1992 }
1993 return __kmp_aux_set_affinity_mask_proc(proc, mask);
1994#endif
1995}
1996
1997int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1998#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1999 return -1;
2000#else
2001 if (!TCR_4(__kmp_init_middle)) {
2002 __kmp_middle_initialize();
2003 }
2004 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2005#endif
2006}
2007
2008int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2009#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2010 return -1;
2011#else
2012 if (!TCR_4(__kmp_init_middle)) {
2013 __kmp_middle_initialize();
2014 }
2015 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2016#endif
2017}
2018
2019/* -------------------------------------------------------------------------- */
2064void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2065 void *cpy_data, void (*cpy_func)(void *, void *),
2066 kmp_int32 didit) {
2067 void **data_ptr;
2068
2069 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2070
2071 KMP_MB();
2072
2073 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2074
2075 if (__kmp_env_consistency_check) {
2076 if (loc == 0) {
2077 KMP_WARNING(ConstructIdentInvalid);
2078 }
2079 }
2080
2081 // ToDo: Optimize the following two barriers into some kind of split barrier
2082
2083 if (didit)
2084 *data_ptr = cpy_data;
2085
2086#if OMPT_SUPPORT
2087 ompt_frame_t *ompt_frame;
2088 if (ompt_enabled.enabled) {
2089 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2090 if (ompt_frame->enter_frame.ptr == NULL)
2091 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2092 OMPT_STORE_RETURN_ADDRESS(gtid);
2093 }
2094#endif
2095/* This barrier is not a barrier region boundary */
2096#if USE_ITT_NOTIFY
2097 __kmp_threads[gtid]->th.th_ident = loc;
2098#endif
2099 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2100
2101 if (!didit)
2102 (*cpy_func)(cpy_data, *data_ptr);
2103
2104// Consider next barrier a user-visible barrier for barrier region boundaries
2105// Nesting checks are already handled by the single construct checks
2106
2107#if OMPT_SUPPORT
2108 if (ompt_enabled.enabled) {
2109 OMPT_STORE_RETURN_ADDRESS(gtid);
2110 }
2111#endif
2112#if USE_ITT_NOTIFY
2113 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2114// tasks can overwrite the location)
2115#endif
2116 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2117#if OMPT_SUPPORT && OMPT_OPTIONAL
2118 if (ompt_enabled.enabled) {
2119 ompt_frame->enter_frame = ompt_data_none;
2120 }
2121#endif
2122}
2123
2124/* -------------------------------------------------------------------------- */
2125
2126#define INIT_LOCK __kmp_init_user_lock_with_checks
2127#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2128#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2129#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2130#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2131#define ACQUIRE_NESTED_LOCK_TIMED \
2132 __kmp_acquire_nested_user_lock_with_checks_timed
2133#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2134#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2135#define TEST_LOCK __kmp_test_user_lock_with_checks
2136#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2137#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2138#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2139
2140// TODO: Make check abort messages use location info & pass it into
2141// with_checks routines
2142
2143#if KMP_USE_DYNAMIC_LOCK
2144
2145// internal lock initializer
2146static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2147 kmp_dyna_lockseq_t seq) {
2148 if (KMP_IS_D_LOCK(seq)) {
2149 KMP_INIT_D_LOCK(lock, seq);
2150#if USE_ITT_BUILD
2151 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2152#endif
2153 } else {
2154 KMP_INIT_I_LOCK(lock, seq);
2155#if USE_ITT_BUILD
2156 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2157 __kmp_itt_lock_creating(ilk->lock, loc);
2158#endif
2159 }
2160}
2161
2162// internal nest lock initializer
2163static __forceinline void
2164__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2165 kmp_dyna_lockseq_t seq) {
2166#if KMP_USE_TSX
2167 // Don't have nested lock implementation for speculative locks
2168 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2169 seq = __kmp_user_lock_seq;
2170#endif
2171 switch (seq) {
2172 case lockseq_tas:
2173 seq = lockseq_nested_tas;
2174 break;
2175#if KMP_USE_FUTEX
2176 case lockseq_futex:
2177 seq = lockseq_nested_futex;
2178 break;
2179#endif
2180 case lockseq_ticket:
2181 seq = lockseq_nested_ticket;
2182 break;
2183 case lockseq_queuing:
2184 seq = lockseq_nested_queuing;
2185 break;
2186 case lockseq_drdpa:
2187 seq = lockseq_nested_drdpa;
2188 break;
2189 default:
2190 seq = lockseq_nested_queuing;
2191 }
2192 KMP_INIT_I_LOCK(lock, seq);
2193#if USE_ITT_BUILD
2194 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2195 __kmp_itt_lock_creating(ilk->lock, loc);
2196#endif
2197}
2198
2199/* initialize the lock with a hint */
2200void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2201 uintptr_t hint) {
2202 KMP_DEBUG_ASSERT(__kmp_init_serial);
2203 if (__kmp_env_consistency_check && user_lock == NULL) {
2204 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2205 }
2206
2207 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2208
2209#if OMPT_SUPPORT && OMPT_OPTIONAL
2210 // This is the case, if called from omp_init_lock_with_hint:
2211 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2212 if (!codeptr)
2213 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2214 if (ompt_enabled.ompt_callback_lock_init) {
2215 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2216 ompt_mutex_lock, (omp_lock_hint_t)hint,
2217 __ompt_get_mutex_impl_type(user_lock),
2218 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2219 }
2220#endif
2221}
2222
2223/* initialize the lock with a hint */
2224void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2225 void **user_lock, uintptr_t hint) {
2226 KMP_DEBUG_ASSERT(__kmp_init_serial);
2227 if (__kmp_env_consistency_check && user_lock == NULL) {
2228 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2229 }
2230
2231 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2232
2233#if OMPT_SUPPORT && OMPT_OPTIONAL
2234 // This is the case, if called from omp_init_lock_with_hint:
2235 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2236 if (!codeptr)
2237 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2238 if (ompt_enabled.ompt_callback_lock_init) {
2239 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2240 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2241 __ompt_get_mutex_impl_type(user_lock),
2242 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2243 }
2244#endif
2245}
2246
2247#endif // KMP_USE_DYNAMIC_LOCK
2248
2249/* initialize the lock */
2250void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2251#if KMP_USE_DYNAMIC_LOCK
2252
2253 KMP_DEBUG_ASSERT(__kmp_init_serial);
2254 if (__kmp_env_consistency_check && user_lock == NULL) {
2255 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2256 }
2257 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2258
2259#if OMPT_SUPPORT && OMPT_OPTIONAL
2260 // This is the case, if called from omp_init_lock_with_hint:
2261 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2262 if (!codeptr)
2263 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2264 if (ompt_enabled.ompt_callback_lock_init) {
2265 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2266 ompt_mutex_lock, omp_lock_hint_none,
2267 __ompt_get_mutex_impl_type(user_lock),
2268 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2269 }
2270#endif
2271
2272#else // KMP_USE_DYNAMIC_LOCK
2273
2274 static char const *const func = "omp_init_lock";
2275 kmp_user_lock_p lck;
2276 KMP_DEBUG_ASSERT(__kmp_init_serial);
2277
2278 if (__kmp_env_consistency_check) {
2279 if (user_lock == NULL) {
2280 KMP_FATAL(LockIsUninitialized, func);
2281 }
2282 }
2283
2284 KMP_CHECK_USER_LOCK_INIT();
2285
2286 if ((__kmp_user_lock_kind == lk_tas) &&
2287 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2288 lck = (kmp_user_lock_p)user_lock;
2289 }
2290#if KMP_USE_FUTEX
2291 else if ((__kmp_user_lock_kind == lk_futex) &&
2292 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2293 lck = (kmp_user_lock_p)user_lock;
2294 }
2295#endif
2296 else {
2297 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2298 }
2299 INIT_LOCK(lck);
2300 __kmp_set_user_lock_location(lck, loc);
2301
2302#if OMPT_SUPPORT && OMPT_OPTIONAL
2303 // This is the case, if called from omp_init_lock_with_hint:
2304 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2305 if (!codeptr)
2306 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2307 if (ompt_enabled.ompt_callback_lock_init) {
2308 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2309 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2310 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2311 }
2312#endif
2313
2314#if USE_ITT_BUILD
2315 __kmp_itt_lock_creating(lck);
2316#endif /* USE_ITT_BUILD */
2317
2318#endif // KMP_USE_DYNAMIC_LOCK
2319} // __kmpc_init_lock
2320
2321/* initialize the lock */
2322void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2323#if KMP_USE_DYNAMIC_LOCK
2324
2325 KMP_DEBUG_ASSERT(__kmp_init_serial);
2326 if (__kmp_env_consistency_check && user_lock == NULL) {
2327 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2328 }
2329 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2330
2331#if OMPT_SUPPORT && OMPT_OPTIONAL
2332 // This is the case, if called from omp_init_lock_with_hint:
2333 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2334 if (!codeptr)
2335 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2336 if (ompt_enabled.ompt_callback_lock_init) {
2337 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2338 ompt_mutex_nest_lock, omp_lock_hint_none,
2339 __ompt_get_mutex_impl_type(user_lock),
2340 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2341 }
2342#endif
2343
2344#else // KMP_USE_DYNAMIC_LOCK
2345
2346 static char const *const func = "omp_init_nest_lock";
2347 kmp_user_lock_p lck;
2348 KMP_DEBUG_ASSERT(__kmp_init_serial);
2349
2350 if (__kmp_env_consistency_check) {
2351 if (user_lock == NULL) {
2352 KMP_FATAL(LockIsUninitialized, func);
2353 }
2354 }
2355
2356 KMP_CHECK_USER_LOCK_INIT();
2357
2358 if ((__kmp_user_lock_kind == lk_tas) &&
2359 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2360 OMP_NEST_LOCK_T_SIZE)) {
2361 lck = (kmp_user_lock_p)user_lock;
2362 }
2363#if KMP_USE_FUTEX
2364 else if ((__kmp_user_lock_kind == lk_futex) &&
2365 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2366 OMP_NEST_LOCK_T_SIZE)) {
2367 lck = (kmp_user_lock_p)user_lock;
2368 }
2369#endif
2370 else {
2371 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2372 }
2373
2374 INIT_NESTED_LOCK(lck);
2375 __kmp_set_user_lock_location(lck, loc);
2376
2377#if OMPT_SUPPORT && OMPT_OPTIONAL
2378 // This is the case, if called from omp_init_lock_with_hint:
2379 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2380 if (!codeptr)
2381 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2382 if (ompt_enabled.ompt_callback_lock_init) {
2383 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2384 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2385 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2386 }
2387#endif
2388
2389#if USE_ITT_BUILD
2390 __kmp_itt_lock_creating(lck);
2391#endif /* USE_ITT_BUILD */
2392
2393#endif // KMP_USE_DYNAMIC_LOCK
2394} // __kmpc_init_nest_lock
2395
2396void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2397#if KMP_USE_DYNAMIC_LOCK
2398
2399#if USE_ITT_BUILD
2400 kmp_user_lock_p lck;
2401 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2402 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2403 } else {
2404 lck = (kmp_user_lock_p)user_lock;
2405 }
2406 __kmp_itt_lock_destroyed(lck);
2407#endif
2408#if OMPT_SUPPORT && OMPT_OPTIONAL
2409 // This is the case, if called from omp_init_lock_with_hint:
2410 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2411 if (!codeptr)
2412 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2413 if (ompt_enabled.ompt_callback_lock_destroy) {
2414 kmp_user_lock_p lck;
2415 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2416 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2417 } else {
2418 lck = (kmp_user_lock_p)user_lock;
2419 }
2420 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2421 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2422 }
2423#endif
2424 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2425#else
2426 kmp_user_lock_p lck;
2427
2428 if ((__kmp_user_lock_kind == lk_tas) &&
2429 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2430 lck = (kmp_user_lock_p)user_lock;
2431 }
2432#if KMP_USE_FUTEX
2433 else if ((__kmp_user_lock_kind == lk_futex) &&
2434 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2435 lck = (kmp_user_lock_p)user_lock;
2436 }
2437#endif
2438 else {
2439 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2440 }
2441
2442#if OMPT_SUPPORT && OMPT_OPTIONAL
2443 // This is the case, if called from omp_init_lock_with_hint:
2444 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2445 if (!codeptr)
2446 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2447 if (ompt_enabled.ompt_callback_lock_destroy) {
2448 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2449 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2450 }
2451#endif
2452
2453#if USE_ITT_BUILD
2454 __kmp_itt_lock_destroyed(lck);
2455#endif /* USE_ITT_BUILD */
2456 DESTROY_LOCK(lck);
2457
2458 if ((__kmp_user_lock_kind == lk_tas) &&
2459 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2460 ;
2461 }
2462#if KMP_USE_FUTEX
2463 else if ((__kmp_user_lock_kind == lk_futex) &&
2464 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2465 ;
2466 }
2467#endif
2468 else {
2469 __kmp_user_lock_free(user_lock, gtid, lck);
2470 }
2471#endif // KMP_USE_DYNAMIC_LOCK
2472} // __kmpc_destroy_lock
2473
2474/* destroy the lock */
2475void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2476#if KMP_USE_DYNAMIC_LOCK
2477
2478#if USE_ITT_BUILD
2479 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2480 __kmp_itt_lock_destroyed(ilk->lock);
2481#endif
2482#if OMPT_SUPPORT && OMPT_OPTIONAL
2483 // This is the case, if called from omp_init_lock_with_hint:
2484 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2485 if (!codeptr)
2486 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2487 if (ompt_enabled.ompt_callback_lock_destroy) {
2488 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2489 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2490 }
2491#endif
2492 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2493
2494#else // KMP_USE_DYNAMIC_LOCK
2495
2496 kmp_user_lock_p lck;
2497
2498 if ((__kmp_user_lock_kind == lk_tas) &&
2499 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2500 OMP_NEST_LOCK_T_SIZE)) {
2501 lck = (kmp_user_lock_p)user_lock;
2502 }
2503#if KMP_USE_FUTEX
2504 else if ((__kmp_user_lock_kind == lk_futex) &&
2505 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2506 OMP_NEST_LOCK_T_SIZE)) {
2507 lck = (kmp_user_lock_p)user_lock;
2508 }
2509#endif
2510 else {
2511 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2512 }
2513
2514#if OMPT_SUPPORT && OMPT_OPTIONAL
2515 // This is the case, if called from omp_init_lock_with_hint:
2516 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2517 if (!codeptr)
2518 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2519 if (ompt_enabled.ompt_callback_lock_destroy) {
2520 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2521 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2522 }
2523#endif
2524
2525#if USE_ITT_BUILD
2526 __kmp_itt_lock_destroyed(lck);
2527#endif /* USE_ITT_BUILD */
2528
2529 DESTROY_NESTED_LOCK(lck);
2530
2531 if ((__kmp_user_lock_kind == lk_tas) &&
2532 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2533 OMP_NEST_LOCK_T_SIZE)) {
2534 ;
2535 }
2536#if KMP_USE_FUTEX
2537 else if ((__kmp_user_lock_kind == lk_futex) &&
2538 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2539 OMP_NEST_LOCK_T_SIZE)) {
2540 ;
2541 }
2542#endif
2543 else {
2544 __kmp_user_lock_free(user_lock, gtid, lck);
2545 }
2546#endif // KMP_USE_DYNAMIC_LOCK
2547} // __kmpc_destroy_nest_lock
2548
2549void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2550 KMP_COUNT_BLOCK(OMP_set_lock);
2551#if KMP_USE_DYNAMIC_LOCK
2552 int tag = KMP_EXTRACT_D_TAG(user_lock);
2553#if USE_ITT_BUILD
2554 __kmp_itt_lock_acquiring(
2555 (kmp_user_lock_p)
2556 user_lock); // itt function will get to the right lock object.
2557#endif
2558#if OMPT_SUPPORT && OMPT_OPTIONAL
2559 // This is the case, if called from omp_init_lock_with_hint:
2560 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2561 if (!codeptr)
2562 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2563 if (ompt_enabled.ompt_callback_mutex_acquire) {
2564 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2565 ompt_mutex_lock, omp_lock_hint_none,
2566 __ompt_get_mutex_impl_type(user_lock),
2567 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2568 }
2569#endif
2570#if KMP_USE_INLINED_TAS
2571 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2572 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2573 } else
2574#elif KMP_USE_INLINED_FUTEX
2575 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2576 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2577 } else
2578#endif
2579 {
2580 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2581 }
2582#if USE_ITT_BUILD
2583 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2584#endif
2585#if OMPT_SUPPORT && OMPT_OPTIONAL
2586 if (ompt_enabled.ompt_callback_mutex_acquired) {
2587 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2588 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2589 }
2590#endif
2591
2592#else // KMP_USE_DYNAMIC_LOCK
2593
2594 kmp_user_lock_p lck;
2595
2596 if ((__kmp_user_lock_kind == lk_tas) &&
2597 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2598 lck = (kmp_user_lock_p)user_lock;
2599 }
2600#if KMP_USE_FUTEX
2601 else if ((__kmp_user_lock_kind == lk_futex) &&
2602 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2603 lck = (kmp_user_lock_p)user_lock;
2604 }
2605#endif
2606 else {
2607 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2608 }
2609
2610#if USE_ITT_BUILD
2611 __kmp_itt_lock_acquiring(lck);
2612#endif /* USE_ITT_BUILD */
2613#if OMPT_SUPPORT && OMPT_OPTIONAL
2614 // This is the case, if called from omp_init_lock_with_hint:
2615 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2616 if (!codeptr)
2617 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2618 if (ompt_enabled.ompt_callback_mutex_acquire) {
2619 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2620 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2621 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2622 }
2623#endif
2624
2625 ACQUIRE_LOCK(lck, gtid);
2626
2627#if USE_ITT_BUILD
2628 __kmp_itt_lock_acquired(lck);
2629#endif /* USE_ITT_BUILD */
2630
2631#if OMPT_SUPPORT && OMPT_OPTIONAL
2632 if (ompt_enabled.ompt_callback_mutex_acquired) {
2633 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2634 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2635 }
2636#endif
2637
2638#endif // KMP_USE_DYNAMIC_LOCK
2639}
2640
2641void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2642#if KMP_USE_DYNAMIC_LOCK
2643
2644#if USE_ITT_BUILD
2645 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2646#endif
2647#if OMPT_SUPPORT && OMPT_OPTIONAL
2648 // This is the case, if called from omp_init_lock_with_hint:
2649 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2650 if (!codeptr)
2651 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2652 if (ompt_enabled.enabled) {
2653 if (ompt_enabled.ompt_callback_mutex_acquire) {
2654 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2655 ompt_mutex_nest_lock, omp_lock_hint_none,
2656 __ompt_get_mutex_impl_type(user_lock),
2657 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2658 }
2659 }
2660#endif
2661 int acquire_status =
2662 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2663 (void) acquire_status;
2664#if USE_ITT_BUILD
2665 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2666#endif
2667
2668#if OMPT_SUPPORT && OMPT_OPTIONAL
2669 if (ompt_enabled.enabled) {
2670 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2671 if (ompt_enabled.ompt_callback_mutex_acquired) {
2672 // lock_first
2673 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2674 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2675 codeptr);
2676 }
2677 } else {
2678 if (ompt_enabled.ompt_callback_nest_lock) {
2679 // lock_next
2680 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2681 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2682 }
2683 }
2684 }
2685#endif
2686
2687#else // KMP_USE_DYNAMIC_LOCK
2688 int acquire_status;
2689 kmp_user_lock_p lck;
2690
2691 if ((__kmp_user_lock_kind == lk_tas) &&
2692 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2693 OMP_NEST_LOCK_T_SIZE)) {
2694 lck = (kmp_user_lock_p)user_lock;
2695 }
2696#if KMP_USE_FUTEX
2697 else if ((__kmp_user_lock_kind == lk_futex) &&
2698 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2699 OMP_NEST_LOCK_T_SIZE)) {
2700 lck = (kmp_user_lock_p)user_lock;
2701 }
2702#endif
2703 else {
2704 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2705 }
2706
2707#if USE_ITT_BUILD
2708 __kmp_itt_lock_acquiring(lck);
2709#endif /* USE_ITT_BUILD */
2710#if OMPT_SUPPORT && OMPT_OPTIONAL
2711 // This is the case, if called from omp_init_lock_with_hint:
2712 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2713 if (!codeptr)
2714 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2715 if (ompt_enabled.enabled) {
2716 if (ompt_enabled.ompt_callback_mutex_acquire) {
2717 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2718 ompt_mutex_nest_lock, omp_lock_hint_none,
2719 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2720 codeptr);
2721 }
2722 }
2723#endif
2724
2725 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2726
2727#if USE_ITT_BUILD
2728 __kmp_itt_lock_acquired(lck);
2729#endif /* USE_ITT_BUILD */
2730
2731#if OMPT_SUPPORT && OMPT_OPTIONAL
2732 if (ompt_enabled.enabled) {
2733 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2734 if (ompt_enabled.ompt_callback_mutex_acquired) {
2735 // lock_first
2736 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2737 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2738 }
2739 } else {
2740 if (ompt_enabled.ompt_callback_nest_lock) {
2741 // lock_next
2742 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2743 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2744 }
2745 }
2746 }
2747#endif
2748
2749#endif // KMP_USE_DYNAMIC_LOCK
2750}
2751
2752void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2753#if KMP_USE_DYNAMIC_LOCK
2754
2755 int tag = KMP_EXTRACT_D_TAG(user_lock);
2756#if USE_ITT_BUILD
2757 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2758#endif
2759#if KMP_USE_INLINED_TAS
2760 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2761 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2762 } else
2763#elif KMP_USE_INLINED_FUTEX
2764 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2765 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2766 } else
2767#endif
2768 {
2769 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2770 }
2771
2772#if OMPT_SUPPORT && OMPT_OPTIONAL
2773 // This is the case, if called from omp_init_lock_with_hint:
2774 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2775 if (!codeptr)
2776 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2777 if (ompt_enabled.ompt_callback_mutex_released) {
2778 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2779 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2780 }
2781#endif
2782
2783#else // KMP_USE_DYNAMIC_LOCK
2784
2785 kmp_user_lock_p lck;
2786
2787 /* Can't use serial interval since not block structured */
2788 /* release the lock */
2789
2790 if ((__kmp_user_lock_kind == lk_tas) &&
2791 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2792#if KMP_OS_LINUX && \
2793 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2794// "fast" path implemented to fix customer performance issue
2795#if USE_ITT_BUILD
2796 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2797#endif /* USE_ITT_BUILD */
2798 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2799 KMP_MB();
2800
2801#if OMPT_SUPPORT && OMPT_OPTIONAL
2802 // This is the case, if called from omp_init_lock_with_hint:
2803 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2804 if (!codeptr)
2805 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2806 if (ompt_enabled.ompt_callback_mutex_released) {
2807 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2808 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2809 }
2810#endif
2811
2812 return;
2813#else
2814 lck = (kmp_user_lock_p)user_lock;
2815#endif
2816 }
2817#if KMP_USE_FUTEX
2818 else if ((__kmp_user_lock_kind == lk_futex) &&
2819 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2820 lck = (kmp_user_lock_p)user_lock;
2821 }
2822#endif
2823 else {
2824 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2825 }
2826
2827#if USE_ITT_BUILD
2828 __kmp_itt_lock_releasing(lck);
2829#endif /* USE_ITT_BUILD */
2830
2831 RELEASE_LOCK(lck, gtid);
2832
2833#if OMPT_SUPPORT && OMPT_OPTIONAL
2834 // This is the case, if called from omp_init_lock_with_hint:
2835 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2836 if (!codeptr)
2837 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2838 if (ompt_enabled.ompt_callback_mutex_released) {
2839 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2840 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2841 }
2842#endif
2843
2844#endif // KMP_USE_DYNAMIC_LOCK
2845}
2846
2847/* release the lock */
2848void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2849#if KMP_USE_DYNAMIC_LOCK
2850
2851#if USE_ITT_BUILD
2852 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2853#endif
2854 int release_status =
2855 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2856 (void) release_status;
2857
2858#if OMPT_SUPPORT && OMPT_OPTIONAL
2859 // This is the case, if called from omp_init_lock_with_hint:
2860 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2861 if (!codeptr)
2862 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2863 if (ompt_enabled.enabled) {
2864 if (release_status == KMP_LOCK_RELEASED) {
2865 if (ompt_enabled.ompt_callback_mutex_released) {
2866 // release_lock_last
2867 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2868 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2869 codeptr);
2870 }
2871 } else if (ompt_enabled.ompt_callback_nest_lock) {
2872 // release_lock_prev
2873 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2874 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2875 }
2876 }
2877#endif
2878
2879#else // KMP_USE_DYNAMIC_LOCK
2880
2881 kmp_user_lock_p lck;
2882
2883 /* Can't use serial interval since not block structured */
2884
2885 if ((__kmp_user_lock_kind == lk_tas) &&
2886 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2887 OMP_NEST_LOCK_T_SIZE)) {
2888#if KMP_OS_LINUX && \
2889 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2890 // "fast" path implemented to fix customer performance issue
2891 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2892#if USE_ITT_BUILD
2893 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2894#endif /* USE_ITT_BUILD */
2895
2896#if OMPT_SUPPORT && OMPT_OPTIONAL
2897 int release_status = KMP_LOCK_STILL_HELD;
2898#endif
2899
2900 if (--(tl->lk.depth_locked) == 0) {
2901 TCW_4(tl->lk.poll, 0);
2902#if OMPT_SUPPORT && OMPT_OPTIONAL
2903 release_status = KMP_LOCK_RELEASED;
2904#endif
2905 }
2906 KMP_MB();
2907
2908#if OMPT_SUPPORT && OMPT_OPTIONAL
2909 // This is the case, if called from omp_init_lock_with_hint:
2910 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2911 if (!codeptr)
2912 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2913 if (ompt_enabled.enabled) {
2914 if (release_status == KMP_LOCK_RELEASED) {
2915 if (ompt_enabled.ompt_callback_mutex_released) {
2916 // release_lock_last
2917 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2918 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2919 }
2920 } else if (ompt_enabled.ompt_callback_nest_lock) {
2921 // release_lock_previous
2922 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2923 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2924 }
2925 }
2926#endif
2927
2928 return;
2929#else
2930 lck = (kmp_user_lock_p)user_lock;
2931#endif
2932 }
2933#if KMP_USE_FUTEX
2934 else if ((__kmp_user_lock_kind == lk_futex) &&
2935 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2936 OMP_NEST_LOCK_T_SIZE)) {
2937 lck = (kmp_user_lock_p)user_lock;
2938 }
2939#endif
2940 else {
2941 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2942 }
2943
2944#if USE_ITT_BUILD
2945 __kmp_itt_lock_releasing(lck);
2946#endif /* USE_ITT_BUILD */
2947
2948 int release_status;
2949 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2950#if OMPT_SUPPORT && OMPT_OPTIONAL
2951 // This is the case, if called from omp_init_lock_with_hint:
2952 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2953 if (!codeptr)
2954 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2955 if (ompt_enabled.enabled) {
2956 if (release_status == KMP_LOCK_RELEASED) {
2957 if (ompt_enabled.ompt_callback_mutex_released) {
2958 // release_lock_last
2959 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2960 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2961 }
2962 } else if (ompt_enabled.ompt_callback_nest_lock) {
2963 // release_lock_previous
2964 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2965 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2966 }
2967 }
2968#endif
2969
2970#endif // KMP_USE_DYNAMIC_LOCK
2971}
2972
2973/* try to acquire the lock */
2974int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2975 KMP_COUNT_BLOCK(OMP_test_lock);
2976
2977#if KMP_USE_DYNAMIC_LOCK
2978 int rc;
2979 int tag = KMP_EXTRACT_D_TAG(user_lock);
2980#if USE_ITT_BUILD
2981 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2982#endif
2983#if OMPT_SUPPORT && OMPT_OPTIONAL
2984 // This is the case, if called from omp_init_lock_with_hint:
2985 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2986 if (!codeptr)
2987 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2988 if (ompt_enabled.ompt_callback_mutex_acquire) {
2989 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2990 ompt_mutex_lock, omp_lock_hint_none,
2991 __ompt_get_mutex_impl_type(user_lock),
2992 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2993 }
2994#endif
2995#if KMP_USE_INLINED_TAS
2996 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2997 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2998 } else
2999#elif KMP_USE_INLINED_FUTEX
3000 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3001 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3002 } else
3003#endif
3004 {
3005 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3006 }
3007 if (rc) {
3008#if USE_ITT_BUILD
3009 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3010#endif
3011#if OMPT_SUPPORT && OMPT_OPTIONAL
3012 if (ompt_enabled.ompt_callback_mutex_acquired) {
3013 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3014 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3015 }
3016#endif
3017 return FTN_TRUE;
3018 } else {
3019#if USE_ITT_BUILD
3020 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3021#endif
3022 return FTN_FALSE;
3023 }
3024
3025#else // KMP_USE_DYNAMIC_LOCK
3026
3027 kmp_user_lock_p lck;
3028 int rc;
3029
3030 if ((__kmp_user_lock_kind == lk_tas) &&
3031 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3032 lck = (kmp_user_lock_p)user_lock;
3033 }
3034#if KMP_USE_FUTEX
3035 else if ((__kmp_user_lock_kind == lk_futex) &&
3036 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3037 lck = (kmp_user_lock_p)user_lock;
3038 }
3039#endif
3040 else {
3041 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3042 }
3043
3044#if USE_ITT_BUILD
3045 __kmp_itt_lock_acquiring(lck);
3046#endif /* USE_ITT_BUILD */
3047#if OMPT_SUPPORT && OMPT_OPTIONAL
3048 // This is the case, if called from omp_init_lock_with_hint:
3049 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3050 if (!codeptr)
3051 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3052 if (ompt_enabled.ompt_callback_mutex_acquire) {
3053 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3054 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3055 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3056 }
3057#endif
3058
3059 rc = TEST_LOCK(lck, gtid);
3060#if USE_ITT_BUILD
3061 if (rc) {
3062 __kmp_itt_lock_acquired(lck);
3063 } else {
3064 __kmp_itt_lock_cancelled(lck);
3065 }
3066#endif /* USE_ITT_BUILD */
3067#if OMPT_SUPPORT && OMPT_OPTIONAL
3068 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3069 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3070 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3071 }
3072#endif
3073
3074 return (rc ? FTN_TRUE : FTN_FALSE);
3075
3076/* Can't use serial interval since not block structured */
3077
3078#endif // KMP_USE_DYNAMIC_LOCK
3079}
3080
3081/* try to acquire the lock */
3082int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3083#if KMP_USE_DYNAMIC_LOCK
3084 int rc;
3085#if USE_ITT_BUILD
3086 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3087#endif
3088#if OMPT_SUPPORT && OMPT_OPTIONAL
3089 // This is the case, if called from omp_init_lock_with_hint:
3090 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3091 if (!codeptr)
3092 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3093 if (ompt_enabled.ompt_callback_mutex_acquire) {
3094 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3095 ompt_mutex_nest_lock, omp_lock_hint_none,
3096 __ompt_get_mutex_impl_type(user_lock),
3097 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3098 }
3099#endif
3100 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3101#if USE_ITT_BUILD
3102 if (rc) {
3103 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3104 } else {
3105 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3106 }
3107#endif
3108#if OMPT_SUPPORT && OMPT_OPTIONAL
3109 if (ompt_enabled.enabled && rc) {
3110 if (rc == 1) {
3111 if (ompt_enabled.ompt_callback_mutex_acquired) {
3112 // lock_first
3113 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3114 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3115 codeptr);
3116 }
3117 } else {
3118 if (ompt_enabled.ompt_callback_nest_lock) {
3119 // lock_next
3120 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3121 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3122 }
3123 }
3124 }
3125#endif
3126 return rc;
3127
3128#else // KMP_USE_DYNAMIC_LOCK
3129
3130 kmp_user_lock_p lck;
3131 int rc;
3132
3133 if ((__kmp_user_lock_kind == lk_tas) &&
3134 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3135 OMP_NEST_LOCK_T_SIZE)) {
3136 lck = (kmp_user_lock_p)user_lock;
3137 }
3138#if KMP_USE_FUTEX
3139 else if ((__kmp_user_lock_kind == lk_futex) &&
3140 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3141 OMP_NEST_LOCK_T_SIZE)) {
3142 lck = (kmp_user_lock_p)user_lock;
3143 }
3144#endif
3145 else {
3146 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3147 }
3148
3149#if USE_ITT_BUILD
3150 __kmp_itt_lock_acquiring(lck);
3151#endif /* USE_ITT_BUILD */
3152
3153#if OMPT_SUPPORT && OMPT_OPTIONAL
3154 // This is the case, if called from omp_init_lock_with_hint:
3155 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3156 if (!codeptr)
3157 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3158 if (ompt_enabled.enabled) &&
3159 ompt_enabled.ompt_callback_mutex_acquire) {
3160 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3161 ompt_mutex_nest_lock, omp_lock_hint_none,
3162 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3163 codeptr);
3164 }
3165#endif
3166
3167 rc = TEST_NESTED_LOCK(lck, gtid);
3168#if USE_ITT_BUILD
3169 if (rc) {
3170 __kmp_itt_lock_acquired(lck);
3171 } else {
3172 __kmp_itt_lock_cancelled(lck);
3173 }
3174#endif /* USE_ITT_BUILD */
3175#if OMPT_SUPPORT && OMPT_OPTIONAL
3176 if (ompt_enabled.enabled && rc) {
3177 if (rc == 1) {
3178 if (ompt_enabled.ompt_callback_mutex_acquired) {
3179 // lock_first
3180 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3181 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3182 }
3183 } else {
3184 if (ompt_enabled.ompt_callback_nest_lock) {
3185 // lock_next
3186 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3187 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3188 }
3189 }
3190 }
3191#endif
3192 return rc;
3193
3194/* Can't use serial interval since not block structured */
3195
3196#endif // KMP_USE_DYNAMIC_LOCK
3197}
3198
3199// Interface to fast scalable reduce methods routines
3200
3201// keep the selected method in a thread local structure for cross-function
3202// usage: will be used in __kmpc_end_reduce* functions;
3203// another solution: to re-determine the method one more time in
3204// __kmpc_end_reduce* functions (new prototype required then)
3205// AT: which solution is better?
3206#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3207 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3208
3209#define __KMP_GET_REDUCTION_METHOD(gtid) \
3210 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3211
3212// description of the packed_reduction_method variable: look at the macros in
3213// kmp.h
3214
3215// used in a critical section reduce block
3216static __forceinline void
3217__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3218 kmp_critical_name *crit) {
3219
3220 // this lock was visible to a customer and to the threading profile tool as a
3221 // serial overhead span (although it's used for an internal purpose only)
3222 // why was it visible in previous implementation?
3223 // should we keep it visible in new reduce block?
3224 kmp_user_lock_p lck;
3225
3226#if KMP_USE_DYNAMIC_LOCK
3227
3228 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3229 // Check if it is initialized.
3230 if (*lk == 0) {
3231 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3232 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3233 KMP_GET_D_TAG(__kmp_user_lock_seq));
3234 } else {
3235 __kmp_init_indirect_csptr(crit, loc, global_tid,
3236 KMP_GET_I_TAG(__kmp_user_lock_seq));
3237 }
3238 }
3239 // Branch for accessing the actual lock object and set operation. This
3240 // branching is inevitable since this lock initialization does not follow the
3241 // normal dispatch path (lock table is not used).
3242 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3243 lck = (kmp_user_lock_p)lk;
3244 KMP_DEBUG_ASSERT(lck != NULL);
3245 if (__kmp_env_consistency_check) {
3246 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3247 }
3248 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3249 } else {
3250 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3251 lck = ilk->lock;
3252 KMP_DEBUG_ASSERT(lck != NULL);
3253 if (__kmp_env_consistency_check) {
3254 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3255 }
3256 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3257 }
3258
3259#else // KMP_USE_DYNAMIC_LOCK
3260
3261 // We know that the fast reduction code is only emitted by Intel compilers
3262 // with 32 byte critical sections. If there isn't enough space, then we
3263 // have to use a pointer.
3264 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3265 lck = (kmp_user_lock_p)crit;
3266 } else {
3267 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3268 }
3269 KMP_DEBUG_ASSERT(lck != NULL);
3270
3271 if (__kmp_env_consistency_check)
3272 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3273
3274 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3275
3276#endif // KMP_USE_DYNAMIC_LOCK
3277}
3278
3279// used in a critical section reduce block
3280static __forceinline void
3281__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3282 kmp_critical_name *crit) {
3283
3284 kmp_user_lock_p lck;
3285
3286#if KMP_USE_DYNAMIC_LOCK
3287
3288 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3289 lck = (kmp_user_lock_p)crit;
3290 if (__kmp_env_consistency_check)
3291 __kmp_pop_sync(global_tid, ct_critical, loc);
3292 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3293 } else {
3294 kmp_indirect_lock_t *ilk =
3295 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3296 if (__kmp_env_consistency_check)
3297 __kmp_pop_sync(global_tid, ct_critical, loc);
3298 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3299 }
3300
3301#else // KMP_USE_DYNAMIC_LOCK
3302
3303 // We know that the fast reduction code is only emitted by Intel compilers
3304 // with 32 byte critical sections. If there isn't enough space, then we have
3305 // to use a pointer.
3306 if (__kmp_base_user_lock_size > 32) {
3307 lck = *((kmp_user_lock_p *)crit);
3308 KMP_ASSERT(lck != NULL);
3309 } else {
3310 lck = (kmp_user_lock_p)crit;
3311 }
3312
3313 if (__kmp_env_consistency_check)
3314 __kmp_pop_sync(global_tid, ct_critical, loc);
3315
3316 __kmp_release_user_lock_with_checks(lck, global_tid);
3317
3318#endif // KMP_USE_DYNAMIC_LOCK
3319} // __kmp_end_critical_section_reduce_block
3320
3321static __forceinline int
3322__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3323 int *task_state) {
3324 kmp_team_t *team;
3325
3326 // Check if we are inside the teams construct?
3327 if (th->th.th_teams_microtask) {
3328 *team_p = team = th->th.th_team;
3329 if (team->t.t_level == th->th.th_teams_level) {
3330 // This is reduction at teams construct.
3331 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3332 // Let's swap teams temporarily for the reduction.
3333 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3334 th->th.th_team = team->t.t_parent;
3335 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3336 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3337 *task_state = th->th.th_task_state;
3338 th->th.th_task_state = 0;
3339
3340 return 1;
3341 }
3342 }
3343 return 0;
3344}
3345
3346static __forceinline void
3347__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3348 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3349 th->th.th_info.ds.ds_tid = 0;
3350 th->th.th_team = team;
3351 th->th.th_team_nproc = team->t.t_nproc;
3352 th->th.th_task_team = team->t.t_task_team[task_state];
3353 th->th.th_task_state = task_state;
3354}
3355
3356/* 2.a.i. Reduce Block without a terminating barrier */
3372kmp_int32
3373__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3374 size_t reduce_size, void *reduce_data,
3375 void (*reduce_func)(void *lhs_data, void *rhs_data),
3376 kmp_critical_name *lck) {
3377
3378 KMP_COUNT_BLOCK(REDUCE_nowait);
3379 int retval = 0;
3380 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3381 kmp_info_t *th;
3382 kmp_team_t *team;
3383 int teams_swapped = 0, task_state;
3384 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3385
3386 // why do we need this initialization here at all?
3387 // Reduction clause can not be used as a stand-alone directive.
3388
3389 // do not call __kmp_serial_initialize(), it will be called by
3390 // __kmp_parallel_initialize() if needed
3391 // possible detection of false-positive race by the threadchecker ???
3392 if (!TCR_4(__kmp_init_parallel))
3393 __kmp_parallel_initialize();
3394
3395 __kmp_resume_if_soft_paused();
3396
3397// check correctness of reduce block nesting
3398#if KMP_USE_DYNAMIC_LOCK
3399 if (__kmp_env_consistency_check)
3400 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3401#else
3402 if (__kmp_env_consistency_check)
3403 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3404#endif
3405
3406 th = __kmp_thread_from_gtid(global_tid);
3407 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3408
3409 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3410 // the value should be kept in a variable
3411 // the variable should be either a construct-specific or thread-specific
3412 // property, not a team specific property
3413 // (a thread can reach the next reduce block on the next construct, reduce
3414 // method may differ on the next construct)
3415 // an ident_t "loc" parameter could be used as a construct-specific property
3416 // (what if loc == 0?)
3417 // (if both construct-specific and team-specific variables were shared,
3418 // then unness extra syncs should be needed)
3419 // a thread-specific variable is better regarding two issues above (next
3420 // construct and extra syncs)
3421 // a thread-specific "th_local.reduction_method" variable is used currently
3422 // each thread executes 'determine' and 'set' lines (no need to execute by one
3423 // thread, to avoid unness extra syncs)
3424
3425 packed_reduction_method = __kmp_determine_reduction_method(
3426 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3427 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3428
3429 OMPT_REDUCTION_DECL(th, global_tid);
3430 if (packed_reduction_method == critical_reduce_block) {
3431
3432 OMPT_REDUCTION_BEGIN;
3433
3434 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3435 retval = 1;
3436
3437 } else if (packed_reduction_method == empty_reduce_block) {
3438
3439 OMPT_REDUCTION_BEGIN;
3440
3441 // usage: if team size == 1, no synchronization is required ( Intel
3442 // platforms only )
3443 retval = 1;
3444
3445 } else if (packed_reduction_method == atomic_reduce_block) {
3446
3447 retval = 2;
3448
3449 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3450 // won't be called by the code gen)
3451 // (it's not quite good, because the checking block has been closed by
3452 // this 'pop',
3453 // but atomic operation has not been executed yet, will be executed
3454 // slightly later, literally on next instruction)
3455 if (__kmp_env_consistency_check)
3456 __kmp_pop_sync(global_tid, ct_reduce, loc);
3457
3458 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3459 tree_reduce_block)) {
3460
3461// AT: performance issue: a real barrier here
3462// AT: (if master goes slow, other threads are blocked here waiting for the
3463// master to come and release them)
3464// AT: (it's not what a customer might expect specifying NOWAIT clause)
3465// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3466// be confusing to a customer)
3467// AT: another implementation of *barrier_gather*nowait() (or some other design)
3468// might go faster and be more in line with sense of NOWAIT
3469// AT: TO DO: do epcc test and compare times
3470
3471// this barrier should be invisible to a customer and to the threading profile
3472// tool (it's neither a terminating barrier nor customer's code, it's
3473// used for an internal purpose)
3474#if OMPT_SUPPORT
3475 // JP: can this barrier potentially leed to task scheduling?
3476 // JP: as long as there is a barrier in the implementation, OMPT should and
3477 // will provide the barrier events
3478 // so we set-up the necessary frame/return addresses.
3479 ompt_frame_t *ompt_frame;
3480 if (ompt_enabled.enabled) {
3481 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3482 if (ompt_frame->enter_frame.ptr == NULL)
3483 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3484 OMPT_STORE_RETURN_ADDRESS(global_tid);
3485 }
3486#endif
3487#if USE_ITT_NOTIFY
3488 __kmp_threads[global_tid]->th.th_ident = loc;
3489#endif
3490 retval =
3491 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3492 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3493 retval = (retval != 0) ? (0) : (1);
3494#if OMPT_SUPPORT && OMPT_OPTIONAL
3495 if (ompt_enabled.enabled) {
3496 ompt_frame->enter_frame = ompt_data_none;
3497 }
3498#endif
3499
3500 // all other workers except master should do this pop here
3501 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3502 if (__kmp_env_consistency_check) {
3503 if (retval == 0) {
3504 __kmp_pop_sync(global_tid, ct_reduce, loc);
3505 }
3506 }
3507
3508 } else {
3509
3510 // should never reach this block
3511 KMP_ASSERT(0); // "unexpected method"
3512 }
3513 if (teams_swapped) {
3514 __kmp_restore_swapped_teams(th, team, task_state);
3515 }
3516 KA_TRACE(
3517 10,
3518 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3519 global_tid, packed_reduction_method, retval));
3520
3521 return retval;
3522}
3523
3532void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3533 kmp_critical_name *lck) {
3534
3535 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3536
3537 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3538
3539 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3540
3541 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3542
3543 if (packed_reduction_method == critical_reduce_block) {
3544
3545 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3546 OMPT_REDUCTION_END;
3547
3548 } else if (packed_reduction_method == empty_reduce_block) {
3549
3550 // usage: if team size == 1, no synchronization is required ( on Intel
3551 // platforms only )
3552
3553 OMPT_REDUCTION_END;
3554
3555 } else if (packed_reduction_method == atomic_reduce_block) {
3556
3557 // neither master nor other workers should get here
3558 // (code gen does not generate this call in case 2: atomic reduce block)
3559 // actually it's better to remove this elseif at all;
3560 // after removal this value will checked by the 'else' and will assert
3561
3562 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3563 tree_reduce_block)) {
3564
3565 // only master gets here
3566 // OMPT: tree reduction is annotated in the barrier code
3567
3568 } else {
3569
3570 // should never reach this block
3571 KMP_ASSERT(0); // "unexpected method"
3572 }
3573
3574 if (__kmp_env_consistency_check)
3575 __kmp_pop_sync(global_tid, ct_reduce, loc);
3576
3577 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3578 global_tid, packed_reduction_method));
3579
3580 return;
3581}
3582
3583/* 2.a.ii. Reduce Block with a terminating barrier */
3584
3600kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3601 size_t reduce_size, void *reduce_data,
3602 void (*reduce_func)(void *lhs_data, void *rhs_data),
3603 kmp_critical_name *lck) {
3604 KMP_COUNT_BLOCK(REDUCE_wait);
3605 int retval = 0;
3606 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3607 kmp_info_t *th;
3608 kmp_team_t *team;
3609 int teams_swapped = 0, task_state;
3610
3611 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3612
3613 // why do we need this initialization here at all?
3614 // Reduction clause can not be a stand-alone directive.
3615
3616 // do not call __kmp_serial_initialize(), it will be called by
3617 // __kmp_parallel_initialize() if needed
3618 // possible detection of false-positive race by the threadchecker ???
3619 if (!TCR_4(__kmp_init_parallel))
3620 __kmp_parallel_initialize();
3621
3622 __kmp_resume_if_soft_paused();
3623
3624// check correctness of reduce block nesting
3625#if KMP_USE_DYNAMIC_LOCK
3626 if (__kmp_env_consistency_check)
3627 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3628#else
3629 if (__kmp_env_consistency_check)
3630 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3631#endif
3632
3633 th = __kmp_thread_from_gtid(global_tid);
3634 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3635
3636 packed_reduction_method = __kmp_determine_reduction_method(
3637 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3638 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3639
3640 OMPT_REDUCTION_DECL(th, global_tid);
3641
3642 if (packed_reduction_method == critical_reduce_block) {
3643
3644 OMPT_REDUCTION_BEGIN;
3645 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3646 retval = 1;
3647
3648 } else if (packed_reduction_method == empty_reduce_block) {
3649
3650 OMPT_REDUCTION_BEGIN;
3651 // usage: if team size == 1, no synchronization is required ( Intel
3652 // platforms only )
3653 retval = 1;
3654
3655 } else if (packed_reduction_method == atomic_reduce_block) {
3656
3657 retval = 2;
3658
3659 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3660 tree_reduce_block)) {
3661
3662// case tree_reduce_block:
3663// this barrier should be visible to a customer and to the threading profile
3664// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3665#if OMPT_SUPPORT
3666 ompt_frame_t *ompt_frame;
3667 if (ompt_enabled.enabled) {
3668 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3669 if (ompt_frame->enter_frame.ptr == NULL)
3670 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3671 OMPT_STORE_RETURN_ADDRESS(global_tid);
3672 }
3673#endif
3674#if USE_ITT_NOTIFY
3675 __kmp_threads[global_tid]->th.th_ident =
3676 loc; // needed for correct notification of frames
3677#endif
3678 retval =
3679 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3680 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3681 retval = (retval != 0) ? (0) : (1);
3682#if OMPT_SUPPORT && OMPT_OPTIONAL
3683 if (ompt_enabled.enabled) {
3684 ompt_frame->enter_frame = ompt_data_none;
3685 }
3686#endif
3687
3688 // all other workers except master should do this pop here
3689 // ( none of other workers except master will enter __kmpc_end_reduce() )
3690 if (__kmp_env_consistency_check) {
3691 if (retval == 0) { // 0: all other workers; 1: master
3692 __kmp_pop_sync(global_tid, ct_reduce, loc);
3693 }
3694 }
3695
3696 } else {
3697
3698 // should never reach this block
3699 KMP_ASSERT(0); // "unexpected method"
3700 }
3701 if (teams_swapped) {
3702 __kmp_restore_swapped_teams(th, team, task_state);
3703 }
3704
3705 KA_TRACE(10,
3706 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3707 global_tid, packed_reduction_method, retval));
3708 return retval;
3709}
3710
3721void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3722 kmp_critical_name *lck) {
3723
3724 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3725 kmp_info_t *th;
3726 kmp_team_t *team;
3727 int teams_swapped = 0, task_state;
3728
3729 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3730
3731 th = __kmp_thread_from_gtid(global_tid);
3732 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3733
3734 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3735
3736 // this barrier should be visible to a customer and to the threading profile
3737 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3738 OMPT_REDUCTION_DECL(th, global_tid);
3739
3740 if (packed_reduction_method == critical_reduce_block) {
3741 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3742
3743 OMPT_REDUCTION_END;
3744
3745// TODO: implicit barrier: should be exposed
3746#if OMPT_SUPPORT
3747 ompt_frame_t *ompt_frame;
3748 if (ompt_enabled.enabled) {
3749 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3750 if (ompt_frame->enter_frame.ptr == NULL)
3751 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3752 OMPT_STORE_RETURN_ADDRESS(global_tid);
3753 }
3754#endif
3755#if USE_ITT_NOTIFY
3756 __kmp_threads[global_tid]->th.th_ident = loc;
3757#endif
3758 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3759#if OMPT_SUPPORT && OMPT_OPTIONAL
3760 if (ompt_enabled.enabled) {
3761 ompt_frame->enter_frame = ompt_data_none;
3762 }
3763#endif
3764
3765 } else if (packed_reduction_method == empty_reduce_block) {
3766
3767 OMPT_REDUCTION_END;
3768
3769// usage: if team size==1, no synchronization is required (Intel platforms only)
3770
3771// TODO: implicit barrier: should be exposed
3772#if OMPT_SUPPORT
3773 ompt_frame_t *ompt_frame;
3774 if (ompt_enabled.enabled) {
3775 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3776 if (ompt_frame->enter_frame.ptr == NULL)
3777 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3778 OMPT_STORE_RETURN_ADDRESS(global_tid);
3779 }
3780#endif
3781#if USE_ITT_NOTIFY
3782 __kmp_threads[global_tid]->th.th_ident = loc;
3783#endif
3784 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3785#if OMPT_SUPPORT && OMPT_OPTIONAL
3786 if (ompt_enabled.enabled) {
3787 ompt_frame->enter_frame = ompt_data_none;
3788 }
3789#endif
3790
3791 } else if (packed_reduction_method == atomic_reduce_block) {
3792
3793#if OMPT_SUPPORT
3794 ompt_frame_t *ompt_frame;
3795 if (ompt_enabled.enabled) {
3796 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3797 if (ompt_frame->enter_frame.ptr == NULL)
3798 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3799 OMPT_STORE_RETURN_ADDRESS(global_tid);
3800 }
3801#endif
3802// TODO: implicit barrier: should be exposed
3803#if USE_ITT_NOTIFY
3804 __kmp_threads[global_tid]->th.th_ident = loc;
3805#endif
3806 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3807#if OMPT_SUPPORT && OMPT_OPTIONAL
3808 if (ompt_enabled.enabled) {
3809 ompt_frame->enter_frame = ompt_data_none;
3810 }
3811#endif
3812
3813 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3814 tree_reduce_block)) {
3815
3816 // only master executes here (master releases all other workers)
3817 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3818 global_tid);
3819
3820 } else {
3821
3822 // should never reach this block
3823 KMP_ASSERT(0); // "unexpected method"
3824 }
3825 if (teams_swapped) {
3826 __kmp_restore_swapped_teams(th, team, task_state);
3827 }
3828
3829 if (__kmp_env_consistency_check)
3830 __kmp_pop_sync(global_tid, ct_reduce, loc);
3831
3832 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3833 global_tid, packed_reduction_method));
3834
3835 return;
3836}
3837
3838#undef __KMP_GET_REDUCTION_METHOD
3839#undef __KMP_SET_REDUCTION_METHOD
3840
3841/* end of interface to fast scalable reduce routines */
3842
3843kmp_uint64 __kmpc_get_taskid() {
3844
3845 kmp_int32 gtid;
3846 kmp_info_t *thread;
3847
3848 gtid = __kmp_get_gtid();
3849 if (gtid < 0) {
3850 return 0;
3851 }
3852 thread = __kmp_thread_from_gtid(gtid);
3853 return thread->th.th_current_task->td_task_id;
3854
3855} // __kmpc_get_taskid
3856
3857kmp_uint64 __kmpc_get_parent_taskid() {
3858
3859 kmp_int32 gtid;
3860 kmp_info_t *thread;
3861 kmp_taskdata_t *parent_task;
3862
3863 gtid = __kmp_get_gtid();
3864 if (gtid < 0) {
3865 return 0;
3866 }
3867 thread = __kmp_thread_from_gtid(gtid);
3868 parent_task = thread->th.th_current_task->td_parent;
3869 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3870
3871} // __kmpc_get_parent_taskid
3872
3884void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3885 const struct kmp_dim *dims) {
3886 int j, idx;
3887 kmp_int64 last, trace_count;
3888 kmp_info_t *th = __kmp_threads[gtid];
3889 kmp_team_t *team = th->th.th_team;
3890 kmp_uint32 *flags;
3891 kmp_disp_t *pr_buf = th->th.th_dispatch;
3892 dispatch_shared_info_t *sh_buf;
3893
3894 KA_TRACE(
3895 20,
3896 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3897 gtid, num_dims, !team->t.t_serialized));
3898 KMP_DEBUG_ASSERT(dims != NULL);
3899 KMP_DEBUG_ASSERT(num_dims > 0);
3900
3901 if (team->t.t_serialized) {
3902 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3903 return; // no dependencies if team is serialized
3904 }
3905 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3906 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3907 // the next loop
3908 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3909
3910 // Save bounds info into allocated private buffer
3911 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3912 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3913 th, sizeof(kmp_int64) * (4 * num_dims + 1));
3914 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3915 pr_buf->th_doacross_info[0] =
3916 (kmp_int64)num_dims; // first element is number of dimensions
3917 // Save also address of num_done in order to access it later without knowing
3918 // the buffer index
3919 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3920 pr_buf->th_doacross_info[2] = dims[0].lo;
3921 pr_buf->th_doacross_info[3] = dims[0].up;
3922 pr_buf->th_doacross_info[4] = dims[0].st;
3923 last = 5;
3924 for (j = 1; j < num_dims; ++j) {
3925 kmp_int64
3926 range_length; // To keep ranges of all dimensions but the first dims[0]
3927 if (dims[j].st == 1) { // most common case
3928 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3929 range_length = dims[j].up - dims[j].lo + 1;
3930 } else {
3931 if (dims[j].st > 0) {
3932 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3933 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3934 } else { // negative increment
3935 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3936 range_length =
3937 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3938 }
3939 }
3940 pr_buf->th_doacross_info[last++] = range_length;
3941 pr_buf->th_doacross_info[last++] = dims[j].lo;
3942 pr_buf->th_doacross_info[last++] = dims[j].up;
3943 pr_buf->th_doacross_info[last++] = dims[j].st;
3944 }
3945
3946 // Compute total trip count.
3947 // Start with range of dims[0] which we don't need to keep in the buffer.
3948 if (dims[0].st == 1) { // most common case
3949 trace_count = dims[0].up - dims[0].lo + 1;
3950 } else if (dims[0].st > 0) {
3951 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3952 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3953 } else { // negative increment
3954 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3955 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3956 }
3957 for (j = 1; j < num_dims; ++j) {
3958 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3959 }
3960 KMP_DEBUG_ASSERT(trace_count > 0);
3961
3962 // Check if shared buffer is not occupied by other loop (idx -
3963 // __kmp_dispatch_num_buffers)
3964 if (idx != sh_buf->doacross_buf_idx) {
3965 // Shared buffer is occupied, wait for it to be free
3966 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3967 __kmp_eq_4, NULL);
3968 }
3969#if KMP_32_BIT_ARCH
3970 // Check if we are the first thread. After the CAS the first thread gets 0,
3971 // others get 1 if initialization is in progress, allocated pointer otherwise.
3972 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3973 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3974 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3975#else
3976 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3977 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3978#endif
3979 if (flags == NULL) {
3980 // we are the first thread, allocate the array of flags
3981 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3982 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3983 KMP_MB();
3984 sh_buf->doacross_flags = flags;
3985 } else if (flags == (kmp_uint32 *)1) {
3986#if KMP_32_BIT_ARCH
3987 // initialization is still in progress, need to wait
3988 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3989#else
3990 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3991#endif
3992 KMP_YIELD(TRUE);
3993 KMP_MB();
3994 } else {
3995 KMP_MB();
3996 }
3997 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3998 pr_buf->th_doacross_flags =
3999 sh_buf->doacross_flags; // save private copy in order to not
4000 // touch shared buffer on each iteration
4001 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4002}
4003
4004void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4005 kmp_int32 shft, num_dims, i;
4006 kmp_uint32 flag;
4007 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4008 kmp_info_t *th = __kmp_threads[gtid];
4009 kmp_team_t *team = th->th.th_team;
4010 kmp_disp_t *pr_buf;
4011 kmp_int64 lo, up, st;
4012
4013 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4014 if (team->t.t_serialized) {
4015 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4016 return; // no dependencies if team is serialized
4017 }
4018
4019 // calculate sequential iteration number and check out-of-bounds condition
4020 pr_buf = th->th.th_dispatch;
4021 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4022 num_dims = pr_buf->th_doacross_info[0];
4023 lo = pr_buf->th_doacross_info[2];
4024 up = pr_buf->th_doacross_info[3];
4025 st = pr_buf->th_doacross_info[4];
4026#if OMPT_SUPPORT && OMPT_OPTIONAL
4027 ompt_dependence_t deps[num_dims];
4028#endif
4029 if (st == 1) { // most common case
4030 if (vec[0] < lo || vec[0] > up) {
4031 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4032 "bounds [%lld,%lld]\n",
4033 gtid, vec[0], lo, up));
4034 return;
4035 }
4036 iter_number = vec[0] - lo;
4037 } else if (st > 0) {
4038 if (vec[0] < lo || vec[0] > up) {
4039 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4040 "bounds [%lld,%lld]\n",
4041 gtid, vec[0], lo, up));
4042 return;
4043 }
4044 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4045 } else { // negative increment
4046 if (vec[0] > lo || vec[0] < up) {
4047 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4048 "bounds [%lld,%lld]\n",
4049 gtid, vec[0], lo, up));
4050 return;
4051 }
4052 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4053 }
4054#if OMPT_SUPPORT && OMPT_OPTIONAL
4055 deps[0].variable.value = iter_number;
4056 deps[0].dependence_type = ompt_dependence_type_sink;
4057#endif
4058 for (i = 1; i < num_dims; ++i) {
4059 kmp_int64 iter, ln;
4060 kmp_int32 j = i * 4;
4061 ln = pr_buf->th_doacross_info[j + 1];
4062 lo = pr_buf->th_doacross_info[j + 2];
4063 up = pr_buf->th_doacross_info[j + 3];
4064 st = pr_buf->th_doacross_info[j + 4];
4065 if (st == 1) {
4066 if (vec[i] < lo || vec[i] > up) {
4067 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4068 "bounds [%lld,%lld]\n",
4069 gtid, vec[i], lo, up));
4070 return;
4071 }
4072 iter = vec[i] - lo;
4073 } else if (st > 0) {
4074 if (vec[i] < lo || vec[i] > up) {
4075 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4076 "bounds [%lld,%lld]\n",
4077 gtid, vec[i], lo, up));
4078 return;
4079 }
4080 iter = (kmp_uint64)(vec[i] - lo) / st;
4081 } else { // st < 0
4082 if (vec[i] > lo || vec[i] < up) {
4083 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4084 "bounds [%lld,%lld]\n",
4085 gtid, vec[i], lo, up));
4086 return;
4087 }
4088 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4089 }
4090 iter_number = iter + ln * iter_number;
4091#if OMPT_SUPPORT && OMPT_OPTIONAL
4092 deps[i].variable.value = iter;
4093 deps[i].dependence_type = ompt_dependence_type_sink;
4094#endif
4095 }
4096 shft = iter_number % 32; // use 32-bit granularity
4097 iter_number >>= 5; // divided by 32
4098 flag = 1 << shft;
4099 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4100 KMP_YIELD(TRUE);
4101 }
4102 KMP_MB();
4103#if OMPT_SUPPORT && OMPT_OPTIONAL
4104 if (ompt_enabled.ompt_callback_dependences) {
4105 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4106 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
4107 }
4108#endif
4109 KA_TRACE(20,
4110 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4111 gtid, (iter_number << 5) + shft));
4112}
4113
4114void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4115 kmp_int32 shft, num_dims, i;
4116 kmp_uint32 flag;
4117 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4118 kmp_info_t *th = __kmp_threads[gtid];
4119 kmp_team_t *team = th->th.th_team;
4120 kmp_disp_t *pr_buf;
4121 kmp_int64 lo, st;
4122
4123 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4124 if (team->t.t_serialized) {
4125 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4126 return; // no dependencies if team is serialized
4127 }
4128
4129 // calculate sequential iteration number (same as in "wait" but no
4130 // out-of-bounds checks)
4131 pr_buf = th->th.th_dispatch;
4132 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4133 num_dims = pr_buf->th_doacross_info[0];
4134 lo = pr_buf->th_doacross_info[2];
4135 st = pr_buf->th_doacross_info[4];
4136#if OMPT_SUPPORT && OMPT_OPTIONAL
4137 ompt_dependence_t deps[num_dims];
4138#endif
4139 if (st == 1) { // most common case
4140 iter_number = vec[0] - lo;
4141 } else if (st > 0) {
4142 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4143 } else { // negative increment
4144 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4145 }
4146#if OMPT_SUPPORT && OMPT_OPTIONAL
4147 deps[0].variable.value = iter_number;
4148 deps[0].dependence_type = ompt_dependence_type_source;
4149#endif
4150 for (i = 1; i < num_dims; ++i) {
4151 kmp_int64 iter, ln;
4152 kmp_int32 j = i * 4;
4153 ln = pr_buf->th_doacross_info[j + 1];
4154 lo = pr_buf->th_doacross_info[j + 2];
4155 st = pr_buf->th_doacross_info[j + 4];
4156 if (st == 1) {
4157 iter = vec[i] - lo;
4158 } else if (st > 0) {
4159 iter = (kmp_uint64)(vec[i] - lo) / st;
4160 } else { // st < 0
4161 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4162 }
4163 iter_number = iter + ln * iter_number;
4164#if OMPT_SUPPORT && OMPT_OPTIONAL
4165 deps[i].variable.value = iter;
4166 deps[i].dependence_type = ompt_dependence_type_source;
4167#endif
4168 }
4169#if OMPT_SUPPORT && OMPT_OPTIONAL
4170 if (ompt_enabled.ompt_callback_dependences) {
4171 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4172 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
4173 }
4174#endif
4175 shft = iter_number % 32; // use 32-bit granularity
4176 iter_number >>= 5; // divided by 32
4177 flag = 1 << shft;
4178 KMP_MB();
4179 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4180 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4181 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4182 (iter_number << 5) + shft));
4183}
4184
4185void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4186 kmp_int32 num_done;
4187 kmp_info_t *th = __kmp_threads[gtid];
4188 kmp_team_t *team = th->th.th_team;
4189 kmp_disp_t *pr_buf = th->th.th_dispatch;
4190
4191 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4192 if (team->t.t_serialized) {
4193 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4194 return; // nothing to do
4195 }
4196 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4197 if (num_done == th->th.th_team_nproc) {
4198 // we are the last thread, need to free shared resources
4199 int idx = pr_buf->th_doacross_buf_idx - 1;
4200 dispatch_shared_info_t *sh_buf =
4201 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4202 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4203 (kmp_int64)&sh_buf->doacross_num_done);
4204 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4205 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4206 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4207 sh_buf->doacross_flags = NULL;
4208 sh_buf->doacross_num_done = 0;
4209 sh_buf->doacross_buf_idx +=
4210 __kmp_dispatch_num_buffers; // free buffer for future re-use
4211 }
4212 // free private resources (need to keep buffer index forever)
4213 pr_buf->th_doacross_flags = NULL;
4214 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4215 pr_buf->th_doacross_info = NULL;
4216 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4217}
4218
4219/* omp_alloc/omp_free only defined for C/C++, not for Fortran */
4220void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4221 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4222}
4223
4224void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4225 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4226}
4227
4228int __kmpc_get_target_offload(void) {
4229 if (!__kmp_init_serial) {
4230 __kmp_serial_initialize();
4231 }
4232 return __kmp_target_offload;
4233}
4234
4235int __kmpc_pause_resource(kmp_pause_status_t level) {
4236 if (!__kmp_init_serial) {
4237 return 1; // Can't pause if runtime is not initialized
4238 }
4239 return __kmp_pause_resource(level);
4240}
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:203
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:205
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:188
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:207
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1395
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:900
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:222
char const * psource
Definition: kmp.h:232
kmp_int32 flags
Definition: kmp.h:224