aboutsummaryrefslogtreecommitdiff
path: root/Source/Utils/KernelTimer.H
blob: 11f4dd6011cd4369f5643a6dd388824d48a0e875 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/* Copyright 2019-2020 Michael Rowan, Axel Huebl, Kevin Gott
 *
 * This file is part of WarpX.
 *
 * License: BSD-3-Clause-LBNL
 */
#ifndef KERNELTIMER_H_
#define KERNELTIMER_H_

#ifdef AMREX_USE_GPU
#include <limits.h>

/**
 * \brief Defines a timer object to be used on GPU; measures summed thread cycles.
 */
class KernelTimer
{
public:
    /** Constructor.
     * \param[in] do_timing Controls whether timer is active.
     * \param[in,out] cost Pointer to cost which holds summed thread cycles
     * (for performance, it is recommended to allocate pinned host memory).
     */
    AMREX_GPU_DEVICE
    KernelTimer (const bool do_timing, amrex::Real* cost)
        : m_do_timing(do_timing), m_cost(cost) {
    if (do_timing && cost) {
#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
            // Start the timer
            m_wt = clock64();

#elif defined(AMREX_USE_DPCPP)
            // To be updated
            AMREX_ALWAYS_ASSERT_WITH_MESSAGE( do_timing == false,
                                              "KernelTimer not yet supported for this hardware." );
#endif
    }
    }

    //! Destructor.
    AMREX_GPU_DEVICE
    ~KernelTimer () {
    if (m_do_timing && m_cost) {
#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
            m_wt = clock64() - m_wt;
            amrex::Gpu::Atomic::Add( m_cost, amrex::Real(m_wt));
#elif defined(AMREX_USE_DPCPP)
            // To be updated
#endif
    }
    }

private:
    //! Stores whether kernel timer is active.
    bool m_do_timing;

    //! Location in which to accumulate costs from all threads.
    amrex::Real* m_cost;

    //! Store the time difference (cost) from a single thread.
    long long int m_wt;
};

#endif
#endif