1 //          Copyright Ferdinand Majerech 2014.
2 // Distributed under the Boost Software License, Version 1.0.
3 //    (See accompanying file LICENSE_1_0.txt or copy at
4 //          http://www.boost.org/LICENSE_1_0.txt)
5 
6 
7 /// Despiker backend.
8 module despiker.backend;
9 
10 
11 import std.algorithm;
12 import std.array;
13 import std.stdio;
14 
15 import tharsis.prof;
16 
17 import despiker.profdatasource: ProfileDataChunk;
18 
19 
20 /** Maximum threads supported by Despiker at the moment.
21  *
22  * Increase when 1024 is too few, in 2025 or so.
23  */
24 enum maxThreads = 1024;
25 
26 
27 /** Information about a 'frame zone' - stored in a random access array.
28  *
29  * All frames are stored in a random-access array for quick access, so this should be
30  * as small as possible to avoid wasting memory.
31  */
32 struct FrameInfo
33 {
34     // Slice extents to get a slice of all events in the frame from ChunkyEventList.
35     ChunkyEventList.SliceExtents extents;
36     // Start time of the frame in hnsecs.
37     ulong startTime;
38     // Duration of the frame in hnsecs.
39     ulong duration;
40 
41     // End time of the frame in hnsecs.
42     ulong endTime() @safe pure nothrow const @nogc
43     {
44         return startTime + duration;
45     }
46 }
47 
48 /** Despiker backend.
49  *
50  * Handles storage of, processing of and access to profile data.
51  */
52 final class Backend
53 {
54 public:
55     /// Function that determines if a zone represents an entire frame.
56     alias FrameFilter = bool delegate(ZoneData zone) @safe nothrow @nogc;
57 
58 private:
59     /// Default number of chunk structs (not the actual chunk data) to preallocate per thread.
60     enum defaultChunkBufferSize = 60 * 3600;
61 
62     /// Profiling state kept for each profiled thread.
63     struct ThreadState
64     {
65         /// 'chunk buffer' used by eventList to store chunk structs (not the actual chunk data)
66         ChunkyEventList.Chunk[] chunkBuffer;
67         /// Stores profiling data for this thread and provides API to read profiling events.
68         ChunkyEventList eventList;
69         /// Generates zones from events in eventList on-the-fly as new chunks are added.
70         ChunkyZoneGenerator zoneGenerator;
71         /** Stores information about frame zones (as determined by Backend.frameFilter_).
72          *
73          * Used to regenerate all events in a frame.
74          */
75         FrameInfo[] frames;
76     }
77 
78     /// Thread state for all profiled thread.
79     ThreadState[] threads_;
80 
81     /// Function that determines if a zone represents an entire frame.
82     FrameFilter frameFilter_;
83 
84 public:
85     /** Construct Backend.
86      *
87      * Params:
88      *
89      * filter = Function to decide if a zone represents an entire frame.
90      */
91     this(FrameFilter frameFilter) @safe pure nothrow @nogc
92     {
93         frameFilter_ = frameFilter;
94     }
95 
96     /** Add a chunk of profiling data.
97      *
98      * Thread index of the chunk must be lower than maxThreads.
99      *
100      * Whichever thread the chunk belongs to, its first event must have time at least
101      * equal to the last event in the last chunk from that thread. (This can be achieved
102      * by prefixing the chunk with a checkpoint event that stores absolute time - which
103      * of course must be at least as late as any event in the previous chunk).
104      */
105     void addChunk(ProfileDataChunk chunk) @system nothrow
106     {
107         const tid = chunk.threadId;
108         assert(tid <= maxThreads, "No more than 1024 threads are supported");
109 
110         // If we were unaware of this thread till now, add thread state for it 
111         // (and for any missing threads with lower indices).
112         while(tid >= threads_.length)
113         {
114             threads_.assumeSafeAppend();
115             threads_ ~= ThreadState.init;
116             with(threads_.back)
117             {
118                 chunkBuffer   = new ChunkyEventList.Chunk[defaultChunkBufferSize];
119                 eventList     = ChunkyEventList(chunkBuffer);
120                 zoneGenerator = ChunkyZoneGenerator(eventList.generator);
121             }
122         }
123 
124         // Add the chunk.
125         with(threads_[tid])
126         {
127             if(eventList.addChunk(chunk.data)) { return; }
128 
129             // If we failed to add chunk, it's time to reallocate.
130             chunkBuffer = new ChunkyEventList.Chunk[chunkBuffer.length * 2];
131             eventList.provideStorage(chunkBuffer);
132             // TODO If needed, delete old chunkBuffer here  2014-10-02
133             if(!eventList.addChunk(chunk.data))
134             {
135                 assert(false, "Can't add chunk; probably start time lower than end time "
136                               "of last chunk");
137             }
138         }
139     }
140 
141     /// Get the number of profiled threads (so far).
142     size_t threadCount() @safe pure nothrow const @nogc
143     {
144         return threads_.length;
145     }
146 
147     /** Get access to frame info for all frames in specified profiled thread.
148      *
149      * Params:
150      *
151      * threadIdx = Index of the thread. Must be less than threadCount().
152      */
153     const(FrameInfo)[] frames(size_t threadIdx) @safe pure nothrow const @nogc
154     {
155         return threads_[threadIdx].frames;
156     }
157 
158     /** Get read-only access to ChunkyEventList for specified profiled thread.
159      *
160      * Used e.g. to get event slices based on slice extents of a frame (read through
161      * frames()).
162      *
163      * Params:
164      *
165      * threadIdx = Index of the thread. Must be less than threadCount().
166      */
167     ref const(ChunkyEventList) events(size_t threadIdx) @safe pure nothrow const @nogc
168     {
169         return threads_[threadIdx].eventList;
170     }
171 
172     /** Update the Backend between Despiker frames.
173      *
174      * Must be called on each event loop update.
175      */
176     void update() @system nothrow
177     {
178         foreach(i, ref thread; threads_)
179         {
180             // Generate zones for any chunks that have been added since the last update().
181             ChunkyZoneGenerator.GeneratedZoneData zone;
182             while(thread.zoneGenerator.generate(zone)) if(frameFilter_(zone))
183             {
184                 thread.frames.assumeSafeAppend();
185                 thread.frames ~= FrameInfo(zone.extents, zone.startTime, zone.duration);
186             }
187         }
188     }
189 }
190 unittest
191 {
192     writeln("Backend unittest");
193     scope(success) { writeln("Backend unittest SUCCESS"); }
194     scope(failure) { writeln("Backend unittest FAILURE"); }
195 
196     const frameCount = 16;
197 
198     bool filterFrames(ZoneData zone) @safe nothrow @nogc
199     {
200         return zone.info == "frame";
201     }
202 
203     import std.typecons;
204     auto profiler = scoped!Profiler(new ubyte[Profiler.maxEventBytes + 2048]);
205     auto backend = new Backend(&filterFrames);
206 
207     size_t lastChunkEnd = 0;
208     profiler.checkpointEvent();
209     // std.typecons.scoped! stores the Profiler on the stack.
210     // Simulate 16 'frames'
211     foreach(frame; 0 .. frameCount)
212     {
213         Zone topLevel = Zone(profiler, "frame");
214 
215         // Simulate frame overhead. Replace this with your frame code.
216         {
217             Zone nested1 = Zone(profiler, "frameStart");
218             foreach(i; 0 .. 1000) { continue; }
219         }
220         {
221             Zone nested2 = Zone(profiler, "frameCore");
222             foreach(i; 0 .. 10000) { continue; }
223         }
224 
225         auto chunkData = profiler.profileData[lastChunkEnd .. $].idup;
226         // Simulate adding chunks for multiple threads.
227         backend.addChunk(ProfileDataChunk(0, chunkData));
228         backend.addChunk(ProfileDataChunk(1, chunkData));
229         backend.update();
230 
231         lastChunkEnd = profiler.profileData.length;
232         profiler.checkpointEvent();
233     }
234 
235     auto chunkData = profiler.profileData[lastChunkEnd .. $].idup;
236     // Simulate adding chunks for multiple threads.
237     backend.addChunk(ProfileDataChunk(0, chunkData));
238     backend.addChunk(ProfileDataChunk(1, chunkData));
239     backend.update();
240 
241     // Check that the time slices in 'frames' of each ThreadState match the zones filtered
242     // as frames using filterFrames()
243     assert(backend.threads_.length == 2);
244     foreach(ref thread; backend.threads_)
245     {
246         auto frameZones = profiler.profileData.zoneRange.filter!filterFrames;
247         foreach(frame; thread.frames)
248         {
249             const extents = frame.extents;
250             const expectedFrameZone = frameZones.front;
251 
252             auto zonesInSlice = ZoneRange!ChunkyEventSlice(thread.eventList.slice(extents));
253             // The 'frame' zone should be the last zone by end time in the slice
254             // (ZoneRange is sorted by zone end time).
255             ZoneData lastZone;
256             foreach(zone; zonesInSlice) { lastZone = zone; }
257 
258             // Other members (ID, nesting, parent ID) will be different since we're
259             // using a zone range generated from an event slice that doesn't include
260             // all the parent zones.
261             assert(expectedFrameZone.startTime == lastZone.startTime &&
262                    expectedFrameZone.endTime == lastZone.endTime &&
263                    expectedFrameZone.info == lastZone.info,
264                    "Frame slices generated by Backend don't match frame zones");
265 
266             frameZones.popFront;
267         }
268     }
269 }