1 # Minimizing Buffer Transfers
  2 [Back to Index ../](../index.md)
  3 
  4 
  5 ## The naive approach
  6 The default execution model is that at each kernel
  7 dispatch the backend just copy all arg buffers togc
  8 the device and after the dispatch it copies all arg
  9 buffers back.
 10 
 11 ## Using kernel arg buffer access patterns
 12 If we knew how each kernel accesses it's args (via static analysis of code model orgc
 13 by marking the args RO, RW or WO with annotations) we can avoid some copies by onlygc
 14 copying in if the kernel 'reads' the arg buffer and only copying out if the
 15 kernel writes to the arg buffer.
 16 
 17 Lets use the game of life as an example.gc
 18 
 19 We assume that the UI only needs updating at some 'rate' (say 5 fps), but the kernels can generate
 20 generations faster that 5 generations per second. code to generate eactgc
 21 
 22 So not every generation needs to be copied to the device.gc
 23 
 24 We'll ignore the detail regarding the `life` kernel, and we will assume the kernel args Mostly we care ab
 25 are appropriately annotated as RO, RW or WO.
 26 
 27 ```java
 28  @Reflect
 29 public static void life(@RO KernelContext kc, @RO Control control, @RW CellGrid cellGrid) {
 30   if (kc.x < kc.maxX) {
 31     Compute.lifePerIdx(kc.x, control, cellGrid);
 32   }
 33 }
 34 
 35 @Reflect
 36 static public void compute(final @RO ComputeContext cc,
 37                            Viewer viewer, @RO Control control, @RW CellGrid cellGrid) {
 38   var timeOfLastUIUpdate = System.currentTimeMillis();
 39   var msPerFrame = 1000/5; // we want 5 fps
 40   while (viewer.state.generation < viewer.state.maxGenerations) {
 41     long now = System.currentTimeMillis();
 42     var msSinceLastUpdate = (now - timeOfLastUIUpdate);
 43     var updateNeeded =  (msSinceLastUpdate > msPerFrame);
 44 gc
 45     cc.dispatchKernel(cellGrid.width() * cellGrid.height(),
 46             kc -> Compute.life(kc, control, cellGrid)
 47     );
 48 gc
 49     // Here we are swapping from<->to on the control buffer
 50     int to = control.from();
 51     control.from(control.to());
 52     control.to(to);
 53 gc
 54     if (updateNeeded) {
 55       viewer.update(now, to, cellGrid);
 56       timeOfLastUIUpdate = now;
 57     }
 58   }
 59 }
 60 ```
 61 
 62 First, let's assume there were no automatic transfers, assume we had to define them. We had to explicitly control transfers so we will insert code.
 63 
 64 What would our code look like?
 65 
 66 ```java
 67  @Reflect
 68 public static void life(@RO KernelContext kc, @RO Control control, @RW CellGrid cellGrid) {
 69   if (kc.x < kc.maxX) {
 70     Compute.lifePerIdx(kc.x, control, cellGrid);
 71   }
 72 }
 73 
 74 @Reflect
 75 static public void compute(final @RO ComputeContext cc,
 76                            Viewer viewer, @RO Control control, @RW CellGrid cellGrid) {
 77   var timeOfLastUIUpdate = System.currentTimeMillis();
 78   var msPerFrame = 1000/5; // we want 5 fps
 79   var cellGridIsJavaDirty = true;
 80   var controlIsJavaDirty = true;
 81   var cellGridIsDeviceDirty = true;
 82   var controlIsDeviceDirty = true;
 83   while (true) {
 84     long now = System.currentTimeMillis();
 85     var msSinceLastUpdate = (now - timeOfLastUIUpdate);
 86     var updateNeeded =  (msSinceLastUpdate > msPerFrame);
 87 gc
 88     if (cellGridIsJavaDirty){
 89         cc.copyToDevice(cellGrid);
 90     }
 91     if (controlIsJavaDirty){
 92         cc.copyToDevice(control);
 93     }
 94     cc.dispatchKernel(cellGrid.width() * cellGrid.height(),
 95             kc -> Compute.life(kc, control, cellGrid)
 96     );
 97     controlIsDeviceDirty = false; // Compute.life marked control as @RO
 98     cellGridIsDeviceDirty = true; // Compute.life marjed cellGrid as @RW
 99 gc
100     // Here we are swapping from<->to on the control buffer
101     if (controlIsDeviceDirty){
102       cc.copyFromDevice(control);
103     }
104     int to = control.from();
105     control.from(control.to());
106     control.to(to);
107     controlIsJavaDirty = true;
108 gc
109     if (updateNeeded) {
110       if (cellGridIsDeviceDirty){
111         cc.copyFromDevice(cellGrid);
112       }
113       viewer.update(now, to, cellGrid);
114       timeOfLastUIUpdate = now;
115     }
116   }
117 }
118 ```
119 
120 Alternatively, what if the buffers themselves could hold the deviceDirty flags javaDirty?
121 
122 
123 ```java
124  @Reflect
125 public static void life(@RO KernelContext kc, @RO Control control, @RW CellGrid cellGrid) {
126   if (kc.x < kc.maxX) {
127     Compute.lifePerIdx(kc.x, control, cellGrid);
128   }
129 }
130 
131 @Reflect
132 static public void compute(final @RO ComputeContext cc,
133                            Viewer viewer, @RO Control control, @RW CellGrid cellGrid) {
134   control.flags =JavaDirty; // not ideal but necessary
135   cellGrid.flags = JavaDirty; // not ideal but necessary
136 gc
137   var timeOfLastUIUpdate = System.currentTimeMillis();
138   var msPerFrame = 1000/5; // we want 5 fps
139 
140   while (true) {
141     long now = System.currentTimeMillis();
142     var msSinceLastUpdate = (now - timeOfLastUIUpdate);
143     var updateNeeded =  (msSinceLastUpdate > msPerFrame);
144 gc
145     if ((cellGrid.flags & JavaDirty) == JavaDirty){
146         cc.copyToDevice(cellGrid);
147     }
148     if ((control.flags & JavaDirty) == JavaDirty){
149         cc.copyToDevice(control);
150     }
151     cc.dispatchKernel(cellGrid.width() * cellGrid.height(),
152             kc -> Compute.life(kc, control, cellGrid)
153     );
154     control.flags = JavaDirty; // Compute.life marked control as @RO
155     cellGrid.flags = DeviceDirty; // Compute.life marjed cellGrid as @RW
156 gc
157     // Here we are swapping from<->to on the control buffer
158     if ((control.flags & DeviceDirty)==DeviceDirty){
159       cc.copyFromDevice(control);
160     }
161     int to = control.from();
162     control.from(control.to());
163     control.to(to);
164     control.flags = JavaDirty;
165 gc
166     if (updateNeeded) {
167       if ((cellGrid.flags & DeviceDirty)==DeviceDirty){
168         cc.copyFromDevice(cellGrid);
169       }
170       viewer.update(now, to, cellGrid);
171       // update does not mutate cellGrid so cellGrid.flags = DeviceDirty
172       timeOfLastUIUpdate = now;
173     }
174   }
175 }
176 ```
177 
178 Essentially, we defer to the kernel dispatch to determine whether buffers are
179 copied to the device and to mark buffers accordingly if the dispatch mutated the buffer.gc
180 
181 Pseudo-code for dispatch is essentially
182 ```java
183 
184 void dispatchKernel(Kernel kernel, KernelContext kc, Arg ... args) {
185     for (int argn = 0; argn<args.length; argn++){
186       Arg arg = args[argn];
187       if (((arg.flags &JavaDirty)==JavaDirty) && kernel.readsFrom(arg)) {
188          enqueueCopyToDevice(arg);
189       }
190     }
191     enqueueKernel(kernel);
192     for (int argn = 0; argn<args.length; argn++){
193        Arg arg = args[argn];
194        if (kernel.writesTo(arg)) {
195           arg.flags = DeviceDirty;
196        }
197     }
198 }
199 ```
200 We rely on babylon to mark each buffer passed to it as JavaDirty
201 
202 ```java
203 
204 @Reflect
205 static public void compute(final @RO ComputeContext cc,
206                            Viewer viewer, @RO Control control, @RW CellGrid cellGrid) {
207     control.flags = JavaDirty;
208     cellGrid.flags = JavaDirty;
209     // yada yada
210 }
211 ```
212 
213 We also rely on babylon to inject calls before each buffer access from java in the compute code.
214 
215 So the injected code would look like this.gc
216 
217 ```java
218 
219 @Reflect
220 static public void compute(final @RO ComputeContext cc,
221                            Viewer viewer, @RO Control control, @RW CellGrid cellGrid) {
222   control.flags =JavaDirty; // injected by bablyon
223   cellGrid.flags = JavaDirty; // injected by babylon
224 gc
225   var timeOfLastUIUpdate = System.currentTimeMillis();
226   var msPerFrame = 1000/5; // we want 5 fps
227   while (true) {
228     long now = System.currentTimeMillis();
229     var msSinceLastUpdate = (now - timeOfLastUIUpdate);
230     var updateNeeded =  (msSinceLastUpdate > msPerFrame);
231 gc
232     // See the psuedo code above to see how dispatchKernel
233     // Only copies buffers that need copying, and marks
234     // buffers it has mutate as dirty
235     cc.dispatchKernel(cellGrid.width() * cellGrid.height(),
236             kc -> Compute.life(kc, control, cellGrid)
237     );
238 gc
239     // injected by babylon
240     if ((control.flags & DeviceDirty)==DeviceDirty){
241       cc.copyFromDevice(control);
242     }
243     // Here we are swapping from<->to on the control buffer
244     int to = control.from();
245 gc
246     control.from(control.to());
247     control.flags = JavaDirty; // injectedgc
248     control.to(to);
249     control.flags = JavaDirty; // injected, but can be avoided
250 gc
251     if (updateNeeded) {
252         // Injected by babylon because cellGrid escapes cpmputegc
253         // and because viewer.update marks cellGrid as @RO
254         if ((cellGrid.flags & DeviceDirty)==DeviceDirty){
255           cc.copyFromDevice(cellGrid);
256         }
257         viewer.update(now, to, cellGrid);
258         // We don't copy cellgrid back after escape becausegc
259         // viewer.update annotates cellGrdi access as RO
260          timeOfLastUIUpdate = now;
261     }
262   }
263 }
264 ```
265 
266 
267 
268 
269 
270 
271 
272