1 /*
2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 package violajones.attic;
26
27
28 import hat.Accelerator;
29 import hat.NDRange;
30 import hat.backend.java.WorkStealer;
31 import hat.buffer.F32Array2D;
32 import org.xml.sax.SAXException;
33 import violajones.Viewer;
34 import violajones.XMLHaarCascadeModel;
35 import hat.buffer.S08x3RGBImage;
36 import violajones.ifaces.Cascade;
37 import violajones.ifaces.ResultTable;
38 import violajones.ifaces.ScaleTable;
39
40 import javax.imageio.ImageIO;
41 import javax.xml.parsers.ParserConfigurationException;
42 import java.awt.image.BufferedImage;
43 import java.io.IOException;
44 import java.lang.invoke.MethodHandles;
45 import java.util.Objects;
46
47 public class ViolaJones {
48
49 public static void main(String[] _args) throws IOException, ParserConfigurationException, SAXException {
50 // Accelerator accelerator = new Accelerator(MethodHandles.lookup(), Backend::isJava);
51 Accelerator accelerator = new Accelerator(MethodHandles.lookup());
52
53
54 BufferedImage nasa = ImageIO.read(Objects.requireNonNull(ViolaJones.class.getResourceAsStream("/images/Nasa1996.jpg")));
55 XMLHaarCascadeModel xmlCascade = XMLHaarCascadeModel.load(ViolaJonesRaw.class.getResourceAsStream("/cascades/haarcascade_frontalface_default.xml"));
56 // Cascade cascade = Cascade.create(accelerator, xmlHaarCascade);
57 var cascade = Cascade.createFrom(accelerator,xmlCascade);
58 var width = nasa.getWidth();
59 var height = nasa.getHeight();
60 S08x3RGBImage rgbImage = S08x3RGBImage.create(accelerator,width,height);
61
62
63 // harViz.showIntegrals();
64
65 var scaleTable = ScaleTable.createFrom(accelerator,new ScaleTable.Constraints(cascade,width,height));
66
67
68 var greyImageF32 = F32Array2D.create(accelerator, width, height);
69 var integralImageF32 = F32Array2D.create(accelerator, width, height);
70 var integralSqImageF32 = F32Array2D.create(accelerator, width, height);
71 var resultTable = ResultTable.create(accelerator, 1000);
72 CoreJavaViolaJones.rgbToGreyScale(rgbImage, greyImageF32);
73 CoreJavaViolaJones.createIntegralImage(greyImageF32, integralImageF32, integralSqImageF32);
74
75 Viewer harViz = new Viewer(accelerator, nasa, rgbImage, cascade, integralImageF32, integralSqImageF32);
76
77 harViz.showIntegrals();
78
79
80 // long floatToShortKernel = accelerator.bridge.getKernel(progHandle, "floatToShortKernel");
81 // long integralColKernel = accelerator.bridge.getKernel(progHandle, "integralColKernel");
82 // long integralRowKernel = accelerator.bridge.getKernel(progHandle, "integralRowKernel");
83 // long singlePassCascadeKernel = accelerator.bridge.getKernel(progHandle, "singlePassCascadeKernel");
84
85
86 // openCLBridge.dump(cascadeMemorySegment, ((OpenCLStructLayout.Tools)cascadeInterface).getLayout());
87 // openCLBridge.dump(scaleTable,scaleTableLayout.layout);
88
89
90 // openCLBridge.dump(treeTable, treeTableLayout.layout);
91 // openCLBridge.dump(stageTable, stageTableLayout.layout);
92 // FloatBuffer integralImage = FloatBuffer.create(accelerator, rgbImageLayout.getElementCount());
93 // FloatBuffer integralSqImage = FloatBuffer.create(accelerator, rgbImageLayout.getElementCount());
94
95 // MemorySegment integralImageMemorySegment = arena.allocateArray(JAVA_FLOAT, rgbImageLayout.getElementCount());
96 // MemorySegment integralSqImageMemorySegment = arena.allocateArray(JAVA_FLOAT, rgbImageLayout.getElementCount());
97
98 // openCLBridge.dump(cascadeMemorySegment, cascadeLayout.layout);
99
100
101 int groupSize = 256;
102 int rangeModGroupSize = scaleTable.rangeModGroupSize(groupSize);
103 //(scaleTable.multiScaleAccumulativeRange() / groupSize) + ((scaleTable.multiScaleAccumulativeRange() % groupSize) == 0 ? 0 : 1)) * groupSize;
104
105 /*
106 OpenCLCodeBuilder c99 = (OpenCLCodeBuilder) accelerator.getCodeBuilder();
107 c99
108 .typedef(FeatureTable.Feature.RectTable.Rect.class)
109 .typedef(FeatureTable.Feature.LinkOrValue.class)
110 .typedef(FeatureTable.Feature.class)
111 .typedef(ScaleTable.Scale.class)
112 .typedef(StageTable.Stage.class)
113 .typedef(TreeTable.Tree.class)
114 .typedef(ResultTable.Result.class)
115 .typedef(Cascade.class)
116 .append("""
117
118 #define SCOPE_START ndrange_t ndrange;ndrange.id.x=get_global_id(0);ndrange.id.maxX=get_global_size(0);
119 #ifdef NDRANGE_CUDA
120 #define atomicInc(p) atomicAdd(p, 1)
121 #else
122 #define atomicInc(p) atom_add(p, 1)
123 #endif
124
125 inline int b2i(i4 v){
126 return v < 0 ? 256 + v : v;
127 }
128 inline int rgbToGrey(i4 r, i4 g, i4 b){
129 return (29 * b2i(r) + 60 * b2i(g) + 11 * b2i(b)) / 100;
130 }
131 inline void integralColById(i4 id, __global cascade_t *cascadeContext, __global b1 *rgb, __global f4 *integral, __global f4 *integralSq){
132 integralSq[id] = integral[id] = 0.0f;
133 for (s32_t y = 1; y < cascadeContext->imageHeight; y++) {
134 s32_t monoOffset = (y * cascadeContext->imageWidth) + id;
135 f32_t lastSq = integralSq[monoOffset - cascadeContext->imageWidth];
136 f32_t last = integral[monoOffset - cascadeContext->imageWidth];
137 char r = rgb[monoOffset * 3 + 0];
138 char g = rgb[monoOffset * 3 + 1];
139 char b = rgb[monoOffset * 3 + 2];
140 f32_t greyValue = rgbToGrey(r, g, b);
141 f32_t greyValueSq = greyValue * greyValue;
142 integralSq[monoOffset] = greyValueSq + lastSq;
143 integral[monoOffset] = greyValue + last;
144 }
145 }
146 __kernel void integralColKernel(__global cascade_t *cascadeContext, __global b1 *rgb, __global f4 *integral, __global f4 *integralSq){
147 SCOPE_START
148 integralColById(ndrange.id.x, cascadeContext, rgb, integral, integralSq);
149 }
150 inline void integralRowById(i4 id, __global cascade_t *cascadeContext, __global f4 *integral, __global f4 *integralSq){
151 for (s32_t x = 1; x < cascadeContext->imageWidth; x++) {
152 s32_t monoOffset = (id * cascadeContext->imageWidth) + x;
153 integral[monoOffset] = integral[monoOffset] + integral[monoOffset - 1];
154 }
155 for (s32_t x = 1; x < cascadeContext->imageWidth; x++) {
156 s32_t monoOffset = (id * cascadeContext->imageWidth) + x;
157 integralSq[monoOffset] = integralSq[monoOffset] + integralSq[monoOffset - 1];
158 }
159 }
160 __kernel void integralRowKernel(__global cascade_t *cascadeContext, __global f4 *integral, __global f4 *integralSq){
161 SCOPE_START
162 integralRowById(ndrange.id.x, cascadeContext, integral, integralSq);
163 }
164 __kernel void floatToShortKernel(__global cascade_t *cascadeContext, __global f4 *fromIntegral, __global s2 *toIntegral, __global f4 *fromIntegralSq, __global s2 *toIntegralSq){
165 SCOPE_START
166 toIntegral[ndrange.id.x] = (s16_t)(fromIntegral[ndrange.id.x]*(65536/fromIntegral[ndrange.id.maxX-1]));
167 toIntegralSq[ndrange.id.x] = (s16_t)(fromIntegralSq[ndrange.id.x]*(65536/fromIntegralSq[ndrange.id.maxX-1]));
168 }
169
170
171 /
172 A +-------+ B
173 | | D-B-C+A
174 C +-------+ D
175 /
176 inline float gradient(__global f4 *image, i4 imageWidth, i4 x, i4 y, i4 width, i4 height){
177 f32_t A = image[(y * imageWidth) + x];
178 f32_t D = image[((y + height) * imageWidth) + x + width];
179 f32_t C = image[((y + height) * imageWidth) + x];
180 f32_t B = image[(y * imageWidth) + x + width];
181 return D-B-C+A;
182 }
183 inline boolean isAFaceStage(__global cascade_t *cascadeContext, __global scale_t *scale, i4 x, i4 y, f4 vnorm, __global f4 *integral, __global stage_t *stagePtr, __global tree_t *treeTable, __global feature_t *featureTable){
184 f32_t sumOfThisStage = 0;
185 for (s32_t treeId = stagePtr->firstTreeId; treeId < (stagePtr->firstTreeId+stagePtr->treeCount); treeId++) {
186 // featureId from 0 to how many roots there are.... we use -1 for none! hence s32_t
187 const __global tree_t *treePtr = &treeTable[treeId];
188 s32_t featureId = treePtr->firstFeatureId;
189 while (featureId >= 0) {
190 const __global feature_t *featurePtr = &featureTable[featureId];
191 f32_t featureGradientSum = .0f;
192 for (s32_t i = 0; i < 3; i++) {
193 const __global rect_t *rect = &featurePtr->rects[i];
194 featureGradientSum += featurePtr->rects[i].weight *
195 gradient(integral, cascadeContext->imageWidth,
196 x + (int) (rect->x * scale->scaleValue),
197 y + (int) (rect->y * scale->scaleValue),
198 (int) (rect->width * scale->scaleValue),
199 (int) (rect->height * scale->scaleValue)
200 ) ;
201 }
202 if ((featureGradientSum * scale->invArea) < (featurePtr->threshold * vnorm)) {//left
203 if (featurePtr->left.hasValue) {
204 sumOfThisStage += featurePtr->left.anon.value;
205 featureId = -1;
206 } else {
207 featureId = treePtr->firstFeatureId+featurePtr->left.anon.featureId;
208 }
209 }else{ // right
210 if (featurePtr->right.hasValue) {
211 sumOfThisStage += featurePtr->right.anon.value;
212 featureId = -1;
213 } else {
214 featureId = treePtr->firstFeatureId+featurePtr->right.anon.featureId;
215 }
216 }
217 }
218 }
219 return sumOfThisStage > stagePtr->threshold;
220 }
221 __kernel void singlePassCascadeKernel(__global cascade_t *cascadeContext, __global f4 *integral, __global f4 *integralSq, __global scale_t *scaleTable, __global result_t *resultTable, __global stage_t *stageTable, __global tree_t *treeTable, __global feature_t *featureTable){
222 SCOPE_START
223
224 size_t gid = ndrange.id.x;
225 if (gid < cascadeContext->multiScaleAccumulativeRange){
226 s32_t i;
227 // This is where we select the scale to use.
228 for (i=0; gid >=scaleTable[i].accumGridSizeMax; i++)
229 ;
230
231 __global scale_t *scale = &scaleTable[i];
232
233 s16_t x = (s16_t)(((gid-scale->accumGridSizeMin) % scale->gridWidth) * scale->scaledXInc);
234 s16_t y = (s16_t)(((gid-scale->accumGridSizeMin) / scale->gridWidth) * scale->scaledYInc);
235
236 f32_t integralGradient = gradient(integral, cascadeContext->imageWidth, x, y, scale->scaledFeatureWidth, scale->scaledFeatureHeight) * scale->invArea;
237 f32_t integralSqGradient = gradient(integralSq, cascadeContext->imageWidth, x, y, scale->scaledFeatureWidth, scale->scaledFeatureHeight) * scale->invArea;
238
239 f32_t vnorm = integralSqGradient - integralGradient * integralGradient;
240 vnorm = (vnorm > 1) ? sqrt(vnorm) : 1;
241
242 bool stillLooksLikeAFace = true;
243
244 for (s32_t stageId = 0; stillLooksLikeAFace && (stageId < cascadeContext->stageCount); stageId++) {
245 __global stage_t *stagePtr = &stageTable[stageId];
246 stillLooksLikeAFace =isAFaceStage(cascadeContext, scale, x, y, vnorm, integral, stagePtr, treeTable, featureTable);
247 }
248 if (stillLooksLikeAFace) {
249 s32_t index = atomicInc(&cascadeContext->atomicResultTableCount);
250 if (index<cascadeContext->maxResults){
251 resultTable[index].x = x;
252 resultTable[index].y = y;
253 resultTable[index].width = scale->scaledFeatureWidth;
254 resultTable[index].height = scale->scaledFeatureHeight;
255 }
256 }
257 }
258 }
259 """
260 );
261
262
263
264 long progHandle = accelerator.bridge.compileProgram(c99.toString());
265 if (accelerator.bridge.programOK(progHandle)) {
266 long floatToShortKernel = accelerator.bridge.getKernel(progHandle, "floatToShortKernel");
267 long integralColKernel = accelerator.bridge.getKernel(progHandle, "integralColKernel");
268 long integralRowKernel = accelerator.bridge.getKernel(progHandle, "integralRowKernel");
269 long singlePassCascadeKernel = accelerator.bridge.getKernel(progHandle, "singlePassCascadeKernel");
270
271
272 // openCLBridge.dump(cascadeMemorySegment, ((OpenCLStructLayout.Tools)cascadeInterface).getLayout());
273 // openCLBridge.dump(scaleTable,scaleTableLayout.layout);
274
275
276 // openCLBridge.dump(treeTable, treeTableLayout.layout);
277 // openCLBridge.dump(stageTable, stageTableLayout.layout);
278 MemorySegment integralImageMemorySegment = arena.allocateArray(JAVA_FLOAT, rgbImageLayout.getElementCount());
279 MemorySegment integralSqImageMemorySegment = arena.allocateArray(JAVA_FLOAT, rgbImageLayout.getElementCount());
280
281 // openCLBridge.dump(cascadeMemorySegment, cascadeLayout.layout);
282
283 int groupSize = 256;
284 int range = ((multiScaleTable.multiScaleAccumulativeRange / groupSize) + ((multiScaleTable.multiScaleAccumulativeRange % groupSize) == 0 ? 0 : 1)) * groupSize;
285
286 ImageLayout integralImage = new ImageLayout(new BufferedImage(rgbImageLayout.getWidth(), rgbImageLayout.getHeight(), BufferedImage.TYPE_USHORT_GRAY));
287 ImageLayout integralSqImage = new ImageLayout(new BufferedImage(rgbImageLayout.getWidth(), rgbImageLayout.getHeight(), BufferedImage.TYPE_USHORT_GRAY));
288 ImageLayout.Instance integralImageInstance = integralImage.instance(arena);
289 ImageLayout.Instance integralSqImageInstance = integralSqImage.instance(arena);
290
291 ImageLayout.Instance rgbImageLayoutInstance = rgbImageLayout.instance(arena);
292
293 HaarVisualizer harViz = new HaarVisualizer(rgbImageLayoutInstance, haarCascade, integralImageInstance, integralSqImageInstance);
294
295 accelerator.bridge.ndrange(integralColKernel, rgbImageLayout.getWidth(),
296 DeviceArgs.of()
297 .s08_1dRO(cascade.segment())
298 .s08_1dRO(rgbImageLayoutInstance.memorySegment)
299 .f32_1dWO(integralImageMemorySegment)
300 .f32_1dWO(integralSqImageMemorySegment)
301 );
302
303 accelerator.bridge.ndrange(integralRowKernel, rgbImageLayout.getHeight(),
304 DeviceArgs.of()
305 .s08_1dRO(cascade.segment())
306 .f32_1dRW(integralImageMemorySegment)
307 .f32_1dRW(integralSqImageMemorySegment)
308 );
309
310 // This allows us to visualize the integral or integralSq image.
311 // We map the integral + integralSq floats to a grey image
312 accelerator.bridge.ndrange(floatToShortKernel,
313 rgbImageLayout.getElementCount(),
314 DeviceArgs.of()
315 .s08_1dRO(cascade.segment())
316 .f32_1dRO(integralImageMemorySegment)
317 .u16_1dWO(integralImageInstance.memorySegment)
318 .f32_1dRO(integralSqImageMemorySegment)
319 .u16_1dWO(integralSqImageInstance.memorySegment)
320 );
321 harViz.showIntegrals();
322
323
324 String mode = System.getProperty("mode", "bridge");
325 System.out.println("Mode =" + mode);
326
327 long start = System.currentTimeMillis();
328 */
329
330 if (true) {
331 long start = System.currentTimeMillis();
332 WorkStealer.usingAllProcessors()
333 .forEachInRange(accelerator.range(NDRange.of(NDRange.Global1D.of(scaleTable.multiScaleAccumulativeRange()))), kc -> {
334 ReferenceJavaViolaJones.findFeatures(
335 kc.gix,
336 xmlCascade,//cascade,//haarCascade, //or cascade
337 integralImageF32,
338 integralSqImageF32,
339 scaleTable,
340 resultTable);
341 });
342 long ms = (System.currentTimeMillis() - start);
343 System.out.println("done " + ms + "ms");
344 harViz.showResults(resultTable, null, null, ms);
345 }
346 // } else if (mode.equals("javaSegments")) {
347
348 /* WorkStealer.of(1)
349 .forEachInRange(multiScaleTable.multiScaleAccumulativeRange, gid -> {
350 ReferenceJavaViolaJones.findFeatures(
351 gid,
352 cascade,
353 harViz,
354 null,
355 integralImageInstance.memorySegment,
356 integralSqImageInstance.memorySegment,
357 scaleTable,
358 resultTable,
359 stageTable,
360 treeTable,
361 featureTable);
362 }); */
363
364
365 /*
366 } else {
367 accelerator.bridge.ndrange(singlePassCascadeKernel, range,
368 DeviceArgs.of()
369 .s08_1dRW(cascade.segment()) // RW only for atomicResult counter
370 .f32_1dRO(integralImageMemorySegment)
371 .f32_1dRO(integralSqImageMemorySegment)
372 .s08_1dRO(scaleTable.segment())
373 .s08_1dRW(resultTable.segment())
374 .s08_1dRO(stageTable.segment())
375 .s08_1dRO(treeTable.segment())
376 .s08_1dRO(featureTable.segment())
377
378 );
379 }
380 System.out.println("ms = " + (System.currentTimeMillis() - start));
381 // openCLBridge.dump(cascadeMemorySegment, cascadeLayout.layout);
382 // harViz.showResults(cascadeInstance.getAtomicResultTableCount(),cascadeInstance.getMaxResults(), resultTable, resultTableLayout);
383 harViz.showResults(cascade.getAtomicResultTableCount(), cascade.getMaxResults(), resultTable);
384
385 accelerator.bridge.releaseKernel(integralColKernel);
386 accelerator.bridge.releaseKernel(integralRowKernel);
387 accelerator.bridge.releaseKernel(floatToShortKernel);
388 accelerator.bridge.releaseKernel(singlePassCascadeKernel);
389
390 accelerator.bridge.releaseProgram(progHandle);
391 }
392 accelerator.bridge.release();
393 */
394
395 }
396 }