Visual Servoing Platform version 3.6.0
Loading...
Searching...
No Matches
vpDetectorDNNOpenCV.cpp
1/****************************************************************************
2 *
3 * ViSP, open source Visual Servoing Platform software.
4 * Copyright (C) 2005 - 2023 by Inria. All rights reserved.
5 *
6 * This software is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 * See the file LICENSE.txt at the root directory of this source
11 * distribution for additional information about the GNU GPL.
12 *
13 * For using ViSP with software that can not be combined with the GNU
14 * GPL, please contact Inria about acquiring a ViSP Professional
15 * Edition License.
16 *
17 * See https://visp.inria.fr for more information.
18 *
19 * This software was developed at:
20 * Inria Rennes - Bretagne Atlantique
21 * Campus Universitaire de Beaulieu
22 * 35042 Rennes Cedex
23 * France
24 *
25 * If you have questions regarding the use of this file, please contact
26 * Inria at visp@inria.fr
27 *
28 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
29 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
30 *
31 * Description:
32 * DNN object detection using OpenCV DNN module.
33 *
34*****************************************************************************/
35#include <visp3/core/vpConfig.h>
36
37#if (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(HAVE_OPENCV_DNN) && (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_17)
38#include <visp3/core/vpImageConvert.h>
39#include <visp3/detection/vpDetectorDNNOpenCV.h>
40#include <visp3/core/vpIoTools.h>
41
42#include<algorithm>
49{
50 std::string list = "[";
51 for (unsigned int i = 0; i < vpDetectorDNNOpenCV::COUNT - 1; i++) {
53 }
55 return list;
56}
57
68{
69 std::string name;
70 switch (type) {
71 case YOLO_V3:
72 name = "yolov3";
73 break;
74 case YOLO_V4:
75 name = "yolov4";
76 break;
77 case YOLO_V5:
78 name = "yolov5";
79 break;
80 case YOLO_V7:
81 name = "yolov7";
82 break;
83 case YOLO_V8:
84 name = "yolov8";
85 break;
86 case FASTER_RCNN:
87 name = "faster-rcnn";
88 break;
89 case SSD_MOBILENET:
90 name = "ssd-mobilenet";
91 break;
92 case RESNET_10:
93 name = "resnet-10";
94 break;
95 case USER_SPECIFIED:
96 name = "user-specified";
97 break;
98 case COUNT:
99 name = "unknown";
100 break;
101 }
102 return name;
103}
104
114{
116 bool hasFoundMatch = false;
117 std::string name_lowercase = vpIoTools::toLowerCase(name);
118 for (int id = 0; id < COUNT && !hasFoundMatch; id++) {
120 if (dnnResultsParsingTypeToString(temp) == name_lowercase) {
121 res = temp;
122 hasFoundMatch = true;
123 }
124 }
125 return res;
126}
127
138std::vector<std::string> vpDetectorDNNOpenCV::parseClassNamesFile(const std::string &filename)
139{
140 return NetConfig::parseClassNamesFile(filename);
141}
142
144 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
145 m_net(), m_netConfig(), m_outNames(), m_dnnRes(),
146 m_parsingMethod(vpDetectorDNNOpenCV::postProcess_unimplemented)
147{
148 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
149}
150
158vpDetectorDNNOpenCV::vpDetectorDNNOpenCV(const NetConfig &config, const DNNResultsParsingType &typeParsingMethod, void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &, const NetConfig &))
159 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
160 m_net(), m_netConfig(config), m_outNames(), m_dnnRes()
161{
162 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
163 setParsingMethod(typeParsingMethod, parsingMethod);
164 if (!m_netConfig.m_modelFilename.empty()) {
165 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
166 }
167}
168
169#ifdef VISP_HAVE_NLOHMANN_JSON
176vpDetectorDNNOpenCV::vpDetectorDNNOpenCV(const std::string &jsonPath, void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &, const NetConfig &))
177 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
178 m_net(), m_netConfig(), m_outNames(), m_dnnRes()
179{
180 initFromJSON(jsonPath);
181 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
182 setParsingMethod(m_netConfig.m_parsingMethodType, parsingMethod);
183}
184
190void vpDetectorDNNOpenCV::initFromJSON(const std::string &jsonPath)
191{
192 std::ifstream file(jsonPath);
193 if (!file.good()) {
194 std::stringstream ss;
195 ss << "Problem opening file " << jsonPath << ". Make sure it exists and is readable" << std::endl;
196 throw vpException(vpException::ioError, ss.str());
197 }
198 json j;
199 try {
200 j = json::parse(file);
201 }
202 catch (json::parse_error &e) {
203 std::stringstream msg;
204 msg << "Could not parse JSON file : \n";
205
206 msg << e.what() << std::endl;
207 msg << "Byte position of error: " << e.byte;
208 throw vpException(vpException::ioError, msg.str());
209 }
210 *this = j; // Call from_json(const json& j, vpDetectorDNN& *this) to read json
211 file.close();
212 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
213}
214
220void vpDetectorDNNOpenCV::saveConfigurationInJSON(const std::string &jsonPath) const
221{
222 std::ofstream file(jsonPath);
223 const json j = *this;
224 file << j.dump(4);
225 file.close();
226}
227#endif
228
233
243bool vpDetectorDNNOpenCV::detect(const vpImage<unsigned char> &I, std::vector<DetectedFeatures2D> &output)
244{
246
247 return detect(m_I_color, output);
248}
249
259bool vpDetectorDNNOpenCV::detect(const vpImage<unsigned char> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
260{
262
263 return detect(m_I_color, output);
264}
265
275bool vpDetectorDNNOpenCV::detect(const vpImage<unsigned char> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
276{
278
279 return detect(m_I_color, output);
280}
281
291bool vpDetectorDNNOpenCV::detect(const vpImage<vpRGBa> &I, std::vector<DetectedFeatures2D> &output)
292{
294
295 return detect(m_img, output);
296}
297
307bool vpDetectorDNNOpenCV::detect(const vpImage<vpRGBa> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
308{
310
311 return detect(m_img, output);
312}
313
321bool vpDetectorDNNOpenCV::detect(const vpImage<vpRGBa> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
322{
324
325 return detect(m_img, output);
326}
327
335bool vpDetectorDNNOpenCV::detect(const cv::Mat &I, std::vector<DetectedFeatures2D> &output)
336{
337 m_img = I;
338 output.clear();
339
340 cv::Size inputSize(m_netConfig.m_inputSize.width > 0 ? m_netConfig.m_inputSize.width : m_img.cols,
341 m_netConfig.m_inputSize.height > 0 ? m_netConfig.m_inputSize.height : m_img.rows);
342 cv::dnn::blobFromImage(m_img, m_blob, m_netConfig.m_scaleFactor, inputSize, m_netConfig.m_mean, m_netConfig.m_swapRB, false);
343
344 m_net.setInput(m_blob);
345 try {
346 m_net.forward(m_dnnRes, m_outNames);
347 }
348 catch (const cv::Exception &e) {
349 std::cerr << "Caught an exception trying to run inference:" << std::endl << "\t"
350 << e.what()
351 << "\nCuda and/or GPU driver might not be correctly installed. Setting preferable backend to CPU and trying again." << std::endl;
352 m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
353 m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
354 m_net.forward(m_dnnRes, m_outNames);
355 }
356
357 DetectionCandidates proposals;
358 postProcess(proposals);
359 size_t nbClassNames = m_netConfig.m_classNames.size();
360 for (size_t i = 0; i < m_indices.size(); ++i) {
361 int idx = m_indices[i];
362 cv::Rect box = proposals.m_boxes[idx];
363 std::optional<std::string> classname_opt;
364 if (nbClassNames > 0) {
365 classname_opt = m_netConfig.m_classNames[proposals.m_classIds[idx]];
366 }
367 output.emplace_back(box.x, box.x + box.width, box.y, box.y + box.height
368 , proposals.m_classIds[idx], proposals.m_confidences[idx]
369 , classname_opt
370 );
371 }
372
374 // removing false detections, based on the bbox sizes
375 output = filterDetectionMultiClassInput(output, m_netConfig.m_filterSizeRatio);
376 }
377
378 return !output.empty();
379}
380
388bool vpDetectorDNNOpenCV::detect(const cv::Mat &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
389{
390 m_img = I;
391 output.clear();
392
393 cv::Size inputSize(m_netConfig.m_inputSize.width > 0 ? m_netConfig.m_inputSize.width : m_img.cols,
394 m_netConfig.m_inputSize.height > 0 ? m_netConfig.m_inputSize.height : m_img.rows);
395 cv::dnn::blobFromImage(m_img, m_blob, m_netConfig.m_scaleFactor, inputSize, m_netConfig.m_mean, m_netConfig.m_swapRB, false);
396
397 m_net.setInput(m_blob);
398 try {
399 m_net.forward(m_dnnRes, m_outNames);
400 }
401 catch (const cv::Exception &e) {
402 std::cerr << "Caught an exception trying to run inference:" << std::endl << "\t"
403 << e.what()
404 << "\nCuda and/or GPU driver might not be correctly installed. Setting preferable backend to CPU and trying again." << std::endl;
405 m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
406 m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
407 m_net.forward(m_dnnRes, m_outNames);
408 }
409
410 DetectionCandidates proposals;
411 postProcess(proposals);
412 size_t nbClassNames = m_netConfig.m_classNames.size();
413 for (size_t i = 0; i < m_indices.size(); ++i) {
414 int idx = m_indices[i];
415 cv::Rect box = proposals.m_boxes[idx];
416 std::string classname;
417 if (nbClassNames > 0) {
418 classname = m_netConfig.m_classNames[proposals.m_classIds[idx]];
419 }
420 else {
421 classname = std::to_string(proposals.m_classIds[idx]);
422 }
423 std::optional<std::string> classname_opt = std::optional<std::string>(classname);
424 output[classname].emplace_back(box.x, box.x + box.width, box.y, box.y + box.height
425 , proposals.m_classIds[idx], proposals.m_confidences[idx]
426 , classname_opt
427 );
428 }
429
431 output = filterDetectionMultiClassInput(output, m_netConfig.m_filterSizeRatio);
432 }
433
434 return !output.empty();
435}
436
444bool vpDetectorDNNOpenCV::detect(const cv::Mat &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
445{
446 std::map< std::string, std::vector<DetectedFeatures2D>> map_output;
447 bool returnStatus = detect(I, map_output);
448 for (auto key_val : map_output) {
449 output.push_back(key_val);
450 }
451 return returnStatus;
452}
453
454#if (VISP_HAVE_OPENCV_VERSION == 0x030403)
461{
462 static std::vector<cv::String> names;
463 if (names.empty()) {
464 std::vector<int> outLayers = m_net.getUnconnectedOutLayers();
465 std::vector<cv::String> layersNames = m_net.getLayerNames();
466 names.resize(outLayers.size());
467 for (size_t i = 0; i < outLayers.size(); ++i)
468 names[i] = layersNames[outLayers[i] - 1];
469 }
470 return names;
471}
472#endif
473
483{
484 switch (m_netConfig.m_parsingMethodType) {
485 case YOLO_V3:
486 case YOLO_V4:
488 break;
489 case YOLO_V5:
490 case YOLO_V7:
492 break;
493 case YOLO_V8:
495 break;
496 case FASTER_RCNN:
498 break;
499 case SSD_MOBILENET:
500#if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
501 void postProcess_SSD_MobileNet(DetectionCandidates & proposals, std::vector<cv::Mat> &dnnRes, const NetConfig & netConfig);
502#else
503 // NB: the two SSD-MobileNet DNNs that have been tested worked only
504 // using the ResNet-10 parsing method
506#endif
507 break;
508 case RESNET_10:
510 break;
511 case USER_SPECIFIED:
513 break;
514 default:
515 throw(vpException(vpException::badValue, "Type of DNN post-processing method not handled."));
516 }
517
518 m_indices.clear();
519 cv::dnn::NMSBoxes(proposals.m_boxes, proposals.m_confidences, m_netConfig.m_confThreshold, m_netConfig.m_nmsThreshold, m_indices);
520}
521
533std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>
534vpDetectorDNNOpenCV::filterDetectionSingleClassInput(const std::vector<DetectedFeatures2D> &detected_features, const double minRatioOfAreaOk)
535{
536 double meanArea(0.);
537 double originalNumberOfObj = static_cast<double>(detected_features.size());
538 double meanFactor = 1. / originalNumberOfObj;
539
540 // Computing the average area of the class
541 for (DetectedFeatures2D feature : detected_features) {
542 meanArea += feature.m_bbox.getArea();
543 }
544 meanArea *= meanFactor;
545
546 // Keeping only the detections that respect the area criterion
547 std::vector<DetectedFeatures2D> filtered_features;
548 for (DetectedFeatures2D feature : detected_features) {
549 if (feature.m_bbox.getArea() >= minRatioOfAreaOk * meanArea && feature.m_bbox.getArea() < meanArea / minRatioOfAreaOk) {
550 filtered_features.push_back(feature);
551 }
552 }
553
554 return filtered_features;
555}
556
567std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>
568vpDetectorDNNOpenCV::filterDetectionMultiClassInput(const std::vector<DetectedFeatures2D> &detected_features, const double minRatioOfAreaOk)
569{
570#ifndef DOXYGEN_SHOULD_SKIP_THIS
575 class MeanAreaComputer
576 {
577 private:
578 std::map<int, std::pair<int, double>> m_map_id_pairOccurrencesAreas;
581 std::map<int, double> m_mapMeans;
588 double computeMeanArea(const int &class_id)
589 {
590 return m_map_id_pairOccurrencesAreas[class_id].second / (double)m_map_id_pairOccurrencesAreas[class_id].first;
591 }
592
593 public:
597 void computeMeans()
598 {
599 for (const auto &classID_pair : m_map_id_pairOccurrencesAreas) {
600 m_mapMeans[classID_pair.first] = computeMeanArea(classID_pair.first);
601 }
602 }
603
604 double getMean(const int &class_id)
605 {
606 if (m_map_id_pairOccurrencesAreas.find(class_id) == m_map_id_pairOccurrencesAreas.end()) {
607 throw(vpException(vpException::badValue, "[MeanAreaComputer::getMean] Asking for class_id \"" + std::to_string(class_id) + "\" that is not present in m_mapMeans. Did you call computeMeans ?"));
608 }
609 return m_mapMeans[class_id];
610 }
611
617 void operator()(const DetectedFeatures2D &feature)
618 {
619 int class_id = feature.getClassId();
620 double area = feature.getBoundingBox().getArea();
621 if (m_map_id_pairOccurrencesAreas.find(class_id) == m_map_id_pairOccurrencesAreas.end()) {
622 m_map_id_pairOccurrencesAreas[class_id] = std::pair<int, double>(1, area);
623 }
624 else {
625 std::pair<int, double> prev_state = m_map_id_pairOccurrencesAreas[class_id];
626 m_map_id_pairOccurrencesAreas[class_id] = std::pair<int, double>(prev_state.first + 1, prev_state.second + area);
627 }
628 }
629 };
630#endif // DOXYGEN_SHOULD_SKIP_THIS
631
632 // Computing the average area of each class
633 MeanAreaComputer meanComputer;
634 std::for_each(detected_features.begin(), detected_features.end(), meanComputer);
635 meanComputer.computeMeans();
636
637 // Keeping only the detections that respect the area criterion
638 std::vector<DetectedFeatures2D> filtered_features;
639 for (DetectedFeatures2D feature : detected_features) {
640 double meanArea = meanComputer.getMean(feature.getClassId());
641 if (feature.m_bbox.getArea() >= minRatioOfAreaOk * meanArea
642 && feature.m_bbox.getArea() < meanArea / minRatioOfAreaOk) {
643 filtered_features.push_back(feature);
644 }
645 }
646
647 return filtered_features;
648}
649
659std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>>
660vpDetectorDNNOpenCV::filterDetectionMultiClassInput(const std::map< std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> &detected_features, const double minRatioOfAreaOk)
661{
662 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> output;
663 for (auto keyval : detected_features) {
664 output[keyval.first] = filterDetectionSingleClassInput(detected_features.at(keyval.first), minRatioOfAreaOk); // removing false detections
665 }
666 return output;
667}
668
682void vpDetectorDNNOpenCV::postProcess_YoloV3_V4(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig)
683{
684 size_t nbBatches = dnnRes.size();
685
686 for (size_t i = 0; i < nbBatches; i++) {
687 // Slightly modify from here: https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221
688 // Counts the number of proposed detections and the number of data corresponding to 1 detection
689 int num_proposal = dnnRes[i].size[0]; // Number of detections
690 int nout = dnnRes[i].size[1]; // Number of data for each detection
691 if (dnnRes[i].dims > 2) {
692 num_proposal = dnnRes[i].size[1];
693 nout = dnnRes[i].size[2];
694 dnnRes[i] = dnnRes[i].reshape(0, num_proposal);
695 }
696
697 int n = 0, row_ind = 0;
698 float *pdata = (float *)dnnRes[i].data;
699
700 // Iterate on the detections to keep only the meaningful ones
701 for (n = 0; n < num_proposal; n++) {
702 float box_score = pdata[4];
703 if (box_score > netConfig.m_confThreshold) {
704 cv::Mat scores = dnnRes[i].row(row_ind).colRange(5, nout);
705 cv::Point classIdPoint;
706 double max_class_score;
707 // Get the value and location of the maximum score
708 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
709
710 max_class_score *= box_score;
711
712 // The detection is kept only if the confidence is greater than the threshold
713 if (max_class_score > netConfig.m_confThreshold) {
714 const int class_idx = classIdPoint.x;
715 float cx = pdata[0] * m_img.cols;
716 float cy = pdata[1] * m_img.rows;
717 float w = pdata[2] * m_img.cols;
718 float h = pdata[3] * m_img.rows;
719
720 int left = int(cx - 0.5 * w);
721 int top = int(cy - 0.5 * h);
722
723 proposals.m_confidences.push_back((float)max_class_score);
724 proposals.m_boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
725 proposals.m_classIds.push_back(class_idx);
726 }
727 }
728 row_ind++;
729 pdata += nout;
730 }
731 }
732}
733
745void vpDetectorDNNOpenCV::postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig)
746{
747 // Compute the ratio between the original size of the image and the network size to translate network coordinates into
748 // image coordinates
749 float ratioh = (float)m_img.rows / netConfig.m_inputSize.height, ratiow = (float)m_img.cols / netConfig.m_inputSize.width;
750 size_t nbBatches = dnnRes.size();
751
752 for (size_t i = 0; i < nbBatches; i++) {
753 // Counts the number of proposed detections and the number of data corresponding to 1 detection
754 int num_proposal = dnnRes[i].size[0]; // Number of detections
755 int nout = dnnRes[i].size[1]; // Number of data for each detection
756 if (dnnRes[i].dims > 2) {
757 num_proposal = dnnRes[i].size[1];
758 nout = dnnRes[i].size[2];
759 dnnRes[i] = dnnRes[i].reshape(0, num_proposal);
760 }
761
762 int n = 0, row_ind = 0;
763 float *pdata = (float *)dnnRes[i].data;
764
765 // Iterate on the detections to keep only the meaningful ones
766 for (n = 0; n < num_proposal; n++) {
767 float box_score = pdata[4];
768
769 if (box_score > netConfig.m_confThreshold) {
770 cv::Mat scores = dnnRes[i].row(row_ind).colRange(5, nout);
771 cv::Point classIdPoint;
772 double max_class_score;
773 // Get the value and location of the maximum score
774 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
775 max_class_score *= box_score;
776
777 // The detection is kept only if the confidence is greater than the threshold
778 if (max_class_score > netConfig.m_confThreshold) {
779 const int class_idx = classIdPoint.x;
780 float cx = pdata[0] * ratiow;
781 float cy = pdata[1] * ratioh;
782 float w = pdata[2] * ratiow;
783 float h = pdata[3] * ratioh;
784
785 int left = int(cx - 0.5 * w);
786 int top = int(cy - 0.5 * h);
787
788 proposals.m_confidences.push_back((float)max_class_score);
789 proposals.m_boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
790 proposals.m_classIds.push_back(class_idx);
791 }
792 }
793 row_ind++;
794 pdata += nout;
795 }
796 }
797}
798
810void vpDetectorDNNOpenCV::postProcess_YoloV8(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig)
811{
812 // Code adapted from here: https://github.com/JustasBart/yolov8_CPP_Inference_OpenCV_ONNX/blob/minimalistic/inference.cpp
813 // Compute the ratio between the original size of the image and the network size to translate network coordinates into
814 // image coordinates
815 float ratioh = (float)m_img.rows / netConfig.m_inputSize.height, ratiow = (float)m_img.cols / netConfig.m_inputSize.width;
816 size_t nbBatches = dnnRes.size();
817
818 for (size_t i = 0; i < nbBatches; i++) {
819 // Counts the number of proposed detections and the number of data corresponding to 1 detection
820 int num_proposal = dnnRes[i].size[1]; // Number of detections
821 int nout = dnnRes[i].size[0]; // Number of data for each detection
822 if (dnnRes[i].dims > 2) {
823 num_proposal = dnnRes[i].size[2];
824 nout = dnnRes[i].size[1];
825 dnnRes[i] = dnnRes[i].reshape(0, nout);
826 }
827 cv::transpose(dnnRes[i], dnnRes[i]); // Organise data as YoloV5 i.e. [batchsize][1:num_proposals][1:4+nb_classes]
828
829 int n = 0, row_ind = 0;
830 float *pdata = (float *)dnnRes[i].data;
831
832 // Iterate on the detections to keep only the meaningful ones
833 for (n = 0; n < num_proposal; n++) {
834 cv::Mat scores = dnnRes[i].row(row_ind).colRange(4, nout);
835 cv::Point classIdPoint;
836 double max_class_score;
837 // Get the value and location of the maximum score
838 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
839
840 // The detection is kept only if the confidence is greater than the threshold
841 if (max_class_score > netConfig.m_confThreshold) {
842 const int class_idx = classIdPoint.x;
843 float cx = pdata[0] * ratiow;
844 float cy = pdata[1] * ratioh;
845 float w = pdata[2] * ratiow;
846 float h = pdata[3] * ratioh;
847
848 int left = int(cx - 0.5 * w);
849 int top = int(cy - 0.5 * h);
850
851 proposals.m_confidences.push_back((float)max_class_score);
852 proposals.m_boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
853 proposals.m_classIds.push_back(class_idx);
854 }
855
856 row_ind++;
857 pdata += nout;
858 }
859 }
860}
861
873void vpDetectorDNNOpenCV::postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig)
874{
875 // Direct copy from object_detection.cpp OpenCV sample
876 // Faster-RCNN
877
878 // Network produces output blob with a shape 1x1xNx7 where N is a number of
879 // detections and an every detection is a vector of values
880 // [batchId, classId, confidence, left, top, right, bottom]
881 size_t nbBatches = dnnRes.size();
882 for (size_t j = 0; j < nbBatches; j++) {
883 float *data = (float *)dnnRes[j].data;
884 for (size_t i = 0; i < dnnRes[j].total(); i += 7) {
885 float confidence = data[i + 2];
886 if (confidence > netConfig.m_confThreshold) {
887 int left = (int)(data[i + 3] * m_img.cols);
888 int top = (int)(data[i + 4] * m_img.rows);
889 int right = (int)(data[i + 5] * m_img.cols);
890 int bottom = (int)(data[i + 6] * m_img.rows);
891 int classId = (int)(data[i + 1]);
892
893 proposals.m_confidences.push_back((float)confidence);
894 proposals.m_boxes.push_back(cv::Rect(left, top, right - left + 1, bottom - top + 1));
895 proposals.m_classIds.push_back(classId);
896 }
897 }
898 }
899
900}
901
902#if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
915void vpDetectorDNNOpenCV::postProcess_SSD_MobileNet(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig)
916{
917 // Network produces 2 outputs blob:
918 // - `scores` with dimensions 1xNxC
919 // - 'boxes' with dimensions 1xNx4
920 // where `N` is a number of detections and `C` is the number of classes (with `BACKGROUND` as classId = 0).
921
922 int scores_index = m_outNames[0] == "scores" ? 0 : 1; // scores output index.
923 int boxes_index = m_outNames[0] == "boxes" ? 0 : 1; // boxes output index.
924
925 int N = dnnRes[scores_index].size[1], C = dnnRes[scores_index].size[2];
926
927 float *confidence = (float *)dnnRes[scores_index].data;
928 float *bbox = (float *)dnnRes[boxes_index].data;
929
930 // Loop over all guesses on the output of the network.
931 for (int i = 0; i < N; i++) {
932 uint32_t maxClass = 0;
933 float maxScore = -1000.0f;
934
935 for (int j = 1; j < C; j++) // ignore background (classId = 0).
936 {
937 const float score = confidence[i * C + j];
938
939 if (score < netConfig.m_confThreshold)
940 continue;
941
942 if (score > maxScore) {
943 maxScore = score;
944 maxClass = j;
945 }
946 }
947
948 if (maxScore > netConfig.m_confThreshold) {
949 int left = (int)(bbox[4 * i] * m_img.cols);
950 int top = (int)(bbox[4 * i + 1] * m_img.rows);
951 int right = (int)(bbox[4 * i + 2] * m_img.cols);
952 int bottom = (int)(bbox[4 * i + 3] * m_img.rows);
953 int width = right - left + 1;
954 int height = bottom - top + 1;
955
956 int classId = maxClass;
957 proposals.m_confidences.push_back(maxScore);
958 proposals.m_boxes.push_back(cv::Rect(left, top, width, height));
959 proposals.m_classIds.push_back(classId);
960 }
961 }
962}
963#endif
964
976void vpDetectorDNNOpenCV::postProcess_ResNet_10(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig)
977{
978 // Direct copy from object_detection.cpp OpenCV sample
979
980 // Network produces output blob with a shape 1x1xNx7 where N is a number of
981 // detections and an every detection is a vector of values
982 // [batchId, classId, confidence, left, top, right, bottom]
983 CV_Assert(dnnRes.size() == 1);
984 float *data = (float *)dnnRes[0].data;
985 for (size_t i = 0; i < dnnRes[0].total(); i += 7) {
986 float confidence = data[i + 2];
987 if (confidence > netConfig.m_confThreshold) {
988 int left = (int)(data[i + 3] * m_img.cols);
989 int top = (int)(data[i + 4] * m_img.rows);
990 int right = (int)(data[i + 5] * m_img.cols);
991 int bottom = (int)(data[i + 6] * m_img.rows);
992 int classId = (int)(data[i + 1]) - 1;
993
994 proposals.m_confidences.push_back((float)confidence);
995 proposals.m_boxes.push_back(cv::Rect(left, top, right - left + 1, bottom - top + 1));
996 proposals.m_classIds.push_back(classId);
997 }
998 }
999}
1000
1009void vpDetectorDNNOpenCV::postProcess_unimplemented(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes, const NetConfig &netConfig)
1010{
1011 (void)proposals;
1012 (void)dnnRes;
1013 (void)netConfig;
1014 throw(vpException(vpException::functionNotImplementedError, "vpDetectorDNNOpenCV::postProcess was called with a USER_SPECIFIED DNN but not post processing method was given."));
1015}
1016
1036void vpDetectorDNNOpenCV::readNet(const std::string &model, const std::string &config, const std::string &framework)
1037{
1038 m_netConfig.m_modelFilename = model;
1039 m_netConfig.m_modelConfigFilename = config;
1040 m_netConfig.m_framework = framework;
1041 m_net = cv::dnn::readNet(model, config, framework);
1042#if (VISP_HAVE_OPENCV_VERSION == 0x030403)
1044#else
1045 m_outNames = m_net.getUnconnectedOutLayersNames();
1046#endif
1047}
1048
1056{
1057 m_netConfig = config;
1058 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
1059 setParsingMethod(m_netConfig.m_parsingMethodType);
1060 if (!m_netConfig.m_modelFilename.empty()) {
1061 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
1062 }
1063}
1064
1070void vpDetectorDNNOpenCV::setConfidenceThreshold(const float &confThreshold) { m_netConfig.m_confThreshold = confThreshold; }
1071
1078void vpDetectorDNNOpenCV::setNMSThreshold(const float &nmsThreshold) { m_netConfig.m_nmsThreshold = nmsThreshold; }
1079
1088{
1089 m_netConfig.m_filterSizeRatio = sizeRatio;
1090 if (m_netConfig.m_filterSizeRatio > std::numeric_limits<double>::epsilon()) {
1092 }
1093 else {
1095 }
1096}
1097
1104void vpDetectorDNNOpenCV::setInputSize(const int &width, const int &height)
1105{
1106 m_netConfig.m_inputSize.width = width;
1107 m_netConfig.m_inputSize.height = height;
1108}
1109
1117void vpDetectorDNNOpenCV::setMean(const double &meanR, const double &meanG, const double &meanB) { m_netConfig.m_mean = cv::Scalar(meanR, meanG, meanB); }
1118
1125void vpDetectorDNNOpenCV::setPreferableBackend(const int &backendId) { m_net.setPreferableBackend(backendId); }
1126
1133void vpDetectorDNNOpenCV::setPreferableTarget(const int &targetId) { m_net.setPreferableTarget(targetId); }
1134
1138void vpDetectorDNNOpenCV::setScaleFactor(const double &scaleFactor)
1139{
1140 m_netConfig.m_scaleFactor = scaleFactor;
1141 if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) {
1142 std::cout << "[vpDetectorDNNOpenCV::setParsingMethod] WARNING: scale factor should be 1/255. to normalize pixels value." << std::endl;
1143 }
1144}
1145
1151void vpDetectorDNNOpenCV::setSwapRB(const bool &swapRB) { m_netConfig.m_swapRB = swapRB; }
1152
1160void vpDetectorDNNOpenCV::setParsingMethod(const DNNResultsParsingType &typeParsingMethod, void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &, const NetConfig &))
1161{
1162 m_netConfig.m_parsingMethodType = typeParsingMethod;
1163 m_parsingMethod = parsingMethod;
1164 if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) {
1165 m_netConfig.m_scaleFactor = 1 / 255.;
1166 std::cout << "[vpDetectorDNNOpenCV::setParsingMethod] NB: scale factor changed to 1/255. to normalize pixels value." << std::endl;
1167 }
1168
1169#if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
1170 if (m_netConfig.m_parsingMethodType == SSD_MOBILENET) {
1171 std::cout << "[vpDetectorDNNOpenCV::setParsingMethod] WARNING: The chosen type of network is " << dnnResultsParsingTypeToString(m_netConfig.m_parsingMethodType) << " VISP_BUILD_DEPRECATED_FUNCTIONS is set to true." << std::endl;
1172 std::cout << "\tThe parsing method that worked with the networks quoted in the ViSP documentation was postProcess_ResNet_10 instead of postProcess_SSD_MobileNet." << std::endl;
1173 std::cout << "\tIf the SSD-MobileNet network does not seem to work, please try to recompile ViSP setting VISP_BUILD_DEPRECATED_FUNCTIONS as false." << std::endl << std::flush;
1174 }
1175#endif
1176}
1177
1178#elif !defined(VISP_BUILD_SHARED_LIBS)
1179// Work around to avoid warning: libvisp_core.a(vpDetectorDNNOpenCV.cpp.o) has no
1180// symbols
1181void dummy_vpDetectorDNN() { };
1182#endif
Structure containing the bounding box, expressed in pixels, confidence and class information about an...
Structure containing some information required for the configuration of a vpDetectorDNNOpenCV object.
json namespace shortcut
cv::Mat m_blob
Buffer for the blob in input net.
void postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector< cv::Mat > &dnnRes, const NetConfig &netConfig)
void setScaleFactor(const double &scaleFactor)
void initFromJSON(const std::string &jsonPath)
void readNet(const std::string &model, const std::string &config="", const std::string &framework="")
static std::string getAvailableDnnResultsParsingTypes()
Get the list of the parsing methods / types of DNNs supported by the vpDetectorDNNOpenCV class.
static void postProcess_unimplemented(DetectionCandidates &proposals, std::vector< cv::Mat > &dnnRes, const NetConfig &netConfig)
void setDetectionFilterSizeRatio(const double &sizeRatio)
DNNResultsParsingType
Enumeration listing the types of DNN for which the vpDetectorDNNOpenCV furnishes the methods permitti...
static DNNResultsParsingType dnnResultsParsingTypeFromString(const std::string &name)
void postProcess_SSD_MobileNet(DetectionCandidates &proposals, std::vector< cv::Mat > &dnnRes, const NetConfig &netConfig)
std::vector< cv::String > m_outNames
Names of layers with unconnected outputs.
void setMean(const double &meanR, const double &meanG, const double &meanB)
void setSwapRB(const bool &swapRB)
cv::Mat m_img
Buffer for the input image.
static std::vector< std::string > parseClassNamesFile(const std::string &filename)
Parse the designated file that contains the list of the classes the network can detect....
std::vector< int > m_indices
Indices for NMS.
void setParsingMethod(const DNNResultsParsingType &typeParsingMethod, void(*parsingMethod)(DetectionCandidates &, std::vector< cv::Mat > &, const NetConfig &)=postProcess_unimplemented)
NetConfig m_netConfig
Configuration of the DNN.
std::vector< cv::Mat > m_dnnRes
Contains all output blobs for each layer specified in m_outNames.
cv::dnn::Net m_net
DNN network.
bool m_applySizeFilterAfterNMS
If true, filter the detections removing the ones for which the bbox does not respect area(bbox) € [me...
std::vector< cv::String > getOutputsNames()
Get the names of the output layers of the DNN.
void setNetConfig(const NetConfig &config)
void postProcess_YoloV3_V4(DetectionCandidates &proposals, std::vector< cv::Mat > &dnnRes, const NetConfig &netConfig)
virtual bool detect(const vpImage< unsigned char > &I, std::vector< DetectedFeatures2D > &output)
Object detection using OpenCV DNN module.
void postProcess_ResNet_10(DetectionCandidates &proposals, std::vector< cv::Mat > &dnnRes, const NetConfig &netConfig)
void setPreferableBackend(const int &backendId)
void setNMSThreshold(const float &nmsThreshold)
virtual ~vpDetectorDNNOpenCV()
Destroy the vpDetectorDNNOpenCV object.
void postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector< cv::Mat > &dnnRes, const NetConfig &netConfig)
std::vector< DetectedFeatures2D > filterDetectionMultiClassInput(const std::vector< DetectedFeatures2D > &detected_features, const double minRatioOfAreaOk)
Return a new vector, ordered by vpDetectorDNNOpenCV::DetectedFeatures2D::m_cls , where the area of ea...
void setPreferableTarget(const int &targetId)
void setInputSize(const int &width, const int &height)
static std::string dnnResultsParsingTypeToString(const DNNResultsParsingType &type)
void postProcess_YoloV8(DetectionCandidates &proposals, std::vector< cv::Mat > &dnnRes, const NetConfig &netConfig)
void postProcess(DetectionCandidates &proposals)
void(* m_parsingMethod)(DetectionCandidates &, std::vector< cv::Mat > &, const NetConfig &)
Pointer towards the parsing method, used if m_parsingMethodType is equal to m_parsingMethodType::USER...
std::vector< DetectedFeatures2D > filterDetectionSingleClassInput(const std::vector< DetectedFeatures2D > &detected_features, const double minRatioOfAreaOk)
Return a new vector of detected features whose area is greater or equal to the average area x minRati...
void saveConfigurationInJSON(const std::string &jsonPath) const
Save the network configuration in a JSON file.
void setConfidenceThreshold(const float &confThreshold)
vpImage< vpRGBa > m_I_color
Buffer for gray to RGBa image conversion.
error that can be emitted by ViSP classes.
Definition vpException.h:59
@ ioError
I/O error.
Definition vpException.h:79
@ badValue
Used to indicate that a value is not in the allowed range.
Definition vpException.h:85
@ functionNotImplementedError
Function not implemented.
Definition vpException.h:78
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
Definition of the vpImage class member functions.
Definition vpImage.h:135
static std::string toLowerCase(const std::string &input)
Return a lower-case version of the string input . Numbers and special characters stay the same.
double getArea() const
Definition vpRect.h:88