commit e8ce9f226cb041db70c5b92500db50f737a1c13b Author: Dustin Thomas Date: Fri Oct 18 18:18:36 2024 -0500 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bbe947b --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +### CMake template +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps +CMakeUserPresets.json + +.idea \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..c389abf --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.29) +project(pumpkin_tracker) + +set(CMAKE_CXX_STANDARD 20) + +find_package(OpenCV REQUIRED) +include_directories(${OpenCV_INCLUDE_DIRS}) + +FIND_LIBRARY(DarkHelp darkhelp) +FIND_LIBRARY(Darknet darknet) + +ADD_EXECUTABLE(pumpkin_tracker main.cpp) +TARGET_LINK_LIBRARIES(pumpkin_tracker ${OpenCV_LIBS}) +TARGET_LINK_LIBRARIES(pumpkin_tracker /usr/local/lib/libdarkhelp.so) +TARGET_LINK_LIBRARIES(pumpkin_tracker /usr/local/lib/libdarknet.so) \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..40a10ee --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# pumpkin-tracker +This uses a camera and object detection to estimate the velocity of a flying pumpkin. + +## Compiling +Make sure to compile [Darknet](https://github.com/hank-ai/darknet) (the actual neural network), +[DarkHelp](https://github.com/stephanecharette/DarkHelp) (a C++ API wrapper for Darknet), and install +[OpenCV](https://opencv.com) before compiling this project. \ No newline at end of file diff --git a/cfg/coco.names b/cfg/coco.names new file mode 100644 index 0000000..ca76c80 --- /dev/null +++ b/cfg/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/cfg/yolov7-tiny.cfg b/cfg/yolov7-tiny.cfg new file mode 100644 index 0000000..4628b43 --- /dev/null +++ b/cfg/yolov7-tiny.cfg @@ -0,0 +1,706 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=1 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 + +max_batches = 2000200 +policy=steps +steps=1600000,1800000 +scales=.1,.1 + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +# 1 +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 8 +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 16 +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 24 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 32 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + + +################################## + +### SPPCSP ### +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -10,-1 + +# 44 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky +### End SPPCSP ### + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 24 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-3 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 56 +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 16 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-3 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 68 +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=128 +activation=leaky + +[route] +layers = -1,56 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 77 +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=leaky + +[route] +layers = -1,44 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 86 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +############################# + +# ============ End of Neck ============ # + +# ============ Head ============ # + + +# P3 +[route] +layers = 68 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +#activation=linear +activation=logistic + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# P4 +[route] +layers = 77 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +#activation=linear +activation=logistic + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# P5 +[route] +layers = 86 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +#activation=linear +activation=logistic + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=80 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 diff --git a/cfg/yolov7-tiny.weights b/cfg/yolov7-tiny.weights new file mode 100644 index 0000000..c29f934 Binary files /dev/null and b/cfg/yolov7-tiny.weights differ diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..b2cd927 --- /dev/null +++ b/main.cpp @@ -0,0 +1,62 @@ +/* Computer vision demo. Performs object tracking on pumpkins to determine velocity, launch angle, and predicted range. + * + */ +#include +#include +#include +#include +#include +#include + +// Default configuration values. Using YOLOv7 tiny model. Make sure to download the weights. +#define DARKNET_CFG "cfg/yolov7-tiny.cfg" +#define DARKNET_WEIGHTS "cfg/yolov7-tiny.weights" +#define DARKNET_DATA "cfg/coco.names" +#define CAMERA_ID 0 +#define SHOW_GUI true + +using namespace std; +using namespace cv; +using namespace DarkHelp; + +int main() { + // Configure the neural network + NN nn(DARKNET_CFG, DARKNET_WEIGHTS, DARKNET_DATA); + + // Setup the capture stream from the webcam + VideoCapture cap(CAMERA_ID); + if (not cap.isOpened()) + { + throw std::runtime_error("failed to open the webcam"); + } + cap.set(CAP_PROP_FRAME_WIDTH, 640.0); + cap.set(CAP_PROP_FRAME_HEIGHT, 480.0); + cap.set(CAP_PROP_FPS, 30.0); + + while (cap.isOpened()) { + Mat frame; + + cap >> frame; + if (frame.empty()) break; + + const PredictionResults results = nn.predict(frame); + + // print results to console + cout << "RESULTS" << endl; + for (int i = 0; i < results.size(); i++) { + const PredictionResult result = results[i]; + cout << result.name << " | (" << result.original_point.x << ", " << result.original_point.y << ")" << endl; + } + cout << endl; + + // Sleep between frames; if the UI is enabled, show the latest (annotated) frame + if constexpr (SHOW_GUI) { + frame = nn.annotate(); + imshow("pumpkin-tracker", frame); + if (const auto key = waitKey(15); key == 27) break; + } + else { + usleep(15000); + } + } +}