Quantcast
Channel: OpenCV Q&A Forum - Latest question feed
Viewing all articles
Browse latest Browse all 19555

Setting up MLP for image recognition

$
0
0
Hi! I am new in OpenCV world and neural networks but I have some coding experience in C++/Java. ---------- I created my first ANN MLP and learned it the XOR: #include #include #include #include #include #include #include using namespace cv; using namespace ml; using namespace std; void print(Mat& mat, int prec) { for (int i = 0; i(i, j); if (j != mat.size().width - 1) cout << ", "; else cout << "]" << endl; } } } int main() { const int hiddenLayerSize = 4; float inputTrainingDataArray[4][2] = { { 0.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 1.0, 1.0 } }; Mat inputTrainingData = Mat(4, 2, CV_32F, inputTrainingDataArray); float outputTrainingDataArray[4][1] = { { 0.0 }, { 1.0 }, { 1.0 }, { 0.0 } }; Mat outputTrainingData = Mat(4, 1, CV_32F, outputTrainingDataArray); Ptr mlp = ANN_MLP::create(); Mat layersSize = Mat(3, 1, CV_16U); layersSize.row(0) = Scalar(inputTrainingData.cols); layersSize.row(1) = Scalar(hiddenLayerSize); layersSize.row(2) = Scalar(outputTrainingData.cols); mlp->setLayerSizes(layersSize); mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM); TermCriteria termCrit = TermCriteria( TermCriteria::Type::COUNT + TermCriteria::Type::EPS, 100000000, 0.000000000000000001 ); mlp->setTermCriteria(termCrit); mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP); Ptr trainingData = TrainData::create( inputTrainingData, SampleTypes::ROW_SAMPLE, outputTrainingData ); mlp->train(trainingData /*, ANN_MLP::TrainFlags::UPDATE_WEIGHTS + ANN_MLP::TrainFlags::NO_INPUT_SCALE + ANN_MLP::TrainFlags::NO_OUTPUT_SCALE*/ ); for (int i = 0; i < inputTrainingData.rows; i++) { Mat sample = Mat(1, inputTrainingData.cols, CV_32F, inputTrainingDataArray[i]); Mat result; mlp->predict(sample, result); cout << sample << " -> ";// << result << endl; print(result, 0); cout << endl; } return 0; } It works very well for this simple problem, I also learn this network the 1-10 to binary conversion. ---------- But i need to use MLP for simple image classification - road signs. I write the code for loading training images and preparing matrix for learning but I'm not able to train the network - it "learn" in one second even with 1 000 000 iterations! And it produce garbage results, the same for all inputs! #include #include #include #include #include #include #include #include #include #include #include using namespace cv; using namespace ml; using namespace std; using namespace chrono; const int WIDTH_SIZE = 50; const int HEIGHT_SIZE = (int)(WIDTH_SIZE * sqrt(3)) / 2; const int IMAGE_DATA_SIZE = WIDTH_SIZE * HEIGHT_SIZE; void print(Mat& mat, int prec) { for (int i = 0; i(i, j); if (j != mat.size().width - 1) cout << ", "; else cout << " ]" << endl; } } } bool loadImage(string imagePath, Mat& outputImage) { // load image in grayscale Mat image = imread(imagePath, IMREAD_GRAYSCALE); Mat temp; // check for invalid input if (image.empty()) { cout << "Could not open or find the image" << std::endl; return false; } // resize the image Size size(WIDTH_SIZE, HEIGHT_SIZE); resize(image, temp, size, 0, 0, CV_INTER_AREA); // convert to float 1-channel temp.convertTo(outputImage, CV_32FC1, 1.0/255.0); return true; } vector getFilesNamesInFolder(string folder) { vector names; char search_path[200]; sprintf(search_path, "%s/*.*", folder.c_str()); WIN32_FIND_DATA fd; HANDLE hFind = ::FindFirstFile(search_path, &fd); if (hFind != INVALID_HANDLE_VALUE) { do { // read all (real) files in current folder // , delete '!' read other 2 default folder . and .. if (!(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { names.push_back(fd.cFileName); } } while (::FindNextFile(hFind, &fd)); ::FindClose(hFind); } return names; } class Sign { public: enum class Category { A = 'A', B = 'B', C = 'C', D = 'D' }; Mat image; Category category; int number; Sign(Mat& image, string name) :image(image) { category = static_cast(name.at(0)); number = stoi(name.substr(2, name.length())); }; }; vector loadSignsFromFolder(String folderName) { vector roadSigns; for (string fileName : getFilesNamesInFolder(folderName)) { Mat image; loadImage(folderName + fileName, image); roadSigns.emplace_back(image, fileName.substr(0, (fileName.length() - 4))); //cut .png } return roadSigns; } void showSignsInWindows(vector roadSigns) { for (Sign sign : roadSigns) { String windowName = "Sign " + to_string(sign.number); namedWindow(windowName, WINDOW_AUTOSIZE); imshow(windowName, sign.image); } waitKey(0); } Mat getInputDataFromSignsVector(vector roadSigns) { Mat roadSignsImageData; for (Sign sign : roadSigns) { Mat signImageDataInOneRow = sign.image.reshape(0, 1); roadSignsImageData.push_back(signImageDataInOneRow); } return roadSignsImageData; } Mat getOutputDataFromSignsVector(vector roadSigns) { int signsCount = (int) roadSigns.size(); int signsVectorSize = signsCount + 1; Mat roadSignsData(0, signsVectorSize, CV_32FC1); int i = 1; for (Sign sign : roadSigns) { vector outputTraningVector(signsVectorSize); fill(outputTraningVector.begin(), outputTraningVector.end(), -1.0); outputTraningVector[i++] = 1.0; Mat tempMatrix(outputTraningVector, false); roadSignsData.push_back(tempMatrix.reshape(0, 1)); } return roadSignsData; } int main(int argc, char* argv[]) { if (argc != 2) { cout << " Usage: display_image ImageToLoadAndDisplay" << endl; return -1; } const int hiddenLayerSize = 500; vector roadSigns = loadSignsFromFolder("../../../Znaki/A/"); Mat inputTrainingData = getInputDataFromSignsVector(roadSigns); Mat outputTrainingData = getOutputDataFromSignsVector(roadSigns); Ptr mlp = ANN_MLP::create(); Mat layersSize = Mat(3, 1, CV_16U); layersSize.row(0) = Scalar(inputTrainingData.cols); layersSize.row(1) = Scalar(hiddenLayerSize); layersSize.row(2) = Scalar(outputTrainingData.cols); mlp->setLayerSizes(layersSize); mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM, 1.0, 1.0); mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.05, 0.05); //mlp->setTrainMethod(ANN_MLP::TrainingMethods::RPROP); TermCriteria termCrit = TermCriteria( TermCriteria::Type::MAX_ITER //| TermCriteria::Type::EPS, ,100 //(int) INT_MAX ,0.000001 ); mlp->setTermCriteria(termCrit); Ptr trainingData = TrainData::create( inputTrainingData, SampleTypes::ROW_SAMPLE, outputTrainingData ); auto start = system_clock::now(); mlp->train(trainingData //, //ANN_MLP::TrainFlags::UPDATE_WEIGHTS , ANN_MLP::TrainFlags::NO_INPUT_SCALE + ANN_MLP::TrainFlags::NO_OUTPUT_SCALE ); auto duration = duration_cast (system_clock::now() - start); cout << "Training time: " << duration.count() << "ms" << endl; for (int i = 0; i < inputTrainingData.rows; i++) { Mat result; //mlp->predict(inputTrainingData.row(i), result); mlp->predict(roadSigns[i].image.reshape(0, 1), result); //cout << result << endl; print(result, 2); } //showSignsInWindows(roadSigns); return 0; } What is wrong in this code, that XOR works but images not? I cheked the input and output matrix and they're correct... could somebody also explain me when to/shoud I use the ANN_MLP::TrainFlags::NO_INPUT_SCALE and ANN_MLP::TrainFlags::NO_OUTPUT_SCALE or what values of setActivationFunction parameters and setTrainMethod should I use? ---------- Thanks!

Viewing all articles
Browse latest Browse all 19555

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>