-2

If you run this program, you'll see that it works fine, but the only problem I have is that my Manhattan Distance is occasionally wrong. My equation seems to be fine, and I don't have a debugger in VS Code, so I am having extreme difficulty diagnosing this problem. My Manhattan Distance calculation seems to be silently breaking whenever it wants to, and it's confusing me.

Source code:

#include <iostream> 
#include <fstream>
#include <cstdlib> //For std::exit()
#include <vector>
#include <cmath>
#include <iomanip>

struct dataPoint {
    int x1; 
    int y1;
    std::string name;
}; 

struct queryPoint {
    int x2; 
    int y2; 
    std::string name;
};

class File
{
public: 
    File(std::string dataFile, std::string queryFile)
        : m_dataFile{ dataFile }, m_queryFile{ queryFile }
    {
        m_dataPoints.reserve(50); 
        m_queryPoints.reserve(50);
    }
    //Constructor to initalize class object. 

    void errorDataCheck() {
        std::ifstream infile; 
        infile.open(m_dataFile); 
        if(!infile.is_open()) {
            std::cout << "Error: Unable to open the data file\n"; 
            infile.close(); //Close file before exiting to prevent corruption.
            std::exit(3);
        } //std::exit is okay to use here, since we don't risk corruption.
        infile.close(); 
    }
    void errorQueryCheck() {
        std::ifstream infile; 
        infile.open(m_queryFile); 
        if(!infile.is_open()) {
            std::cout << "Error: Unable to open the query file\n";
            infile.close(); 
            std::exit(3);
        }
        infile.close();
    }
    void errorCheck() {
        errorDataCheck();
        errorQueryCheck();
    }
    void const readFile() {
        std::ifstream dataInfile; 
        dataInfile.open(m_dataFile);

        bool data1{ false }; 
        bool data2{ false }; 
        bool query1{ false }; 
        bool query2{ false };
        while( !dataInfile.eof() ) {
            dataPoint dataPoint;
            dataInfile >> dataPoint.x1; 
            dataInfile >> dataPoint.y1; 
            dataInfile >> dataPoint.name;
            m_dataPoints.push_back(dataPoint);
        }
        dataInfile.close();

        std::ifstream queryInfile; 
        queryInfile.open(m_queryFile);
        while( !queryInfile.eof() ) {
            queryPoint point;
            queryInfile >> point.x2; 
            queryInfile >> point.y2; 
            queryInfile >> point.name;

            if(data1 || data2 || query1 || query2) {
                if(data1) std::cout << "Invalid point in data file\n"; 
                if(data2) std::cout << "Invalid point in data file\n"; 
                if(query1) std::cout << "Invalid point in query file\n"; 
                if(query2) std::cout << "Invalid point in query file\n";
                std::exit(3);
            }
            m_queryPoints.push_back(point);
        }
        queryInfile.close();
    }
    void sendFile() {
        std::ofstream outfile;
        for(auto& dataElement : m_dataPoints) {
            if(dataElement.name == m_dataPoints[m_dataPoints.size()].name) //Discard final element in vector because it's garbage. Tbh I don't even know how it got there. 
                break; 

            std::string fileName{"Distances_" + dataElement.name};
            outfile.open(fileName);

            outfile << "x1 y1 label\n"; 
            outfile << dataElement.x1 << ' ' << dataElement.y1 << ' ' << dataElement.name << '\n';
            outfile << "x2 y2 manDist eucDist label\n";

            for(auto& queryElement : m_queryPoints) {
                if(queryElement.name == m_queryPoints[m_queryPoints.size()].name)
                    break;
                outfile << queryElement.x2 << ' ' << queryElement.y2 << ' ';
                outfile << std::setprecision(4) << abs( (dataElement.x1 - queryElement.x2) + (dataElement.y1 - queryElement.y2) ) << ' ';
                outfile << std::setprecision(4) << sqrt( pow(dataElement.x1 - queryElement.x2, 2) + pow(dataElement.y1 - queryElement.y2, 2) ) << ' ';
                outfile << queryElement.name << '\n';
            }
            outfile.close();
        }
    }
    void print() {
        for(auto& element : m_dataPoints)
            std::cout << element.x1 << ' ' << element.y1 << ' ' << element.name << '\n';

        for(auto& element : m_queryPoints)
            std::cout << element.x2 << ' ' << element.y2 << ' ' << element.name << '\n'; 
    }

private: 
    std::string m_dataFile; 
    std::string m_queryFile;
    std::vector<dataPoint> m_dataPoints; 
    std::vector<queryPoint> m_queryPoints;
};

void errorArgcCheck(int argc)
{
    if(argc == 1) {
        std::cout << "Usage: ./a.out dataFile queryFile\n"; 
        std::exit(1);
    }
    else if(argc != 3) {
        std::cout << "Error: Incorrect amount of command line arguments\n"; 
        std::exit(1);
    }
} //Has to be done outside class before we can initialize object.

int main(int argc, char* argv[])
{
    errorArgcCheck(argc); //Check argv[1] and argv[2] for existence before initialize File object.

    File files{argv[1], argv[2]}; //Initialize class object with file names and argc
    files.errorCheck(); //Computationally expensive to open and close files, but doesn't matter for a 100-200 line program.

    files.readFile(); 
    files.sendFile();
    files.print();

    std::cout << '\n';
    return 0;
}

Data file:

-1200 300 Plane_001
-1100 500 Plane_002
-800 200 Plane_003
-400 600 Plane_004
-1100 -100 Plane_005
-800 -400 Plane_006
-500 -600 Plane_007
200 100 Plane_008
300 800 Plane_009
700 100 Plane_010
900 400 Plane_011
400 -600 Plane_012
800 -800 Plane_013
1000 -300 Plane_014

Query File:

-1300 100 SFO
-1200 700 SEA
-900 500 JAC
-600 100 SLC
-200 700 MSP
-200 300 JAC
-1200 -300 LAX
-700 -200 LAS
-300 -100 DEN
-400 -500 PHX
0 -300 DFW
100 500 ORD
300 500 DTW
700 700 EWR
900 700 JFK
900 100 CLT
300 -100 STL
500 -400 ATL
900 -600 MCO
1000 -800 MIA

Desired output for distance_plane_001 file:

x1 y1 label
-1200 300 Plane_001
x2 y2 manDist eucDist label
-1300 100 300 223.6 SFO
-1200 700 400 400 SEA
-900 500 500 360.6 JAC
-600 100 800 632.5 SLC
-200 700 1400 1077 MSP
-200 300 1000 1000 JAC
-1200 -300 600 600 LAX
-700 -200 1000 707.1 LAS
-300 -100 1300 984.9 DEN
-400 -500 1600 1131 PHX
0 -300 1800 1342 DFW
100 500 1500 1315 ORD
300 500 1700 1513 DTW
700 700 2300 1942 EWR
900 700 2500 2138 JFK
900 100 2300 2110 CLT
300 -100 1900 1552 STL
500 -400 2400 1838 ATL
900 -600 3000 2285 MCO
1000 -800 3300 2460 MIA

The program output for plane_001:

x1 y1 label
-1200 300 Plane_001
x2 y2 manDist eucDist label
-1300 100 300 223.6 SFO
-1200 700 400 400 SEA
-900 500 500 360.6 JAC
-600 100 400 632.5 SLC
-200 700 1400 1077 MSP
-200 300 1000 1000 JAC
-1200 -300 600 600 LAX
-700 -200 0 707.1 LAS
-300 -100 500 984.9 DEN
-400 -500 0 1131 PHX
0 -300 600 1342 DFW
100 500 1500 1315 ORD
300 500 1700 1513 DTW
700 700 2300 1942 EWR
900 700 2500 2138 JFK
900 100 1900 2110 CLT
300 -100 1100 1552 STL
500 -400 1000 1838 ATL
900 -600 1200 2285 MCO
1000 -800 1100 2460 MIA
Adrian Mole
  • 49,934
  • 160
  • 51
  • 83
Leo C
  • 9
  • 1
  • At first grance, your usage of `while( !dataInfile.eof() )` and `while( !queryInfile.eof() )` looks [wrong](https://stackoverflow.com/questions/5605125/why-is-iostreameof-inside-a-loop-condition-i-e-while-stream-eof-cons). You should check if readings are successful before using what are "read". – MikeCAT Oct 28 '20 at 23:50
  • 3
    That's a lot of code. Are you expecting *us* to use a debugger and find the problem for you? Try to narrow down the problem to a small piece of code, and then make a [mre] – cigien Oct 28 '20 at 23:50
  • I was asking if you could take a look at my manhattan distance equations and see if there's anything wrong with it. I don't think I can reproduce this problem, because there's no consistency to it. It seems to break sometimes, but work fine in others. So I was wondering if there is something else in the problem that is causing it to do this. I'm really sorry guys, I am on VScode and I don't know how to debug. I promise it will be the first thing I learn after today. – Leo C Oct 28 '20 at 23:53
  • @MikeCAT What do you recommend using in place of `while( !dataInfile.eof() )`? I saw another stack over flow post advising against this, but I wasn't sure what I should be doing instead. Thank you :) – Leo C Oct 28 '20 at 23:54
  • @LeoC [Follow the link](https://stackoverflow.com/questions/5605125/why-is-iostreameof-inside-a-loop-condition-i-e-while-stream-eof-cons) he provided in his comment. Unrelated, `dataPoint dataPoint;` - Um.. just because you can doesn't mean you should. – WhozCraig Oct 28 '20 at 23:55
  • But your manhattan distance equation is only a small part of the code. Have you tried testing that independently? – cigien Oct 28 '20 at 23:55
  • I have made sure the the program reads in the values correctly. I used the print function that I have in `main()` to verify that my program is inputting. And I am almost 99% certain that my mandist equation is correct, because I've verified it multiple times. I actually have no idea why it's breaking, and why it doesn't sometimes, so I don't think this is something I could reproduce. I promise I'm not being lazy, I've been debugging as much as I can myself with print statements. @cigien – Leo C Oct 29 '20 at 00:03
  • Thank you @WhozCraig I changed struct names to capital. My bad haha, forgot to do that. And I looked at the link but I can't figure out how I could translate what they say to do there into my program, just because I'm inputting multiple times from a single file. – Leo C Oct 29 '20 at 00:05
  • 4
    You've posted far too much code. Please spend a few minutes reading [mre], and then come back and reduce your code to the absolute minimum needed to demonstrate the issue. And to your commentary about the debugger, there is no such thing as *too early* to learn to use one, even if it's your first app. There's no better tool to have in a programmer's toolbox than a debugger, and no better skill to learn in order to be able to solve problems yourself. *I promise to learn one later* doesn't work. Do yourself a favor and start learning right now; your future self will be eternally grateful. – Ken White Oct 29 '20 at 00:51
  • I have edited your post to remove the 'second' question and made some effort to reduce the length of the code (without making any *significant* changes). Some further simplification could be done (e.g. `dataPoint` and `queryPoint` could be 'unified'), which may help to prevent closure of the question. – Adrian Mole Oct 29 '20 at 01:28

1 Answers1

3

...my manhattan distance calculation seems to be silently breaking whenever it wants to, and it's confusing me.

The confusion is in the formula you use for the Manhattan Distance. You are using an expression of the form abs(a + b) where you should be using abs(a) + abs(b). (The two will give very different answers if one term is negative and the other is positive.)

So, your code that prints out the Manhattan Distance, which you have as:

outfile << std::setprecision(4) << abs((dataElement.x1 - queryElement.x2) + (dataElement.y1 - queryElement.y2)) << ' ';

should, instead, be this:

outfile << std::setprecision(4) << (abs(dataElement.x1 - queryElement.x2) + abs(dataElement.y1 - queryElement.y2)) << ' ';

From this Wikipedia page:

...in the plane, the taxicab distance between (p1,p2) and (q1,q2) is |p1-q1| + |p2-q2|.

Adrian Mole
  • 49,934
  • 160
  • 51
  • 83