I'm making an interpreter for my own programming language, as a hobby. My problem is non-ascii characters are displayed incorrectly in the Windows CMD. The source file I'm reading is saved as UTF-8. I presume it's UTF-8 without BOM. When my source file says, for example;
print "á"
On my Mac I get the expected output. The letter á
but on my PC I get á
. I thought it was a code-page problem, the the code page I'm using has the letter á
. Then I tried a different font. Lucida Grande works. But in the Python interpreter the letter á
is displayed in the default font.
I asked people on StackOverflow and someone said my program was itself probably compiled with the wrong encoding. So my question is, how can I specify / change the encoding that is used when C++ compiles my files. I'm using TDM-GCC for my compiler, I've also used MinGW and had the same problem.
Thanks for your help
---EDIT---
Below is my entire source file. You can compile it like this:
c++ myfile.cc -o myprogram -std=c++11
Whenever I run "myprogram.exe somefile.mylang", where somefile.mylang says:
print "Hello á"
I get this output on the windows CMD:
"Hello á"
I don't know how Python, Lua, Ruby etc ... can use the default console font and output the correct character.
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <vector>
#include <cstdlib>
using namespace std;
/* Global Variables */
/* Not all of these are actual "keywords" that can be used in programs.
They are called keywords because they are reserved, either because they
are specified as keywords in the grammar or because they are reserved by
the interpreter for internal use. */
string keywords[9] = { "print", "string", "sc", "variable", "eq", "undefined", "nl", "num", "expr" };
/* We store tokens in a vector, we could use an array but specifying an arrays
size at runtime is technically impossible and the work arounds are a pain. */
vector<string> tokens;
/* Our "symbol table" is just a vector too because, we can only determine how
large the symbol table should be at runtime, so we use a vector to make things
easier. */
vector<string> variables;
/* Function Declarations */
/* We declare all of the functions up here because it makes it easy to see how many
functions we have and it makes it easier to find inefficiencies, also it makes the
code look nicer. */
void exec_program();
string load_program();
string lex();
void parse();
void doPRINT();
void doASSIGN();
void goGETVAR();
/* Definitions */
/* These are our constants, these are defined as constant at the start of the program so
that if anything goes wrong in the execution of the code we can always display the
right kind of errors. */
#define IO_ERROR "[IO ERROR] "
#define SYNTAX_ERROR "[SYNTAX ERROR] "
#define ASSIGN_ERROR "[ASSIGN ERROR] "
/* We load the program into the interpreter by reading the file */
string load_program(string filename) {
string filedata;
ifstream rdfile(filename);
/* We check to see whether or not we can open the file. This doesn't tell use whether
the file exists because permissions could also prevent us being able to open the file. */
if (!rdfile) {
cout << IO_ERROR << "Unable to open the file \"" << filename << "\"." << endl;
exit(0);
}
/* Loop through and grab each line of the file, then store each line in filedata. */
for (std::string line; std::getline(rdfile, line); )
{
filedata += line;
filedata += "\n";
}
/* Close the file when we're done. */
rdfile.close();
/* Return the data so that the rest of the program can use it. */
return filedata;
}
void lex(string prog) {
int i = 0;
string toks = "";
string n = "";
string expr = "";
bool state = 0;
bool exprStarted = 0;
bool isexpr = 0;
string s = "";
for(i = 0; i < prog.size(); ++i) {
toks += prog[i];
if (toks == " " and state == 0) {
toks = "";
if (n != "") {
//isexpr = 1;
//tokens.push_back(keywords[7] + ":" + n);
}
n = "";
} else if (toks == ";" and state == 0) {
toks = "";
if (expr != "" and isexpr == 1) {
tokens.push_back(keywords[8] + ":[" + expr + "]");
} else if (n != "" and isexpr == 0) {
tokens.push_back(keywords[7] + ":" + expr);
}
if (tokens.back() != "sc") {
tokens.push_back(keywords[2]);
}
n = "";
expr = "";
isexpr = 0;
} else if (toks == "\n" and state == 0) {
toks = "";
if (expr != "" and isexpr == 1) {
tokens.push_back(keywords[8] + ":[" + expr + "]");
} else if (n != "" and isexpr == 0) {
tokens.push_back(keywords[7] + ":" + expr);
}
if (tokens.back() != "sc") {
tokens.push_back(keywords[2]);
}
n = "";
expr = "";
isexpr = 0;
} else if (toks == "0" or toks == "1" or toks == "2" or toks == "3" or toks == "4" or toks == "5"
or toks == "6" or toks == "7" or toks == "8" or toks == "9") {
if (state == 0) {
n += toks;
expr += toks;
} else {
s += toks;
}
toks = "";
} else if (toks == "+" or toks == "-" or toks == "*" or toks == "/") {
expr += toks;
isexpr = 1;
toks = "";
n = "";
} else if (toks == keywords[0]) {
tokens.push_back(keywords[0]);
toks = "";
} else if (toks == "\"") {
if (state == 0) {
state = 1;
} else if (state == 1) {
state = 0;
tokens.push_back(keywords[1] + ":" + s + "\"");
s = "";
toks = "";
}
} else if (state == 1) {
s += toks;
toks = "";
}
}
int ii = 0;
while (ii < tokens.size()) {
//cout << tokens[ii] << endl;
ii++;
}
}
string evalExpression(string expr) {
int res = 0;
int getnextnum = 0;
int iter = 0;
int it = 0;
string opp = "";
string num = "";
string num1 = "";
string num2 = "";
string result = "";
vector<string> numholder;
for (char & c : expr) {
if (c == '0' or c == '1' or c == '2' or c == '3' or c == '4' or c == '5' or
c == '6' or c == '7' or c == '8' or c == '9') {
// c is a number
num += c;
} else if (c == '+' or c == '-' or c == '*' or c == '/') {
// c is an operator
numholder.push_back(num);
if (c == '+') {
opp = "+";
} else if (c == '-') {
opp = "-";
} else if (c == '*') {
opp = "*";
} else if (c == '/') {
opp = "/";
}
numholder.push_back(opp);
num = "";
} else if (c == ']') {
// end of expression
numholder.push_back(num);
} else if (c == '(' or c == ')') {
// c is a round bracket
}
}
for ( iter = 0; iter < numholder.size(); ++iter) {
if (numholder[iter][0] == '+' or numholder[iter][0] == '-' or numholder[iter][0] == '*' or numholder[iter][0] == '/') {
iter++;
}
if (numholder[iter][0] == '0' or '1' or '2' or '3' or '4' or '5' or '6' or '7' or '8' or '9') {
// num = NUMBER
if (num1 == "") {
num1 = numholder[iter];
}
else if (num2 == "") {
num2 = numholder[iter];
}
}
if (iter-1 >= 0) {
it = iter - 1;
//cout << numholder[iter] << " " << numholder[iter-1] << " num1 = " << num1 << " num2 = " << num2 << endl;
if (numholder[it][0] == '+' and num1 != "" and num2 != "") {
res = stoi(num1) + stoi(num2);
num1 = to_string(res);
num2 = "";
} else if (numholder[it][0] == '-' and num1 != "" and num2 != "") {
res = stoi(num1) - stoi(num2);
num1 = to_string(res);
num2 = "";
} else if (numholder[it][0] == '*' and num1 != "" and num2 != "") {
res = stoi(num1) * stoi(num2);
num1 = to_string(res);
num2 = "";
} else if (numholder[it][0] == '/' and num1 != "" and num2 != "") {
res = stoi(num1) / stoi(num2);
num1 = to_string(res);
num2 = "";
}
}
//iter++;
}
numholder.clear();
num1 = "";
num2 = "";
num = "";
//cout << res << endl;
expr = to_string(res);
return expr;
}
void doPRINT(string toPrint) {
if (toPrint.substr(0,6) == "string") {
toPrint = toPrint.substr (7);
toPrint = toPrint.substr(1,toPrint.size() - 2);
} else if (toPrint.substr(0,3) == "num") {
toPrint = toPrint.substr (4);
} else if (toPrint.substr(0,4) == "expr") {
toPrint = toPrint.substr (6);
toPrint = evalExpression(toPrint);
}
cout << toPrint << endl;
}
void parse(vector<string> tokens) {
int i = 0;
while (i < tokens.size()) {
if (tokens[i] + " " + tokens[i+1].substr(0,6) + " " + tokens[i+2] == "print string sc" or
tokens[i] + " " + tokens[i+1].substr(0,3) + " " + tokens[i+2] == "print num sc" or
tokens[i] + " " + tokens[i+1].substr(0,4) + " " + tokens[i+2] == "print expr sc") {
doPRINT(tokens[i+1]);
i+=3;
}
}
}
/* Main program exec function */
void exec_program(string filename) {
lex(load_program(filename));
parse(tokens);
}
/* The main function, we have to start somewhere. */
int main(int argc, char* argv[]) {
if (!argv[1]) {
cout << "Usage: reedoo <filename> [args]" << endl;
} else {
exec_program(argv[1]);
}
return 0;
}