7

Basically, I need to write a hex dump utility using C++. It'll look something like this

Part of a Word document's hex dump using Visual Studio

(Part of a Word document's hex dump using Visual Studio)

I want to prompt the user for a file name, and then display the hexadecimal values as well as the translated ASCII characters. I'm still new at working with binary files, so if you could keep it simple, that would be much appreciated.

Wolf
  • 9,679
  • 7
  • 62
  • 108
user2430692
  • 83
  • 1
  • 1
  • 3
  • 1
    So do you want to know how to read in a file or how to display byte-values as hex-char-string? Or do you want someone to write you that program? – Stefan Falk May 29 '13 at 01:52
  • The second thing. I know how to read in a file, but I don't know how to use the binary file once I have it. – user2430692 May 29 '13 at 02:00

2 Answers2

24

Because each row shows two different "views" of the same chunk of data, a practical way to achieve this is by reading a whole row into a buffer. With those bytes in your buffer, you can then format your line of output however you want.

For each row of output, you loop twice over your buffer: once to output the data as hex codes, and once to output the data as characters.

Here's a no-frills demonstration that just reads from standard input and outputs in roughly the same format as you showed. Try it out here:

#include <cctype>
#include <iostream>
#include <iomanip>

int main()
{
    const int ROW_SIZE = 16;        // Number of bytes per row
    const int GROUP_SIZE = 8;       // Number of bytes for each hex group
    unsigned long address = 0;

    std::cout << std::hex << std::setfill('0');
    while (std::cin.good())
    {
        // Read up to ROW_SIZE bytes
        int nread;
        char buf[ROW_SIZE];
        for (nread = 0; nread < ROW_SIZE && std::cin.get(buf[nread]); nread++);
        if (nread == 0) break;

        // Show the address
        std::cout << std::setw(8) << address;

        // Show the hex codes
        for (int i = 0; i < ROW_SIZE; i++)
        {
            if (i % GROUP_SIZE == 0) std::cout << ' ';
            if (i < nread)
                std::cout << ' ' << std::setw(2) << (unsigned int)(unsigned char)buf[i];
            else
                std::cout << "   ";
        }

        // Show printable characters
        std::cout << "  ";
        for (int i = 0; i < nread; i++)
        {
            std::cout << (std::isprint(buf[i]) ? buf[i] : '.');
        }

        std::cout << "\n";
        address += ROW_SIZE;
    }
}

Input

Hello there, this is a test binary file.
What do you think?

.

Output

00000000  48 65 6c 6c 6f 20 74 68  65 72 65 2c 20 74 68 69  Hello there, thi
00000010  73 20 69 73 20 61 20 74  65 73 74 20 62 69 6e 61  s is a test bina
00000020  72 79 20 66 69 6c 65 2e  0a 57 68 61 74 20 64 6f  ry file..What do
00000030  20 79 6f 75 20 74 68 69  6e 6b 3f 0a 0a 2e         you think?...
paddy
  • 60,864
  • 6
  • 61
  • 103
0
#include <iostream>
#include <vector>
#include <iomanip>
#include <numeric>

template<typename byte_type = std::uint8_t, typename container_type = std::vector<std::vector<byte_type>>>
container_type arrange_bytes(const byte_type* buffer, const std::size_t size, const std::size_t w = 16) {
  return std::accumulate(buffer, buffer + size, container_type{{}}, [w](auto& acc, const byte_type byte) {
    if(acc.back().size() >= w) {
      acc.push_back({});
    }
    acc.back().push_back(byte);
    return acc;
  });
}

std::string init_text_row(const int offset) {
  std::ostringstream ost{};
  ost << std::hex << std::setfill('0') << std::setw(8) << offset;
  return ost.str();
}

template<typename byte_type = std::uint8_t>
std::string format_row(const std::vector<byte_type>& bytes, const int offset) {
  auto init = init_text_row(offset);
  return std::accumulate(bytes.begin(), bytes.end(), init, [](auto& acc, const auto& byte) {
      std::ostringstream ost{};
      ost  << ' ' << std::hex << std::setfill('0') << static_cast<unsigned>(byte);
      return acc + ost.str();
  });
}

template<typename byte_type = std::uint8_t, typename container_type = std::vector<std::vector<byte_type>>>
std::string format_bytes(const container_type& bytes) {
  struct memory {
    std::string data = {};
    int offset = 0;
  };
  return std::accumulate(bytes.begin(), bytes.end(), memory{}, [](auto& acc, const auto& row) {
    acc.data += format_row(row, acc.offset) + '\n';
    acc.offset += row.size();
    return acc;
  }).data;
}

template<typename byte_type = std::uint8_t>
std::string hexdump(const byte_type* buffer, std::size_t size) {
  return format_bytes(arrange_bytes(buffer, size));
}

#include <cstring>

int main() {
  const auto* message = "Hello, world! I am Simon the Sourcerer and I am a mighty pirate!";
  const auto len = std::strlen(message);
  std::cout << hexdump(message, len) << std::endl;
  return 0;
}
aerkenemesis
  • 672
  • 4
  • 16