0

I have a weight Matrix for a Levenshtein Distance Algorithm which looks like this

    int[,] weights = new int[6, 6]
    {
        { 0, 1, 2, 1, 1, 2 },
        { 1, 0, 1, 2, 1, 2 },
        { 2, 1, 0, 3, 2, 3 },
        { 1, 2, 3, 0, 1, 2 },
        { 1, 1, 2, 1, 0, 1 },
        { 2, 2, 3, 2, 1, 0 }
    };

Now I want to rewrite the Matrix to work with a qwertz Keyboard. I guess it has to be 4 rows deep an 11 columns wide. I don't get the pattern

I was looking for an Algorithm to determine how similar two words are. I thought a weighted Levenshtein Distance Algorithm with Typewriter distance would make sense, but a 6x6 Matrix obviously throws a Index out of Range Exception. The Index is determined by the horizontal and vertical position on the keyboard.

This is the weight-part of my Algorithm:

    public static int GetWeightedDistance(char a, char b)
    {
    int[,] weights = new int[6, 6]
    {
        { 0, 1, 2, 1, 1, 2 },
        { 1, 0, 1, 2, 1, 2 },
        { 2, 1, 0, 3, 2, 3 },
        { 1, 2, 3, 0, 1, 2 },
        { 1, 1, 2, 1, 0, 1 },
        { 2, 2, 3, 2, 1, 0 }
    };


        int rowA = GetRowIndex(a);
        int colA = GetColIndex(a);
        int rowB = GetRowIndex(b);
        int colB = GetColIndex(b);

        return weights[rowA, colA] + weights[rowB, colB];
    }

    public static int GetRowIndex(char c)
    {
        string row1 = "1234567890ß";
        string row2 = "qwertzuiopü";
        string row3 = "asdfghjklöä";
        string row4 = "yxcvbnm";

        if (row1.Contains(c))
        {
            return 0;
        }
        else if (row2.Contains(c))
        {
            return 1;
        }
        else if (row3.Contains(c))
        {
            return 2;
        }
        else if (row4.Contains(c))
        {
            return 3;
        }
        else
        {
            return -1;
        }
    }

    public static int GetColIndex(char c)
    {
        string row1 = "1234567890ß";
        string row2 = "qwertzuiopü";
        string row3 = "asdfghjklöä";
        string row4 = "yxcvbnm";

        if (row1.Contains(c))
        {
            return row1.IndexOf(c);
        }
        else if (row2.Contains(c))
        {
            return row2.IndexOf(c);
        }
        else if (row3.Contains(c))
        {
            return row3.IndexOf(c);
        }
        else if (row4.Contains(c))
        {
            return row4.IndexOf(c);
        }
        else
        {
            return -1;
        }


    }

1 Answers1

0

You should define what "Typewriter distance" is. If it' a L1 distance (absolute difference of rows plus absolute difference of columns) then you can put it as follow:

private static readonly IReadOnlyList<string> s_Rows = new [] {
  "1234567890ß",
  "qwertzuiopü",
  "asdfghjklöä",
  "yxcvbnm",
};

public static (int row, int column) Find(char letter) {
  letter = char.ToLower(letter);

  for (int r = 0; r < s_Rows.Count; ++r) {
    int c = s_Rows[r].IndexOf(letter);

    if (c >= 0)
      return (r, c);
  }

  return (-1, -1);
}

public static int GetWeightedDistance(char left, char right) {
  var a = Find(left);
  var b = Find(right);

  return Math.Abs(a.row - b.row) + Math.Abs(a.column - b.column);
}

Having the distance you can put it into Levenstein (which is long, so let me skip it here, but provide in the fiddle below)

Fiddle

Dmitry Bychenko
  • 180,369
  • 20
  • 160
  • 215