3

I'm working on visual C# to calculate the word error rate I have one textbox for the refrence which is the correct sentance and one for the hypothesis which is wrong one.

in order to calculate WER I need to calculate : substitution : the word that has been changed which was my first question Insert : the words that had been inserted in the sentence Deleted: the words that had been deleted from the original sentence

For EX:

refrence: This is a NPL program. hypothesis: it is an NPL cool.

it: substitution is: correct an :substitution NPL:correct program: deleted cool: inserted

I tried the algorithm that dasblinkenlight proposed ( thank you so much by the way ) I worked but there is a runtime error I couldn't figure it out, in line

int x=  Compute(buffer[j], buffer_ref[i]);

Index was outside the bounds of the array.

and here is my code :

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;

namespace WindowsFormsApplication1
{
    public partial class Form1 : Form
    {
        string [] hyp = new string[20];
        string [] refrence = new string[20];
        string [] Anser= new string[20];
        string[] buffer = new string[20];
        string[] buffer_ref = new string[20];
        int count = 0; // number of words 
        string ref2=" " ;
        string hyp2 = " ";
        string Anser2 = " ";
        string buffer2 = " ";

        int corecct_c=0;
        int corecct_d = 0;
        int corecct_i = 0;

        //====================================================================

        public Form1()
        {
            InitializeComponent();
            for (int i = 0; i <= 19; ++i)
            {
                hyp[i] = null;
                buffer[i] = null;
            }
        }

        private void textBox2_TextChanged(object sender, EventArgs e)
        {
            refrence = this.textBox2.Text.Split(' ');
            buffer_ref = this.textBox2.Text.Split(' ');


        }

        private void textBox1_TextChanged(object sender, EventArgs e)
        {
            hyp = this.textBox1.Text.Split(' ');
            buffer = this.textBox1.Text.Split(' ');
            //hyp = this.textBox1.Text;
            // fname1.Add(this.textBox1.Text);


        }

        public void correct(string[] R)
        {

            for (int i = 0; (i <= 19) && (R[i] != "."); ++i)
            {

                if (buffer[i] == refrence[i])
                { buffer[i] = "0";
                buffer_ref[i] = "0";
                    corecct_c = corecct_c + 1;
                    Anser[i] = "C";
                }
            }

        }

        // function that compute 2 strings
        public static int Compute(string s, string t)
        {
            int n = s.Length;
            int m = t.Length;
            int[,] d = new int[n + 1, m + 1];

            // Step 1
            if (n == 0)
            {
                return m;
            }

            if (m == 0)
            {
                return n;
            }

            // Step 2
            for (int i = 0; i <= n; d[i, 0] = i++)
            {
            }

            for (int j = 0; j <= m; d[0, j] = j++)
            {
            }

            // Step 3
            for (int i = 1; i <= n; i++)
            {
                //Step 4
                for (int j = 1; j <= m; j++)
                {
                    // Step 5
                    int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;

                    // Step 6
                    d[i, j] = Math.Min(
                        Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
                        d[i - 1, j - 1] + cost);
                }
            }
            // Step 7
            return d[n, m];
        }


        public void sub(){

            for (int j = 0;j<=19;j++) 
         {
             if (buffer[j].IndexOf("0") != -1)
             {


                 for (int i = 0; i <= 19; i++)
                 {

                     if (buffer_ref[j].IndexOf("0") != -1)
                     {

                       int x=  Compute(buffer[j], buffer_ref[i]);
                       if (x > 3)
                       {
                           buffer[j] = "0";
                           Anser[j] = "S";

                       }


                     }//end if

                 } 

             }//end if 


        }//end for 

        }// end fun

        private void button1_Click(object sender, EventArgs e)
        {


            correct(refrence);
            sub();
            for (int i = 0; (i <= 19) && (refrence[i] != "."); ++i)
            {
                //loop intialize 
                ref2 = ref2 + " " + refrence[i];
                hyp2 = hyp2 + " " + hyp[i];
                Anser2 = Anser2 + " " + Anser[i];
                buffer2 = buffer2 + " " + buffer[i];
                count++;
                            }

            listBox1.Items.Add(" Refrence :" + ref2);
            listBox1.Items.Add(" HYp :" + hyp2);
            listBox1.Items.Add(" Anser:" + Anser2);
            listBox1.Items.Add(" buffer:" + buffer2);
            listBox1.Items.Add(count);

        } 




        private void Form1_Load(object sender, EventArgs e)
        {

        }

        private void label1_Click(object sender, EventArgs e)
        {

        }



        private void button2_Click(object sender, EventArgs e)
        {

        }

        private void label2_Click(object sender, EventArgs e)
        {

        }

        private void listBox1_SelectedIndexChanged(object sender, EventArgs e)
        {

        }

    }
}

can you help me please ?

Glory
  • 45
  • 7
  • Welcome, just a heads up, we already have a few similar questions, http://stackoverflow.com/questions/5344514/testing-for-similar-string-content, http://stackoverflow.com/questions/747169/c-sharp-comparing-similar-strings and http://stackoverflow.com/questions/1918838/match-similar-names-in-c-sharp – Nathan Koop Dec 07 '12 at 16:25
  • 1
    On what degree is "similar"? How many letters has to be the same for it to decide that the two strings are "similar"? – Mikk Dec 07 '12 at 16:25
  • Thank you @NathanKoop just about to bring that up ;) – Benjamin Trent Dec 07 '12 at 16:25
  • I've used the Levenshtein Distance algorithm but I've had some trouble ^^" there is a runtime error I couldn't handle :( – Glory Dec 07 '12 at 20:24

2 Answers2

5

There is a built-in way to test if two lines are identical, but there is no built-in way to tell if two lines are similar. You need to implement an algorithm that measures string similarity, such as the Levenshtein Distance - a very common Edit Distance algorithm. Lines with small edit distance can be declared similar depending on some threshold specific to your requirements.

Sergey Kalinichenko
  • 714,442
  • 84
  • 1,110
  • 1,523
  • 2
    +1 you were faster :) A C# implementation of the Levenstein Distance algorithm: http://www.dotnetperls.com/levenshtein – Cristian Lupascu Dec 07 '12 at 16:26
  • can you please check my code , I believe I done something wrong but I don't now what – Glory Dec 07 '12 at 18:20
  • this is a method to calculate the place of substitution – Glory Dec 07 '12 at 20:26
  • I've wrote all of the code can you please tell me what you think – Glory Dec 07 '12 at 20:27
  • @Glory I tried out your implementation of Levenshtein, it looks good ([link to ideone](http://ideone.com/yUM9H9)). As far as the rest of your code goes, I cannot tell you much, because I don't know what's going on. – Sergey Kalinichenko Dec 07 '12 at 20:39
  • Thanks for trying, you have helped me alot thank you ^^ and I will try to find out what went wrong – Glory Dec 07 '12 at 21:42
  • actually I'm still looking for some help , but if I found an answer I will gladly do it ^^ – Glory Dec 07 '12 at 22:43
  • @Glory Consider posting a separate question: as far as matching characters in strings goes, this question has an answer, and is, therefore, highly unlikely to get much additional attention from the community. Anyway, a well-formulated new question has much higher chances to be of use to you. Good night! – Sergey Kalinichenko Dec 07 '12 at 23:54
  • found what I was looking for .. thanx for UR help ^^ – Glory Dec 18 '12 at 04:36
2

You'll need to use an algorithm that compares the "distance" between two strings:

The closeness of a match is measured in terms of the number of primitive operations necessary to convert the string into an exact match. This number is called the edit distance between the string and the pattern. The usual primitive operations are:

insertion: cot → coat
deletion: coat → cot
substitution: coat → cost
Carra
  • 17,808
  • 7
  • 62
  • 75
  • can you please check my code , I believe I done something wrong but I don't now what – Glory Dec 07 '12 at 18:40
  • What is buffer doing in your code? To figure the "distance" between `a1` and `a2` you need to do: `var distance = LevenshteinDistance.Compute(a1, a2)` – Nathan Koop Dec 07 '12 at 18:51
  • the buffer is a temp where i can delete the correct words so the code can skip them – Glory Dec 07 '12 at 20:29