3

We have created a Custom Capcha generator using C#.Net. Now we have requirement to upgrade our Capcha to include audio feature(Listen Capcha). Our major constraint is that we cannot use any third party components(Dlls)

Please guide me to implement such functionality.

Thanks in advance.

Adrian Godong
  • 8,802
  • 8
  • 40
  • 62
suryakiran
  • 1,976
  • 25
  • 41
  • by the way, the question is on how to make sound the characters on server, or how to play them on browser ? To play them one answer that fits you: http://stackoverflow.com/questions/10648471/how-to-play-audio-and-video-files-in-web-browser/10648724#10648724 – Aristos May 28 '12 at 10:44
  • Characters are built on the server and the based on those characters sound would be generated and played on browser. – suryakiran May 28 '12 at 11:10
  • What details do you wish for ? in witch part on server or on client ? – Aristos Jun 01 '12 at 09:17

4 Answers4

5

I made something like this in the past. There was a need for back noise (instrumental music) merged with the concatenated letters/characters. In addition recordings in use we generated from different voices. All these increased the complexity for "smart code" able to extract the captcha text from voice. I had also to transform the final WAVE to MP3 (using lame_enc.dll) for obvious reasons. It's not so easy to explain you how to do it, so I include the very first draft version needed for the job, leaving out the MP3 code as it uses a third party DLL.

using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Drawing2D;
using System.Drawing.Imaging;
using System.Drawing.Text;
using System.IO;
using System.Reflection;

namespace suryakiran
{
    internal static partial class Extensions
    {
        internal static byte[] AudioCaptcha(this string text)
        {
            String en = "abcdefghijkoprstvx0123456789", Location = string.Concat(System.Web.HttpContext.Current.Request.ServerVariables["APPL_PHYSICAL_PATH"].ToString(), @"\bin\wav\");
            Int32 dataLength = 0, length = 0, sampleRate = 0, plus = 37500, p = 0;
            Int16 bitsPerSample = 0, channels = 0;
            Byte[] music, wav;
            Random r = new Random();
            p = r.Next(1, 4000000);
            p += (p % 150) + 44;
            Byte[] rb = new Byte[9 * plus];
        // read music
        using (FileStream fs = new FileStream(String.Format(Location + @"z{0}.wav", (r.Next() % 12) + 1), FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
        {
            wav = new Byte[44];
            fs.Read(wav, 0, 44);
            fs.Position = (long)p;
            fs.Read(rb, 0, rb.Length);
        }
        // make music
        using (MemoryStream ms = new MemoryStream())
        {
            channels = BitConverter.ToInt16(wav, 22); sampleRate = BitConverter.ToInt32(wav, 24); bitsPerSample = BitConverter.ToInt16(wav, 34);
            length = rb.Length; dataLength = rb.Length;
            ms.Write(new Byte[44], 0, 44); ms.Write(rb, 0, rb.Length);
            ms.Position = 0;
            using (BinaryWriter bw = new BinaryWriter(ms))
            {
                bw.Write(new char[4] { 'R', 'I', 'F', 'F' }); bw.Write(length);
                bw.Write(new char[8] { 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ' });
                bw.Write((Int32)16); bw.Write((Int16)1);
                bw.Write(channels); bw.Write(sampleRate);
                bw.Write((Int32)(sampleRate * ((bitsPerSample * channels) / 8)));
                bw.Write((Int16)((bitsPerSample * channels) / 8));
                bw.Write(bitsPerSample); bw.Write(new char[4] { 'd', 'a', 't', 'a' }); bw.Write(dataLength);
                music = ms.ToArray();
                p = 0;
            }
        }

        using (MemoryStream final = new MemoryStream())
        {
            final.Write(music, 0, 44);
            // make voice
            using (MemoryStream msvoice = new MemoryStream())
            {
                msvoice.Write(new Byte[plus / 2], 0, plus / 2);
                length += plus; dataLength += plus / 2; p += plus / 2;
                for (var i = 0; i < text.Length; i++)
                {
                    String fn = String.Format(Location + @"{0}\{1}.wav", (r.Next() % 3), en.Substring(en.IndexOf(text.Substring(i, 1)), 1)).Replace("?", "qm");
                    wav = File.ReadAllBytes(fn);
                    Int32 size = BitConverter.ToInt32(wav, 4);
                    {
                        msvoice.Write(new Byte[plus / 2], 0, plus / 2);
                        length += plus; dataLength += plus / 2; p += plus / 2;
                    }
                    msvoice.Write(wav, 44, wav.Length - 44);
                    length += size; dataLength += size - 36;
                }
                msvoice.Position = 0;
                MemoryStream msmusic = new MemoryStream();
                msmusic.Write(music, 0, music.Length);
                msmusic.Position = 44;
                //merge;
                while (final.Length < msmusic.Length)
                    final.WriteByte((byte)(msvoice.ReadByte() - msmusic.ReadByte()));
                return final.ToArray();
            }
        }
    }

    internal static Byte[] VisualCaptcha(this String source)
    {
        try
        {
            Random r = new Random();
            Int32 w = 250, h = 75;
            String family = "Arial Rounded MT Bold";
            using (var bmp = new Bitmap(w, h, PixelFormat.Format32bppArgb))
            {
                Int32 m = 0, nm = 0;
                Color tc;
                using (var g = Graphics.FromImage(bmp))
                {
                    g.TextRenderingHint = TextRenderingHint.AntiAlias;
                    g.SmoothingMode = SmoothingMode.HighQuality;
                    g.Clear(Color.White);
                    SizeF size;
                    m = r.Next() % 9 + 1;
                    nm = r.Next() % 3;
                    tc = Color.FromArgb(255, 255, 255);
                    size = g.MeasureString(source, new Font(family, h * 1.2f, FontStyle.Bold), new SizeF(w * 1F, h * 1F));
                    using (var brush = new LinearGradientBrush(new Rectangle(0, 0, w, h), Color.Black, Color.Black, 45, false))
                    {
                        ColorBlend blend = new ColorBlend(6);
                        for (var i = 0; i < 6; i++) { blend.Positions[i] = i * (1 / 5F); blend.Colors[i] = r.RandomColor(255, 64, 128); }
                        brush.InterpolationColors = blend;

                        for (int wave = 0; wave < 2; wave++)
                        {
                            Int32 min = (15 + wave * 20);
                            PointF[] pt = new PointF[] { new PointF(16f, (float)r.Next(min, min + 10)), new PointF(240f, (float)r.Next(min + 10, min + 20)) };
                            List<PointF> PointList = new List<PointF>();
                            float curDist = 0, distance = 0;
                            for (int i = 0; i < pt.Length - 1; i++)
                            {
                                PointF ptA = pt[i], ptB = pt[i + 1];
                                float deltaX = ptB.X - ptA.X, deltaY = ptB.Y - ptA.Y;
                                curDist = 0;
                                distance = (float)Math.Sqrt(Math.Pow(deltaX, 2) + Math.Pow(deltaY, 2));
                                while (curDist < distance)
                                {
                                    curDist++;
                                    float offsetX = (float)((double)curDist / (double)distance * (double)deltaX);
                                    float offsetY = (float)((double)curDist / (double)distance * (double)deltaY);
                                    PointList.Add(new PointF(ptA.X + offsetX, ptA.Y + offsetY));
                                }
                            }
                            for (int i = 0; i < PointList.Count - 24; i = i + 24)
                            {
                                float x1 = PointList[i].X, y1 = PointList[i].Y, x2 = PointList[i + 24].X, y2 = PointList[i + 24].Y;
                                float angle = (float)((Math.Atan2(y2 - y1, x2 - x1) * 180 / 3.14159265));
                                g.TranslateTransform(x1, y1);
                                g.RotateTransform(angle);
                                Int32 pm = r.Next() % 2 + 1;
                                Point[] p1 = new Point[] { new Point(0, 0), new Point(3, -3 * pm), new Point(6, -4 * pm), new Point(9, -3 * pm), new Point(12, 0), new Point(15, 3 * pm), new Point(18, 4 * pm), new Point(21, 3 * pm), new Point(24, 0) };
                                using (var path = new GraphicsPath()) g.DrawLines(new Pen(brush, 2f), p1);
                                g.RotateTransform(-angle);
                                g.TranslateTransform(-x1, -y1);
                            }
                        }
                        using (var path = new GraphicsPath())
                        {
                            PointF[] points = new PointF[] { };
                            if (m == 1 || m == 2 || m == 3) // star trek inverse
                            {
                                path.AddString(source, new FontFamily(family), 1, h * 0.75F, new PointF((w - size.Width) / 2F, (h * 0.9F - size.Height) / 2F), StringFormat.GenericTypographic);
                                points = new PointF[] { new PointF(0, 0), new PointF(w, 0), new PointF(w * 0.2F, h), new PointF(w * 0.8F, h) };
                            }
                            else if (m == 4 || m == 5) // star trek
                            {
                                path.AddString(source, new FontFamily(family), 1, h * 0.75F, new PointF((w - size.Width) / 2F, (h * 1.2F - size.Height) / 2F + 2F), StringFormat.GenericTypographic);
                                points = new PointF[] { new PointF(w * 0.2F, 0), new PointF(w * 0.8F, 0), new PointF(0, h), new PointF(w, h) };
                            }
                            else if (m == 6 || m == 7) // grow from left
                            {
                                path.AddString(source, new FontFamily(family), 1, h * 0.75F, new PointF((w * 1.15F - size.Width) / 2F, (h - size.Height) / 2F), StringFormat.GenericTypographic);
                                points = new PointF[] { new PointF(0, h * 0.25F), new PointF(w, 0), new PointF(0, h * 0.75F), new PointF(w, h) };
                            }
                            else if (m == 8 || m == 9) // grow from right
                            {
                                path.AddString(source, new FontFamily(family), 1, h * 0.75F, new PointF((w * 0.85F - size.Width) / 2F, (h - size.Height) / 2F), StringFormat.GenericTypographic);
                                points = new PointF[] { new PointF(w * 0.1F, 0), new PointF(w * 0.9F, h * 0.25F), new PointF(w * 0.1F, h), new PointF(w * 0.9F, h * 0.75F) };
                            }
                            path.Warp(points, new RectangleF(0, 0, w, h));
                            g.FillPath(Brushes.White, path);
                            g.DrawPath(new Pen(brush, 2F), path);
                        }
                    }
                }
                using (var thumb = new Bitmap(128, 40, PixelFormat.Format32bppArgb))
                {
                    using (var g = Graphics.FromImage(thumb))
                    {
                        g.CompositingQuality = CompositingQuality.HighQuality;
                        g.SmoothingMode = SmoothingMode.HighQuality;
                        g.InterpolationMode = InterpolationMode.HighQualityBicubic;
                        Rectangle tr = new Rectangle(0, 0, thumb.Width, thumb.Height);
                        g.DrawImage(bmp, tr);
                        g.DrawRectangle(new Pen(Brushes.White), new Rectangle(0, 0, 127, 39));
                    }
                    using (var ms = new MemoryStream())
                    {
                        ((Image)thumb).Save(ms, ImageFormat.Png);
                        return ms.ToArray();
                    }
                }
            }
        }
        catch { return null; }
    }

    private static Color RandomColor(this Random rnd, Int32 alpha, Int32 min, Int32 max)
    {
        return Color.FromArgb(alpha, rnd.Next(min, max), rnd.Next(min, max), rnd.Next(min, max));
    }
}

}

For both visual and audio generation the code is independent from the captcha generation (source text). Each letter audio file in WAVE format was about 30KB but each music file was at least 8MB, giving me a wide range for random starting positions (avoiding - frequently - repeated patterns). You can compile the code as a dll or place it (modified) in a code behind file.

  • Thanks for the Reply. That was really useful, could you please let me know is there any player which plays the content from memory stream or Please let me know how to play the merged content from the stream on client side. – suryakiran Jun 04 '12 at 06:58
  • First of all create a sayit.ashx that gets the captcha text and pass it to a call to AudioCaptcha. Then add onclick="SayIt()" to the object that fires the audio request and an empty
    in your html. Finally add this javascript code: function SayIt(){var o=document.getElementById('voice');if(o.innerHTML=='')o.innerHTML="<\/embed>" else with(document.getElementById('play')){SelectionStart = 0;Play()}} Then if you reload the captcha image change the voice.innerHTML to ""
    –  Jun 04 '12 at 07:48
1

The idea is very simple.

For every character your have, you record it in audio, and then you simple play the recorded audio in the same order as you show the characters.

So for the character A, you spell it and record it on a wav file that when you hear it says "alpha" clear and loud.

Read this answer for how to play them: How to play audio and video files in web browser?

sound edit on code behind

If you like to connect the audio to one file, you can select an audio library with all ready existing functions for this mix.

I know that one of the best is the bass audio library at http://www.un4seen.com/

Of course because you only need to add two audio maybe you can do that direct with the windows media library, check the SDK of it http://msdn.microsoft.com/en-us/library/windows/desktop/dd757738(v=vs.85).aspx

Windows media are ready to use functions inside the windows that do many thinks with media like audio.

Community
  • 1
  • 1
Aristos
  • 66,005
  • 16
  • 114
  • 150
  • There might be a security tweak for this. I need something where on the server side required wav files are merged and sent back to browser as a output stream. Then our flash player should play from the output stream. – suryakiran Jun 01 '12 at 09:19
  • @suryakiran I have update the answer with guide on how you do that. I know that I do not have give you source code to do that, but the examples of that SDK and the help contains many informations to make it by your self. Also There are too many thinks to cover and the answer for how to play it contains many details. The only part that have left is how you connect them on server. – Aristos Jun 01 '12 at 09:35
  • +1 - even though he specifically requested that 3rd party libraries not be used, that sounds a little unrealistic without a ton, ton, ton of work. – Peter Jun 01 '12 at 17:37
1

I'll skip a lot of captcha security and generation details in favor of demonstrating my proposal.

I've used a voice synthesizer on the server side.

On your web application project, add a reference to System.Speech.dll

I've created two pages:

Start.aspx -> the page that the user will see. With this two elements:

<span id="lblCaptchaText" runat="server"></span>
<embed height="50px" width="100px" src="AudioPlayer.aspx" /> 

The code behind should look like this (simplified)

public partial class Start : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {
        // You generate your captcha somehow 
        string captchaString = "E F G 6 7";

        // Let's store the captcha in a place so that our wave generator can find it.
        Session.Add("captcha", captchaString);

        // Display the captcha in the page (this should actually be a blurry non-human readable image)
        lblCaptchaText.InnerText = captchaString;
    }
}

AudioPlayer.aspx -> the page that generates the wave output to the user. Nothing on the HTML side.

public partial class AudioPlayer : System.Web.UI.Page
{
    protected override void OnPreRender(EventArgs e)
    {
        Response.ContentType = "audio/wav";
        MemoryStream mstream = GetAudio(Session["captcha"] as string);
        mstream.Position = 0;            
        mstream.CopyTo(Response.OutputStream);
        Response.End();
    }

    public static MemoryStream GetAudio(string input)
    {
        MemoryStream mem = new MemoryStream();

        Thread t = new Thread(new ThreadStart(() =>
        {
            SpeechSynthesizer synth = new SpeechSynthesizer();

            synth.Rate = -5;
            synth.SetOutputToWaveStream(mem);
            synth.Speak(input);

        }));

        t.Start();
        t.Join();

        return mem;
    }
}

Compile your web site and run Start.aspx, you'll hear your captcha.

There are a lot of things to consider when using Microsoft Speech SDK, performance, resource management, scalability, etc. The good news is that they have a server "flavor" of their Speech Recognition engine and Speech Synthesizer called Microsoft Speech Server.

Adrian Salazar
  • 5,279
  • 34
  • 51
-1

RECAPTHA has this by default, so why would you want to re-invent the wheel?

http://blog.recaptcha.net/2008/12/new-audio-recaptcha.html

IrishChieftain
  • 15,108
  • 7
  • 50
  • 91
  • In our project we don't have to implement using 3rd party components for which source code is not available – suryakiran Jun 04 '12 at 04:00