3

I have a project in my Bioinformatics course at the university, and one of the things in my project is gene prediction.

My problem today is how to get all indexes of more than one specific word in a string. For example, in my case here I want to find all occurrences of start codons ("AUG") and stop codons ("UAA","UAG", "UGA") and use them to predict genes, simply trying to do Open Reading Frame (ORF)

Here is my initial code:

private void jButton3ActionPerformed(java.awt.event.ActionEvent evt) {                                         
    // TODO add your handling code here:
    //   textArea1.setText(null);\
    String str = jTextField1.getText(), y = "", gene = "", dnax = "", text = "";
    SymbolList dna = null;
    int start_codon_index = -1, stop_codon_index = -1;
if ("".equals(str)) {
    jTextArea1.setText("No DNA strand entered.. ");

} else {
    if (checksum(str) == 100) {
        try {
            dna = DNATools.createDNA(str);
        } catch (IllegalSymbolException ex) {
            Logger.getLogger(m.class.getName()).log(Level.SEVERE, null, ex);
        }
        try {
            dna = DNATools.toRNA(dna);
        } catch (IllegalAlphabetException ex) {
            Logger.getLogger(m.class.getName()).log(Level.SEVERE, null, ex);
        }
        dnax = dna.seqString().toUpperCase();
        if (dnax.length() % 3 != 0) {
            if (dnax.length() % 3 == 1) {
                dnax += "-";
            }
            if (dnax.length() % 3 == 2) {
                dnax += "-";
            }
        }
        //  System.out.println(dnax);
        for (int g = 0; g < dnax.length(); g += 3) {
            y = dnax.substring(g, g + 3);
            if ("AUG".equals(y)) {
                start_codon_index = g;
            } else if (start_codon_index != -1 && ("UGA".equals(y) || "UAG".equals(y) || "UAA".equals(y))) {

                stop_codon_index = g + 3;

            }
        }

        if (stop_codon_index != -1 && start_codon_index != -1) {
            String k = "";
            int a = 0;
            for (a = start_codon_index; a < stop_codon_index; a++) {
                gene += dnax.charAt(a);

            }
            text += "\nGene start position:  " + start_codon_index + "\nGene end position:  " + a + "\n Gene: " + gene;
            jTextArea1.setText(text);

        } else {

            jTextArea1.setText("No genes found in Seq: " + dnax);

        }
    } else {
        jTextArea1.setText("Text entered is not a DNA strand..");
    }
}
}

Here is the checksum() method:

private static int checksum(String x) {
    int i = 0, checks = 0, count = 0;
    char c;
    x = x.toUpperCase();
    while (i < x.length()) {
        c = x.charAt(i);
        if (c == 'A' || c == 'T' || c == 'G' || c == 'C' || c == '-') {



    count++;
    }
    i++;
}
try {
    checks = (count / x.length()) * 100;
} catch (Exception e) {
    e.printStackTrace();
}

return checks;
}

I've tried other solutions, but nothing is working for me. Any help/suggestion is welcome.

Bruce_Wayne
  • 1,564
  • 3
  • 18
  • 41
Wrix789
  • 35
  • 6

2 Answers2

2

I think you are asking how to find the indexes of those specific codons? And dnax is the String you are checking?

You could use indexOf(String str, int fromIndex). It returns -1 if no substring was found.

So maybe something like this might help,

List<Integer> startCodonIndices = new ArrayList<Integer>();
int index;
for (int i=0; i+3<dnax.length(); i++) {
    index = indexOf("AUG", i);
    startCodonIndices.add(index);
}
pepers
  • 315
  • 3
  • 15
  • This works perfect for start codons; although, stop codons are still causing me trouble – Wrix789 May 17 '15 at 07:18
  • 1
    this is what I've added to my code and it results in a correct gene but with an exception ( IndexOutOfBounds ) - look below – Wrix789 May 17 '15 at 08:17
  • Sorry I couldn't help more, I went to bed after first comment haha, but it's better that you figured it out on your own. Good job! – pepers May 17 '15 at 13:09
0

this what i changed and added based on your suggestions in my code:

 for (int i = 0; i + 3 < dnax.length(); i++) {
                    index = indexOf("AUG", i);
                    startCodonIndices.add(index);
                }
                List stopCodonIndices = new ArrayList();
                int i2, i3, i4;
                for (int j = 0; j + 3 < dnax.length(); j++) {
                    i2 = indexOf("UGA", j);
                    i3 = indexOf("UAA", j);
                    i4 = indexOf("UAG", j);
                    stopCodonIndices.add(i2);
                    stopCodonIndices.add(i3);
                    stopCodonIndices.add(i4);
                }
                for (int n = 0; n < dnax.length(); n++) {
                    for (int k = 0; k < startCodonIndices.size() - 1; k++) {
                        for (int h = 0; h < stopCodonIndices.size() - 1; h++) {
                            gene = dnax.substring(k, h);
                            jTextArea1.append("\n" + gene);
                        }
                    }
                }

Result of Seq=ATGACCTGA:

A
AU
AUG
AUGA
AUGAC
AUGACC
AUGACCU
AUGACCUG
AUGACCUGA

Error:

Exception in thread "AWT-EventQueue-0" java.lang.StringIndexOutOfBoundsException: String index out of range: 10
at java.lang.String.substring(String.java:1951)
    at bio.m.jButton3ActionPerformed(m.java:365)
    at bio.m.access$200(m.java:36)
    at bio.m$3.actionPerformed(m.java:142)
    at javax.swing.AbstractButton.fireActionPerformed(AbstractButton.java:2022)
    at javax.swing.AbstractButton$Handler.actionPerformed(AbstractButton.java:2346)
    at javax.swing.DefaultButtonModel.fireActionPerformed(DefaultButtonModel.java:402)
    at javax.swing.DefaultButtonModel.setPressed(DefaultButtonModel.java:259)
    at javax.swing.plaf.basic.BasicButtonListener.mouseReleased(BasicButtonListener.java:252)
    at com.jtattoo.plaf.BaseButtonListener.mouseReleased(BaseButtonListener.java:60)
    at java.awt.Component.processMouseEvent(Component.java:6525)
    at javax.swing.JComponent.processMouseEvent(JComponent.java:3324)
    at java.awt.Component.processEvent(Component.java:6290)
    at java.awt.Container.processEvent(Container.java:2234)
    at java.awt.Component.dispatchEventImpl(Component.java:4881)
    at java.awt.Container.dispatchEventImpl(Container.java:2292)
    at java.awt.Component.dispatchEvent(Component.java:4703)
    at java.awt.LightweightDispatcher.retargetMouseEvent(Container.java:4898)
    at java.awt.LightweightDispatcher.processMouseEvent(Container.java:4533)
    at java.awt.LightweightDispatcher.dispatchEvent(Container.java:4462)
    at java.awt.Container.dispatchEventImpl(Container.java:2278)
    at java.awt.Window.dispatchEventImpl(Window.java:2750)
    at java.awt.Component.dispatchEvent(Component.java:4703)
    at java.awt.EventQueue.dispatchEventImpl(EventQueue.java:751)
    at java.awt.EventQueue.access$500(EventQueue.java:97)
    at java.awt.EventQueue$3.run(EventQueue.java:702)
    at java.awt.EventQueue$3.run(EventQueue.java:696)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.security.ProtectionDomain$1.doIntersectionPrivilege(ProtectionDomain.java:75)
    at java.security.ProtectionDomain$1.doIntersectionPrivilege(ProtectionDomain.java:86)
    at java.awt.EventQueue$4.run(EventQueue.java:724)
    at java.awt.EventQueue$4.run(EventQueue.java:722)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.security.ProtectionDomain$1.doIntersectionPrivilege(ProtectionDomain.java:75)
    at java.awt.EventQueue.dispatchEvent(EventQueue.java:721)
    at java.awt.EventDispatchThread.pumpOneEventForFilters(EventDispatchThread.java:201)
    at java.awt.EventDispatchThread.pumpEventsForFilter(EventDispatchThread.java:116)
    at java.awt.EventDispatchThread.pumpEventsForHierarchy(EventDispatchThread.java:105)
    at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:101)
    at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:93)
    at java.awt.EventDispatchThread.run(EventDispatchThread.java:82)

It returns as you can see a gene, but there is something wrong with the edited code

Edit: The code is finally doing what it should: getting the start and stop codons from my RNA sequence Here is my final edit:

        List startCodonIndices = new ArrayList();
        int index = 0;
        for (int i = 0; i + 3 < dnax.length(); i++) {

            index = dnax.indexOf("AUG", i);
            if (index != -1) {
                startCodonIndices.add(index);
            }
            List stopCodonIndices = new ArrayList();
            int i2, i3, i4;
            for (int j = 0; j + 3 < dnax.length(); j++) {

                i2 = dnax.indexOf("UGA", j);
                System.out.println("i2: "+i2);
                if (i2 != -1) {
                    stopCodonIndices.add(i2);
                }
                i3 = dnax.indexOf("UAA", j);
                System.out.println("i3: "+i3);
                if (i3 != -1) {
                    stopCodonIndices.add(i3);
                }
                i4 = dnax.indexOf("UAG", j);
                System.out.println("i4: "+i4);
                if (i4 != -1) {
                    stopCodonIndices.add(i4);
                }

            }
            for (int v = 0; v < startCodonIndices.size(); v++) {
                for (int h = 0; h < stopCodonIndices.size(); h++) {
               gene = dnax.substring((int)startCodonIndices.get(v), (int)stopCodonIndices.get(h)+3);
                jTextArea1.setText(gene+"\n");
            }}

Thank you pepers for your help!

Wrix789
  • 35
  • 6