1

I am trying to highlight string in string if the first one contains the second. The problem is that when the parent string contains accented characters, indexOf returns me the index of the first match with an error. filterText is the String to highlight. For example.

@Override
    public View getView(final int position, View convertView, ViewGroup parent) {
        View rowView = convertView;
        ViewHolder viewHolder;

        if (rowView == null) {
            LayoutInflater inflater = getLayoutInflater();
            rowView = inflater.inflate(R.layout.list_row_search, parent, false);
            // configure view holder
            viewHolder = new ViewHolder();
            viewHolder.tvName = (TextView) rowView.findViewById(R.id.tv_name);
            viewHolder.tvDescription = (TextView) rowView.findViewById(R.id.tv_description);
            rowView.setTag(viewHolder);

        } else {
            viewHolder = (ViewHolder) convertView.getTag();
        }


        viewHolder.tvName.setTypeface(tfBold);
        viewHolder.tvDescription.setTypeface(tfRegular);

        viewHolder.tvName.setText(getSpannedFromHtml(parkingList.get(position).getPostTitle()));
        viewHolder.tvDescription.setText(getSpannedFromHtml(parkingList.get(position).getPostSubTitle()));

        if(getUTF8Length(filterText) > 2){
            if(parkingList.get(position).getPostTitle().toLowerCase(Locale.getDefault()).contains(filterText)) {

                Log.d("AAA", "length 1: " + parkingList.get(position).getPostTitle().toLowerCase(Locale.getDefault()).length());
                Log.d("AAA", "length 2: " + getUTF8Length(parkingList.get(position).getPostTitle().toLowerCase(Locale.getDefault()).toString()));


                // Find all occurrences for the fist letter of filterText in Name
                for (int cont = -1; (cont = parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US), cont + 1)) != -1; ) {
                    String str1 = "";
                    String str2 = "";
                    String str3 = "";

                    Log.d("AAA", "parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US)): " + parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US)));

                    Log.d("AAA", "parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US): " + parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US)));

                    if (cont > 0)
                        str1 = viewHolder.tvName.getText().subSequence(0, cont).toString();
                    str2 = viewHolder.tvName.getText().subSequence(cont, cont + getUTF8Length(filterText)).toString();
                    if ((cont + getUTF8Length(filterText)) < getUTF8Length(viewHolder.tvName.getText().toString()))
                        str3 = viewHolder.tvName.getText().subSequence(cont + getUTF8Length(filterText), getUTF8Length(viewHolder.tvName.getText().toString())).toString();

                    String highlilightedString = str1 + "<font color = #ef6c00>" + str2 + "</font>" + str3;
                    viewHolder.tvName.setText(getSpannedFromHtml(highlilightedString));
                }
            }

            if(parkingList.get(position).getPostSubTitle().toLowerCase(Locale.getDefault()).contains(filterText))
                // Find all occurrences for the fist letter of filterText in Description
                for(int cont = -1; (cont = parkingList.get(position).getPostSubTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US), cont + 1)) != -1;){
                    String str1="";
                    String str2="";
                    String str3="";

                    if(cont > 0)
                        str1 = viewHolder.tvDescription.getText().subSequence(0, cont).toString();
                    str2 = viewHolder.tvDescription.getText().subSequence(cont, cont + filterText.length()).toString();
                    if((cont + filterText.length()) < viewHolder.tvDescription.getText().length() )
                        str3 = viewHolder.tvDescription.getText().subSequence(cont + filterText.length(), viewHolder.tvDescription.getText().length()).toString();

                    String highlilightedString = str1 + "<font color = #ef6c00>" + str2 + "</font>" + str3;
                    viewHolder.tvDescription.setText(getSpannedFromHtml(highlilightedString));
                }
            }

        return rowView;
    }

    /**
     *
     */
    private int getUTF8Length (String str) {
        int count = 0;

        for(int cont = 0, len = str.length(); cont < len; cont++){
            char ch = str.charAt(cont);
            if(ch <= 0x7F){
                count++;
            } else if(ch <= 0x7FF){
                cont+=2;
            } else if(Character.isHighSurrogate(ch)){
                count+=4;
                cont++;
            } else {
                count+=3;
            }
        }

        return count;
    }

The problem is that if parkingList (position) contains for example the string "Pokémon Shop" and filterText contains "mon", indexOf method returns 9 instead of 4. The following logs are of the example if the chain evaluated is "Pokémon Shop" and the string to search inside is "mon":

D/AAA: length 1: 17
D/AAA: length 2: 17
D/AAA: parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US), 0): 9
D/AAA: parkingList.get(position).getPostTitle().toLowerCase(Locale.US).indexOf(filterText.toLowerCase(Locale.US): 9
flagg327
  • 967
  • 1
  • 10
  • 21

1 Answers1

0

It is because of your getUTF8Length(). It gives you the wrong length value. Try the code below. (You may have to adjust the regex to meet your string requirements.)

private int getUTF8Length (String str) {
    int count = 0;

    List<String> characters=new ArrayList<String>();
    Pattern pat = Pattern.compile("[\\s*\\p{L}*]\\p{M}*");
    Matcher matcher = pat.matcher(str);
    while (matcher.find()) {
        characters.add(matcher.group());
    }

    count = characters.size();
    String s = characters.toString();
    Log.d("LogDebug", s);

    return count;
}

Sample output:

D/LogDebug: [P, o, k, é, m, o, n,  , S, h, o, p]
D/LogDebug: getUTF8Length() = 12

PS. This code is based on Java Unicode String length with regex adjusted to string input "Pokémon Shop".

Community
  • 1
  • 1
user1506104
  • 6,554
  • 4
  • 71
  • 89