-1

I was writing, in C, a program that gives recommendations from google with the help of another python script. An interesting thing happened when I finally compiled the program and tested it out and as result the output of the python script came out with extra spaces. I searched it on the net but couldn't find anything so here I am.

C code:

#include <stdio.h>
#include <stdlib.h>


void search(){
    system("clear");

    char search[200];
    char a;
    char lsearch[800];
    char esearch[2000];

    printf("\n*Suggestions for your Search*\n\n\n\n");
    printf("> ");
    system ("/bin/stty raw");
    int i = 0;
    while(i > -1) {
        a = getchar();
        if(a == ' ') {
            system("clear");
            search[i] = '!';
            fflush(stdout);
            snprintf(lsearch, 800, "cd python-bash-things && python3 rec.py %s",search);
            fflush(stdout);
            system(lsearch);

            search[i] = ' ';
            printf("\n\n\n> %s",search);
            i++;
            continue;
        }
        else if(a == 3) 
            return;
        else if(a == 13) {
            snprintf(esearch, 2000, "firefox -search '%s'",search);     
            break;
        }
        search[i] = a;
        i++;
    }
    system("clear");
    system ("/bin/stty cooked");
    system(esearch);
}

int main(){
    search();
    return 0;
}

Python code:

#!/usr/bin/env python3

import json
import re
import urllib.parse
import urllib.request
import sys
import os
import datetime
import gzip
import subprocess as sp
import html

SEARCH_ENGINE = 'google'
BROWSER = 'firefox'
TERMINAL = ['gnome-terminal', '--']
CONFIG = {
    'BROWSER_PATH' : {
        'chrome' : ['google-chrome-stable'],
        'firefox' : ['firefox'],
        'chromium' : ['chromium-browser'],
        'brave' : ['brave-browser'],
        'lynx' : TERMINAL + ['lynx']
    },
    'USER_AGENT' : {
        'chrome' : 'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36',
        'firefox' : 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0',
        'chromium' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/76.0.3809.100 Chrome/76.0.3809.100 Safari/537.36',
        'brave' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
        'lynx' : 'Lynx/2.8.9rel.1 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/1.1.1d'
    },
    'SEARCH_ENGINE_NAME' : {
        'google' : 'Google',
        'duckduckgo' : 'DuckDuckGo'
    },
    'SEARCH_URL' : {
        'google' : 'https://www.google.com/search?q=',
        'duckduckgo' : 'https://duckduckgo.com/?q='
    },
    'SUGGESTION_URL' : {
        'google' : 'https://www.google.com/complete/search?',
        'duckduckgo' : 'https://duckduckgo.com/ac/?'
    }
}

def cleanhtml(txt):
    return re.sub(r'<.*?>', '', txt)

def fetch_suggestions(search_string):
    if SEARCH_ENGINE == 'google':
        r = {
            'q' : search_string,
            'cp' : '11',
            'client' : 'psy-ab',
            'xssi' : 't',
            'gs_ri' : 'gws-wiz',
            'hl' : 'en-IT',
            'authuser' : '0'
        }
        url = CONFIG['SUGGESTION_URL'][SEARCH_ENGINE] + urllib.parse.urlencode(r)
        headers = {
            'sec-fetch-mode' : 'cors',
            'dnt' : '1',
            'accept-encoding' : 'gzip',
            'accept-language' : 'en-US;q=0.9,en;q=0.8',
            'pragma' : 'no-cache',
            'user-agent' : CONFIG['USER_AGENT'][BROWSER],
            'accept' : '*/*',
            'cache-control' : 'no-cache',
            'authority' : 'www.google.com',
            'referer' : 'https://www.google.com/',
            'sec-fetch-site' : 'same-origin'
        }
        req = urllib.request.Request(url, headers=headers, method='GET')

        reply_data = gzip.decompress(urllib.request.urlopen(req).read()).split(b'\n')[1]
        reply_data = json.loads(reply_data)
        return [ cleanhtml(res[0]).strip() for res in reply_data[0] ]
    else:  
        if search_string.startswith('!'):
            bang_search = True
            search_string = search_string.lstrip('!')
        else:
            bang_search = False
        r = {
            'q' : search_string,
            'callback' : 'autocompleteCallback',
            'kl' : 'wt-wt',
            '_' : str(int((datetime.datetime.now().timestamp())*1000))
        }
        url = CONFIG['SUGGESTION_URL'][SEARCH_ENGINE] + urllib.parse.urlencode(r)
        if bang_search:
            url = url.replace('?q=', '?q=!')
        headers = {
            'pragma' : 'no-cache',
            'dnt' : '1',
            'accept-encoding' : 'gzip',
            'accept-language' : 'en-US;q=0.9,en;q=0.8',
            'user-agent' : CONFIG['USER_AGENT'][BROWSER],
            'sec-fetch-mode' : 'no-cors',
            'accept' : '*/*',
            'cache-control' : 'no-cache',
            'authority' : 'duckduckgo.com',
            'referer' : 'https://duckduckgo.com/',
            'sec-fetch-site' : 'same-origin',
        }
        req = urllib.request.Request(url, headers=headers, method='GET')
        reply_data = gzip.decompress(urllib.request.urlopen(req).read()).decode('utf8')
        reply_data = json.loads(re.match(r'autocompleteCallback\((.*)\);', reply_data).group(1))
        return [ cleanhtml(res['phrase']).strip() for res in reply_data ]

def main():
    search_string = html.unescape((' '.join(sys.argv[1:])).strip())

    if search_string.endswith('!'):
        
        search_string = search_string.rstrip('!').strip()
        results = fetch_suggestions(search_string)
        for r in results:
            print(html.unescape(r))
    else:
        url = CONFIG['SEARCH_URL'][SEARCH_ENGINE] + urllib.parse.quote_plus(search_string)
        sp.Popen(CONFIG['BROWSER_PATH'][BROWSER] + [url], stdout=sp.DEVNULL, stderr=sp.DEVNULL, shell=False)

def validate_config(c):
    if type(c) != dict:
        print('Configuration file must be a JSON object', file=sys.stderr)
        sys.exit(1)
    for k in ('SEARCH_ENGINE', 'BROWSER', 'TERMINAL'):
        if k not in c:
            print('Configuration file is missing %s' % k, file=sys.stderr)
            sys.exit(1)
    for k in ('SEARCH_ENGINE', 'BROWSER'):
        if type(c[k]) != str:
            print('Configuration Error: The value of %s must be a string' % k, file=sys.stderr)
    if type(c['TERMINAL']) != list:
        print('Configuration Error: The value of TERMINAL must be a list of strings', file=sys.stderr)
        sys.exit(1)
    for x in c['TERMINAL']:
        if type(x) != str:
            print('Configuration Error: The value of TERMINAL must be a list of strings', file=sys.stderr)
            sys.exit(1)

if __name__ == "__main__":
    try:
        fname = os.path.expanduser('~/.config/rofi-web-search/config.json')
        if os.path.exists(fname):
            try:
                config = json.loads(open(fname, 'r').read())
            except json.JSONDecodeError:
                print('Configuration file %s is not a valid JSON' % fname, file=sys.stderr)
                sys.exit(1)
            validate_config(config)
            SEARCH_ENGINE = config['SEARCH_ENGINE']
            BROWSER = config['BROWSER']
            TERMINAL = config['TERMINAL']
        else:
            config = {
                    'SEARCH_ENGINE' : SEARCH_ENGINE,
                    'BROWSER' : BROWSER,
                    'TERMINAL' : TERMINAL
                }
            os.makedirs(os.path.dirname(fname))
            f = open(fname, 'w')
            f.write(json.dumps(config, indent=4))
            f.write('\n')
            f.close()
        main()
    except:
        sys.exit(1)

output after typing mario:

marion cotillard
                mario
                     mario balotelli
                                    mario badescu
                                                 mario götze
                                                            mario gomez
                                                                       mario oyna
                                                                                 mario mandzukic
          mariobet
                  mario lemina



                              > mario 

All answers are appreciated

Edit: An interesting thing is that the terminal demonstrates the same behavior after running the program.

LtWorf
  • 7,286
  • 6
  • 31
  • 45
  • 2
    Can you either provide a [mre] or an explanation of where the spaces are, why you don't want them, where you think they're coming from, etc? – Random Davis Nov 02 '20 at 18:31
  • You are passing `char search[200];` to string handling functions, but the character sequence is *unterminated* by any `'\0'`. I only noticed that after formatting the code I copied. – Weather Vane Nov 02 '20 at 18:33
  • @WeatherVane yep this explains a lot – Omer Erbilgin Nov 02 '20 at 18:59

1 Answers1

0

The problem code is:

char search[200];
...
search[i] = '!';
... and other assignments to search
snprintf(esearch, 2000, "firefox -search '%s'",search);//search has never been `\0` terminated

By calling a string function with a string that is not null terminated is undefined behavior.

And, from another post:

if you have a string that is not null-terminated, you cannot use the C string manipulation routines on it. You can't use strlen, strcpy, [snprintf] or strcat . Basically, any function that takes a char* but no separate length is not usable. (emphasis mine)

To address this one issue, simply initialize the buffer at the time of its declaration:

char search[200] = {0};

Then limit the content written to search to 199 characters, (search[198]) so as not to overwrite the null terminator which should be preserved at position search[199].

ryyker
  • 22,849
  • 3
  • 43
  • 87
  • I understand the undefined behavior but the solution you offer isn't solving the problem. I tried to give '\0' before using snprintf() but it didn't work either. – Omer Erbilgin Nov 02 '20 at 19:07
  • @OmerErbilgin - Although null termination is a real issue, there are _other_ issues in your code. One of the suggestions in the comments under your question was to create and post a [mcve]. I suggest that you take some time, follow the suggestion how how to do that in the link, and post another question with that new code. That will help those who are trying to help understand the issues – ryyker Nov 02 '20 at 19:16
  • I did! Here is my new question: https://stackoverflow.com/questions/64652498/unwanted-spaces-coming-from-c – Omer Erbilgin Nov 02 '20 at 20:00