You can actually use locale, just use locale.atof
to cast after setting the locale to a suitable region:
In [6]: from locale import atof
In [7]: import locale
In [8]: locale.setlocale(locale.LC_ALL, 'de_DE')
Out[8]: 'de_DE'
In [9]: mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,250']
In [10]: sorted(mylist,key=lambda x: atof(x.split()[0]))
Out[10]: ['3,250', '12 text', '23 text', '1.482 text', '23.130']
If you can have just text, you can use a try/except, what you expect to happen for the string sort will decide what we do in the except, for now I just return float("inf") so the strings are pushed to the end:
from locale import atof
import locale
locale.setlocale(locale.LC_ALL, 'de_DE')
mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,250', "foo"]
def atof_try(x):
try:
return atof(x.split()[0])
except ValueError:
return float("inf")
So if we add foo
to mylist:
In [35]: mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,250', "foo"]
In [36]: sorted(mylist, key=atof_try)
Out[36]: ['3,250', '12 text', '23 text', '1.482 text', '23.130', 'foo']
Ok, bar the empty string at the end this matches your expected output, the regular sort would put the empty string at the end, we can change the if it really matters:
from locale import atof
import locale
locale.setlocale(locale.LC_ALL, 'de_DE')
import re
wrong_type = object()
def atof_try(x):
try:
return atof(x.split()[0])
except ValueError:
return wrong_type
def atof_pre(x, patt=re.compile("^\d+")):
try:
_atof = atof_try(x)
if _atof is not wrong_type:
return _atof
temp = patt.search(x)
return int(temp.group())
except (ValueError, IndexError, AttributeError):
return wrong_type
def merge_types(l, out):
for ele in l:
if atof_pre(ele) is not wrong_type:
yield ele
else:
out.append(ele)
The output:
In [3]: temp = []
In [4]: mylist[:] = sorted(merge_types(mylist, temp), key=atof_pre) + sorted(temp)
In [5]: print(mylist)
['2 another test', '2apples', '4apples', '4,32', '4,32 hi', '12 today', '801', '13.300 a test', '', 'apples4', 'doo', 'foo']
Putting the logic in a class and doing an inplace sort on the odd list and extending in place of concatenation, you can pass in lambdas to specify what to sort on and rev
determines if you reveres sort or not:
from locale import atof
import re
class WeirdSort:
def __init__(self, in_list, rev=False, which=None, other=None):
# holds all strings that don't match the pattern we want.
self.temp = []
self.in_list = in_list
self.wrong_type = object()
# what lambda to pass as the sort key.
self.which = which
# split data and sort in_list.
self.in_list[:] = sorted(self.separate_types(), key=self.atof_pre, reverse=rev)
# sort odd strings.
self.temp.sort(key=other, reverse=rev)
# merge both lists.
if rev:
self.temp.extend(self.in_list)
self.in_list[:] = self.temp
else:
self.in_list.extend(self.temp)
del self.temp
def atof_try(self, x):
"""Try to cast using specified locale,
return wrong_type on failure."""
try:
return atof(self.which(x))
except ValueError:
return self.wrong_type
def atof_pre(self, x, patt=re.compile("^\d+")):
"""Try to cast using atof initially,
on failure, try to pull digits from
front of string and cast to int.
On failure, returns wrong_type object
which will mean "x" will be sorted using a regular sort.
"""
try:
_atof = self.atof_try(x)
if _atof is not self.wrong_type:
return _atof
temp = patt.search(x)
return int(temp.group())
except (ValueError, IndexError, AttributeError):
return self.wrong_type
def separate_types(self):
"""Separate elements that can be cast to a float
using atof/int/re logic and those that cannot,
anything that cannot be sorted will be
added to temp_list and sorted separately.
"""
for ele in self.in_list:
if self.atof_pre(ele) is not self.wrong_type:
yield ele
else:
self.temp.append(ele)
The empty string is also now at the end.
So for the input:
import locale
locale.setlocale(locale.LC_ALL, 'de_DE')
mylist = ['2 another test', '4,32', '801', '4apples', 'foo', '4,32 hi', 'apples4', '', '13.300 a test', '2apples', 'doo', '12 today']
flat_lambda1, flat_lambda2 = lambda x: x.split()[0], lambda x: (x == "", x)
WeirdSort(mylist, True, flat_lambda1, flat_lambda2)
print(mylist)
sublst_lambda1, sublist_lambda2 = lambda x: x[0].split()[0], lambda x: (x[0] == "", x[0])
WeirdSort(mylist, False, lambda x: x.split()[0], lambda x: (x == "", x))
print(mylist)
mylist = [['3,25', 1], ['12 text', 2], ["", 5], ['23 text', 3]]
WeirdSort(mylist, True, sublst_lambda1, sublist_lambda2)
print(mylist)
WeirdSort(mylist, False, sublst_lambda1, sublist_lambda2)
print(mylist)
You get:
['', 'foo', 'doo', 'apples4', '13.300 a test', '801', '12 today', '4,32', '4,32 hi', '4apples', '2 another test', '2apples']
['2 another test', '2apples', '4apples', '4,32', '4,32 hi', '12 today', '801', '13.300 a test', 'apples4', 'doo', 'foo', '']
[['', 5], ['23 text', 3], ['12 text', 2], ['3,25', 1]]
[['3,25', 1], ['12 text', 2], ['23 text', 3], ['', 5]]