2

I am creating a polar plot with the following code.

import plotly as pt
import plotly.express as px
import pandas as pd

df = pd.read_csv('newdata.csv')

range_num = list(range(100,181, 5))
options = ['NOT_VISIBLE', 'NOT_POSSIBLE', 'ABOVE_LIMIT']
all_options = incompletes + ([str(num) for num in range_num])

dfnew = df.drop(['IDENT', 'LOCATION', 'MONTH', 'AGE'], axis = 1)

df['PT'] = dfnew.ffill(axis='columns').iloc[:, -1]
data = df.groupby('AGE')['PT'].value_counts().reset_index(name='COUNT')

fig = px.line_polar(data, r="COUNT", theta="PT", color="AGE", line_close=True,
                    color_discrete_sequence=px.colors.sequential.Plasma_r)
fig.show()

This shows a polar plot with two sets of data being plotted for AGE=YES and AGE=NO (YES and NO being the two values in the AGE column). My issue here is that the data around the polar plot is disorganised. I would like the top to start with NOT_VISIBLE then going clockwise, follow with NOT_POSSIBLE ABOVE_LIMIT and the numbers in ascending order (100, 105, 110 etc).

I am sure there is a straightforward way to do this but I am new to python and can't seem to figure it out.

I tried to do:

SC = sorted(['PT'])

But this has not worked, not sure what else to try, any help is greatly appreciated.

Python 2.7 and PANDAS 0.24.2

EDIT: Data below (Actual data contains a lot more rows).

pd.DataFrame({'IDENT': {0L: 'ID76', 1L: 'ID34', 2L: 'ID51', 3L: 'ID97', 4L: 'ID73', 5L: 'ID14', 6L: 'ID64', 7L: 'ID10', 8L: 'ID32', 9L: 'ID89', 10L: 'ID27', 11L: 'ID19', 12L: 'ID80', 13L: 'ID71', 14L: 'ID08', 15L: 'ID47', 16L: 'ID66', 17L: 'ID23'}, 'AGE': {0L: 'YES', 1L: 'NO', 2L: 'YES', 3L: 'NO', 4L: 'NO', 5L: 'NO', 6L: 'YES', 7L: 'YES', 8L: 'YES', 9L: 'NO', 10L: 'YES', 11L: 'YES', 12L: 'NO', 13L: 'YES', 14L: 'NO', 15L: 'NO', 16L: 'YES', 17L: 'YES'}, 'MONTH': {0L: 1990L, 1L: 2000L, 2L: 2010L, 3L: 2020L, 4L: 2020L, 5L: 2010L, 6L: 2000L, 7L: 1990L, 8L: 2020L, 9L: 2010L, 10L: 1990L, 11L: 2020L, 12L: 2000L, 13L: 2010L, 14L: 1990L, 15L: 2020L, 16L: 2000L, 17L: 1990L}, 'LOCATION': {0L: 'E1', 1L: 'E4', 2L: 'E2', 3L: 'E1', 4L: 'E3', 5L: 'E4', 6L: 'E3', 7L: 'E1', 8L: 'E2', 9L: 'E1', 10L: 'E2', 11L: 'E3', 12L: 'E2', 13L: 'E1', 14L: 'E4', 15L: 'E3', 16L: 'E4', 17L: 'E2'}, 'PT5': {0L: nan, 1L: 'ABOVE_LIMIT', 2L: nan, 3L: nan, 4L: nan, 5L: nan, 6L: nan, 7L: nan, 8L: '100', 9L: nan, 10L: nan, 11L: nan, 12L: 'NOT_POSSIBLE', 13L: nan, 14L: nan, 15L: nan, 16L: '165', 17L: 'NOT_POSSIBLE'}, 'PT4': {0L: '110', 1L: nan, 2L: '145', 3L: nan, 4L: 'NOT_VISIBLE', 5L: nan, 6L: '105', 7L: nan, 8L: nan, 9L: nan, 10L: 'ABOVE_LIMIT', 11L: nan, 12L: nan, 13L: nan, 14L: 'NOT_VISIBLE', 15L: nan, 16L: nan, 17L: '130'}, 'PT3': {0L: nan, 1L: 'NOT_POSSIBLE', 2L: nan, 3L: nan, 4L: nan, 5L: nan, 6L: nan, 7L: 'NOT_POSSIBLE', 8L: nan, 9L: nan, 10L: nan, 11L: 'NOT_VISIBLE', 12L: nan, 13L: nan, 14L: nan, 15L: nan, 16L: nan, 17L: nan}, 'PT2': {0L: nan, 1L: nan, 2L: nan, 3L: '160', 4L: nan, 5L: nan, 6L: nan, 7L: '180', 8L: 'NOT_VISIBLE', 9L: nan, 10L: '160', 11L: nan, 12L: nan, 13L: nan, 14L: nan, 15L: nan, 16L: nan, 17L: 'ABOVE_LIMIT'}, 'PT1': {0L: nan, 1L: nan, 2L: nan, 3L: nan, 4L: 'ABOVE_LIMIT', 5L: nan, 6L: nan, 7L: nan, 8L: nan, 9L: nan, 10L: nan, 11L: nan, 12L: '115', 13L: nan, 14L: nan, 15L: nan, 16L: nan, 17L: nan}})


+-------+----------+-----+-------+------+------+-------+------+--------------+
| IDENT | LOCATION | AGE | MONTH | PT1  | PT2  | PT3   | PT4  | PT5          |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID76  | E1       | YES | 1990  |      |      |       | 110  |              |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID34  | E4       | NO  | 2000  |      |      | NOT_POSSIBLE | ABOVE_LIMIT  |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID51  | E2       | YES | 2010  |      |      |       | 145  |              |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID97  | E1       | NO  | 2020  |      | 160  |       |      |              |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID73  | E3       | NO  | 2020  | ABOVE_LIMIT |       | NOT_VISIBLE         |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID14  | E4       | NO  | 2010  |      |      |       |      |              |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID64  | E3       | YES | 2000  |      |      |       | 105  |              |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID10  | E1       | YES | 1990  |      | 180  | NOT_POSSIBLE |              |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID32  | E2       | YES | 2020  |      | NOT_VISIBLE  |      | 100          |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID89  | E1       | NO  | 2010  |      |      |       |      |              |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID27  | E2       | YES | 1990  |      | 160  |       | ABOVE_LIMIT         |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID19  | E3       | YES | 2020  |      |      | NOT_VISIBLE  |              |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID80  | E2       | NO  | 2000  | 115  |      |       |      | NOT_POSSIBLE |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID71  | E1       | YES | 2010  |      |      |       |      |              |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID08  | E4       | NO  | 1990  |      |      |       | NOT_VISIBLE         |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID47  | E3       | NO  | 2020  |      |      |       |      |              |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID66  | E4       | YES | 2000  |      |      |       |      | 165          |
+-------+----------+-----+-------+------+------+-------+------+--------------+
| ID23  | E2       | YES | 1990  |      | ABOVE_LIMIT  | 130  | NOT_POSSIBLE |
+-------+----------+-----+-------+------+--------------+------+--------------+
Gone4good
  • 51
  • 3
  • Please share a sample of your data as explained [here](https://stackoverflow.com/questions/63163251/pandas-how-to-easily-share-a-sample-dataframe-using-df-to-dict/63163254#63163254) in order to make your code reproducible. – vestland Sep 05 '21 at 07:04
  • 1
    Thank you, initial question has been edited. – Gone4good Sep 05 '21 at 09:10

1 Answers1

0
  • not sure if this will work with python 2. I really recommend your upgrade to python 3. Python 2 has been end of life since 1-Jan-2020
  • you need to sort your data in order for axis to work way you want
    1. have used python sorted() to define ordered list of PT
    2. have used pandas categoricals to enable sorting using custom order defined in 1.
    3. have done an optional step of making data a product of valid combinations of AGE and PT. without this order with be set by first trace then missing values appended (rather than ordered)
import plotly as pt
import plotly.express as px
import pandas as pd
import numpy as np

nan = np.nan
df = pd.DataFrame({'IDENT': {0: 'ID76', 1: 'ID34', 2: 'ID51', 3: 'ID97', 4: 'ID73', 5: 'ID14', 6: 'ID64', 7: 'ID10', 8: 'ID32', 9: 'ID89', 10: 'ID27', 11: 'ID19', 12: 'ID80', 13: 'ID71', 14: 'ID08', 15: 'ID47', 16: 'ID66', 17: 'ID23'}, 'AGE': {0: 'YES', 1: 'NO', 2: 'YES', 3: 'NO', 4: 'NO', 5: 'NO', 6: 'YES', 7: 'YES', 8: 'YES', 9: 'NO', 10: 'YES', 11: 'YES', 12: 'NO', 13: 'YES', 14: 'NO', 15: 'NO', 16: 'YES', 17: 'YES'}, 'MONTH': {0: 1990, 1: 2000, 2: 2010, 3: 2020, 4: 2020, 5: 2010, 6: 2000, 7: 1990, 8: 2020, 9: 2010, 10: 1990, 11: 2020, 12: 2000, 13: 2010, 14: 1990, 15: 2020, 16: 2000, 17: 1990}, 'LOCATION': {0: 'E1', 1: 'E4', 2: 'E2', 3: 'E1', 4: 'E3', 5: 'E4', 6: 'E3', 7: 'E1', 8: 'E2', 9: 'E1', 10: 'E2', 11: 'E3', 12: 'E2', 13: 'E1', 14: 'E4', 15: 'E3', 16: 'E4', 17: 'E2'}, 'PT5': {0: nan, 1: 'ABOVE_LIMIT', 2: nan, 3: nan, 4: nan, 5: nan, 6: nan, 7: nan, 8: '100', 9: nan, 10: nan, 11: nan, 12: 'NOT_POSSIBLE', 13: nan, 14: nan, 15: nan, 16: '165', 17: 'NOT_POSSIBLE'}, 'PT4': {0: '110', 1: nan, 2: '145', 3: nan, 4: 'NOT_VISIBLE', 5: nan, 6: '105', 7: nan, 8: nan, 9: nan, 10: 'ABOVE_LIMIT', 11: nan, 12: nan, 13: nan, 14: 'NOT_VISIBLE', 15: nan, 16: nan, 17: '130'}, 'PT3': {0: nan, 1: 'NOT_POSSIBLE', 2: nan, 3: nan, 4: nan, 5: nan, 6: nan, 7: 'NOT_POSSIBLE', 8: nan, 9: nan, 10: nan, 11: 'NOT_VISIBLE', 12: nan, 13: nan, 14: nan, 15: nan, 16: nan, 17: nan}, 'PT2': {0: nan, 1: nan, 2: nan, 3: '160', 4: nan, 5: nan, 6: nan, 7: '180', 8: 'NOT_VISIBLE', 9: nan, 10: '160', 11: nan, 12: nan, 13: nan, 14: nan, 15: nan, 16: nan, 17: 'ABOVE_LIMIT'}, 'PT1': {0: nan, 1: nan, 2: nan, 3: nan, 4: 'ABOVE_LIMIT', 5: nan, 6: nan, 7: nan, 8: nan, 9: nan, 10: nan, 11: nan, 12: '115', 13: nan, 14: nan, 15: nan, 16: nan, 17: nan}})

# env setup
incompletes = []

range_num = list(range(100,181, 5))
options = ['NOT_VISIBLE', 'NOT_POSSIBLE', 'ABOVE_LIMIT']
all_options = incompletes + ([str(num) for num in range_num])

dfnew = df.drop(['IDENT', 'LOCATION', 'MONTH', 'AGE'], axis = 1)

df['PT'] = dfnew.ffill(axis='columns').iloc[:, -1]
data = df.groupby('AGE')['PT'].value_counts().reset_index(name='COUNT')

# define custom ordered list of unique PT values
orderpt = sorted(
    data["PT"].unique().tolist(),
    key=lambda x: options.index(x) if x in options else int(x)
)

# optional - make sure all combinations of AGE & PT are in data set
data = data.merge(
    pd.MultiIndex.from_product(
        [["YES", "NO"], pd.Categorical(orderpt, ordered=True)], names=["AGE", "PT"]
    )
    .to_frame()
    .reset_index(drop=True),
    on=["AGE", "PT"],
    how="right"
).fillna(0)

# redefine PT as a categorical that uses custom order
data["PT"] = pd.Categorical(data["PT"], categories=orderpt, ordered=True)

fig = px.line_polar(data.sort_values(["AGE","PT"]), r="COUNT", theta="PT", color="AGE", line_close=True,
                    color_discrete_sequence=px.colors.sequential.Plasma_r)
fig.show()

enter image description here

Rob Raymond
  • 29,118
  • 3
  • 14
  • 30