0

In below code, for example, if liver diseases' code exists in following columns it should be 1. Even though sometimes it is 1, sometimes it is 0 but it should be 1. For example: CASITE other columns has_livercancer C220 ......... 0

has_livercancer should be 1. Additionaly, this is not the only case. It happens all columns that I create in below.

I cannot provide any data due to privacy issues.

test_ipd <- test_ipd %>%
  group_by(ID_NO) %>%
  mutate(
    ICD9_CODE = first(ICD9_CODE),
    ICD10_CODE = first(ICD10_CODE),
    MDIAG_CODE = first(MDIAG_CODE),
    CONFIRM_MDIAG = first(CONFIRM_MDIAG),
    SDIAG_CODE = first(SDIAG_CODE),
    CASITE = first(CASITE),
    has_diabetes_byicd = ifelse(
      coalesce(str_detect(SDIAG_CODE,"^250|^E1[0-4]"),
               str_detect(ICD9_CODE, "^250|^E1[0-4]"),
               str_detect(ICD10_CODE, "^250|^E1[0-4]"),
               str_detect(MDIAG_CODE,"^250|^E1[0-4]"),
               str_detect(CONFIRM_MDIAG, "^250|^E1[0-4]")), 1, 0),
has_livercancer_byicd = ifelse(
  coalesce(str_detect(SDIAG_CODE,"^155|C220|C22.0|C22.1|C221|C222|C22.2|C22.3|C223|C224|C22.4|C22.7|C22.9|155.0"),
           str_detect(ICD9_CODE, "^155|C220|C22.0|C22.1|C221|C222|C22.2|C22.3|C223|C224|C22.4|C22.7|C22.9|155.0"),
           str_detect(ICD10_CODE,"^155|C220|C22.0|C22.1|C221|C222|C22.2|C22.3|C223|C224|C22.4|C22.7|C22.9|155.0"),
           str_detect(MDIAG_CODE,"^155|C220|C22.0|C22.1|C221|C222|C22.2|C22.3|C223|C224|C22.4|C22.7|C22.9|155.0"),
           str_detect(CONFIRM_MDIAG, "^155|C220|C22.0|C22.1|C221|C222|C22.2|C22.3|C223|C224|C22.4|C22.7|C22.9|155.0"),
           str_detect(CASITE,"155|C220|C22.0|C22.1|C221|C222|C22.2|C22.3|C223|C224|C22.4|C22.7|C22.9|155.0")), 
  1, 
  0
),
    has_chronic_hepatitis_byicd = ifelse(
      coalesce(str_detect(SDIAG_CODE,"^070\\.[23]|^B18\\.[01-2]"),
               str_detect(ICD9_CODE, "^070\\.[23]|^B18\\.[01-2]"),
               str_detect(ICD10_CODE, "^070\\.[23]|^B18\\.[01-2]"),
               str_detect(MDIAG_CODE, "^070\\.[23]|^B18\\.[01-2]"),
               str_detect(CONFIRM_MDIAG, "^070\\.[23]|^B18\\.[01-2]")), 
  1,
  0
),
    has_alcohol_liver_disease_byicd = ifelse(
      coalesce(str_detect(SDIAG_CODE, "^070\\.[23]|^B18\\.[01-2]"),
               str_detect(ICD9_CODE, "^070\\.[23]|^B18\\.[01-2]"),
               str_detect(ICD10_CODE, "^070\\.[23]|^B18\\.[01-2]"),
               str_detect(MDIAG_CODE, "^070\\.[23]|^B18\\.[01-2]"),
               str_detect(CONFIRM_MDIAG, "^571\\.0|^K70")),
      1,
      0
    ),
    has_nafld_byicd = ifelse(
      coalesce(str_detect(SDIAG_CODE,"^571\\.8|^K76\\.0"),
               str_detect(ICD9_CODE, "^571\\.8|^K76\\.0"),
               str_detect(ICD10_CODE, "^571\\.8|^K76\\.0"),
               str_detect(MDIAG_CODE, "^571\\.8|^K76\\.0"),
               str_detect(CONFIRM_MDIAG, "^571\\.8|^K76\\.0")),
      1,
      0
    ),
    has_obesity_byicd = ifelse(
      coalesce(str_detect(SDIAG_CODE, "^278\\.0|^E66"),
               str_detect(ICD9_CODE, "^278\\.0|^E66"),
               str_detect(ICD10_CODE, "^278\\.0|^E66"),
               str_detect(MDIAG_CODE, "^278\\.0|^E66"),
               str_detect(CONFIRM_MDIAG, "^278\\.0|^E66")),
      1,
      0
    ),
    has_cirrhosis_byicd = ifelse(
      coalesce(str_detect(SDIAG_CODE,"^571\\.5|K74|K74.0|K74.1|K74.2|K74.3|K74.4|K74.5|K74.6"),
               str_detect(ICD9_CODE,"^571\\.5|K74|K74.0|K74.1|K74.2|K74.3|K74.4|K74.5|K74.6"),
               str_detect(ICD10_CODE,"^571\\.5|K74|K74.0|K74.1|K74.2|K74.3|K74.4|K74.5|K74.6"),
               str_detect(MDIAG_CODE,"^571\\.5|K74|K74.0|K74.1|K74.2|K74.3|K74.4|K74.5|K74.6"),
               str_detect(CONFIRM_MDIAG, "^571\\.5|K74|K74.0|K74.1|K74.2|K74.3|K74.4|K74.5|K74.6")),
      1,
      0
    )
    )

I want consistency. If the patient's code contains that particular code, it should have that disease.

r2evans
  • 141,215
  • 6
  • 77
  • 149
  • 6
    You should provide a fake dataset for us to help. Try using case_when() for a vectorized version of if_else if you need multiple conditions. Are the results of coalesce end up as a one True or False answer? It's a long code and hard to think of a solution without code to experiment with – RYann May 14 '23 at 08:32
  • The results of coalesce end up as a one True answer. I am going to upload a fake dataset. – Deniz Taş May 14 '23 at 09:31
  • If you're wondering how best to include sample data, please use `dput`, `data.frame`, or `read.table` for sharing the data (for several reasons). See https://stackoverflow.com/q/5963269 , [mcve], and https://stackoverflow.com/tags/r/info for some discussion on how to use them. Realize that if you have concerns over proprietary or private data, we don't need "actual" data, just something that is representative enough that you can take our answer and adapt to the real data. For instance, we may not need your real patterns, just make sure we can find _something_. Cheers! – r2evans May 14 '23 at 15:06

0 Answers0