0 to 1 normalisation in R whilst preserving column 1 and headers

Question

I'm trying to normalise my dataset between 0 to 1. Each column should be normalised independently. I want to output a new dataframe which preserves column one (not normalised) and all the original column headers.

This is a subset of my data:

SEC <- structure(list(ml = c(0, 0.03, 0.06, 0.09, 0.12, 0.15, 0.18, 
0.21, 0.24, 0.27), A1_280 = c(0.542, 0.322, 0.286, 0.261, 0.19, 
-0.258, -0.272, -0.046, -0.005, 0.138), A1_420 = c(-0.06, -0.303, 
-0.192, -0.381, 0.15, -0.268, -0.576, -0.016, -0.541, -0.41), 
    A2_280 = c(9.877, 27.637, 3.513, -0.882, -1.92, -1.251, -2.284, 
    -2.129, -3.131, -2.913), A2_420 = c(-0.445, 13.337, 1.075, 
    -1.402, -2.156, -2.263, -1.988, -2.105, -2.082, -2.61), A3_280 = c(8.782, 
    59.775, 56.769, 22.842, 9.086, 3.466, 2.256, 1.341, 0.946, 
    0.754), A3_420 = c(0.54, 30.736, 29.073, 12.277, 4.413, 1.77, 
    1.123, 0.488, 0.634, -0.011), B1_280 = c(14.95, 61.441, 37.189, 
    10.928, 4.316, 2.292, 0.757, 0.995, 0.997, -0.07), B1_420 = c(2.455, 
    30.966, 18.61, 4.779, 1.511, 0.74, 0.267, 0.533, 0.149, -0.551
    ), B2_280 = c(-0.288, -0.304, -0.006, -0.158, -0.284, -0.131, 
    -0.443, -0.081, -0.387, -0.04), B2_420 = c(-0.074, -0.256, 
    0.022, 0.104, -0.287, -0.139, -0.015, 0.1, -0.021, -0.146
    ), B3_280 = c(0.084, 0.043, 0.061, 0.032, 0.038, 0.072, 0.03, 
    0.128, 0.077, 0.098), B3_420 = c(-0.056, 0.095, 0.05, -0.015, 
    -0.106, 0.106, -0.017, -0.001, 0.036, 0.139), AB1_280 = c(1.599, 
    1.908, 0.735, 0.49, 0.708, 0.109, 0.702, -0.487, -0.009, 
    -0.196), AB1_420 = c(0.199, 1.218, 0.469, 0.564, 0.498, -0.2, 
    -0.322, 0.294, 0.367, -0.281), AB2_280 = c(-1.46, -1.2, -1.977, 
    -2.736, -2.087, -2.144, -2.246, -2.84, -2.304, -3.106), AB2_420 = c(-1, 
    -0.468, -0.459, -0.345, -1.145, -0.924, -1.622, -0.869, -1.028, 
    -1.183), AB3_280 = c(0.306, 1.392, -2.248, -3.247, -3.715, 
    -2.699, -3.896, -2.744, -3.653, -3.387), AB3_420 = c(-0.899, 
    0.817, -1.41, -1.162, -1.258, -1.409, -1.7, -1.309, -1.946, 
    -1.658), AB4_280 = c(6.847, 55.721, 51.163, 21.166, 8.441, 
    3.105, 2.631, 1.265, -0.184, 0.529), AB4_420 = c(-0.861, 
    27.465, 25.185, 10.767, 4.136, 1.414, 0.545, -0.098, 0.242, 
    -0.509)), row.names = c(NA, -10L), spec = structure(list(
    cols = list(ml = structure(list(), class = c("collector_double", 
    "collector")), A1_280 = structure(list(), class = c("collector_double", 
    "collector")), A1_420 = structure(list(), class = c("collector_double", 
    "collector")), A2_280 = structure(list(), class = c("collector_double", 
    "collector")), A2_420 = structure(list(), class = c("collector_double", 
    "collector")), A3_280 = structure(list(), class = c("collector_double", 
    "collector")), A3_420 = structure(list(), class = c("collector_double", 
    "collector")), B1_280 = structure(list(), class = c("collector_double", 
    "collector")), B1_420 = structure(list(), class = c("collector_double", 
    "collector")), B2_280 = structure(list(), class = c("collector_double", 
    "collector")), B2_420 = structure(list(), class = c("collector_double", 
    "collector")), B3_280 = structure(list(), class = c("collector_double", 
    "collector")), B3_420 = structure(list(), class = c("collector_double", 
    "collector")), AB1_280 = structure(list(), class = c("collector_double", 
    "collector")), AB1_420 = structure(list(), class = c("collector_double", 
    "collector")), AB2_280 = structure(list(), class = c("collector_double", 
    "collector")), AB2_420 = structure(list(), class = c("collector_double", 
    "collector")), AB3_280 = structure(list(), class = c("collector_double", 
    "collector")), AB3_420 = structure(list(), class = c("collector_double", 
    "collector")), AB4_280 = structure(list(), class = c("collector_double", 
    "collector")), AB4_420 = structure(list(), class = c("collector_double", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x5606ec29c390>, class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"))

Here is my code so far:

normalize_0_to_1_columnwise <- function(SEC) {
  normalized_SEC <- data.frame(ml = SEC$ml)  # Copy the first column as it is
  
  # Apply normalization for each column (excluding the first column 'ml')
  for (col in names(SEC)[-1]) {
    normalized_SEC[[col]] <- (SEC[[col]] - min(SEC[[col]])) / (max(SEC[[col]]) - min(SEC[[col]]))
  }
  
  # Preserve the original column headers
  colnames(normalized_SEC)[-1] <- colnames(SEC)[-1]

}

# Output normalized dataframe
normalized_SEC

This works for preserving the first column 'ml' and all the column headers, but all the 'values' in the dataframe are NAs. Where have I gone wrong?

I know there are other similar questions answered but I can't get them to work for my data and required output.

score 2 · Accepted Answer · answered Jul 25 '23 at 15:58

In tidyverse you could do:

library(tidyverse)
sec_scaled <- mutate(SEC, across(-ml, scales::rescale))

sec_scaled

# A tibble: 10 × 21
      ml A1_280 A1_420  A2_280 A2_420  A3_280 A3_420 B1_280 B1_420 B2_280 B2_420 B3_280
   <dbl>  <dbl>  <dbl>   <dbl>  <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1  0    1      0.711  0.423   0.136  0.136   0.0179 0.244  0.0954  0.355 0.545  0.551 
 2  0.03 0.730  0.376  1       1      1       1      1      1       0.318 0.0793 0.133 
 3  0.06 0.686  0.529  0.216   0.231  0.949   0.946  0.606  0.608   1     0.790  0.316 
 4  0.09 0.655  0.269  0.0731  0.0758 0.374   0.400  0.179  0.169   0.652 1      0.0204
 5  0.12 0.568  1      0.0394  0.0285 0.141   0.144  0.0713 0.0654  0.364 0      0.0816
 6  0.15 0.0172 0.424  0.0611  0.0218 0.0459  0.0579 0.0384 0.0410  0.714 0.379  0.429 
 7  0.18 0      0      0.0275  0.0390 0.0254  0.0369 0.0134 0.0260  0     0.696  0     
 8  0.21 0.278  0.771  0.0326  0.0317 0.00995 0.0162 0.0173 0.0344  0.828 0.990  1     
 9  0.24 0.328  0.0482 0       0.0331 0.00325 0.0210 0.0173 0.0222  0.128 0.680  0.480 
10  0.27 0.504  0.229  0.00709 0      0       0      0      0       0.922 0.361  0.694 
# ℹ 9 more variables: B3_420 <dbl>, AB1_280 <dbl>, AB1_420 <dbl>, AB2_280 <dbl>,
#   AB2_420 <dbl>, AB3_280 <dbl>, AB3_420 <dbl>, AB4_280 <dbl>, AB4_420 <dbl>

Include `.names = "{.col}_rescaled")` in `across()` to keep the original columns. — Seth, Jul 25 '23 at 16:02
@Seth OP wants to keep the original column names with scaled values, you cannot use `.names` parameter — Onyambu, Jul 25 '23 at 16:04
I misinterpreted as wanting a new rescaled column for each while retaining the originals, thanks for clarifying! — Seth, Jul 25 '23 at 16:05

score 1 · Answer 2 · answered Jul 25 '23 at 15:42

Functions return the last line. Your last line inside the function is colnames(SEC)[-1]. You have normalized_SEC after the function's closing }, but it needs to be inside the function definition as the last line to return properly. If I make that fix, your function works on your sample data just fine.

But if it still doesn't work on your full data and you are getting NA outputs, you probably have NA values in the input and you need to add na.rm = TRUE arguments to your min() and max() calls.

That said, I can see from your tbl_df class that you're using some tidyverse functions. A nice dplyr approach would be this:

library(dplyr)
result = SEC |> mutate(across(-ml, \(x) {
  min = min(x, na.rm = TRUE);
  max = max(x, na.rm = TRUE);
  (x - min) / (max - min)
}))

# # A tibble: 10 × 21
#       ml A1_280 A1_420  A2_280 A2_420  A3_280 A3_420 B1_280 B1_420 B2_280 B2_420 B3_280 B3_420 AB1_280 AB1_420
#    <dbl>  <dbl>  <dbl>   <dbl>  <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
#  1  0    1      0.711  0.423   0.136  0.136   0.0179 0.244  0.0954  0.355 0.545  0.551   0.204   0.871  0.338 
#  2  0.03 0.730  0.376  1       1      1       1      1      1       0.318 0.0793 0.133   0.820   1      1     
#  3  0.06 0.686  0.529  0.216   0.231  0.949   0.946  0.606  0.608   1     0.790  0.316   0.637   0.510  0.514 
#  4  0.09 0.655  0.269  0.0731  0.0758 0.374   0.400  0.179  0.169   0.652 1      0.0204  0.371   0.408  0.575 
#  5  0.12 0.568  1      0.0394  0.0285 0.141   0.144  0.0713 0.0654  0.364 0      0.0816  0       0.499  0.532 
#  6  0.15 0.0172 0.424  0.0611  0.0218 0.0459  0.0579 0.0384 0.0410  0.714 0.379  0.429   0.865   0.249  0.0792
#  7  0.18 0      0      0.0275  0.0390 0.0254  0.0369 0.0134 0.0260  0     0.696  0       0.363   0.496  0     
#  8  0.21 0.278  0.771  0.0326  0.0317 0.00995 0.0162 0.0173 0.0344  0.828 0.990  1       0.429   0      0.4   
#  9  0.24 0.328  0.0482 0       0.0331 0.00325 0.0210 0.0173 0.0222  0.128 0.680  0.480   0.580   0.200  0.447 
# 10  0.27 0.504  0.229  0.00709 0      0       0      0      0       0.922 0.361  0.694   1       0.122  0.0266
# # ℹ 6 more variables: AB2_280 <dbl>, AB2_420 <dbl>, AB3_280 <dbl>, AB3_420 <dbl>, AB4_280 <dbl>, AB4_420 <dbl>

Using this sample data (dput() from question with pointer and spec_* stuff removed):

SEC = structure(list(ml = c(0, 0.03, 0.06, 0.09, 0.12, 0.15, 0.18, 
0.21, 0.24, 0.27), A1_280 = c(0.542, 0.322, 0.286, 0.261, 0.19, 
-0.258, -0.272, -0.046, -0.005, 0.138), A1_420 = c(-0.06, -0.303, 
-0.192, -0.381, 0.15, -0.268, -0.576, -0.016, -0.541, -0.41), 
    A2_280 = c(9.877, 27.637, 3.513, -0.882, -1.92, -1.251, -2.284, 
    -2.129, -3.131, -2.913), A2_420 = c(-0.445, 13.337, 1.075, 
    -1.402, -2.156, -2.263, -1.988, -2.105, -2.082, -2.61), A3_280 = c(8.782, 
    59.775, 56.769, 22.842, 9.086, 3.466, 2.256, 1.341, 0.946, 
    0.754), A3_420 = c(0.54, 30.736, 29.073, 12.277, 4.413, 1.77, 
    1.123, 0.488, 0.634, -0.011), B1_280 = c(14.95, 61.441, 37.189, 
    10.928, 4.316, 2.292, 0.757, 0.995, 0.997, -0.07), B1_420 = c(2.455, 
    30.966, 18.61, 4.779, 1.511, 0.74, 0.267, 0.533, 0.149, -0.551
    ), B2_280 = c(-0.288, -0.304, -0.006, -0.158, -0.284, -0.131, 
    -0.443, -0.081, -0.387, -0.04), B2_420 = c(-0.074, -0.256, 
    0.022, 0.104, -0.287, -0.139, -0.015, 0.1, -0.021, -0.146
    ), B3_280 = c(0.084, 0.043, 0.061, 0.032, 0.038, 0.072, 0.03, 
    0.128, 0.077, 0.098), B3_420 = c(-0.056, 0.095, 0.05, -0.015, 
    -0.106, 0.106, -0.017, -0.001, 0.036, 0.139), AB1_280 = c(1.599, 
    1.908, 0.735, 0.49, 0.708, 0.109, 0.702, -0.487, -0.009, 
    -0.196), AB1_420 = c(0.199, 1.218, 0.469, 0.564, 0.498, -0.2, 
    -0.322, 0.294, 0.367, -0.281), AB2_280 = c(-1.46, -1.2, -1.977, 
    -2.736, -2.087, -2.144, -2.246, -2.84, -2.304, -3.106), AB2_420 = c(-1, 
    -0.468, -0.459, -0.345, -1.145, -0.924, -1.622, -0.869, -1.028, 
    -1.183), AB3_280 = c(0.306, 1.392, -2.248, -3.247, -3.715, 
    -2.699, -3.896, -2.744, -3.653, -3.387), AB3_420 = c(-0.899, 
    0.817, -1.41, -1.162, -1.258, -1.409, -1.7, -1.309, -1.946, 
    -1.658), AB4_280 = c(6.847, 55.721, 51.163, 21.166, 8.441, 
    3.105, 2.631, 1.265, -0.184, 0.529), AB4_420 = c(-0.861, 
    27.465, 25.185, 10.767, 4.136, 1.414, 0.545, -0.098, 0.242, 
    -0.509)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", 
"data.frame"))

score 1 · Answer 3 · answered Jul 25 '23 at 15:49

Forgive me for converting your data from a tibble into a data.frame as I was having problems recreating the structure. The following is base R, so it should apply to tibbles as well but will return a data.frame.

First, the data:

SECDF <- data.frame(ml = c(0, 0.03, 0.06, 0.09, 0.12, 0.15, 0.18, 0.21, 0.24, 0.27),
                    A1_280 = c(0.542, 0.322, 0.286, 0.261, 0.19, -0.258, -0.272, -0.046, -0.005, 0.138),
                    A1_420 = c(-0.06, -0.303, -0.192, -0.381, 0.15, -0.268, -0.576, -0.016, -0.541, -0.41),
                    A2_280 = c(9.877, 27.637, 3.513, -0.882, -1.92, -1.251, -2.284, -2.129, -3.131, -2.913),
                    A2_420 = c(-0.445, 13.337, 1.075,  -1.402, -2.156, -2.263, -1.988, -2.105, -2.082, -2.61),
                    A3_280 = c(8.782,  59.775, 56.769, 22.842, 9.086, 3.466, 2.256, 1.341, 0.946, 0.754),
                    A3_420 = c(0.54, 30.736, 29.073, 12.277, 4.413, 1.77, 1.123, 0.488, 0.634, -0.011),
                    B1_280 = c(14.95, 61.441, 37.189, 10.928, 4.316, 2.292, 0.757, 0.995, 0.997, -0.07),
                    B1_420 = c(2.455, 30.966, 18.61, 4.779, 1.511, 0.74, 0.267, 0.533, 0.149, -0.551),
                    B2_280 = c(-0.288, -0.304, -0.006, -0.158, -0.284, -0.131, -0.443, -0.081, -0.387, -0.04),
                    B2_420 = c(-0.074, -0.256, 0.022, 0.104, -0.287, -0.139, -0.015, 0.1, -0.021, -0.146),
                    B3_280 = c(0.084, 0.043, 0.061, 0.032, 0.038, 0.072, 0.03, 0.128, 0.077, 0.098),
                    B3_420 = c(-0.056, 0.095, 0.05, -0.015, -0.106, 0.106, -0.017, -0.001, 0.036, 0.139),
                    AB1_280 = c(1.599, 1.908, 0.735, 0.49, 0.708, 0.109, 0.702, -0.487, -0.009, -0.196),
                    AB1_420 = c(0.199, 1.218, 0.469, 0.564, 0.498, -0.2,  -0.322, 0.294, 0.367, -0.281),
                    AB2_280 = c(-1.46, -1.2, -1.977, -2.736, -2.087, -2.144, -2.246, -2.84, -2.304, -3.106),
                    AB2_420 = c(-1, -0.468, -0.459, -0.345, -1.145, -0.924, -1.622, -0.869, -1.028, -1.183),
                    AB3_280 = c(0.306, 1.392, -2.248, -3.247, -3.715, -2.699, -3.896, -2.744, -3.653, -3.387),
                    AB3_420 = c(-0.899, 0.817, -1.41, -1.162, -1.258, -1.409, -1.7, -1.309, -1.946, -1.658),
                    AB4_280 = c(6.847, 55.721, 51.163, 21.166, 8.441, 3.105, 2.631, 1.265, -0.184, 0.529),
                    AB4_420 = c(-0.861, 27.465, 25.185, 10.767, 4.136, 1.414, 0.545, -0.098, 0.242, -0.509))

SECDF
     ml A1_280 A1_420 A2_280 A2_420 A3_280 A3_420 B1_280 B1_420 B2_280 B2_420 B3_280 B3_420 AB1_280 AB1_420 AB2_280 AB2_420 AB3_280 AB3_420 AB4_280 AB4_420
1  0.00  0.542 -0.060  9.877 -0.445  8.782  0.540 14.950  2.455 -0.288 -0.074  0.084 -0.056   1.599   0.199  -1.460  -1.000   0.306  -0.899   6.847  -0.861
2  0.03  0.322 -0.303 27.637 13.337 59.775 30.736 61.441 30.966 -0.304 -0.256  0.043  0.095   1.908   1.218  -1.200  -0.468   1.392   0.817  55.721  27.465
3  0.06  0.286 -0.192  3.513  1.075 56.769 29.073 37.189 18.610 -0.006  0.022  0.061  0.050   0.735   0.469  -1.977  -0.459  -2.248  -1.410  51.163  25.185
4  0.09  0.261 -0.381 -0.882 -1.402 22.842 12.277 10.928  4.779 -0.158  0.104  0.032 -0.015   0.490   0.564  -2.736  -0.345  -3.247  -1.162  21.166  10.767
5  0.12  0.190  0.150 -1.920 -2.156  9.086  4.413  4.316  1.511 -0.284 -0.287  0.038 -0.106   0.708   0.498  -2.087  -1.145  -3.715  -1.258   8.441   4.136
6  0.15 -0.258 -0.268 -1.251 -2.263  3.466  1.770  2.292  0.740 -0.131 -0.139  0.072  0.106   0.109  -0.200  -2.144  -0.924  -2.699  -1.409   3.105   1.414
7  0.18 -0.272 -0.576 -2.284 -1.988  2.256  1.123  0.757  0.267 -0.443 -0.015  0.030 -0.017   0.702  -0.322  -2.246  -1.622  -3.896  -1.700   2.631   0.545
8  0.21 -0.046 -0.016 -2.129 -2.105  1.341  0.488  0.995  0.533 -0.081  0.100  0.128 -0.001  -0.487   0.294  -2.840  -0.869  -2.744  -1.309   1.265  -0.098
9  0.24 -0.005 -0.541 -3.131 -2.082  0.946  0.634  0.997  0.149 -0.387 -0.021  0.077  0.036  -0.009   0.367  -2.304  -1.028  -3.653  -1.946  -0.184   0.242
10 0.27  0.138 -0.410 -2.913 -2.610  0.754 -0.011 -0.070 -0.551 -0.040 -0.146  0.098  0.139  -0.196  -0.281  -3.106  -1.183  -3.387  -1.658   0.529  -0.509

Now, create a (vectorized) function for the scaling and apply it to the data frame columns. The base version of scale could also be used if passed the proper center and scale values, but it's simple enough to roll our own:

scale01 <- function(x) (x - min(x)) / (max(x) - min(x))

Now simply:

normSECDF <- cbind(SECDF[, 1L], apply(SECDF[, -1L], 2L, scale01))
normSECDF
               A1_280     A1_420      A2_280     A2_420      A3_280     A3_420     B1_280     B1_420    B2_280     B2_420     B3_280    B3_420   AB1_280    AB1_420
 [1,] 0.00 1.00000000 0.71074380 0.422776911 0.13576221 0.136019383 0.01792045 0.24418397 0.09537710 0.3546911 0.54475703 0.55102041 0.2040816 0.8709812 0.33831169
 [2,] 0.03 0.72972973 0.37603306 1.000000000 1.00000000 1.000000000 1.00000000 1.00000000 1.00000000 0.3180778 0.07928389 0.13265306 0.8204082 1.0000000 1.00000000
 [3,] 0.06 0.68550369 0.52892562 0.215938638 0.23107795 0.949068975 0.94591342 0.60572906 0.60795761 1.0000000 0.79028133 0.31632653 0.6367347 0.5102296 0.51363636
 [4,] 0.09 0.65479115 0.26859504 0.073095424 0.07575092 0.374239677 0.39964875 0.17879729 0.16911508 0.6521739 1.00000000 0.02040816 0.3714286 0.4079332 0.57532468
 [5,] 0.12 0.56756757 1.00000000 0.039359074 0.02846930 0.141170092 0.14388396 0.07130432 0.06542501 0.3638444 0.00000000 0.08163265 0.0000000 0.4989562 0.53246753
 [6,] 0.15 0.01719902 0.42424242 0.061102444 0.02175958 0.045949747 0.05792435 0.03839964 0.04096202 0.7139588 0.37851662 0.42857143 0.8653061 0.2488518 0.07922078
 [7,] 0.18 0.00000000 0.00000000 0.027528601 0.03900420 0.025448569 0.03688165 0.01344475 0.02595425 0.0000000 0.69565217 0.00000000 0.3632653 0.4964509 0.00000000
 [8,] 0.21 0.27764128 0.77134986 0.032566303 0.03166740 0.009945613 0.01622923 0.01731398 0.03439414 0.8283753 0.98976982 1.00000000 0.4285714 0.0000000 0.40000000
 [9,] 0.24 0.32800983 0.04820937 0.000000000 0.03310968 0.003253079 0.02097766 0.01734649 0.02221024 0.1281465 0.68030691 0.47959184 0.5795918 0.1995825 0.44740260
[10,] 0.27 0.50368550 0.22865014 0.007085283 0.00000000 0.000000000 0.00000000 0.00000000 0.00000000 0.9221968 0.36061381 0.69387755 1.0000000 0.1215031 0.02662338
        AB2_280   AB2_420    AB3_280    AB3_420    AB4_280    AB4_420
 [1,] 0.8635887 0.4870791 0.79462935 0.37893594 0.12576693 0.00000000
 [2,] 1.0000000 0.9036805 1.00000000 1.00000000 1.00000000 1.00000000
 [3,] 0.5923400 0.9107283 0.31164902 0.19399204 0.91846883 0.91950858
 [4,] 0.1941238 1.0000000 0.12273071 0.28374955 0.38189786 0.41050625
 [5,] 0.5346275 0.3735317 0.03422844 0.24900471 0.15427958 0.17641037
 [6,] 0.5047219 0.5465936 0.22636157 0.19435396 0.05883195 0.08031491
 [7,] 0.4512067 0.0000000 0.00000000 0.08903366 0.05035328 0.04963638
 [8,] 0.1395593 0.5896633 0.21785174 0.23054651 0.02591897 0.02693638
 [9,] 0.4207765 0.4651527 0.04595310 0.00000000 0.00000000 0.03893949
[10,] 0.0000000 0.3437745 0.09625567 0.10423453 0.01275378 0.01242675

> apply(normSECDF, 2L, min)
         A1_280  A1_420  A2_280  A2_420  A3_280  A3_420  B1_280  B1_420  B2_280  B2_420  B3_280  B3_420 AB1_280 AB1_420 AB2_280 AB2_420 AB3_280 AB3_420 AB4_280 AB4_420 
      0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0 
> apply(normSECDF, 2L, max)
         A1_280  A1_420  A2_280  A2_420  A3_280  A3_420  B1_280  B1_420  B2_280  B2_420  B3_280  B3_420 AB1_280 AB1_420 AB2_280 AB2_420 AB3_280 AB3_420 AB4_280 AB4_420 
   0.27    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00    1.00

I'd recommend using `lapply` not `apply` on data frames. It's simpler (you don't have to specify the margin), and it's often more efficient and less risky because `apply` converts data frames to matrices which is often an unnecessary step and can sometimes have unintended consequences if columns have different classes. And it's a simple switch to `normSECDF <- cbind(SECDF[, 1L], lapply(SECDF[, -1L], scale01))`. Though in either case (your answer or my comment) because the first argument of `cbind` is a vector the result is a `matrix` not a `data.frame`. Using `cbind.data.frame` would be safer. — Gregor Thomas, Jul 26 '23 at 14:28
Good points. Personally, I try to use `vapply` instead of s/lapply when I can due to type-safety and speed. — Avraham, Jul 28 '23 at 06:28
Yes, `vapply` would also be an improvement here. `apply` is a poor choice. — Gregor Thomas, Jul 28 '23 at 14:13

0 to 1 normalisation in R whilst preserving column 1 and headers

3 Answers3