1

I used the codes below to draw a smooth line according to my point data. It worked very well when the points located very close to each other. However, the line cannot fit the points well when the distance among points become larger like the small example I provided below.

Are there any ways I can let the line fit the points better? Over-fitting will not be a problem here.

enter image description here

ggplot(A, aes(x=per, y=res)) + 
     geom_point(size = 2,aes (colour = stimulus,group =stimulus)) +
     theme_bw() +
     geom_smooth(se = FALSE,colour = 'black',size=1)+
     scale_colour_hue(name="Stimulus",
                      labels=c('CS+',paste0('S',1:3),paste0('S',5:7)),c=90, l=60)+
     scale_y_continuous(breaks = c(25,50,75,100),limits = c(0,110))+
     scale_x_continuous(breaks = c(4,6,8,10,12,14,16), limits = c(4,16.5))



A <- structure(list(per = c(9.787, 7.391, 11.257, 10.58, 10.072, 9.642, 
13.454, 9.704, 9.62, 12.353, 10.149, 9.681, 11.103, 9.974, 9.931, 
9.328, 8.135, 10.99, 8.449, 11.371, 10.841, 8.187, 5.987, 9.789, 
9.861, 7.878, 12.533, 12.794, 8.264, 9.282, 7.415, 10.988, 13.301, 
12.696, 11.482, 6.785, 8.63, 8.672, 9.289, 8.517, 11.315, 10.893, 
7.304, 8.89, 8.072, 9.161, 12.279, 13.266, 12.646, 9.109, 9.859, 
10.569, 9.981, 12.473, 10.366, 16.287, 9.894, 10.867, 8.898, 
12.732, 10.344, 11.928, 11.725, 8.171, 7.667, 9.351, 10.864, 
10.357, 11.506, 13.395, 10.978, 7.244, 8.221, 11.825, 12.547, 
13.402, 10.305, 9.824, 6.412, 9.524, 10.514, 12.126, 13.783, 
13.062, 10.625, 7.173, 5.383, 9.987, 10.777, 12.744, 14.79, 7.298, 
7.681, 8.602, 8.275, 12.511, 12.432, 12.139, 8.788, 9.857, 7.744, 
9.407, 13.293, 11.807, 12.287), res = c(49.525, 1.9, 10.585, 
38.938, 87.768, 37.864, 0, 49.024, 40.369, 0, 97.016, 38.772, 
10.217, 68.665, 60.624, 21.941, 1.639, 16.456, 8.373, 9.802, 
24.535, 0, 1.049, 53.35, 56.914, 2.688, 0, 2.852, 2.401, 11.839, 
0, 21.825, 2.68, 0, 4.3, 0.833, 4.776, 0, 23.914, 3.923, 14.089, 
23.14, 0, 9.079, 3.609, 11.635, 8.983, 0, 1.285, 13.458, 59.474, 
44.223, 68.614, 4.666, 60.251, 1.67, 58.239, 25.435, 9.943, 4.988, 
67.225, 4.214, 2.323, 5.527, 0, 15.271, 24.359, 65.788, 6.947, 
0, 18.026, 2.845, 7.309, 3.212, 0, 2.144, 72.576, 57.335, 0.673, 
29.013, 44.91, 2.906, 0.555, 0, 33.105, 1.551, 0, 74.016, 20.846, 
0, 0.478, 2.25, 5.906, 3.127, 0, 0, 8.642, 0, 7.704, 60.859, 
0, 24.876, 0.091, 3.735, 9.104), stimulus = c("S1", "S2", "S3", 
"CS+", "S5", "S6", "S7", "S1", "S2", "S3", "CS+", "S5", "S6", 
"S7", "S1", "S2", "S3", "CS+", "S5", "S6", "S7", "S1", "S2", 
"S3", "CS+", "S5", "S6", "S7", "S1", "S2", "S3", "CS+", "S5", 
"S6", "S7", "S1", "S2", "S3", "CS+", "S5", "S6", "S7", "S1", 
"S2", "S3", "CS+", "S5", "S6", "S7", "S1", "S2", "S3", "CS+", 
"S5", "S6", "S7", "S1", "S2", "S3", "CS+", "S5", "S6", "S7", 
"S1", "S2", "S3", "CS+", "S5", "S6", "S7", "S1", "S2", "S3", 
"CS+", "S5", "S6", "S7", "S1", "S2", "S3", "CS+", "S5", "S6", 
"S7", "S1", "S2", "S3", "CS+", "S5", "S6", "S7", "S1", "S2", 
"S3", "CS+", "S5", "S6", "S7", "S1", "S2", "S3", "CS+", "S5", 
"S6", "S7")), class = "data.frame", row.names = c(NA, -105L))
Kenny
  • 361
  • 1
  • 8

1 Answers1

0

If overfitting is not a problem, then you can try some function that describes your distribution well (can use MLE to find optimal paramter values for the distribution, if required), e.g.,

ggplot(A, aes(x=per, y=res)) + 
     geom_point(size = 2,aes (colour = stimulus,group =stimulus)) +
     theme_bw() +
     geom_smooth(method='loess', formula=y ~ exp(-x), se = FALSE,
                 colour = 'black',size=1) +
     scale_colour_hue(name="Stimulus",
                      labels=c('CS+',paste0('S',1:3),paste0('S',5:7)),c=90, l=60)+
     scale_y_continuous(breaks = c(25,50,75,100),limits = c(0,110))+
     scale_x_continuous(breaks = c(4,6,8,10,12,14,16), limits = c(4,16.5))

gives the following fit:

enter image description here

Sandipan Dey
  • 21,482
  • 2
  • 51
  • 63