PageRenderTime 30ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/R/residuals.lrm.s

http://github.com/harrelfe/rms
Assembly | 377 lines | 364 code | 13 blank | 0 comment | 56 complexity | ddff5237c60fbe79c72e6a1567f6c505 MD5 | raw file
  1. residuals.lrm <-
  2. function(object,
  3. type=c("li.shepherd", "ordinary","score","score.binary","pearson",
  4. "deviance","pseudo.dep","partial",
  5. "dfbeta","dfbetas","dffit","dffits","hat","gof","lp1"),
  6. pl=FALSE, xlim, ylim, kint=1, label.curves=TRUE,
  7. which, ...)
  8. {
  9. gotsupsmu <- FALSE
  10. type <- match.arg(type)
  11. dopl <- (is.logical(pl) && pl) || is.character(pl)
  12. ylabpr <- NULL # y-axis label for partial residuals
  13. k <- object$non.slopes
  14. L <- object$linear.predictors
  15. isorm <- inherits(object, 'orm')
  16. trans <- object$trans
  17. cumprob <- if(length(trans)) trans$cumprob else plogis
  18. if(length(L)==0) stop('you did not use linear.predictors=TRUE for the fit')
  19. if(kint < 1 | kint > k) stop(paste('kint must be from 1-', k, sep=''))
  20. cof <- object$coefficients
  21. ordone <- type %in% c('li.shepherd','partial','gof','score','score.binary')
  22. ## residuals explicitly handled for ordinal model
  23. if(ordone && !missing(kint))
  24. stop('may not specify kint for li.shepherd, partial, score, score.binary, or gof')
  25. if(isorm) L <- L - cof[attr(L, 'intercepts')] + cof[1]
  26. if(k > 1 && kint !=1 && !ordone) L <- L - cof[1] + cof[kint]
  27. P <- cumprob(L)
  28. if(length(Y <- object$y) == 0) stop("you did not specify y=TRUE in the fit")
  29. rnam <- names(Y)
  30. cnam <- names(cof)
  31. if(!is.factor(Y)) Y <- factor(Y)
  32. lev <- levels(Y)
  33. lev2 <- names(cof)[1:k]
  34. Y <- unclass(Y) - 1
  35. if(!ordone && k > 1) Y <- Y >= kint
  36. if(k > 1 && missing(kint) && !ordone)
  37. warning(paste('using first intercept and ',
  38. lev2[kint],
  39. ' to compute residuals or test GOF',
  40. sep=''))
  41. if(type=="gof") {
  42. if(length(X <- object$x)==0)
  43. stop("you did not use x=TRUE in the fit")
  44. stats <- matrix(NA, nrow=k, ncol=5,
  45. dimnames=list(if(k > 1) lev2,
  46. c("Sum of squared errors", "Expected value|H0",
  47. "SD", "Z", "P")))
  48. X <- cbind(1,X)
  49. for(j in 1:k) {
  50. y <- Y >= j
  51. p <- cumprob(L - cof[1] + cof[j])
  52. sse <- sum((y - p)^2)
  53. wt <- p * (1 - p)
  54. d <- 1 - 2 * p
  55. z <- lm.wfit(X, d, wt, method='qr')
  56. ## res <- summary(lm.wfit(X, d, wt, method="qr"))$residuals 11Apr02
  57. res <- z$residuals * sqrt(z$weights)
  58. sd <- sqrt(sum(res^2))
  59. ev <- sum(wt)
  60. z <- (sse-ev)/sd
  61. P <- 2 * pnorm(- abs(z))
  62. stats[j,] <- c(sse, ev, sd, z, P)
  63. }
  64. return(drop(stats))
  65. }
  66. naa <- object$na.action
  67. if(type=="ordinary") return(naresid(naa, Y - cumprob(L)))
  68. if(type %in% c('score','score.binary','partial')) {
  69. nc <- length(cof)
  70. if(missing(which)) which <- if(type == 'score') 1:nc else 1:(nc - k) else
  71. if(type=='score') which <- k + which
  72. }
  73. if(type=='score' || type=='score.binary')
  74. plotit <- function(w, ylim, xlab, ylab, lev=names(w)) {
  75. statsum <- function(x) {
  76. n <- length(x)
  77. xbar <- sum(x) / n
  78. if(n < 2) {low <- hi <- NA} else {
  79. se <- 1.959964 * sqrt(sum((x - xbar)^2) / (n - 1) / n)
  80. low <- xbar - se; hi <- xbar + se
  81. }
  82. c(mean=xbar, lower=low, upper=hi)
  83. }
  84. k <- length(w)
  85. w <- lapply(w, statsum)
  86. plot(c(1,k), c(0,0), xlab=xlab, ylab=ylab,
  87. ylim=if(length(ylim)==0) range(unlist(w)) else ylim,
  88. type='n', axes=FALSE)
  89. mgp.axis(2)
  90. mgp.axis(1, at=1:k, labels=lev)
  91. abline(h=0, lty=2, lwd=1)
  92. ii <- 0
  93. for(ww in w) {
  94. ii <- ii+1
  95. points(ii, ww[1])
  96. errbar(ii, ww[1], ww[3], ww[2], add=TRUE)
  97. }
  98. }
  99. if(type=='score.binary') {
  100. if(k==1) stop('score.binary only applies to ordinal models')
  101. if(!dopl) stop('score.binary only applies if you are plotting')
  102. if(!length(X <- unclass(object$x)))
  103. stop('you did not specify x=TRUE for the fit')
  104. xname <- dimnames(X)[[2]]
  105. yname <- as.character(formula(object))[2]
  106. for(i in which) {
  107. xi <- X[,i]
  108. r <- vector('list',k)
  109. names(r) <- lev[-1]
  110. for(j in 1:k)
  111. r[[j]] <- xi * ((Y >= j) - cumprob(L - cof[1] + cof[j]))
  112. if(pl!='boxplot') plotit(r, ylim=if(missing(ylim)) NULL else ylim,
  113. xlab=yname, ylab=xname[i])
  114. else
  115. boxplot(r, varwidth=TRUE, notch=TRUE, err=-1,
  116. ylim=if(missing(ylim)) quantile(unlist(r), c(.1, .9))
  117. else ylim, ...)
  118. title(xlab=yname, ylab=xname[i])
  119. }
  120. invisible()
  121. }
  122. if(type=="score") {
  123. if(!length(X <- unclass(object$x)))
  124. stop("you did not specify x=TRUE for the fit")
  125. if(isorm && object$family != 'logistic')
  126. stop('score residuals not yet implemented for orm with non-logistic family')
  127. if(k == 1) return(naresid(naa, cbind(1, X) * (Y - P))) # only one intercept
  128. z <- function(i, k, L, coef) cumprob(coef[pmin(pmax(i, 1), k)] + L)
  129. ## Mainly need the pmax - 0 subscript will drop element from vector
  130. ## z$k <- k; z$L <- L-cof[1]; z$coef <- cof
  131. formals(z) <- list(i=NA, k=k, L=L-cof[1], coef=cof)
  132. ## set defaults in fctn def'n
  133. u <- matrix(NA, nrow=length(L), ncol=length(which),
  134. dimnames=list(names(L), names(cof)[which]))
  135. pq <- function(x) x * (1 - x)
  136. ## Compute probabilities of being in observed cells
  137. pc <- ifelse(Y==0, 1 - z(1), ifelse(Y == k, z(k), z(Y) - z(Y + 1)) )
  138. xname <- dimnames(X)[[2]]
  139. yname <- as.character(formula(object))[2]
  140. ii <- 0
  141. for(i in which) {
  142. ii <- ii + 1
  143. di <- if(i <= k) ifelse(Y==0, if(i==1) 1 else 0, Y==i) else X[,i - k]
  144. di1 <- if(i <= k) ifelse(Y==0 | Y==k, 0, (Y + 1) == i) else X[,i - k]
  145. ui <- ifelse(Y == 0, -z(1) * di,
  146. ifelse(Y == k, (1 - z(k)) * di,
  147. (pq(z(Y)) * di - pq(z(Y + 1)) * di1) / pc ) )
  148. u[,ii] <- ui
  149. if(dopl && i > k) {
  150. if(pl=='boxplot') {
  151. boxplot(split(ui, Y), varwidth=TRUE, notch=TRUE,
  152. names=lev, err=-1,
  153. ylim=if(missing(ylim)) quantile(ui, c(.1, .9))
  154. else ylim, ...)
  155. title(xlab=yname, ylab=paste('Score Residual for', xname[i - k]))
  156. }
  157. else plotit(split(ui,Y), ylim=if(missing(ylim)) NULL else ylim,
  158. lev=lev,
  159. xlab=yname,
  160. ylab=paste('Score Residual for', xname[i-k]))
  161. }
  162. }
  163. return(if(dopl) invisible(naresid(naa, u)) else naresid(naa, u))
  164. }
  165. if(type == "li.shepherd") {
  166. if(length(X <- object$x)==0)
  167. stop("you did not use x=TRUE in the fit")
  168. N <- length(Y)
  169. px <- 1 - cumprob(outer(cof[1:k],
  170. as.vector(X %*% cof[- (1:k)]), "+"))
  171. low.x = rbind(0, px)[cbind(Y + 1L, 1:N)]
  172. hi.x = 1 - rbind(px, 1)[cbind(Y + 1L, 1:N)]
  173. return(low.x - hi.x)
  174. }
  175. if(type=="pearson") return(naresid(naa, (Y - P) / sqrt(P * (1 - P))))
  176. if(type=="deviance") {
  177. r <- ifelse(Y==0,-sqrt(2 * abs(log(1 - P))), sqrt(2 * abs(logb(P))))
  178. return(naresid(naa, r))
  179. }
  180. if(type=="pseudo.dep") {
  181. r <- L + (Y - P) / P / (1 - P)
  182. return(naresid(naa, r))
  183. }
  184. if(type=="partial") {
  185. if(!length(X <- unclass(object$x)))
  186. stop("you did not specify x=TRUE in the fit")
  187. cof.int <- cof[1 : k]
  188. cof <- cof[- (1 : k)]
  189. if(!missing(which)) {
  190. X <- X[, which, drop=FALSE]
  191. cof <- cof[which]
  192. }
  193. atx <- attributes(X)
  194. dx <- atx$dim
  195. if(k==1) r <- X * matrix(cof, nrow=dx[1], ncol=dx[2], byrow=TRUE) +
  196. (Y - P) / P / (1 - P)
  197. else {
  198. r <- X * matrix(cof, nrow=dx[1], ncol=dx[2], byrow=TRUE)
  199. R <- array(NA, dim=c(dx, k), dimnames=c(atx$dimnames, list(lev2)))
  200. for(j in 1:k) {
  201. y <- Y >= j
  202. p <- cumprob(L - cof.int[1] + cof.int[j])
  203. R[,,j] <- r + (y - p) / p / (1 - p)
  204. }
  205. }
  206. if(dopl) {
  207. xname <- atx$dimnames[[2]]; X <- unclass(X)
  208. for(i in 1:dx[2]) {
  209. if(pl == "loess") {
  210. if(k > 1)
  211. stop('pl="loess" not implemented for ordinal response')
  212. xi <- X[,i]
  213. ri <- r[,i]
  214. ddf <- data.frame(xi,ri)
  215. plot(xi, ri, xlim=if(missing(xlim)) range(xi) else xlim,
  216. ylim=if(missing(ylim)) range(ri) else ylim,
  217. xlab=xname[i], ylab=ylabpr)
  218. lines(lowess(xi,ri))
  219. }
  220. else if(k==1) {
  221. xi <- X[,i]; ri <- r[,i]
  222. plot(xi, ri, xlab=xname[i], ylab="Partial Residual",
  223. xlim=if(missing(xlim))range(xi) else xlim,
  224. ylim=if(missing(ylim))range(ri) else ylim)
  225. if(pl=="lowess") lines(lowess(xi, ri, iter=0, ...))
  226. else lines(supsmu(xi, ri, ...))
  227. }
  228. else {
  229. xi <- X[,i]
  230. ri <- R[,i,,drop=TRUE]
  231. smoothed <- vector('list',k)
  232. ymin <- 1e30; ymax <- -1e30
  233. for(j in 1:k) {
  234. w <- if(pl!='supsmu')
  235. lowess(xi, ri[,j], iter=0, ...)
  236. else
  237. supsmu(xi, ri[,j], ...)
  238. ymin <- min(ymin,w$y)
  239. ymax <- max(ymax,w$y)
  240. smoothed[[j]] <- w
  241. }
  242. plot(0, 0, xlab=xname[i], ylab=ylabpr,
  243. xlim=if(missing(xlim))range(xi) else xlim,
  244. ylim=if(missing(ylim))range(pretty(c(ymin,ymax)))
  245. else ylim, type='n')
  246. us <- par('usr')[1:2]
  247. for(j in 1:k) {
  248. w <- smoothed[[j]]
  249. lines(w, lty=j)
  250. if(is.character(label.curves)) {
  251. xcoord <- us[1]+(us[2]-us[1])*j/(k+1)
  252. text(xcoord, approx(w, xout=xcoord, rule=2, ties=mean)$y,
  253. lev2[j])
  254. }
  255. }
  256. if(is.list(label.curves) ||
  257. (is.logical(label.curves) && label.curves))
  258. labcurve(smoothed, lev2, opts=label.curves)
  259. }
  260. }
  261. return(invisible(if(k==1)naresid(naa,r) else R))
  262. }
  263. return(if(k==1) naresid(naa,r) else R)
  264. }
  265. ##if(type=='convexity') {
  266. ## if(missing(p.convexity)) {
  267. ## pq <- quantile(P, c(.01, .99))
  268. ## if(pq[1]==pq[2]) pq <- range(P)
  269. ## p.convexity <- seq(pq[1], pq[2], length=100)
  270. ## }
  271. ## lcp <- length(p.convexity)
  272. ## cp <- single(lcp)
  273. ## for(i in 1:lcp) {
  274. ## p <- p.convexity[i]
  275. ## cp[i] <- mean(((p/P)^Y)*(((1-p)/(1-P))^(1-Y)))
  276. ## }
  277. ## if(pl) plot(p.convexity, cp, xlab='p', ylab='C(p)', type='l')
  278. ## return(invisible(cp))
  279. ##}
  280. if(type %in% c("dfbeta", "dfbetas", "dffit", "dffits", "hat", "lp1")) {
  281. if(length(X <- unclass(object$x)) == 0)
  282. stop("you did not specify x=TRUE for the fit")
  283. v <- P * (1 - P)
  284. g <- lm(L + (Y - P) / v ~ X, weights=v)
  285. infl <- lm.influence(g)
  286. dfb <- coef(infl) ## R already computed differences
  287. dimnames(dfb) <- list(rnam, c(cnam[kint], cnam[-(1:k)]))
  288. if(type=="dfbeta") return(naresid(naa, dfb))
  289. if(type=="dfbetas") {
  290. ## i <- c(kint, (k+1):length(cof))
  291. vv <- vcov(object, intercepts=1)
  292. return(naresid(naa, sweep(dfb, 2, diag(vv)^.5,"/")))
  293. ## was diag(object$var[i, i])
  294. }
  295. if(type=="hat") return(naresid(naa, infl$hat))
  296. if(type=="dffit") return(naresid(naa,
  297. (infl$hat * g$residuals)/(1 - infl$hat)))
  298. if(type=="dffits") return(naresid(naa,
  299. (infl$hat^.5)*g$residuals/(infl$sigma * (1 - infl$hat)) ))
  300. if(type=="lp1") return(naresid(naa,
  301. L - (infl$hat * g$residuals) / (1 - infl$hat)))
  302. }
  303. }
  304. residuals.orm <-
  305. function(object,
  306. type=c("li.shepherd", "ordinary","score","score.binary","pearson",
  307. "deviance","pseudo.dep","partial",
  308. "dfbeta","dfbetas","dffit","dffits","hat","gof","lp1"),
  309. pl=FALSE, xlim, ylim, kint, label.curves=TRUE,
  310. which, ...)
  311. {
  312. type <- match.arg(type)
  313. args <- list(object=object, type=type, pl=pl,
  314. label.curves=label.curves, ...)
  315. if(!missing(kint)) args$kint <- kint
  316. if(!missing(xlim)) args$xlim <- xlim
  317. if(!missing(ylim)) args$ylim <- ylim
  318. if(!missing(which)) args$which <- which
  319. do.call('residuals.lrm', args)
  320. }
  321. plot.lrm.partial <- function(..., labels, center=FALSE, ylim)
  322. {
  323. dotlist <- list(...)
  324. nfit <- length(dotlist)
  325. if(missing(labels)) labels <- (as.character(sys.call())[-1])[1:nfit]
  326. vname <- dimnames(dotlist[[1]]$x)[[2]]
  327. nv <- length(vname)
  328. if(nv==0) stop('you did not specify x=TRUE on the fit')
  329. r <- vector('list', nv)
  330. for(i in 1:nfit) r[[i]] <- resid(dotlist[[i]], 'partial')
  331. for(i in 1:nv) {
  332. curves <- vector('list',nfit)
  333. ymin <- 1e10; ymax <- -1e10
  334. for(j in 1:nfit) {
  335. xx <- dotlist[[j]]$x[,vname[i]]
  336. yy <- r[[j]][,vname[i]]
  337. if(center)yy <- yy - mean(yy)
  338. curves[[j]] <- lowess(xx, yy, iter=0)
  339. ymin <- min(ymin, curves[[j]]$y)
  340. ymax <- max(ymax, curves[[j]]$y)
  341. }
  342. for(j in 1:nfit) {
  343. if(j==1) plot(curves[[1]], xlab=vname[i], ylab=NULL,
  344. ylim=if(missing(ylim)) c(ymin, ymax) else ylim, type='l')
  345. else lines(curves[[j]], lty=j)
  346. }
  347. if(nfit>1) labcurve(curves, labels)
  348. }
  349. invisible()
  350. }