Posted By

hbeale on 04/19/12


Tagged

R Convenience


Versions (?)

frequently used convenience functions


 / Published in: R
 

as you'd guess, these are R functions that I use frequently, posted for the benefit of people I share code with can

  1. ## these are functions that I sometimes use in my code
  2. ## they are intended to be able to be run on any computer
  3. ## with R installed.
  4.  
  5.  
  6. #################################
  7. ###
  8. ### Settings
  9. ###
  10. #################################
  11. options(stringsAsFactors=FALSE)
  12.  
  13.  
  14. #################################
  15. ###
  16. ### Aliases for frequently used functions
  17. ###
  18. #################################
  19. s <- base::summary;
  20. h <- utils::head;
  21. n <- base::names;
  22. as.dataframe<-base::data.frame;
  23.  
  24.  
  25. #################################
  26. ###
  27. ### FUNCTION: sourceDir
  28. ###
  29. #################################
  30. # sources any *.R files in a given directory
  31. #
  32. # Args: path
  33. # trace is a boolean to print file names as they are sourced
  34. #
  35. # Returns: nothing
  36. #
  37. # this function is copied from the "source" help text
  38. #
  39. sourceDir <- function(path, trace = TRUE, ...) {
  40. for (nm in list.files(path, pattern = "\\.[Rr]$")) {
  41. if(trace) cat(nm,":")
  42. source(file.path(path, nm), ...)
  43. if(trace) cat("\n")
  44. }
  45. }
  46.  
  47.  
  48. #################################
  49. ###
  50. ### FUNCTION: write.txt
  51. ###
  52. #################################
  53. # write.table with my most frequently used settings
  54. #
  55. # Args:
  56. # ... (vector of items to paste)
  57. #
  58. # Returns:
  59. #
  60. write.txt<-function (x, file="", quote=FALSE, sep="\t", row.names=FALSE, ...){
  61.  
  62. write.table(x, file, quote=quote, sep=sep, row.names=row.names, ...)
  63.  
  64. }
  65.  
  66.  
  67. #################################
  68. ###
  69. ### FUNCTION: read.txt
  70. ###
  71. #################################
  72. # read.table with my most frequently used settings
  73. #
  74. # Args:
  75. # ... (vector of items to paste)
  76. #
  77. # Returns:
  78. # data frame
  79. #
  80. read.txt<-function (file="", sep="\t", header=TRUE,row.names=NULL, ...){
  81.  
  82. read.table(file, sep= sep, header= header, row.names= row.names, ...)
  83.  
  84. }
  85.  
  86.  
  87. #################################
  88. ###
  89. ### FUNCTION: pasteNS
  90. ###
  91. #################################
  92. # Paste arguments with no spaces between them
  93. #
  94. # Args:
  95. # ... (vector of items to paste)
  96. #
  97. # Returns:
  98. # character vector result of paste
  99. #
  100. pasteNS<-function (...){
  101.  
  102. paste(..., sep="")
  103.  
  104. }
  105.  
  106.  
  107. #################################
  108. ###
  109. ### FUNCTION: convertToComplement
  110. ###
  111. #################################
  112. # Converts string into its DNA complement
  113. #
  114. # Args:
  115. # string
  116. # fGenotype (boolean; if TRUE, orders the string alphabetically)
  117. #
  118. # Returns:
  119. # string
  120. #
  121. convertToComplement<-function(x,fGenotype=TRUE){
  122. allComps<-NULL
  123. for (nucString in x){
  124.  
  125. bases=c("A","C","G","T")
  126. xx<-unlist(strsplit(toupper(nucString),NULL))
  127. thisComp=unlist(lapply(xx,function(bbb){
  128. if(bbb=="A") compString<-"T"
  129. if(bbb=="C") compString<-"G"
  130. if(bbb=="G") compString<-"C"
  131. if(bbb=="T") compString<-"A"
  132. if(!bbb %in% bases) compString<-"N"
  133.  
  134. return(compString)
  135. }))
  136. if(fGenotype) thisComp = thisComp [order(thisComp)]
  137. thisComp =paste(thisComp,collapse="")
  138. allComps=c(allComps, thisComp)
  139. }
  140. return(allComps)
  141. }
  142.  
  143. #################################
  144. ###
  145. ### FUNCTION: convertColsToNumeric
  146. ###
  147. #################################
  148. # Converts specified columns to numeric
  149. #
  150. # Args:
  151. # data frame, column indices
  152. #
  153. # Returns:
  154. # original DF with specified columns changed
  155. #
  156. convertColsToNumeric<-function(dfConv,colIndices){
  157. for (i in colIndices){
  158. dfConv[,i]=as.numeric(dfConv[,i])
  159. }
  160. return(dfConv)
  161. }
  162.  
  163. #################################
  164. ###
  165. ### FUNCTION: printNumberedVector
  166. ###
  167. #################################
  168. #
  169. # prints a vector to screen, one item per line with each line numbered
  170. #
  171. # Input: a vector
  172. #
  173. # Returns:
  174. # nothing
  175. #
  176. ##
  177. printNumberedVector<-function(someVector){
  178. cat(paste(1:length(someVector ), someVector,"\n"))
  179. }
  180.  
  181.  
  182.  
  183. #################################
  184. ###
  185. ### FUNCTION: cnvSummary
  186. ###
  187. #################################
  188. #
  189. # creates a summary of cnvs identified by the cnv-seq package
  190. # writes the summary to a file
  191. # returns the files
  192. #
  193. # Input: cnvFileName, outputFileName
  194. #
  195. # Returns:
  196. # nothing
  197. #
  198. ##
  199. summarizeCNV<-function(cnvFileName,outputFileName){
  200.  
  201. ## for testing
  202. ## cnvFileName ="*CNV.hits.log2-0.6.pvalue-0.001.minw-4.cnv"
  203. if (! file.exists(outputFileName)) {
  204. library(cnv)
  205. cnvFileData <- read.delim(cnvFileName)
  206. if (sum(cnvFileData $cnv!=0)){
  207. cnvSummary<-NULL
  208. for (i in seq(max(min(cnvFileData$cnv), 1), max(cnvFileData$cnv))) {
  209. sub <- subset(cnvFileData, cnv == i)
  210. start <- ceiling(mean(c(min(sub$start), min(sub$position))))
  211. end <- floor(mean(c(max(sub$end), max(sub$position))))
  212. cnvSummary <- rbind(cnvSummary ,c(paste("CNVR_", i, sep = ""), unique(sub$chromosome), start, end, end - start + 1, unique(sub$cnv.log2), unique(sub$cnv.p.value)))#, sep = "\t", file = file,
  213. }
  214. colnames(cnvSummary)= c("cnv", "chromosome", "start", "end", "size", "log2", "p.value")
  215. cnvSummary=data.frame(cnvSummary)
  216. for (i in 3:7){
  217. cnvSummary[,i]=as.numeric(cnvSummary[,i])
  218. }
  219. } else {
  220. return(NULL)
  221. }
  222. write.table(cnvSummary, outputFileName, row.names=FALSE, quote=FALSE, sep="\t")
  223. } else {
  224. cnvSummary =read.table(outputFileName, header=TRUE, sep="\t")
  225. }
  226. return(cnvSummary)
  227. }
  228.  
  229.  
  230.  
  231. #################################
  232. ###
  233. ### FUNCTION: fixRowNamesColumn
  234. ###
  235. #################################
  236. #
  237. # moves data in "Row.names" column to rownames
  238. #
  239. # Args: df
  240. #
  241. # Returns:
  242. # dataframe
  243. #
  244. ##
  245.  
  246.  
  247.  
  248. fixRowNamesColumn<-function(mergeResultsDF){
  249.  
  250. #mergeResultsDF= popByAgeWide
  251. rownames(mergeResultsDF)= mergeResultsDF$Row.names
  252. mergeResultsDF= mergeResultsDF[,2:ncol(mergeResultsDF)]
  253. }
  254.  
  255.  
  256. #################################
  257. ###
  258. ### FUNCTION: getLabelsAndMidPointsOfGroups
  259. ###
  260. #################################
  261. #
  262. # useful for labeling axes and creating
  263. #
  264. # Args: df, valname, byname, passFUN, fnDesc
  265. # Example Vals
  266. # df= creatDF
  267. # valname="creat"
  268. # byname="breed"
  269. # passFUN=length
  270. # fnDesc="count"
  271. # fReplaceValName=FALSE ## the default is to append
  272. #
  273. # Returns:
  274. # dataframe
  275. #
  276. ##
  277.  
  278.  
  279. getLabelsAndMidPointsOfGroups<- function(charVec,alternatingColors=c("grey", "black")){
  280.  
  281. plotAnnotation<-data.frame(values= charVec)
  282. previousVal= c("", charVec[1:(length(charVec )-1)])
  283. switchPoint= charVec != previousVal
  284. groupBreaks=data.frame(start=which(switchPoint))
  285. groupBreaks$length= c(groupBreaks $start[2:nrow(groupBreaks)],length(charVec)+1)-groupBreaks$start
  286. groupBreaks$end= groupBreaks$start+ (groupBreaks$length-1)
  287. groupBreaks$midpoint=round(rowMeans(groupBreaks[,c("start","end")]),0)
  288. groupBreaks$label= plotAnnotation$values[groupBreaks$midpoint]
  289.  
  290.  
  291. groupBreaks$color= alternatingColors[1]
  292. groupBreaks$color[ c(T,F) ]=alternatingColors[2]
  293.  
  294.  
  295. plotAnnotation$color=as.character(unlist(apply(groupBreaks,1,function(x){
  296. rep(x["color"],x["length"])
  297. })))
  298.  
  299. plotAnnotation$labelnames= ""
  300.  
  301. plotAnnotation$labelnames[groupBreaks$midpoint]=plotAnnotation$value[groupBreaks$midpoint]
  302. list(labelsDF=plotAnnotation, breaksDF=groupBreaks)
  303. }
  304.  
  305.  
  306.  
  307. #################################
  308. ###
  309. ### FUNCTION: prettyAggregate
  310. ###
  311. #################################
  312. # wrapper for aggregate
  313. # renames columns based on input
  314. #
  315. # Args: df, valname, byname, passFUN, fnDesc
  316. # Example Vals
  317. # df= creatDF
  318. # valname="creat"
  319. # byname="breed"
  320. # passFUN=length
  321. # fnDesc="count"
  322. # fReplaceValName=FALSE ## the default is to append
  323. #
  324. # Returns:
  325. # dataframe
  326. #
  327. ##
  328.  
  329. prettyAggregate<-function(df, valname, byname, passFUN, fnDesc, fReplaceValName=FALSE){
  330. if (fReplaceValName) {
  331. agColName=fnDesc
  332. } else {
  333. agColName=pasteNS(valname,"_", fnDesc)
  334. }
  335. result=aggregate(df[, valname],by=list(df[, byname]), FUN= passFUN)
  336. colnames(result)=c(byname, agColName)
  337. return(result)
  338. }
  339.  
  340.  
  341.  
  342. #################################
  343. ###
  344. ### FUNCTION: cbindList/rbindList
  345. ###
  346. #################################
  347. # tries to return a dataframe from a list of bind-able vectors or dataframes
  348. # based on first example in Reduce help
  349. #
  350. # Args:
  351. # List
  352. #
  353. # Returns:
  354. # dataframe
  355. #
  356. ##
  357.  
  358. cbindList <- function(x) Reduce("cbind", x)
  359. rbindList <-function(x) Reduce("rbind",x)
  360.  
  361.  
  362. #################################
  363. ###
  364. ### FUNCTION: bindRepeat
  365. ###
  366. #################################
  367. # returns a dataframe composed of n
  368. # copies of the vector
  369.  
  370. # Args:
  371. # vector2Rep - vector to be repeated
  372. # n - times to repeat
  373. ## abandoned:
  374. # fBindAsColumns - optional, boolean
  375. #
  376. # Returns:
  377. # dataframe
  378. #
  379. ##
  380. bindRepeat <- function(vector2Rep,n){
  381. a <- list()
  382. for(i in 1:n) a[[i]] <- vector2Rep
  383. rbindList(a)
  384. }
  385.  
  386.  
  387.  
  388. #################################
  389. ###
  390. ### FUNCTION: inMB
  391. ###
  392. #################################
  393. # Convert a number of bases to megabases
  394. #
  395. # Args:
  396. # number(s)
  397. #
  398. # Returns:
  399. # vector
  400. #
  401. ##
  402. inMB<-function(bases){
  403. bases/1000000
  404. }
  405.  
  406.  
  407.  
  408.  
  409. #################################
  410. ###
  411. ### FUNCTION: textListToVec
  412. ###
  413. #################################
  414. # Convert a chunk of text (one item per line) to a vector
  415. # Just type textListToVec(" and then paste in the lines and type ")
  416. #
  417. # Args:
  418. # newline-separated list
  419. #
  420. # Returns:
  421. # vector
  422. #
  423. ##
  424.  
  425.  
  426. textListToVec <-function(a) strsplit(a,"\n")[[1]]
  427.  
  428.  
  429.  
  430. #################################
  431. ###
  432. ### FUNCTION: tableDF
  433. ###
  434. #################################
  435. # performs the table function
  436. # but returns a well-formatted data.frame
  437. #
  438. # Args:
  439. # valVector
  440. #
  441. # Returns:
  442. # dataframe
  443. #
  444. ##
  445.  
  446. tableDF<-function(valVector=c("a", "a", "b")){
  447. t=data.frame(as.matrix(table(valVector)))
  448. t2=data.frame(value=row.names(t), frequency=t[,1])
  449. row.names(t2)=row.names(t)# colnames(t)=c("frequency")
  450. return(t2)
  451. }
  452.  
  453.  
  454.  
  455.  
  456. #################################
  457. ###
  458. ### FUNCTION: interleave
  459. ###
  460. #################################
  461. # Interleave two vectorsAdd timestamp and data to plot
  462. #
  463. # Args:
  464. # v1
  465. # v2
  466. #
  467. # Returns:
  468. # vector
  469. #
  470. ##
  471.  
  472. interleave <- function(v1,v2)
  473. {
  474. ord1 <- 2*(1:length(v1))-1
  475. ord2 <- 2*(1:length(v2))
  476. c(v1,v2)[order(c(ord1,ord2))]
  477. }
  478.  
  479.  
  480.  
  481. #################################
  482. ###
  483. ### FUNCTION: stampPlot
  484. ###
  485. #################################
  486. # Add timestamp and data to plot
  487. #
  488. # Args:
  489. # desc.txt=""
  490. # fOuter=FALSE
  491. #
  492. # Returns:
  493. # nothinbg
  494. #
  495. ##
  496.  
  497. stampPlot=function(desc.txt="",fOuter=FALSE ){
  498. if(fOuter) {
  499. pAdj=-2
  500. } else {
  501. pAdj=-0.6
  502. }
  503. mtext(paste(desc.txt, hbTimeStamp()),side=4,outer= fOuter,padj= pAdj,cex=.8)
  504. }
  505.  
  506.  
  507.  
  508. #################################
  509. ###
  510. ### FUNCTION: snap
  511. ###
  512. #################################
  513. # like head, but also limits the number of columns. default is 6 columns 6 rows.
  514.  
  515. snap <-function (df,rowLim=6,colLim=6) {
  516. # troubleshooting
  517. #age=18:29
  518. #height=runif(12,62,74)
  519. #df =data.frame(age=age,height=height)
  520.  
  521.  
  522. if (is(df)[1]=="data.frame" | is(df)[1]=="matrix"){
  523. nrowDF =nrow(df)
  524. ncolDF =ncol(df)
  525. if (rowLim<nrowDF) nrowDF=rowLim
  526. if (colLim<ncolDF) ncolDF=colLim
  527. print(df[1:nrowDF,1:ncolDF])
  528. } else {
  529. print("cannot parse input")
  530. }
  531. }
  532.  
  533.  
  534. #################################
  535. ###
  536. ### FUNCTION: getAllListSubItemsByIndex
  537. ###
  538. #################################
  539. # get the nth sub item from each item
  540. #
  541. # Args:
  542. # theList, list to pull from
  543. # theIndex, the index of desired items
  544. #
  545. # Returns:
  546. # vector or results
  547. #
  548. getAllListSubItemsByIndex <-function (theList, theIndex){
  549.  
  550. paste(lapply(theList,function(x) x=x[[theIndex]]))
  551.  
  552.  
  553. }
  554.  
  555.  
  556.  
  557. #################################
  558. ###
  559. ### FUNCTION: greaterOf
  560. ###
  561. #################################
  562. # get the greater of two items
  563. #
  564. # Args:
  565. # values to compare
  566. #
  567. # Returns:
  568. # greater value
  569. #
  570. greaterOf <- function (x,y) {
  571.  
  572. if (x>y | y==x){
  573. x
  574. } else {
  575. if (y>x) {
  576. y
  577. } else {
  578. NA
  579. }
  580. }
  581.  
  582. }
  583.  
  584.  
  585. #################################
  586. ###
  587. ### FUNCTION: lesserOf
  588. ###
  589. #################################
  590. # get the lesser of two items
  591. #
  592. # Args:
  593. # values to compare
  594. #
  595. # Returns:
  596. # lesser value
  597. #
  598. lesserOf <- function (x,y) {
  599.  
  600. if (x<y | y==x ){
  601. x
  602. } else {
  603. if (y<x) {
  604. y
  605. } else {
  606. NA
  607. }
  608. }
  609.  
  610. }
  611.  
  612.  
  613.  
  614. ### FUNCTION: isBetween
  615. ###
  616. #################################
  617. # is one value between two others
  618. #
  619. # Args:
  620. # 3 values
  621. #
  622. # Returns:
  623. # TRUE or FALSE
  624. #
  625. isBetween <- function (x,y,z) {
  626.  
  627. x>y & x<z
  628.  
  629. }
  630.  
  631.  
  632. #################################
  633. ###
  634. ### FUNCTION: initialCap
  635. ###
  636. #################################
  637. # Converts each string in a vector to initial upper case
  638. #
  639. # Args:
  640. # wordsToConvert: vector of genes symbols
  641. #
  642. # Returns:
  643. # vector with changed words
  644. #
  645. initialCap <- function(wordsToConvert) {
  646.  
  647. return_list<-NULL
  648. for (i in 1:length(wordsToConvert)){
  649. r<-tolower(wordsToConvert[i])
  650. s <- strsplit(r, " ")[[1]]
  651. return_list[i]=paste(toupper(substring(s, 1,1)), substring(s, 2), sep="", collapse=" ")
  652. }
  653. return(return_list)
  654. }
  655.  
  656. #################################
  657. ###
  658. ### FUNCTION: allIdentical
  659. ###
  660. #################################
  661. # tests whether each item in a vector is identical
  662. #
  663. # Args:
  664. # vectorToTest: vector
  665. #
  666. # Returns:
  667. # TRUE or FALSE
  668. #
  669.  
  670.  
  671. allIdentical <- function(vectorToTest){
  672. sum(! vectorToTest %in% vectorToTest[1])}
  673.  
  674.  
  675. #################################
  676. ###
  677. ### FUNCTION: set_up_plot
  678. ###
  679. #################################
  680. # Determines axis values from data
  681. # adds a time stamp
  682. #
  683. # Args:
  684. # x & y data
  685. # fPlotFromZero
  686. # fPlotEvenAxes
  687.  
  688. # Returns:
  689. # plot
  690. #
  691.  
  692.  
  693. set_up_plot <-function(x,y,fPlotFromZero=TRUE, fPlotEvenAxes=FALSE,fDateStamp=TRUE,stampText="",fOuter=FALSE,xAxLabel=colnames(x), yAxLabel=colnames(y), ...){
  694. #print(names(x))
  695. # if
  696. # xAxName=colnames(x)
  697. # yAxLabel=colnames(y)
  698. if(fPlotEvenAxes){
  699. x=c(min(c(x,y)),max(c(x,y)))
  700. y=x
  701. }
  702. if (fPlotFromZero) {
  703. plot_y_min=0
  704. plot_x_min=0
  705. } else {
  706. plot_y_min=min(y)
  707. plot_x_min=min(x)
  708. }
  709.  
  710. plot_y_max=max(y)
  711. plot_x_max=max(x)
  712.  
  713. plot(c(plot_x_min, plot_x_max),c(plot_y_min, plot_y_max), xlab= xAxLabel , ylab= yAxLabel , type="n", ... )
  714.  
  715. if (fDateStamp) stampPlot(stampText,fOuter= fOuter)
  716.  
  717. }
  718.  
  719.  
  720.  
  721.  
  722.  
  723. #################################
  724. ###
  725. ### FUNCTION: hbTimeStamp
  726. ###
  727. #################################
  728. # Get formatted time and date
  729. #
  730. # Args (all optional):
  731. # sepYMD
  732. # sepHMS
  733. # sepDateTime
  734.  
  735. # Returns:
  736. # character vector
  737. #
  738.  
  739.  
  740. hbTimeStamp<-function(sepYMD="-", sepHMS=".", sepDateTime="_", dateVars=c("Y","m","d"), timeVars=c("I", "M", "S")){
  741. dateSpec=paste(paste("%", dateVars, sep=""), collapse= sepYMD)
  742. timeSpec=paste(paste("%", timeVars, sep=""), collapse= sepHMS)
  743.  
  744. paste(format(Sys.time(), dateSpec),paste(format(Sys.time(), timeSpec),substring(format(Sys.time(), "%r"),10,11),sep=""),sep= sepDateTime)
  745. }
  746.  
  747.  
  748. #################################
  749. ###
  750. ### FUNCTION: breakPlotIntoPages
  751. ###
  752. #################################
  753. # Breaks data into page-size chunks and calls a plotting function for each chunk
  754. #
  755. # Args:
  756. # dataframe with plot data
  757. # optional: desired rows per page
  758.  
  759. # Returns:
  760. # multiple plots
  761. #
  762.  
  763. breakPlotIntoPages<-function(multipagePlotData,rowsPerPage=40,varList){
  764.  
  765. #if (! "myPlot" %in% varList) {
  766. # print("the 'myPlot' function must exist")
  767. # return()
  768. # }
  769.  
  770. plotRows=nrow(multipagePlotData)
  771. dataBreaks=unique(c(seq(from=1,to=plotRows,by=rowsPerPage),plotRows))
  772. breakCount=length(dataBreaks)
  773. penultimateBreakCount= breakCount-1
  774. lastPageRowCount= dataBreaks[breakCount ]-dataBreaks[penultimateBreakCount]
  775.  
  776.  
  777. for (i in 1:(length(dataBreaks)-1)){
  778. brokenData=multipagePlotData[dataBreaks[i]:dataBreaks[i+1],]
  779. xLimVals=c(0,rowsPerPage*1.6)
  780. barPos=myPlot(brokenData,xLimVals,1)
  781. }
  782.  
  783. }
  784.  
  785.  
  786.  
  787. ## a useful function: rev() for strings
  788. strReverse <- function(x)
  789. sapply(lapply(strsplit(x, NULL), rev), paste, collapse="")
  790. ##strReverse(c("abc", "Statistics"))
  791.  
  792.  
  793.  
  794. #make transparenbt
  795.  
  796.  
  797.  
  798.  
  799. ################################
  800. ##
  801. ## FUNCTION: makeTransparent
  802.  
  803. ##
  804. ################################
  805. #Add transparency to colors
  806.  
  807. # Args:
  808. # vector of colors in name or hex format, e.g. grey or #FFCDFF
  809. # desiredPctTranparent
  810.  
  811. # Returns:
  812. # same colors with transparency added
  813.  
  814. #
  815.  
  816. makeTransparent<-function(colorVector=c("#FFCDFF", "#C0108C", "#CB7600"), desiredPctTranparent=50){
  817.  
  818. #colorVector=c("grey", "blue")
  819. if (substr(colorVector[1],0,1)!="#") {
  820. colorVector =rgb(t(col2rgb(colorVector)/255))
  821. }
  822. s=seq(0,255,length.out=21)
  823. transparencyCodes=data.frame(pctTransparent=100*(1-(s/255)))
  824. t2=unlist(lapply(s,function(x) rgb(0,100,0,x,maxColorValue=255)))
  825. transparencyCodes$hexSuffix= substring(t2,8,9)
  826. paste(rgb(t(col2rgb(colorVector)),maxColorValue=255), transparencyCodes[transparencyCodes$pctTransparent== as.character(desiredPctTranparent),2],sep="")
  827.  
  828. }
  829.  
  830.  
  831.  
  832.  
  833. #################################
  834. ###
  835. ### FUNCTION: plotMultipleCors
  836. ###
  837. #################################
  838. # Plots multiple vectors against
  839. # a single vector and returns
  840. # correlations
  841.  
  842. #
  843. # Args:
  844. # plotData
  845. ## plot data should have x values in first col, and any number of cols after that can be Y
  846. ## column names determine legend text
  847. # vColors -- a vector of colors the length of each Y col in plotData
  848. # ylabText -- text for Y label
  849. # legendPos -- legend position, like "topleft"
  850. # fNormalizeY -- normalize each Y value so the max val is 100
  851.  
  852. # Returns:
  853. # plot
  854. #
  855.  
  856. plotMultipleCors <-function(plotData, vColors="",ylabText="", xlabText ="Breed average creatinine level",legendPos="topleft",fNormalizeY=TRUE,...){
  857. ## plot data should have x values in first col, and any number of cols after that can be Y
  858. #test plotData= gAcdDesc[,c("mean","se","sd","Height","Weight")]
  859. library(plotrix)
  860. x= plotData[,1]
  861. if (fNormalizeY) {
  862. plotData[,2:ncol(plotData)]=apply(plotData[,2:ncol(plotData)],2,function(y){
  863. # 100*y/max(y)
  864. rescale(y,c(0,100))
  865. })
  866. }
  867.  
  868. maxY=max(plotData[,2:ncol(plotData)],na.rm=TRUE)
  869. set_up_plot( x,0: maxY,xAxLabel= xlabText, yAxLabel=ylabText ,...)
  870.  
  871.  
  872. # if (vColors =""){
  873. nColorsNeeded= ncol(plotData)-1
  874. pal1=brewer.pal(8,"Set1")
  875. vColors=pal1[rep(1:length(pal1), nColorsNeeded)[1: nColorsNeeded]]
  876. # }
  877.  
  878. plotSettings=data.frame(dataName =colnames(plotData[,2:ncol(plotData)]),color= vColors)
  879. plotSettings$notNA=apply(plotData[,2:ncol(plotData)],2,function(x){
  880. sum(!is.na(x))
  881. })
  882. plotSettings$legendText= plotSettings$dataName
  883. plotSettings$pch=0:(nrow(plotSettings)-1)
  884. for (i in 1:nrow(plotSettings)){
  885. y=plotData[,(i+1)]
  886. points(x,y,col= plotSettings$color[i],pch=plotSettings$pch[i])
  887. abline(lm(y ~ x),col= plotSettings$color[i])
  888. rsq <-cor(x,y, use="complete.obs")
  889. plotSettings$corr_value[i] <- format(c(rsq, 0.123456789), digits=2)[1]
  890. ##text(mean(x),mean(y),substitute(paste("R"^{2}, " = "*x),list(x= corr_value) ),cex=1,col= vColors[i-1])
  891. ##legend(210, 110, bquote(r^2 ==.(format(summary(regression)$adj.r.squared,digits=3))))
  892. print(plotSettings$color[i])
  893. } #end for loop
  894. legend(legendPos, legend=pasteNS(plotSettings$legendText,", Rsq=",plotSettings$corr_value, ", n=",plotSettings$notNA), pch= plotSettings$pch, col= plotSettings$color)
  895.  
  896. # colnames(plotData[,2:ncol(plotData)]), lty=1, col=vColors)
  897.  
  898. } ## end function
  899.  
  900. #################################
  901. ###
  902. ### FUNCTION: plotCors
  903. ###
  904. #################################
  905. # Plots multiple vectors against
  906. # a single vector and returns
  907. # correlations
  908.  
  909. #
  910. # Args:
  911. # plotData
  912. ## plot data should have x values in first col, and any number of cols after that can be Y
  913. ## column names determine legend text
  914. # vColors -- a vector of colors the length of each Y col in plotData
  915. # ylabText -- text for Y label
  916. # legendPos -- legend position, like "topleft"
  917. # fNormalizeY -- normalize each Y value so the max val is 100
  918.  
  919. # Returns:
  920. # plot
  921. #
  922.  
  923. plotCors<-function(plotData, vColors="",ylabText=colnames(plotData)[2], xlabText =colnames(plotData)[1],legendPos="topleft",fNormalizeY=TRUE,...){
  924. ## plot data should have x values in first col, and any number of cols after that can be Y
  925. #test plotData= gAcdDesc[,c("mean","se","sd","Height","Weight")]
  926. #plotData=na.omit(plotData)
  927. x= plotData[,1]
  928. if (fNormalizeY) {
  929. plotData[,2]=100*plotData[,2]/max(plotData[,2])
  930. }
  931. y=plotData[,2]
  932.  
  933. set_up_plot( x,y,xAxLabel= xlabText, yAxLabel=ylabText ,...)
  934.  
  935. points(x,y)#,col= plotSettings$color[i])
  936. abline(lm(y ~ x)) #,col= plotSettings$color[i])
  937. rsq <-cor(x,y, use="complete.obs")
  938. rsqTxt <- format(c(rsq, 0.123456789), digits=2)[1]
  939. ##text(mean(x),mean(y),substitute(paste("R"^{2}, " = "*x),list(x= corr_value) ),cex=1,col= vColors[i-1])
  940. ##legend(210, 110, bquote(r^2 ==.(format(summary(regression)$adj.r.squared,digits=3))))
  941. #print(plotSettings$color[i])
  942. title(sub=pasteNS("Rsq=", rsqTxt ))
  943.  
  944. # colnames(plotData[,colSpec]), lty=1, col=vColors)
  945.  
  946. } ## end function
  947.  
  948.  
  949.  
  950.  
  951.  
  952. #################################
  953. ###
  954. ### FUNCTION: groupSNPsIntoLoci
  955. ###
  956. #################################
  957. #
  958. # find groups of nearby SNPs
  959. #
  960. # Args:
  961. # hitListSnpChrID any kind of chromosome identifier
  962. # hitListSnpPos position of snp, either a number of a snp id like chr15.44226659
  963. # maxInterSNPRange the maximum acceptable distance between SNPs
  964. #
  965. # Returns:
  966. # data.frame vector result of paste
  967. # data frame contains columns - c("chr", "minPos", "maxPos", "minPVal", "snpCount", "locID", "locusSize")
  968. #
  969. # find windows in snps
  970. groupSNPsIntoLoci<-function(hitListSnpChrID, hitListSnpPos, hitListSnpPVals, maxInterSNPRange=1E6){
  971. #test hitListSnpChrID=
  972. # hitListSnpChrID =KCsize_candidates$chrChar
  973. # hitListSnpPos=KCsize_candidates$pos
  974. # hitListSnpPVals=KCsize_candidates$KC.LOGP
  975. if (sum(grepl("chr", hitListSnpPos))>1){
  976. hitListSnpPos=as.numeric(gsub("^[^\\.]*\\.","", hitListSnpPos))
  977. }
  978.  
  979.  
  980. hitList=data.frame(cbind(hitListSnpChrID, hitListSnpPos, hitListSnpPVals))
  981. colnames(hitList)=c("chr","pos","pval")
  982. hitList$pos=as.numeric(hitList$pos)
  983. hitList$pval =as.numeric(hitList$pval)
  984. # set up variables and record info for first row
  985. #assocLocus columns: chr, minPos, minPos, minPVal, snpCount
  986. assocLocus<-NULL
  987. hitList= hitList[order(hitList$chr, hitList$pos),]
  988. currAssocLocus= data.frame(c(hitList[1,c("chr","pos","pos","pval")]))
  989. colnames(currAssocLocus)=c("chr", "minPos", "maxPos", "minPVal")
  990. currAssocLocus$snpCount[1]=1
  991. #round(hitList[1,"pos"]/1E6,0)
  992. hitList$locusID=NA
  993.  
  994. currAssocLocus$locID[1]=pasteNS(hitList[1,"chr"],".",round(hitList[1,"pos"]/1E6,0),"Mb")
  995. for (i in 2:nrow(hitList)){
  996. # check each snp
  997. if (hitList[i,"chr"]==currAssocLocus$chr[1] & (abs(hitList[i,"pos"]-currAssocLocus$minPos[1])<maxInterSNPRange | abs(hitList[i,"pos"]-currAssocLocus$minPos[1])<maxInterSNPRange)){
  998. # they are on the same chromosome and within maxInterSNPRange of each other
  999. # extend the current Association Locus values (update currAssocLocus)
  1000. currAssocLocus$snpCount[1]=currAssocLocus$snpCount[1]+1
  1001. if (hitList[i,"pval"]<currAssocLocus$minPVal[1]) currAssocLocus$minPVal[1]=hitList[i,"pval"]
  1002. if (hitList[i,"pos"]<currAssocLocus$minPos[1]) currAssocLocus$minPos[1]=hitList[i,"pos"]
  1003. if (hitList[i,"pos"]>currAssocLocus$maxPos[1]) currAssocLocus$maxPos[1]=hitList[i,"pos"]
  1004. if (i==nrow(hitList)) assocLocus<-rbind(assocLocus, currAssocLocus) ## add this last locus to the list
  1005.  
  1006. } else {
  1007. assocLocus<-rbind(assocLocus, currAssocLocus)
  1008. currAssocLocus[1,c("chr", "minPos", "maxPos", "minPVal")]=c(hitList[i,c("chr","pos","pos","pval")])
  1009. currAssocLocus$snpCount[1]=1
  1010. currAssocLocus$locID[1]=pasteNS(hitList[i,"chr"],".",round(hitList[i,"pos"]/1E6,0),"Mb")
  1011. }
  1012. hitList$locusID[i]=currAssocLocus$locID[1]
  1013. }
  1014.  
  1015. assocLocus=data.frame(assocLocus)
  1016. assocLocus$locusSize= assocLocus$maxPos-assocLocus$minPos+1
  1017. return(assocLocus)
  1018. }

Report this snippet  

You need to login to post a comment.