php 生存分析,科学网—生存分析案例 - 张洪磊的博文_weixin_39726873的博客-程序员宅基地

技术标签: php 生存分析  

#!/usr/bin/Rscript

library(survival)

library(limma)

file.create("survive.txt")

#files

tumors

for (tumor in tumors){

exp_file

#read in files

rna=read.table(exp_file,header=TRUE,row.names=1,sep="t",stringsAsFactors=FALSE)

#get the index of the normal/tumor samples

t_index

n_index

#first I remove genes whose expression is == 0 in more than 50% of the samples:

rem

x

x

r

remove dim(x)[2]*0.8)

return(remove)

}

remove

rna

#calculate z-scores :(value - mean normal)/SD normal

#    scal

#        mean_n

#        sd_n

#        res

#        colnames(res)

#        rownames(res)

#        for(i in 1:dim(x)[1]){

#            for(j in 1:dim(x)[2]){

#                res[i,j]

#            }

#        }

#    return(res)

#    }

z_rna

#set the rownames and colnames

colnames(z_rna)

rm(rna)

print(paste(i,"-----------------finish first part",sep=""))

#######################################################################

# read in clinical data

clinical_file      

clinical          

clinical          

clinical$IDs      

rownames(clinical)

sum(clinical$IDs %in% colnames(z_rna))

#days_to_new_tumor_event_after_initial_treatment : isolate the max value

ind_keep

new_tum

new_tum_collapsed

for (k in 1:nrow(new_tum)){

if(sum(is.na(new_tum[k,])) < ncol(new_tum)){

m

new_tum_collapsed

} else {

new_tum_collapsed

}

}

#do the same to death: isolate the max value

ind_keep

death

death_collapsed

for (k in 1:nrow(death)){

if( sum(is.na(death[k,])) < ncol(death) ){

m

death_collapsed

} else {

death_collapsed

}

}

#days_to_last_followup: isolate the min value

ind_keep

fl

fl_collapsed

for (k in 1:nrow(fl)){

if( sum(is.na(fl[k,])) < ncol(fl) ){

m

fl_collapsed

} else {

fl_collapsed

}

}

#put everything together

all_clin

colnames(all_clin)

#now, to do survival analysis we need three main things:

#1- time: this is the time till an event happens

#2- status: this indicates which patients have to be kept for the analysis

#3- event: this tells i.e. which patients have the gene up- or down-regulated or have no changes in expression

#since we want to do censored analysis we need to have something to censor the data with.

#for example, if a patient has no death data BUT there is a date to last followup

#it means that after that day we know nothing about the patient, therefore after that day it

#cannot be used for calculations/Kaplan Meier plot anymore, therefore we censor it.

#so now we need to create vectors for both "time to new tumor" and 'time to death" that contain

#also the data from censored individuals

# create vector with time to new tumor containing data to censor for new_tumor

all_clin$new_time

for (i in 1:length(as.numeric(as.character(all_clin$new_tumor_days)))){

all_clin$new_time[i]

as.numeric(as.character(all_clin$followUp_days))[i],as.numeric(as.character(all_clin$new_tumor_days))[i])

}

# create vector time to death containing values to censor for death

all_clin$new_death

for (i in 1:length(as.numeric(as.character(all_clin$death_days)))){

all_clin$new_death[i]

as.numeric(as.character(all_clin$followUp_days))[i],as.numeric(as.character(all_clin$death_days))[i])

}

#now we need to create the vector for censoring the data which means telling R which patients are dead or have new tumor. in this case

#if a patient has a “days_to_death” it will be assigned 1, and used in the corresponding analysis. the reason why we

#censor with death events even for recurrence is pretty important. a colleague made me notice that this is a competitive

#risk problem, where, although a patient can recur and then die, if a patient is dead, it will not recur, therefore is

#more accurate to censor for death events.

#create vector for death censoring

all_clin$death_event

#finally add row.names to clinical

rownames(all_clin)

#create event vector for RNASeq data

#event_rna 1.96,1,ifelse(i< -1.96,0,2)))) #up and down  1:>1.96;0: -1.96 and <1.96

#event_rna 1.96,1,0))) # altered and not altered

event_rna

event_rna

colnames(event_rna)

rownames(event_rna)

med

for (m in 1:nrow(z_rna)){

for (n in 1:ncol(z_rna)){

event_rna[m,n] med[m],1,0)

}

}

#pick your gene

gene_list

genes    

print(paste("intersect genes",intersect(genes,rownames(event_rna))))

common_samples

#Calculate each gene

for (gene in genes){

factor

time

event

#survdiff

s

s1

pv

#HR

HR

up95  

low95

#high_num and low_num

high_num

low_num

#plot data

png(paste(tumor,gene,".survive.png",sep=""))

plot(s,col=c(1:3), frame=F, lwd=2, main=paste(tumor,gene,sep=":"))

# add lines for the median survival

x1

x2

if(x1 != "NA" & x2 != "NA"){

lines(c(0,x1),c(0.5,0.5),col="blue")

lines(c(x1,x1),c(0,0.5),col="black")

lines(c(x2,x2),c(0,0.5),col="red")

}

# add legend

legend(1800,0.995,legend=paste("p.value = ",pv[[1]],sep=""),bty="n",cex=1.4)

legend(0.5,1.5,legend=paste("HR = ",HR,sep=""),bty="n",cex=1.4)

legend(max(all_clin[common_samples,"new_death"],na.rm = T)*0.7,0.94,

legend=c(paste("high=",high_num),paste("low=",low_num )),bty="n",cex=1.3,lwd=3,col=c("black","red"))

dev.off()

sf_content

print(sf_content)

write.table(sf_content,"sf.survival",append=TRUE,quote=FALSE,col.names=FALSE,row.names=FALSE)

}

}

转载本文请联系原作者获取授权,同时请注明本文来自张洪磊科学网博客。

链接地址:http://blog.sciencenet.cn/blog-2609994-992077.html

上一篇:正则

下一篇:多变量计算不同水平的overlap

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/weixin_39726873/article/details/115756925

智能推荐

AST解析基础: 如何写一个简单的html语法分析库_weixin_34258782的博客-程序员宅基地

前言虚拟语法树(Abstract Syntax Tree, AST)是解释器/编译器进行语法分析的基础, 也是众多前端编译工具的基础工具, 比如webpack, postcss, less等. 对于ECMAScript, 由于前端轮子众多, 人力过于充足, 早已经被人们玩腻了. 光是语法分析器就有 uglify , acorn , bablyo...

北京理工大学金旭亮老师C#(78—EmbedException)_weixin_42681308的博客-程序员宅基地

CLR“两轮遍历”的多层嵌套异常捕获处理流程try try try 在此书写测试代码 catch Exception A finally catch ExceptionB finallycatch ExceptionCcatch Exceptionfinally“扫描并查找相匹...

db2与oracle 使用差异_iteye_15105的博客-程序员宅基地

1、取前N条记录Oracle:Select * from TableName where rownum &amp;lt;= N;DB2:Select * from TableName fetch first N rows only;2、取得系统日期Oracle:Select sysdate from dual;DB2:Select current timestamp from sy...

WPF DevExpress Chart控件 界面绑定数据源,不通过C#代码进行绑定_weixin_34034670的博客-程序员宅基地

 &amp;lt;Grid x:Name=&quot;myGrid&quot; Loaded=&quot;Grid_Loaded&quot; DataContext=&quot;{Binding PartOneData}&quot;&amp;gt; &amp;lt;dxc:ChartControl Name=&quot;chartControl1&quot; DataSource=&quot;{Binding ComplainAnalysisList}&quot;&amp;gt;

linux查询默认权限命令,『无欲则无求』Linux常用命令 — 24、umask默认权限_weixin_39948824的博客-程序员宅基地

文章目录1、文件和目录的默认权限2、umask 默认权限(1)查看系统的umask权限(2)用八进制数值显示umask权限(3)umask权限的计算方法(4)注意:umask 默认权限的计算绝不是数字直接相减。总结:1、文件和目录的默认权限Linux系统的基本权限有6种。读写执行为基本权限umask 为默认权限后边还有4种权限,会在之后的权限管理部分详细讲解。我们先创建一个文件abc,然后查看ab...

《两日算法系列》之第五篇:SVM_小一的学习笔记的博客-程序员宅基地

1. 支持向量机理论支持向量机(Support Vector Machine:SVM)支持向量机的目的是什么?利用训练数据集的间隔最大化找到一个最优分离超平面你可能有两个名词不太懂,间隔?分离超平面?别紧张,没有人刚开始学习SVM的时候就知道这两个东西先来看个例子,现在有一些人的身高和体重数据,将它们绘制成散点图,是这样的:如果现在给你一个未知男女的身高和体重,你能分辨出性别吗?...

随便推点

apache hadoop 2.6.4 64bit 在windows8.1下直接安装指南(无需虚拟机和cygwin)_Father Abraham的博客-程序员宅基地

转载链接:官方文档首先需要下载Apache hadoop 2.6.4的tar.gz包,到本地解压缩到某个盘下,注意路径里不要带空格。否则你配置文件里需要用windows 8.3格式的路径!第二确保操作系统是64bit,已安装.netframework4.0以上版本,这个你懂的,微软的天下,没有这个怎么混!第三确保安装了64 bit 版本的JDK1.8,笔者使用的就是JD

有限状态机报错transition is invalid while previous transition is still in progress_烟雨星空的博客-程序员宅基地

在这之前,先来说下什么是有限状态机(Finite-state machine)背景我们在开发游戏,比如rpg游戏时,会涉及到玩家有各种状态,如攻击状态,等待状态等,如果用ifelse也可以实现,但是随着程序复杂度越来越高,这样肯定是不易于代码维护的,很容易出错,而且代码可读性比较差。这时,用状态机就可以很好的解决这些问题。我们可以通过状态机来记录它的各个状态(state)和状态之间的转换(tr...

mysql分组多个数据计算_一小步一大步的博客-程序员宅基地_mysql分组减法

当需要我们做分组并计算每组中的数据时,可以把查询的结果作为一个表,然后再进行运算。表aidcnameclass1000a11001b21002c3表bidsnameagecid2000小红1210002001小白

防空系统效能评估软件攻略_软件胃信pp637521的博客-程序员宅基地

防空系统效能评估软件防空系统效能评估软件是为解决武器装备效能评估问题新研发的一款软件,防空系统效能评估软件用于武器装备论证、研制、试验、使用等不同阶段的效能评估,防空系统效能评估软件为作战体系、装备体系评价和优化提供定量依据。装备系统效能评估分析软件能够使用仿真、靶试、演习等多种来源的实验数据,将效能评估贯穿武器装备全生命周期。1防空系统效能评估软件介绍2、互联网是个神奇的大网,大数据开发和...

Expressions and phases for writing_zhaoyawei09的博客-程序员宅基地

Remarks for English expressionslags behind, with respect toit lags behind its non-stochastic counterparts with respect to the convergence rate.mitigate, shortcomingSVRG mitigates this shortcoming.invol

python测试框架 pytest_weixin_30457465的博客-程序员宅基地

一:pytest 是python的一套全功能的测试框架. 优点如下:1、操作简单,支持多组数据参数化, 支持用例的skip和xfail;2、支持简单的单元测试和复杂的功能测试,还可以做UI和接口自动化测试;3、pytest有很多第三方的插件并且支持定义扩展; 如失败重新执行, 断言失败也继续运行,自定义出错停止, 自定义mark标记灵活运行用例....4、可以很好的集...

推荐文章

热门文章

相关标签