技术标签: php 生存分析
#!/usr/bin/Rscript
library(survival)
library(limma)
file.create("survive.txt")
#files
tumors
for (tumor in tumors){
exp_file
#read in files
rna=read.table(exp_file,header=TRUE,row.names=1,sep="t",stringsAsFactors=FALSE)
#get the index of the normal/tumor samples
t_index
n_index
#first I remove genes whose expression is == 0 in more than 50% of the samples:
rem
x
x
r
remove dim(x)[2]*0.8)
return(remove)
}
remove
rna
#calculate z-scores :(value - mean normal)/SD normal
# scal
# mean_n
# sd_n
# res
# colnames(res)
# rownames(res)
# for(i in 1:dim(x)[1]){
# for(j in 1:dim(x)[2]){
# res[i,j]
# }
# }
# return(res)
# }
z_rna
#set the rownames and colnames
colnames(z_rna)
rm(rna)
print(paste(i,"-----------------finish first part",sep=""))
#######################################################################
# read in clinical data
clinical_file
clinical
clinical
clinical$IDs
rownames(clinical)
sum(clinical$IDs %in% colnames(z_rna))
#days_to_new_tumor_event_after_initial_treatment : isolate the max value
ind_keep
new_tum
new_tum_collapsed
for (k in 1:nrow(new_tum)){
if(sum(is.na(new_tum[k,])) < ncol(new_tum)){
m
new_tum_collapsed
} else {
new_tum_collapsed
}
}
#do the same to death: isolate the max value
ind_keep
death
death_collapsed
for (k in 1:nrow(death)){
if( sum(is.na(death[k,])) < ncol(death) ){
m
death_collapsed
} else {
death_collapsed
}
}
#days_to_last_followup: isolate the min value
ind_keep
fl
fl_collapsed
for (k in 1:nrow(fl)){
if( sum(is.na(fl[k,])) < ncol(fl) ){
m
fl_collapsed
} else {
fl_collapsed
}
}
#put everything together
all_clin
colnames(all_clin)
#now, to do survival analysis we need three main things:
#1- time: this is the time till an event happens
#2- status: this indicates which patients have to be kept for the analysis
#3- event: this tells i.e. which patients have the gene up- or down-regulated or have no changes in expression
#since we want to do censored analysis we need to have something to censor the data with.
#for example, if a patient has no death data BUT there is a date to last followup
#it means that after that day we know nothing about the patient, therefore after that day it
#cannot be used for calculations/Kaplan Meier plot anymore, therefore we censor it.
#so now we need to create vectors for both "time to new tumor" and 'time to death" that contain
#also the data from censored individuals
# create vector with time to new tumor containing data to censor for new_tumor
all_clin$new_time
for (i in 1:length(as.numeric(as.character(all_clin$new_tumor_days)))){
all_clin$new_time[i]
as.numeric(as.character(all_clin$followUp_days))[i],as.numeric(as.character(all_clin$new_tumor_days))[i])
}
# create vector time to death containing values to censor for death
all_clin$new_death
for (i in 1:length(as.numeric(as.character(all_clin$death_days)))){
all_clin$new_death[i]
as.numeric(as.character(all_clin$followUp_days))[i],as.numeric(as.character(all_clin$death_days))[i])
}
#now we need to create the vector for censoring the data which means telling R which patients are dead or have new tumor. in this case
#if a patient has a “days_to_death” it will be assigned 1, and used in the corresponding analysis. the reason why we
#censor with death events even for recurrence is pretty important. a colleague made me notice that this is a competitive
#risk problem, where, although a patient can recur and then die, if a patient is dead, it will not recur, therefore is
#more accurate to censor for death events.
#create vector for death censoring
all_clin$death_event
#finally add row.names to clinical
rownames(all_clin)
#create event vector for RNASeq data
#event_rna 1.96,1,ifelse(i< -1.96,0,2)))) #up and down 1:>1.96;0: -1.96 and <1.96
#event_rna 1.96,1,0))) # altered and not altered
event_rna
event_rna
colnames(event_rna)
rownames(event_rna)
med
for (m in 1:nrow(z_rna)){
for (n in 1:ncol(z_rna)){
event_rna[m,n] med[m],1,0)
}
}
#pick your gene
gene_list
genes
print(paste("intersect genes",intersect(genes,rownames(event_rna))))
common_samples
#Calculate each gene
for (gene in genes){
factor
time
event
#survdiff
s
s1
pv
#HR
HR
up95
low95
#high_num and low_num
high_num
low_num
#plot data
png(paste(tumor,gene,".survive.png",sep=""))
plot(s,col=c(1:3), frame=F, lwd=2, main=paste(tumor,gene,sep=":"))
# add lines for the median survival
x1
x2
if(x1 != "NA" & x2 != "NA"){
lines(c(0,x1),c(0.5,0.5),col="blue")
lines(c(x1,x1),c(0,0.5),col="black")
lines(c(x2,x2),c(0,0.5),col="red")
}
# add legend
legend(1800,0.995,legend=paste("p.value = ",pv[[1]],sep=""),bty="n",cex=1.4)
legend(0.5,1.5,legend=paste("HR = ",HR,sep=""),bty="n",cex=1.4)
legend(max(all_clin[common_samples,"new_death"],na.rm = T)*0.7,0.94,
legend=c(paste("high=",high_num),paste("low=",low_num )),bty="n",cex=1.3,lwd=3,col=c("black","red"))
dev.off()
sf_content
print(sf_content)
write.table(sf_content,"sf.survival",append=TRUE,quote=FALSE,col.names=FALSE,row.names=FALSE)
}
}
转载本文请联系原作者获取授权,同时请注明本文来自张洪磊科学网博客。
链接地址:http://blog.sciencenet.cn/blog-2609994-992077.html
上一篇:正则
下一篇:多变量计算不同水平的overlap
前言虚拟语法树(Abstract Syntax Tree, AST)是解释器/编译器进行语法分析的基础, 也是众多前端编译工具的基础工具, 比如webpack, postcss, less等. 对于ECMAScript, 由于前端轮子众多, 人力过于充足, 早已经被人们玩腻了. 光是语法分析器就有 uglify , acorn , bablyo...
CLR“两轮遍历”的多层嵌套异常捕获处理流程try try try 在此书写测试代码 catch Exception A finally catch ExceptionB finallycatch ExceptionCcatch Exceptionfinally“扫描并查找相匹...
1、取前N条记录Oracle:Select * from TableName where rownum &lt;= N;DB2:Select * from TableName fetch first N rows only;2、取得系统日期Oracle:Select sysdate from dual;DB2:Select current timestamp from sy...
&lt;Grid x:Name="myGrid" Loaded="Grid_Loaded" DataContext="{Binding PartOneData}"&gt; &lt;dxc:ChartControl Name="chartControl1" DataSource="{Binding ComplainAnalysisList}"&gt;
文章目录1、文件和目录的默认权限2、umask 默认权限(1)查看系统的umask权限(2)用八进制数值显示umask权限(3)umask权限的计算方法(4)注意:umask 默认权限的计算绝不是数字直接相减。总结:1、文件和目录的默认权限Linux系统的基本权限有6种。读写执行为基本权限umask 为默认权限后边还有4种权限,会在之后的权限管理部分详细讲解。我们先创建一个文件abc,然后查看ab...
1. 支持向量机理论支持向量机(Support Vector Machine:SVM)支持向量机的目的是什么?利用训练数据集的间隔最大化找到一个最优分离超平面你可能有两个名词不太懂,间隔?分离超平面?别紧张,没有人刚开始学习SVM的时候就知道这两个东西先来看个例子,现在有一些人的身高和体重数据,将它们绘制成散点图,是这样的:如果现在给你一个未知男女的身高和体重,你能分辨出性别吗?...
转载链接:官方文档首先需要下载Apache hadoop 2.6.4的tar.gz包,到本地解压缩到某个盘下,注意路径里不要带空格。否则你配置文件里需要用windows 8.3格式的路径!第二确保操作系统是64bit,已安装.netframework4.0以上版本,这个你懂的,微软的天下,没有这个怎么混!第三确保安装了64 bit 版本的JDK1.8,笔者使用的就是JD
在这之前,先来说下什么是有限状态机(Finite-state machine)背景我们在开发游戏,比如rpg游戏时,会涉及到玩家有各种状态,如攻击状态,等待状态等,如果用ifelse也可以实现,但是随着程序复杂度越来越高,这样肯定是不易于代码维护的,很容易出错,而且代码可读性比较差。这时,用状态机就可以很好的解决这些问题。我们可以通过状态机来记录它的各个状态(state)和状态之间的转换(tr...
当需要我们做分组并计算每组中的数据时,可以把查询的结果作为一个表,然后再进行运算。表aidcnameclass1000a11001b21002c3表bidsnameagecid2000小红1210002001小白
防空系统效能评估软件防空系统效能评估软件是为解决武器装备效能评估问题新研发的一款软件,防空系统效能评估软件用于武器装备论证、研制、试验、使用等不同阶段的效能评估,防空系统效能评估软件为作战体系、装备体系评价和优化提供定量依据。装备系统效能评估分析软件能够使用仿真、靶试、演习等多种来源的实验数据,将效能评估贯穿武器装备全生命周期。1防空系统效能评估软件介绍2、互联网是个神奇的大网,大数据开发和...
Remarks for English expressionslags behind, with respect toit lags behind its non-stochastic counterparts with respect to the convergence rate.mitigate, shortcomingSVRG mitigates this shortcoming.invol
一:pytest 是python的一套全功能的测试框架. 优点如下:1、操作简单,支持多组数据参数化, 支持用例的skip和xfail;2、支持简单的单元测试和复杂的功能测试,还可以做UI和接口自动化测试;3、pytest有很多第三方的插件并且支持定义扩展; 如失败重新执行, 断言失败也继续运行,自定义出错停止, 自定义mark标记灵活运行用例....4、可以很好的集...