使用Bioconductor下载GEO(Gene Expression Omnibus)上的数据
来源:互联网 发布:淘宝的运费险怎么用 编辑:程序博客网 时间:2024/06/05 04:31
library(GEOquery)
gset <- getGEO("GSE46106", GSEMatrix =TRUE)
Found 1 file(s)
GSE46106_series_matrix.txt.gz
trying URL 'ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE46nnn/GSE46106/matrix/GSE46106_series_matrix.txt.gz'
ftp data connection made, file length 4110183 bytes
opened URL
==================================================
downloaded 3.9 Mb
File stored at:
/var/folders/n4/11sc2xz13k56hl85z_h2rgq00000gn/T//RtmpBFQdpL/GPL570.soft
length(gset)
gset <- gset[[1]]
head(pData(gset)[,1:5])
load NCBI platform annotation
gpl <- annotation(gset)
platf <- getGEO(gpl, AnnotGPL=TRUE)
File stored at:
/var/folders/n4/11sc2xz13k56hl85z_h2rgq00000gn/T//RtmpBFQdpL/GPL570.annot.gz
There were 30 warnings (use warnings() to see them)
ncbifd <- data.frame(attr(dataTable(platf), "table"))
eset <- exprs(gset)
head(eset[,1:5])
GSM1123782 GSM1123783 GSM1123784 GSM1123785 GSM1123786
1007_s_at 10.1689 10.5247 10.8179 10.3539 10.4964
1053_at 9.6002 7.9436 9.8653 9.9733 10.1960
117_at 5.6808 5.0301 3.7654 2.7751 2.8179
121_at 4.2268 4.6148 4.6147 4.4977 4.6147
1255_g_at 2.1869 2.1869 2.1869 2.1869 2.1869
1294_at 2.1874 2.1874 2.1874 2.1874 2.1874
head(ncbifd[,1:5])
ID Gene.title Gene.symbol Gene.ID UniGene.title
1 1007_s_at discoidin domain receptor tyrosine kinase 1 DDR1 780 <NA>
2 1053_at replication factor C (activator 1) 2, 40kDa RFC2 5982 <NA>
3 117_at heat shock 70kDa protein 6 (HSP70B') HSPA6 3310 <NA>
4 121_at paired box 8 PAX8 7849 <NA>
5 1255_g_at guanylate cyclase activator 1A (retina) GUCA1A 2978 <NA>
6 1294_at ubiquitin-like modifier activating enzyme 7 UBA7 7318 <NA>
接着是第二种。
library(GEOquery)
getGEOSuppFiles("GSE46106")
[1] "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE46nnn/GSE46106/suppl/"
trying URL 'ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE46nnn/GSE46106/suppl//GSE46106_RAW.tar'
ftp data connection made, file length 399247360 bytes
opened URL
===============================================
downloaded 380.8 Mb
trying URL 'ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE46nnn/GSE46106/suppl//filelist.txt'
ftp data connection made, file length 3308 bytes
opened URL
==================================================
downloaded 3308 bytes
setwd("GSE46106/")
dir()
[1] "filelist.txt" "GSE46106_RAW.tar"
untar("GSE46106_RAW.tar")
files <- dir(pattern="gz$")
sapply(files, gunzip)
GSM1123782_2147TG11_U133plus2.CEL.gz GSM1123783_2147TG15_U133plus2.CEL.gz GSM1123784_2147TG1_U133plus2.CEL.gz
33380229 32529207 33390958
GSM1123785_2147TG6_U133plus2.CEL.gz GSM1123786_2277TG1_U133plus2.CEL.gz GSM1123787_2277TG9_U133plus2.CEL.gz
33210730 33082826 33531908
GSM1123788_2665TG15_U133plus2.CEL.gz GSM1123789_2665TG1_U133plus2.CEL.gz GSM1123790_2665TG6_U133plus2.CEL.gz
32232774 33380782 33432878
GSM1123791_3104TG1_U133plus2.CEL.gz GSM1123792_3104TG3_U133plus2.CEL.gz GSM1123793_3107TG1_U133plus2.CEL.gz
32850644 32588606 32584342
GSM1123794_3107TG5_U133plus2.CEL.gz GSM1123795_3143TG1_U133plus2.CEL.gz GSM1123796_3143TG5_U133plus2.CEL.gz
33358932 32369391 32806108
GSM1123797_3204TG1_U133plus2.CEL.gz GSM1123798_3204TG5_U133plus2.CEL.gz GSM1123799_3561TG1_U133plus2.CEL.gz
32557782 33016324 32883029
GSM1123800_3561TG5_U133plus2.CEL.gz GSM1123801_3611TG1_U133plus2.CEL.gz GSM1123802_3611TG5_U133plus2.CEL.gz
33454274 32657648 33126603
GSM1123803_3613TG1_U133plus2.CEL.gz GSM1123804_3613TG5_U133plus2.CEL.gz GSM1123805_3807TG1_U133plus2.CEL.gz
32581902 33193826 32791917
GSM1123806_3807TG5_U133plus2.CEL.gz GSM1123807_3824TG2_U133plus2.CEL.gz GSM1123808_3887TG1_U133plus2.CEL.gz
33266865 33078422 32479022
GSM1123809_3887TG5_U133plus2.CEL.gz GSM1123810_3904TG1_U133plus2.CEL.gz GSM1123811_3904TG5_U133plus2.CEL.gz
33291287 32645537 33425796
GSM1123812_3936TG1_U133plus2.CEL.gz GSM1123813_3936TG5_U133plus2.CEL.gz GSM1123814_3963TG1_U133plus2.CEL.gz
32939923 33317196 32725410
GSM1123815_3963TG5_U133plus2.CEL.gz GSM1123816_4013TG1_U133plus2.CEL.gz GSM1123817_4169TG2_U133plus2.CEL.gz
33276082 33151406 33167461
GSM1123818_4175TG1_U133plus2.CEL.gz GSM1123819_4272TG1_U133plus2.CEL.gz GSM1123820_4400TG1_U133plus2.CEL.gz
33139829 33111635 33206021
GSM1123821_4400TG5_U133plus2.CEL.gz GSM1123822_4664TG1_U133plus2.CEL.gz GSM1123823_4849TG1_U133plus2.CEL.gz
33026806 33364390 33300210
GSM1123824_4888TG1_U133plus2.CEL.gz GSM1123825_4913TG1_U133plus2.CEL.gz
33299396 33207528
filelist <- dir(pattern="CEL$")
library(affy)
library(annotate)
data <- ReadAffy(filenames=filelist)
affydb<-annPkgName(data@annotation,type="db")
require(affydb, character.only=TRUE)
eset<-rma(data,verbose=FALSE)
eset.e <- exprs(eset)
library(annaffy)
symbols<-as.character(aafSymbol(as.character(rownames(eset)),affydb))
genes<-as.character(aafUniGene(as.character(rownames(eset)),affydb))
gset <- getGEO("GSE46106", GSEMatrix =TRUE)
Found 1 file(s)
GSE46106_series_matrix.txt.gz
trying URL 'ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE46nnn/GSE46106/matrix/GSE46106_series_matrix.txt.gz'
ftp data connection made, file length 4110183 bytes
opened URL
==================================================
downloaded 3.9 Mb
File stored at:
/var/folders/n4/11sc2xz13k56hl85z_h2rgq00000gn/T//RtmpBFQdpL/GPL570.soft
length(gset)
gset <- gset[[1]]
head(pData(gset)[,1:5])
load NCBI platform annotation
gpl <- annotation(gset)
platf <- getGEO(gpl, AnnotGPL=TRUE)
File stored at:
/var/folders/n4/11sc2xz13k56hl85z_h2rgq00000gn/T//RtmpBFQdpL/GPL570.annot.gz
There were 30 warnings (use warnings() to see them)
ncbifd <- data.frame(attr(dataTable(platf), "table"))
eset <- exprs(gset)
head(eset[,1:5])
GSM1123782 GSM1123783 GSM1123784 GSM1123785 GSM1123786
1007_s_at 10.1689 10.5247 10.8179 10.3539 10.4964
1053_at 9.6002 7.9436 9.8653 9.9733 10.1960
117_at 5.6808 5.0301 3.7654 2.7751 2.8179
121_at 4.2268 4.6148 4.6147 4.4977 4.6147
1255_g_at 2.1869 2.1869 2.1869 2.1869 2.1869
1294_at 2.1874 2.1874 2.1874 2.1874 2.1874
head(ncbifd[,1:5])
ID Gene.title Gene.symbol Gene.ID UniGene.title
1 1007_s_at discoidin domain receptor tyrosine kinase 1 DDR1 780 <NA>
2 1053_at replication factor C (activator 1) 2, 40kDa RFC2 5982 <NA>
3 117_at heat shock 70kDa protein 6 (HSP70B') HSPA6 3310 <NA>
4 121_at paired box 8 PAX8 7849 <NA>
5 1255_g_at guanylate cyclase activator 1A (retina) GUCA1A 2978 <NA>
6 1294_at ubiquitin-like modifier activating enzyme 7 UBA7 7318 <NA>
接着是第二种。
library(GEOquery)
getGEOSuppFiles("GSE46106")
[1] "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE46nnn/GSE46106/suppl/"
trying URL 'ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE46nnn/GSE46106/suppl//GSE46106_RAW.tar'
ftp data connection made, file length 399247360 bytes
opened URL
===============================================
downloaded 380.8 Mb
trying URL 'ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE46nnn/GSE46106/suppl//filelist.txt'
ftp data connection made, file length 3308 bytes
opened URL
==================================================
downloaded 3308 bytes
setwd("GSE46106/")
dir()
[1] "filelist.txt" "GSE46106_RAW.tar"
untar("GSE46106_RAW.tar")
files <- dir(pattern="gz$")
sapply(files, gunzip)
GSM1123782_2147TG11_U133plus2.CEL.gz GSM1123783_2147TG15_U133plus2.CEL.gz GSM1123784_2147TG1_U133plus2.CEL.gz
33380229 32529207 33390958
GSM1123785_2147TG6_U133plus2.CEL.gz GSM1123786_2277TG1_U133plus2.CEL.gz GSM1123787_2277TG9_U133plus2.CEL.gz
33210730 33082826 33531908
GSM1123788_2665TG15_U133plus2.CEL.gz GSM1123789_2665TG1_U133plus2.CEL.gz GSM1123790_2665TG6_U133plus2.CEL.gz
32232774 33380782 33432878
GSM1123791_3104TG1_U133plus2.CEL.gz GSM1123792_3104TG3_U133plus2.CEL.gz GSM1123793_3107TG1_U133plus2.CEL.gz
32850644 32588606 32584342
GSM1123794_3107TG5_U133plus2.CEL.gz GSM1123795_3143TG1_U133plus2.CEL.gz GSM1123796_3143TG5_U133plus2.CEL.gz
33358932 32369391 32806108
GSM1123797_3204TG1_U133plus2.CEL.gz GSM1123798_3204TG5_U133plus2.CEL.gz GSM1123799_3561TG1_U133plus2.CEL.gz
32557782 33016324 32883029
GSM1123800_3561TG5_U133plus2.CEL.gz GSM1123801_3611TG1_U133plus2.CEL.gz GSM1123802_3611TG5_U133plus2.CEL.gz
33454274 32657648 33126603
GSM1123803_3613TG1_U133plus2.CEL.gz GSM1123804_3613TG5_U133plus2.CEL.gz GSM1123805_3807TG1_U133plus2.CEL.gz
32581902 33193826 32791917
GSM1123806_3807TG5_U133plus2.CEL.gz GSM1123807_3824TG2_U133plus2.CEL.gz GSM1123808_3887TG1_U133plus2.CEL.gz
33266865 33078422 32479022
GSM1123809_3887TG5_U133plus2.CEL.gz GSM1123810_3904TG1_U133plus2.CEL.gz GSM1123811_3904TG5_U133plus2.CEL.gz
33291287 32645537 33425796
GSM1123812_3936TG1_U133plus2.CEL.gz GSM1123813_3936TG5_U133plus2.CEL.gz GSM1123814_3963TG1_U133plus2.CEL.gz
32939923 33317196 32725410
GSM1123815_3963TG5_U133plus2.CEL.gz GSM1123816_4013TG1_U133plus2.CEL.gz GSM1123817_4169TG2_U133plus2.CEL.gz
33276082 33151406 33167461
GSM1123818_4175TG1_U133plus2.CEL.gz GSM1123819_4272TG1_U133plus2.CEL.gz GSM1123820_4400TG1_U133plus2.CEL.gz
33139829 33111635 33206021
GSM1123821_4400TG5_U133plus2.CEL.gz GSM1123822_4664TG1_U133plus2.CEL.gz GSM1123823_4849TG1_U133plus2.CEL.gz
33026806 33364390 33300210
GSM1123824_4888TG1_U133plus2.CEL.gz GSM1123825_4913TG1_U133plus2.CEL.gz
33299396 33207528
filelist <- dir(pattern="CEL$")
library(affy)
library(annotate)
data <- ReadAffy(filenames=filelist)
affydb<-annPkgName(data@annotation,type="db")
require(affydb, character.only=TRUE)
eset<-rma(data,verbose=FALSE)
eset.e <- exprs(eset)
library(annaffy)
symbols<-as.character(aafSymbol(as.character(rownames(eset)),affydb))
genes<-as.character(aafUniGene(as.character(rownames(eset)),affydb))
1 0
- 使用Bioconductor下载GEO(Gene Expression Omnibus)上的数据
- 使用GEOquery下载GEO数据
- 将GEO的soft 数据转换为expression matrix
- Redis GEO地理使用(六)
- 使用百度地图SDK反geo检索失败的问题
- 打造自己的LINQ Provider(上):Expression Tree揭秘
- 打造自己的LINQ Provider(上):Expression Tree揭秘
- Redis GEO 的java实现(通过Jedis)
- 使用Expression Blend创建漂亮的按钮(Building Better Buttons in Expression Blend )
- Expression Tree Visualizer的使用
- EL表达式(Expression)的使用
- geo 原理的两篇文章
- 如何使用Github上下载的Demo
- github上下载的源码 如何使用 ?
- 安装 GitLab CE Omnibus package 到下面的操作系统
- vim编辑R代码的实现:::(R语言安装BIoconductor和BiocLite):R在linux的安装
- EL(Expression Language)的初次使用印象
- 关于使用Expression Blend的个人心得(附图)
- 机器学习入门:线性回归及梯度下降
- 2014WAP校园招聘笔试题
- Collections on Native Method for Java
- CentOS 7安装配置JDK
- [leetcode][栈] Valid Parentheses
- 使用Bioconductor下载GEO(Gene Expression Omnibus)上的数据
- 【提权】udf提权入侵
- Struts2中使用validate校验框架
- Struts与Spring整合Scope
- KANSEI ROBOT
- (C语言)字符串反转相关问题
- 绘制字符串与图像
- C# 多线程
- Android各国语言values命名规则