='/Volumes/T7/Dehingia_et_al_2025/data/'
data_directory='/Volumes/T7/Dehingia_et_al_2025/data/ChIP-and-ATAC-seq/'
chipseq_directory='/Volumes/T7/Dehingia_et_al_2025/data/RNA-seq/'
rnaseq_directory='/Volumes/T7/Dehingia_et_al_2025/data/Hi-C/'
hic_directory= '/Volumes/T7/Dehingia_et_al_2025/data/outputs/'
outputs_directory = '/Volumes/T7/Dehingia_et_al_2025/data/objects/'
objects_directory = '/Volumes/T7/Dehingia_et_al_2025/scripts/'
scripts_directory = '/Volumes/T7/Dehingia_et_al_2025/source_files/'
source_data_directory
library(rtracklayer)
## Warning: pakiet 'rtracklayer' został zbudowany w wersji R 4.1.1
## Warning: pakiet 'S4Vectors' został zbudowany w wersji R 4.1.1
## Warning: pakiet 'GenomeInfoDb' został zbudowany w wersji R 4.1.1
library(sf)
## Warning: pakiet 'sf' został zbudowany w wersji R 4.1.2
library(DESeq2)
## Warning: pakiet 'MatrixGenerics' został zbudowany w wersji R 4.1.1
library(org.Hs.eg.db)
library(LSD)
library(geneplotter)
library(biomaRt)
## Warning: pakiet 'biomaRt' został zbudowany w wersji R 4.1.1
library(gplots)
library(VennDiagram)
## Warning: pakiet 'VennDiagram' został zbudowany w wersji R 4.1.2
library(vsn)
library(ggVennDiagram)
library(GenomicRanges)
library(gwasrapidd)
library(fgsea)
library(goseq)
library(ggplot2)
library(RColorBrewer)
## Warning: pakiet 'RColorBrewer' został zbudowany w wersji R 4.1.2
library(pheatmap)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
## Warning: pakiet 'GenomicFeatures' został zbudowany w wersji R 4.1.1
library(dplyr)
library(tidyverse)
## Warning: pakiet 'tidyverse' został zbudowany w wersji R 4.1.2
## Warning: pakiet 'tibble' został zbudowany w wersji R 4.1.2
## Warning: pakiet 'forcats' został zbudowany w wersji R 4.1.2
library(scales)
library(smoothmest)
## Warning: pakiet 'smoothmest' został zbudowany w wersji R 4.1.2
## Warning: pakiet 'MASS' został zbudowany w wersji R 4.1.2
library(BSgenome.Mmusculus.UCSC.mm10)
library(Matrix)
library(ggpubr)
## Warning: pakiet 'ggpubr' został zbudowany w wersji R 4.1.2
set.seed(22)
=12000*1024^2
msoptions(future.globals.maxSize=ms)
= function( normCounts, metaTable, metaColumn, geneSymbol, mappings4genes ){
buildDfForGGBARPLOT # normCounts=ddsFdf; metaTable=as.data.frame(coldata);metaColumn="cell_type";geneSymbol="Aldh1a3";mappings4genes=genemap_unique
=which(colnames(metaTable)==metaColumn)
theColdata.frame( norm_counts = normCounts[rownames(normCounts) == unique(mappings4genes$gene_id[mappings4genes$geneName==geneSymbol])],
annot = metaTable[ match(colnames(normCounts),metaTable$sample), theCol ] )}
source( paste0(scripts_directory,'functions.R' ) )
= import.gff(paste0(data_directory,'Mus_musculus.GRCm38.101.gtf'))
gtf
= TxDb.Mmusculus.UCSC.mm10.knownGene
txdb <- c("tx_name", "gene_id","GENEID")
columns = promoters(txdb, upstream=200, downstream=200,columns=columns)
promGR = promoters(txdb, upstream=0, downstream=0,columns=columns)
TSSGR = promoters(txdb, upstream=2000, downstream=2000,columns=columns) promGRExt
## Warning in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE): GRanges object contains 5 out-of-bound ranges located on sequences
## chr4_GL456350_random, chr4_JH584293_random, chr4_JH584295_random,
## chr5_JH584296_random, and chrUn_GL456239. Note that ranges located on a
## sequence whose length is unknown (NA) or on a circular sequence are not
## considered out-of-bound (use seqlengths() and isCircular() to get the
## lengths and circularity flags of the underlying sequences). You can use
## trim() to trim these ranges. See ?`trim,GenomicRanges-method` for more
## information.
=trim(promGRExt)
promGRExt
= data.frame(tid=gtf$transcript_id,gid=gtf$gene_id,stringsAsFactors = FALSE)
transcrpit2ensg $transcipt1 = unlist(strsplit(promGRExt$tx_name,'\\.'))[seq(1,2*length(promGRExt),by=2)]
promGRExt$ensgeneid = transcrpit2ensg$gid[match(promGRExt$transcipt1,promGRExt$transcipt1)]
promGRExtseqlevelsStyle(promGR)='ucsc'
= data.frame(gene_id=gtf$gene_id,geneName=gtf$gene_name,stringsAsFactors = FALSE)
genemap_unique = genemap_unique[!duplicated(genemap_unique$gene_id),]
genemap_unique
= data.frame( chr=as.character(chrom(gtf)),
promoters_ap start=as.numeric(start(gtf)),
end=as.numeric(end(gtf)),
strand=as.character(strand(gtf)),
transcript_id=as.character(gtf$transcript_id),
gene_id=as.character(gtf$gene_id),
gene_name = as.character(gtf$gene_name),
gene_biotype = as.character(gtf$gene_biotype),
type = gtf$type,
stringsAsFactors = FALSE )
length(unique(promoters_ap$gene_id))
## [1] 55487
= promoters_ap[promoters_ap$type == "transcript",]
promoters_ap = split(promoters_ap,promoters_ap$gene_id)
promoters_sp
## for each gene find the 5-prime most TSS
= do.call('rbind', lapply( promoters_sp, function(x){
promoters_tss = ifelse( as.character(unique(x$strand))=="+",
tss which.min(x$start),'start'],
x[which.max(x$end),'end'] )
x[if( as.character(unique(x$strand))=="+") tp = x[which.min(x$start),] else tp = x[which.max(x$end),]
$tss = tss
tpreturn(tp)
}))
##
= GRanges(seqnames = promoters_tss$chr,
promoters_tss_gr ranges = IRanges(as.numeric(promoters_tss$tss)-500 ,
end=as.numeric(promoters_tss$tss) + 500,
names=promoters_tss$transcript_id),
strand = promoters_tss$strand,
gene_id = promoters_tss$gene_id,
gene_name = promoters_tss$gene_name,
gene_biotype = promoters_tss$gene_biotype,
tss = promoters_tss$tss)
seqlevelsStyle(promoters_tss_gr) = 'ucsc'
= readBed_filterChromsStraded( paste0(data_directory,'CTCF_HUMAN.H11MO.0.A.bed'),
ctcf_motif chroms=paste0('chr',c(1:19,'X','Y')), 5 )
= importBEDPE_Filter_Loops( paste0(hic_directory, 'Bonev/hiccups_output_esc/' ),
es_loops max_size=5000000, extend_anchors_by=0 )
= importBEDPE_Filter_Loops( paste0(hic_directory, 'Bonev/hiccups_output_npc/' ),
ns_loops max_size=5000000, extend_anchors_by=0 )
=es_loops$y2-es_loops$x1
es_loops_size=ns_loops$y2-ns_loops$x1
ns_loops_size
= getGR( es_loops$X.chr1, es_loops$x1, es_loops$x2,0 )
es_loops_left_anchor = getGR( es_loops$X.chr1, es_loops$y1, es_loops$y2,0 )
es_loops_right_anchor
= getGR( ns_loops$X.chr1, ns_loops$x1, ns_loops$x2,0 )
ns_loops_left_anchor = getGR( ns_loops$X.chr1, ns_loops$y1, ns_loops$y2,0 )
ns_loops_right_anchor
seqlevelsStyle(es_loops_left_anchor) = 'ucsc'
seqlevelsStyle(es_loops_right_anchor) = 'ucsc'
seqlevelsStyle(ns_loops_left_anchor) = 'ucsc'
seqlevelsStyle(ns_loops_right_anchor) = 'ucsc'
= c( es_loops_left_anchor, es_loops_right_anchor )
es_anchors_GR = c( ns_loops_left_anchor, ns_loops_right_anchor )
ns_anchors_GR
= compareLoops( es_loops, ns_loops, offset=5000, maxLoopSize=5000000 )
es_ns_loops = es_ns_loops$First_set_specific
es_spe_loops = es_ns_loops$Second_set_specific
ns_spe_loops = es_ns_loops$First_set_common
common_loops =es_spe_loops$y2-es_spe_loops$x1
es_spe_loops_size=ns_spe_loops$y2-ns_spe_loops$x1
ns_spe_loops_size
= getGR( es_spe_loops$X.chr1, es_spe_loops$x1, es_spe_loops$x2,0 )
es_spe_loops_left_anchor = getGR( es_spe_loops$X.chr1, es_spe_loops$y1, es_spe_loops$y2,0 )
es_spe_loops_right_anchor
= getGR( ns_spe_loops$X.chr1, ns_spe_loops$x1, ns_spe_loops$x2,0 )
ns_spe_loops_left_anchor = getGR( ns_spe_loops$X.chr1, ns_spe_loops$y1, ns_spe_loops$y2,0 )
ns_spe_loops_right_anchor
= getGR( common_loops$X.chr1, common_loops$x1, common_loops$x2,0 )
en_com_loops_left_anchor = getGR( common_loops$X.chr1, common_loops$y1, common_loops$y2,0 )
en_com_loops_right_anchor
= getGR( es_spe_loops$X.chr1, es_spe_loops$x1, es_spe_loops$y2,0 )
es_spe_loops_gr = getGR( ns_spe_loops$X.chr1, ns_spe_loops$x1, ns_spe_loops$y2,0 )
ns_spe_loops_gr = getGR( common_loops$X.chr1, common_loops$x1, common_loops$y2,0 )
cm_loops_gr
=width(es_spe_loops_gr)
es_spe_loops_size=width(ns_spe_loops_gr)
ns_spe_loops_size
= getGR( es_loops$X.chr1,es_loops$x1, es_loops$y2, 0 )
es_loops_gr = getGR( ns_loops$X.chr1,ns_loops$x1, ns_loops$y2, 0 )
ns_loops_gr
= getGR( es_spe_loops$X.chr1, es_spe_loops$x1+25000, es_spe_loops$y2-25000,0 )
es_spe_loops_domain_gr = getGR( ns_spe_loops$X.chr1, ns_spe_loops$x1+25000, ns_spe_loops$y2-25000,0 )
ns_spe_loops_domain_gr = getGR( common_loops$X.chr1, common_loops$x1+25000, common_loops$y2-25000,0 )
common_loops_domain_gr
seqlevelsStyle(es_spe_loops_left_anchor) = 'ucsc'
seqlevelsStyle(es_spe_loops_right_anchor) = 'ucsc'
seqlevelsStyle(ns_spe_loops_left_anchor) = 'ucsc'
seqlevelsStyle(ns_spe_loops_right_anchor) = 'ucsc'
seqlevelsStyle(en_com_loops_left_anchor) = 'ucsc'
seqlevelsStyle(en_com_loops_right_anchor) = 'ucsc'
seqlevelsStyle(es_spe_loops_gr) = 'ucsc'
seqlevelsStyle(ns_spe_loops_gr) = 'ucsc'
seqlevelsStyle(es_loops_gr) = 'ucsc'
seqlevelsStyle(ns_loops_gr) = 'ucsc'
seqlevelsStyle(es_spe_loops_domain_gr) = 'ucsc'
seqlevelsStyle(ns_spe_loops_domain_gr) = 'ucsc'
seqlevelsStyle(common_loops_domain_gr) = 'ucsc'
seqlevelsStyle(cm_loops_gr) = 'ucsc'
= es_spe_loops[-queryHits(findOverlaps(es_spe_loops_gr,c(en_com_loops_left_anchor,en_com_loops_right_anchor))),]
es_spe_loops_filt = ns_spe_loops[-queryHits(findOverlaps(ns_spe_loops_gr,c(en_com_loops_left_anchor,en_com_loops_right_anchor))),]
ns_spe_loops_filt = getGR(es_spe_loops_filt$X.chr1,es_spe_loops_filt$x1,es_spe_loops_filt$y2,0)
es_spe_loops_filt_gr = getGR(ns_spe_loops_filt$X.chr1,ns_spe_loops_filt$x1,ns_spe_loops_filt$y2,0) ns_spe_loops_filt_gr
Check differentiation of ES 46 and Nora ES cells
= data.frame(es1=read.delim(paste0(rnaseq_directory,'gene_counts_FEB_2022.txt'),skip=1)[,13],
rna_46C es2=read.delim(paste0(rnaseq_directory,'gene_counts_FEB_2022.txt'),skip=1)[,14],
ns1=read.delim(paste0(rnaseq_directory,'gene_counts_FEB_2022.txt'),skip=1)[,8],
ns2=read.delim(paste0(rnaseq_directory,'gene_counts_FEB_2022.txt'),skip=1)[,10],
row.names=read.delim(paste0(rnaseq_directory,'gene_counts_FEB_2022.txt'),skip=1)[,1])
= data.frame(cell_type=c('ES','ES','NS','NS'),genotype='46C',row.names=colnames(rna_46C))
coldata46c
load( paste0(rnaseq_directory,"CTCF_degron_countdata.RData") )
= requested_countdata[,1:14]
count_df_filt = requested_metadata[1:14,]
coldata $culture[9:14]='NS'
coldata$cell_type = paste(coldata$culture,coldata$condition,sep="_")
coldata= DESeqDataSetFromMatrix(countData = count_df_filt,
ddsF colData = coldata,
design= ~cell_type )
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= DESeq(ddsF) ddsF
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
= counts(ddsF,normalized=TRUE ) ddsFdf
= DESeqDataSetFromMatrix(countData = count_df_filt[,c(3,4,7,8,10,11,14)],
Diff_Nora__DDS colData = coldata[c(3,4,7,8,10,11,14),],
design= ~0+culture )
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= results(DESeq(Diff_Nora__DDS), contrast=c("culture", "2i", "NS") ) es2i_ns_comp
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
$padj_squished=es2i_ns_comp$padj
es2i_ns_comp$padj_squished[es2i_ns_comp$padj_squished<10^(-20)]=10^(-20)
es2i_ns_comp
par(pty="s")
plot(x=es2i_ns_comp$log2FoldChange,y=-log10(es2i_ns_comp$padj_squished),
col=ifelse(es2i_ns_comp$padj<0.01,"green4","gray"),
ylim=c(0,20),pch=19,cex=0.5)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
= data.frame(LFC=es2i_ns_comp$log2FoldChange,log10pval=-log10(es2i_ns_comp$padj_squished))
df write.table(df,file=paste0(source_data_directory,"volcano_differentiationNcells.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= DESeqDataSetFromMatrix(countData = count_df_filt[,c(3,4,7,8,10,11,14)],
Diff_Nora__DDS colData = coldata[c(3,4,7,8,10,11,14),],
design= ~0+culture )
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= counts(DESeq(Diff_Nora__DDS),normalized=TRUE ) es2i_ns_comp_counts
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
= es2i_ns_comp[!is.na(es2i_ns_comp$padj),]
es2i_ns_comp = rep("gray",nrow(es2i_ns_comp_counts))
thisColV rownames(es2i_ns_comp_counts) %in% rownames(es2i_ns_comp[es2i_ns_comp$padj<0.01 & es2i_ns_comp$log2FoldChange>0,])] = "red"
thisColV[rownames(es2i_ns_comp_counts) %in% rownames(es2i_ns_comp[es2i_ns_comp$padj<0.01 & es2i_ns_comp$log2FoldChange<0,])] = "blue"
thisColV[
par(pty="s",bty="O")
plot(x=log2(0.5+rowMeans(es2i_ns_comp_counts[,1:4])),
y=log2(0.5+rowMeans(es2i_ns_comp_counts[,5:7])),
col=thisColV,xlab="ES", ylab="NS",
ylim=c(-2,20),xlim=c(-2,20),pch=19,cex=0.25)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
= data.frame(ES=log2(0.5+rowMeans(es2i_ns_comp_counts[,1:4])),NS=log2(0.5+rowMeans(es2i_ns_comp_counts[,5:7])))
df write.table(df,file=paste0(source_data_directory,"scatterplot_Differentiation_Ncells.txt"),row.names = FALSE, sep="\t",quote=FALSE)
=0;pval_thr=0.1
LFC_thr= DESeqDataSetFromMatrix(countData = count_df_filt[,1:8],
ti_DDS colData = coldata[1:8,],
design= ~cell_type )
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
=DESeq(ti_DDS) ti_DDS
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
= results(ti_DDS, contrast=c("cell_type", "2i_WT", "2i_IAA") )
ti
= DESeqDataSetFromMatrix(countData = count_df_filt[,9:14],
ns_DDS colData = coldata[9:14,],
design= ~0+cell_type )
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= DESeq(ns_DDS) ns_DDS
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
= results(ns_DDS, contrast=c("cell_type", "NS_WT", "NS_IAA") )
ns
= merge( as.data.frame(ti), as.data.frame(ns), by='row.names')
es_ns_IAA_merged
= ti[!is.na(ti$padj),]
ti = ns[!is.na(ns$padj),]
ns
= ( ti[ti$padj<pval_thr & abs(ti$log2FoldChange)>LFC_thr,] )
TI_sig_genes = rownames( ti[ti$padj<pval_thr & ti$log2FoldChange<(-1*LFC_thr),] )
TI_sig_genes_up = rownames( ti[ti$padj<pval_thr & ti$log2FoldChange>LFC_thr,] )
TI_sig_genes_dn = rownames( ti[ti$padj>0.1 & abs(ti$log2FoldChange)<log2(1.25),] )
TI_common
= ( ns[ns$padj<pval_thr & abs(ns$log2FoldChange)>LFC_thr,] )
NS_sig_genes = rownames( ns[ns$padj<pval_thr & ns$log2FoldChange<(-1 *LFC_thr),] )
NS_sig_genes_up = rownames( ns[ns$padj<pval_thr & ns$log2FoldChange>LFC_thr,] )
NS_sig_genes_dn = rownames( ns[ns$padj>0.1 & abs(ns$log2FoldChange)<log2(1.25),] )
NS_common
write.table( TI_sig_genes_up, file=paste0(outputs_directory,'TI_sig_genes_up.txt'),
quote=FALSE, row.names=FALSE, col.names=FALSE, sep='\t' )
write.table( TI_sig_genes_dn, file=paste0(outputs_directory,'TI_sig_genes_dn.txt'),
quote=FALSE, row.names=FALSE, col.names=FALSE, sep='\t' )
write.table( NS_sig_genes_up, file=paste0(outputs_directory,'NS_sig_genes_up.txt'),
quote=FALSE, row.names=FALSE, col.names=FALSE, sep='\t' )
write.table( NS_sig_genes_dn, file=paste0(outputs_directory,'NS_sig_genes_dn.txt'),
quote=FALSE, row.names=FALSE, col.names=FALSE, sep='\t' )
length(unique(c(TI_sig_genes_up,TI_sig_genes_dn,NS_sig_genes_up,NS_sig_genes_dn)))
## [1] 1250
Volcano plots for the extended figure
= data.frame(Untreated=rowMeans(ddsFdf[,c('ES_2i_1','ES_2i_2','ES_2i_3','ES_2i_4')]),
TI IAA = rowMeans(ddsFdf[,c('ES_2i_IAA_1','ES_2i_IAA_2','ES_2i_IAA_3','ES_2i_IAA_4')]),
stringsAsFactors=FALSE )
par(mar=c(5,5,3,1))
plot(ti$log2FoldChange,-log10(ti$pvalue),
col=ifelse(abs(ti$log2FoldChange)>0 & ti$padj<pval_thr,'red3','gray'),
pch=19, cex=0.25,
xlab=expression('log'[2]*'(Untr/IAA)'),
ylab=expression('-log'[10]*'(P-val)'),
ylim=c(0,30),xlim=c(-3,3),
main='ES 2i untr. vs. IAA')
axis(1,lwd=3)
axis(2,lwd=3)
box(lwd=3,col='black')
= data.frame(LFC=ti$log2FoldChange,log10pval=-log10(ti$pvalue),col=ifelse(abs(ti$log2FoldChange)>0 & ti$padj<pval_thr,'red3','gray'))
df write.table(df,file=paste0(source_data_directory,"volcano_ES_IAA.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= data.frame(Untreated=rowMeans(ddsFdf[,c('NS_FBS_3','NS_FBS_5','NS_FBS_4')]),
NS IAA = rowMeans(ddsFdf[,c('NS_FBS_IAA_3','NS_FBS_IAA_5','NS_FBS_IAA_4')]),
stringsAsFactors=FALSE )
par(mar=c(5,5,3,1))
plot(ns$log2FoldChange,-log10(ns$pvalue),
col=ifelse(abs(ns$log2FoldChange)>0 & ns$padj<pval_thr,'red3','gray'),
pch=19, cex=0.25,
ylim=c(0,30),xlim=c(-3,3),
xlab=expression('log'[2]*'(Untr/IAA)'),
ylab=expression('-log'[10]*'(P-val)'),
main='NS')
axis(1,lwd=3)
axis(2,lwd=3)
box(lwd=3,col='black')
= data.frame(LFC=ns$log2FoldChange,log10pval=-log10(ns$pvalue),col=ifelse(abs(ns$log2FoldChange)>0 & ns$padj<pval_thr,'red3','gray'))
df write.table(df,file=paste0(source_data_directory,"volcano_NS_IAA.txt"),row.names = FALSE, sep="\t",quote=FALSE)
More up-regulated genes in the NS cells
=0.1
pval_thr_plot= ti[ti$padj<pval_thr_plot,]
x = ns[ns$padj<pval_thr_plot,]
y =rbind(ES_2i=table( x$log2FoldChange>0 ),
mNS=table( y$log2FoldChange>0 ) )
m
## FALSE TRUE
## ES_2i 357 418
## NS 358 198
fisher.test(m)
##
## Fisher's Exact Test for Count Data
##
## data: m
## p-value = 4.223e-11
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.3751452 0.5944608
## sample estimates:
## odds ratio
## 0.4726291
par(mar=c(5,10,5,10),mfrow=c(1,1))
barplot(100*(m/rowSums(m)),
beside=T, ylim=c(0,80),
col=c('red','blue'), ylab='%',
names=c('Up','Down'),lwd=1.5)
fisher.test(matrix(c(49,51,65,35),2,2))
##
## Fisher's Exact Test for Count Data
##
## data: matrix(c(49, 51, 65, 35), 2, 2)
## p-value = 0.03188
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.2815690 0.9484736
## sample estimates:
## odds ratio
## 0.519077
axis(1,lwd=3,at=c(2,5),c('Up','Down'))
axis(2,lwd=3)
Direction of change
= es_ns_IAA_merged[! is.na(es_ns_IAA_merged$padj.x),]
es_ns_IAA_merged_filt = es_ns_IAA_merged_filt[! is.na(es_ns_IAA_merged_filt$padj.y),]
es_ns_IAA_merged_filt = es_ns_IAA_merged_filt[rowSums(es_ns_IAA_merged_filt[,c('padj.x','padj.y')]<0.1)>0,]
es_ns_IAA_merged_filt $geneName = genemap_unique$geneName[match( es_ns_IAA_merged_filt$Row.names,genemap_unique$gene_id ) ]
es_ns_IAA_merged_filt
$col = 'black'
es_ns_IAA_merged_filt$col[es_ns_IAA_merged_filt$padj.x<0.1 & es_ns_IAA_merged_filt$padj.y<0.1] = 'green4'
es_ns_IAA_merged_filt$col[es_ns_IAA_merged_filt$padj.x<0.1 & es_ns_IAA_merged_filt$padj.y>0.1] = 'red'
es_ns_IAA_merged_filt$col[es_ns_IAA_merged_filt$padj.x>0.1 & es_ns_IAA_merged_filt$padj.y<0.1] = 'blue'
es_ns_IAA_merged_filt
$size = 0.5
es_ns_IAA_merged_filt$size[es_ns_IAA_merged_filt$padj.x<0.1 & es_ns_IAA_merged_filt$padj.y<0.1] = 0.5
es_ns_IAA_merged_filt
par(mar=c(5,5,3,1))
plot(-1*es_ns_IAA_merged_filt$log2FoldChange.x,
-1*es_ns_IAA_merged_filt$log2FoldChange.y,
col=es_ns_IAA_merged_filt$col,
pch=19, cex=es_ns_IAA_merged_filt$size,
ylim=c(-4,4),xlim=c(-4,4),
xlab=expression('ES [log'[2]*'(IAA/Untreated)]'),
ylab=expression('NS [log'[2]*'(IAA/Untreated)]'),
main='Without versus with CTCF')
abline( v=0,col='gray',lwd=3)
abline( h=0,col='gray',lwd=3)
axis(1,lwd=3)
axis(2,lwd=3)
box(col='black',lwd=3)
points( es_ns_IAA_merged_filt$log2FoldChange.x[es_ns_IAA_merged_filt$Row.names == 'ENSMUSG00000024268'],
$log2FoldChange.y[es_ns_IAA_merged_filt$Row.names == 'ENSMUSG00000024268'], pch=19, cex=0.5, col='black') es_ns_IAA_merged_filt
cor.test(es_ns_IAA_merged_filt$log2FoldChange.x, es_ns_IAA_merged_filt$log2FoldChange.y)
##
## Pearson's product-moment correlation
##
## data: es_ns_IAA_merged_filt$log2FoldChange.x and es_ns_IAA_merged_filt$log2FoldChange.y
## t = 13.692, df = 979, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3469996 0.4521298
## sample estimates:
## cor
## 0.4008836
= es_ns_IAA_merged_filt[!is.na(es_ns_IAA_merged_filt$geneName),] es_ns_IAA_merged_filt_genes
= data.frame(ES=-1*es_ns_IAA_merged_filt$log2FoldChange.x,NS=-1*es_ns_IAA_merged_filt$log2FoldChange.y,col=es_ns_IAA_merged_filt$col)
df write.table(df,file=paste0(source_data_directory,"scatterplot_IAAimpact_Ncells.txt"),row.names = FALSE, sep="\t",quote=FALSE)
Plots for Figure 10
=buildDfForGGBARPLOT( ddsFdf,as.data.frame(coldata),
tp"cell_type",
"Sox9",
genemap_unique )$annot = factor(tp$annot,levels = c("NS_WT","NS_IAA","2i_WT","2i_IAA"))
tpggbarplot(tp, x = "annot", y = "norm_counts",
color="annot",
add = c("mean_se", "jitter"), size=1.25,
palette=c('blue','blue4','red','coral3'),
title="Sox9") + theme( axis.line = element_line(colour = 'black', size = 0.75))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
tp
## norm_counts annot
## 1 5.448699 2i_IAA
## 2 1.689030 2i_IAA
## 3 3.136609 2i_WT
## 4 4.230981 2i_WT
## 5 7.634598 2i_IAA
## 6 5.358430 2i_IAA
## 7 4.846634 2i_WT
## 8 5.989234 2i_WT
## 9 978.291257 NS_IAA
## 10 1530.401815 NS_WT
## 11 1868.185338 NS_WT
## 12 890.618161 NS_IAA
## 13 889.752300 NS_IAA
## 14 1371.565757 NS_WT
=buildDfForGGBARPLOT( ddsFdf,as.data.frame(coldata),
tp"cell_type",
"Ngfr",
genemap_unique )$annot = factor(tp$annot,levels = c("NS_WT","NS_IAA","2i_WT","2i_IAA"))
tpggbarplot(tp, x = "annot", y = "norm_counts",
color="annot",
add = c("mean_se", "jitter"), size=1.25,
palette=c('blue','blue4','red','coral3'),
title="Ngfr") + theme( axis.line = element_line(colour = 'black', size = 0.75)) + ylim(c(0,125))
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
tp
## norm_counts annot
## 1 530.34002 2i_IAA
## 2 913.76539 2i_IAA
## 3 564.58968 2i_WT
## 4 829.27229 2i_WT
## 5 329.67584 2i_IAA
## 6 306.62127 2i_IAA
## 7 428.44241 2i_WT
## 8 387.05422 2i_WT
## 9 101.76855 NS_IAA
## 10 47.31081 NS_WT
## 11 44.81172 NS_WT
## 12 110.09226 NS_IAA
## 13 94.72400 NS_IAA
## 14 55.29920 NS_WT
=buildDfForGGBARPLOT( ddsFdf,as.data.frame(coldata),
tp"cell_type",
"Camk2a",
genemap_unique )$annot = factor(tp$annot,levels = c("NS_WT","NS_IAA","2i_WT","2i_IAA"))
tpggbarplot(tp, x = "annot", y = "norm_counts",
color="annot",
add = c("mean_se", "jitter"), size=1.25,
palette=c('blue','blue4','red','coral3'),
title="Camk2a") + theme( axis.line = element_line(colour = 'black', size = 0.75)) + ylim(c(0,600))
tp
## norm_counts annot
## 1 187.0720 2i_IAA
## 2 167.2140 2i_IAA
## 3 196.0381 2i_WT
## 4 205.2026 2i_WT
## 5 263.7407 2i_IAA
## 6 270.8984 2i_IAA
## 7 268.5035 2i_WT
## 8 243.3126 2i_WT
## 9 591.7349 NS_IAA
## 10 165.2450 NS_WT
## 11 285.8679 NS_WT
## 12 338.0397 NS_IAA
## 13 595.7813 NS_IAA
## 14 344.1648 NS_WT
library(dplyr)
library(ggplot2)
library(ggpubr)
theme_set(theme_bw())
= read.delim(paste0(data_directory,"Aldh1a3_ES-NS qPCR.txt"),header=TRUE)
mm = mm[-1,]
mm $Genotype=factor(mm$Genotype,levels=c("Wild_type","KO_1","KO_2","KO_3"))
mm
= mm[mm$Cell_type=="ES",]
mmES = ggplot(mmES, aes(x = Genotype, y = Expression))
m
+ geom_jitter(
m aes(shape = Genotype, color = Genotype),
position = position_jitter(0.2),
size = 5 ) +
stat_summary(
aes(color = Genotype),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.7) + scale_color_manual(values = c("red","coral3", "coral3", "coral3")) + ylim(0,1) + theme( axis.line = element_line(colour = "black",
linewidth = 1, linetype = "solid"))
= mm[mm$Cell_type=="NS",]
mmNS = ggplot(mmNS, aes(x = Genotype, y = Expression))
m
+ geom_jitter(
m aes(shape = Genotype, color = Genotype),
position = position_jitter(0.1),
size = 5 ) +
stat_summary(
aes(color = Genotype),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.7 ) + scale_color_manual(values = c("blue","steelblue3", "steelblue3", "steelblue3")) + ylim(0,0.5) + theme( axis.line = element_line(colour = "black",
linewidth = 1, linetype = "solid"))
t.test( mmNS$Expression[mmNS$Genotype=="Wild_type"],mmNS$Expression[mmNS$Genotype=="KO_1"])
##
## Welch Two Sample t-test
##
## data: mmNS$Expression[mmNS$Genotype == "Wild_type"] and mmNS$Expression[mmNS$Genotype == "KO_1"]
## t = -4.7588, df = 3.6289, p-value = 0.0113
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.35052505 -0.08552556
## sample estimates:
## mean of x mean of y
## 0.07367433 0.29169964
Check if the differentiation between ES2i and NS worked in the Nora and the 46C cells.
=c("Prdm14","Nanog","Fgf4","Nodal","Lin28a","Zfp42","Dppa2","Esrrb","Tbx3","Klf4","Pou5f1","Phc1","Tet1","Dll1","Pax6","Notch1","Rxra","Hes6","Wnt5a","Fzd2","Fzd1","Nes","Bmi1","Hes5","Ascl1","Olig1","Olig2")
marker_genes
= data.frame(gtf$gene_name,gtf$gene_id)
ens2gene = ens2gene[!duplicated(ens2gene$gtf.gene_id),]
ens2gene = data.frame(gene_names=marker_genes,
marker_genes gene_id = ens2gene$gtf.gene_id[match(marker_genes,ens2gene$gtf.gene_name)])
$lfc = -1*es2i_ns_comp$log2FoldChange[match(marker_genes$gene_id,rownames(es2i_ns_comp))]
marker_genes$padj = es2i_ns_comp$padj[match(marker_genes$gene_id,rownames(es2i_ns_comp))]
marker_genes= marker_genes[order(marker_genes$lfc,decreasing=FALSE),]
marker_genes
barplot(marker_genes$lfc,col=ifelse(marker_genes$lfc<0,"red","blue"),
names=marker_genes$gene_names,las=2,ylim=c(-20,20),cex.axis = 1.5)
= data.frame(LFC=marker_genes$lfc,col=ifelse(marker_genes$lfc<0,"red","blue"))
df write.table(df,file=paste0(source_data_directory,"barplot_ES__NS_differentiation.txt"))
= read.delim(paste0(rnaseq_directory,'gene_counts_FEB_2022.txt'),skip=1)[,c(1,6)]
rnL = data.frame( ES = rowSums( requested_countdata[,c(3,4,7,8)] ),
requested_countdata_tpm NS = rowSums( requested_countdata[,c(10,11,14)] ),
Length = rnL$Length[match(rownames(requested_countdata),rnL$Geneid)] )
= GetTPM(requested_countdata_tpm,1:2,rownames(requested_countdata_tpm))
requested_countdata_tpm
par(mfrow=c(1,2),bty='n')
boxplot( requested_countdata_tpm[rownames(requested_countdata_tpm) %in% TI_sig_genes_up, "ES"],
rownames(requested_countdata_tpm) %in% TI_sig_genes_dn, "ES"],
requested_countdata_tpm[outline=FALSE,col='white', ylab="Expression (TPM)",
border=c('red','red4'),lwd=3,ylim=c(0,100))
axis(1,lwd=3,at=c(1,2),c("Up","Down"))
axis(2,lwd=3)
boxplot( requested_countdata_tpm[rownames(requested_countdata_tpm) %in% NS_sig_genes_up, "NS"],
rownames(requested_countdata_tpm) %in% NS_sig_genes_dn, "NS"],
requested_countdata_tpm[outline=FALSE,col='white', border=c('blue','steelblue3'),
ylab="Expression (TPM)",
lwd=3,ylim=c(0,100))
axis(1,lwd=3,at=c(1,2),c("Up","Down"))
axis(2,lwd=3)
t.test(requested_countdata_tpm[rownames(requested_countdata_tpm) %in% TI_sig_genes_up, "ES"],
rownames(requested_countdata_tpm) %in% TI_sig_genes_dn, "ES"]) requested_countdata_tpm[
##
## Welch Two Sample t-test
##
## data: requested_countdata_tpm[rownames(requested_countdata_tpm) %in% TI_sig_genes_up, "ES"] and requested_countdata_tpm[rownames(requested_countdata_tpm) %in% TI_sig_genes_dn, "ES"]
## t = -5.2704, df = 441.46, p-value = 2.134e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -102.59209 -46.86077
## sample estimates:
## mean of x mean of y
## 12.29998 87.02641
t.test(requested_countdata_tpm[rownames(requested_countdata_tpm) %in% NS_sig_genes_up, "NS"],
rownames(requested_countdata_tpm) %in% NS_sig_genes_dn, "NS"]) requested_countdata_tpm[
##
## Welch Two Sample t-test
##
## data: requested_countdata_tpm[rownames(requested_countdata_tpm) %in% NS_sig_genes_up, "NS"] and requested_countdata_tpm[rownames(requested_countdata_tpm) %in% NS_sig_genes_dn, "NS"]
## t = -2.5177, df = 210.1, p-value = 0.01256
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -83.12087 -10.11752
## sample estimates:
## mean of x mean of y
## 21.75334 68.37254
= max(c(length(TI_sig_genes_up),length(TI_sig_genes_dn),length(NS_sig_genes_up),length(NS_sig_genes_dn)))
theNtimes = data.frame( ES_up_by_IAA = rep(NA,theNtimes), ES_dn_by_IAA = rep(NA,theNtimes),
df NS_up_by_IAA = rep(NA,theNtimes), NS_dn_by_IAA = rep(NA,theNtimes) )
$ES_up_by_IAA[1:length(TI_sig_genes_up)] = requested_countdata_tpm[rownames(requested_countdata_tpm) %in% TI_sig_genes_up, "ES"]
df$ES_dn_by_IAA[1:length(TI_sig_genes_dn)] = requested_countdata_tpm[rownames(requested_countdata_tpm) %in% TI_sig_genes_dn, "ES"]
df$NS_up_by_IAA[1:length(NS_sig_genes_up)] = requested_countdata_tpm[rownames(requested_countdata_tpm) %in% NS_sig_genes_up, "ES"]
df$NS_dn_by_IAA[1:length(NS_sig_genes_dn)] = requested_countdata_tpm[rownames(requested_countdata_tpm) %in% NS_sig_genes_dn, "ES"]
dfwrite.table(df,file=paste0(source_data_directory,"boxplots_TPM.txt"),row.names = FALSE, sep="\t",quote=FALSE)
ATAC peaks +/- CTCF
= readBed_filterChroms(paste0(chipseq_directory,'ES_2i_PLNOV_merged_filtered.bam_peaks.narrowPeak'),
ti_atac_peaks_Nora_unt chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readBed_filterChroms(paste0(chipseq_directory,'ES_2i_IAA_PLNOV_merged_filtered.bam_peaks.narrowPeak'),
ti_atac_peaks_Nora_iaa chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readBed_filterChroms(paste0(chipseq_directory,'NPC_PLDEC_merged_filtered.bam_peaks.narrowPeak'),
ns_atac_peaks_Nora_unt chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readBed_filterChroms(paste0(chipseq_directory,'NPC_IAA_PLDEC_merged_filtered.bam_peaks.narrowPeak'),
ns_atac_peaks_Nora_iaa chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readNarrowPeak2getSummit(paste0(chipseq_directory,'ES_2i_PLNOV_merged_filtered.bam_peaks.narrowPeak'),
ti_atac_peaks_Nora_unt_summit chroms=paste0('chr',c(1:19,'X','Y')), 5)
= readNarrowPeak2getSummit(paste0(chipseq_directory,'ES_2i_IAA_PLNOV_merged_filtered.bam_peaks.narrowPeak'),
ti_atac_peaks_Nora_iaa_summit chroms=paste0('chr',c(1:19,'X','Y')), 5)
= readNarrowPeak2getSummit(paste0(chipseq_directory,'NPC_PLDEC_merged_filtered.bam_peaks.narrowPeak'),
ns_atac_peaks_Nora_unt_summit chroms=paste0('chr',c(1:19,'X','Y')), 5)
= readNarrowPeak2getSummit(paste0(chipseq_directory,'NPC_IAA_PLDEC_merged_filtered.bam_peaks.narrowPeak'),
ns_atac_peaks_Nora_iaa_summit chroms=paste0('chr',c(1:19,'X','Y')), 5)
=findOverlaps(ti_atac_peaks_Nora_unt,ti_atac_peaks_Nora_iaa)
overlapsdraw.pairwise.venn(area1=queryLength(overlaps),
area2=subjectLength(overlaps),
cross.area=length(overlaps),
category=c('',''), col=c('red','coral4'),
fill=rep('white',2),cat.cex=1.2,lwd=8)
## (polygon[GRID.polygon.374], polygon[GRID.polygon.375], polygon[GRID.polygon.376], polygon[GRID.polygon.377], text[GRID.text.378], text[GRID.text.379], text[GRID.text.380], text[GRID.text.381], text[GRID.text.382])
=findOverlaps(ns_atac_peaks_Nora_unt,ns_atac_peaks_Nora_iaa)
overlapsdraw.pairwise.venn(area1=queryLength(overlaps),
area2=subjectLength(overlaps),
cross.area=length(overlaps),
category=c('',''), col=c('blue','blue4'),
fill=rep('white',2),cat.cex=1.2,lwd=8)
## (polygon[GRID.polygon.383], polygon[GRID.polygon.384], polygon[GRID.polygon.385], polygon[GRID.polygon.386], text[GRID.text.387], text[GRID.text.388], text[GRID.text.389], text[GRID.text.390], text[GRID.text.391])
H3K27ac peaks
= readBed_filterChroms(paste0(chipseq_directory,'ChIP_Seq_H3K27ac_05-22_MusMus_ESC_AID_KI_CTCF-AID-GFP_OsTIR_TIGRE_Nora_2i_Rep_1_peaks.narrowPeak'),
ti_k27ac_peaks_Nora_unt chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readBed_filterChroms(paste0(chipseq_directory,'ChIP_Seq_H3K27ac_05-22_MusMus_ESC_AID_KI_CTCF-AID-GFP_OsTIR_TIGRE_Nora_2i_IAA_Rep_1_peaks.narrowPeak'),
ti_k27ac_peaks_Nora_iaa chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readBed_filterChroms(paste0(chipseq_directory,'ChIP_Seq_H3K27ac_05-22_MusMus_es-NPC_AID_KI_CTCF-AID-GFP_OsTIR_TIGRE_Nora_Rep_1_peaks.narrowPeak'),
ns_k27ac_peaks_Nora_unt chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readBed_filterChroms(paste0(chipseq_directory,'ChIP_Seq_H3K27ac_05-22_MusMus_es-NPC_AID_KI_CTCF-AID-GFP_OsTIR_TIGRE_Nora_IAA_Rep_1_peaks.narrowPeak'),
ns_k27ac_peaks_Nora_iaa chroms=paste0('chr',c(1:19,'X','Y')), 7)
=findOverlaps(ti_k27ac_peaks_Nora_unt,ti_k27ac_peaks_Nora_iaa)
overlapsdraw.pairwise.venn(area1=queryLength(overlaps),
area2=subjectLength(overlaps),
cross.area=length(overlaps),
category=c('',''), col=c('red','coral4'),
fill=rep('white',2),cat.cex=1.2,lwd=8)
## (polygon[GRID.polygon.392], polygon[GRID.polygon.393], polygon[GRID.polygon.394], polygon[GRID.polygon.395], text[GRID.text.396], text[GRID.text.397], text[GRID.text.398], text[GRID.text.399], text[GRID.text.400])
=findOverlaps(ns_k27ac_peaks_Nora_unt,ns_k27ac_peaks_Nora_iaa)
overlapsdraw.pairwise.venn(area1=queryLength(overlaps),
area2=subjectLength(overlaps),
cross.area=length(overlaps),
category=c('',''), col=c('blue','blue4'),
fill=rep('white',2),cat.cex=1.2,lwd=8)
## (polygon[GRID.polygon.401], polygon[GRID.polygon.402], polygon[GRID.polygon.403], polygon[GRID.polygon.404], text[GRID.text.405], text[GRID.text.406], text[GRID.text.407], text[GRID.text.408], text[GRID.text.409])
= GenomicRanges::resize(promoters_tss_gr[promoters_tss_gr$gene_id %in% TI_sig_genes_up],500000,fix='center')
es_up_proms = GenomicRanges::resize(promoters_tss_gr[promoters_tss_gr$gene_id %in% TI_sig_genes_dn],500000,fix='center')
es_dn_proms
= GenomicRanges::resize(promoters_tss_gr[promoters_tss_gr$gene_id %in% NS_sig_genes_up],500000,fix='center')
ns_up_proms = GenomicRanges::resize(promoters_tss_gr[promoters_tss_gr$gene_id %in% NS_sig_genes_dn],500000,fix='center')
ns_dn_proms
par(mfrow=c(1,4),mar=c(6,5,1,1),pty="m")
= data.frame(unt=countOverlaps(es_up_proms,ti_k27ac_peaks_Nora_unt),
es_up_promc iaa=countOverlaps(es_up_proms,ti_k27ac_peaks_Nora_iaa))
= data.frame(unt=countOverlaps(es_dn_proms,ti_k27ac_peaks_Nora_unt),
es_dn_promc iaa=countOverlaps(es_dn_proms,ti_k27ac_peaks_Nora_iaa))
boxplot( es_dn_promc$iaa-es_dn_promc$unt,
$iaa-es_up_promc$unt,
es_up_promcborder=c("red4","coral4"),
outline=FALSE, col="white",names=c("Down","Up"),
ylim=c(-10,10),lwd=2, ylab="IAA-veh")
write.table(es_up_promc,file=paste0(source_data_directory,"es_up_promc.txt"),quote=FALSE,row.names = FALSE, sep="\t")
write.table(es_dn_promc,file=paste0(source_data_directory,"es_dn_promc.txt"),quote=FALSE,row.names = FALSE, sep="\t")
= data.frame(unt=countOverlaps(es_up_proms,ti_atac_peaks_Nora_unt),
es_up_promc iaa=countOverlaps(es_up_proms,ti_atac_peaks_Nora_iaa))
= data.frame(unt=countOverlaps(es_dn_proms,ti_atac_peaks_Nora_unt),
es_dn_promc iaa=countOverlaps(es_dn_proms,ti_atac_peaks_Nora_iaa))
boxplot( es_dn_promc$iaa-es_dn_promc$unt,
$iaa-es_up_promc$unt,
es_up_promcborder=c("red4","coral4"),
outline=FALSE, col="white", names=c("Down","Up"),
ylim=c(-10,10), lwd=2, ylab="IAA-veh" )
write.table(es_up_promc,file=paste0(source_data_directory,"es_up_promc.txt"),quote=FALSE,row.names = FALSE, sep="\t")
write.table(es_dn_promc,file=paste0(source_data_directory,"es_dn_promc.txt"),quote=FALSE,row.names = FALSE, sep="\t")
= data.frame(unt=countOverlaps(ns_dn_proms,ns_k27ac_peaks_Nora_unt),
ns_dn_promc iaa=countOverlaps(ns_dn_proms,ns_k27ac_peaks_Nora_iaa))
= data.frame(unt=countOverlaps(ns_up_proms,ns_k27ac_peaks_Nora_unt),
ns_up_promc iaa=countOverlaps(ns_up_proms,ns_k27ac_peaks_Nora_iaa))
boxplot( ns_dn_promc$unt-ns_dn_promc$iaa,
$unt-ns_up_promc$iaa,
ns_up_promcborder=c("blue4","steelblue3"),
outline=FALSE, col="white",names=c("Down","Up"),
ylim=c(-10,10), lwd=2, ylab="veh-IAA" )
write.table(ns_dn_promc,file=paste0(source_data_directory,"ns_dn_promc.txt"),quote=FALSE,row.names = FALSE, sep="\t")
write.table(ns_up_promc,file=paste0(source_data_directory,"ns_up_promc.txt"),quote=FALSE,row.names = FALSE, sep="\t")
= data.frame(unt=countOverlaps(ns_dn_proms,ns_atac_peaks_Nora_unt),
ns_dn_promc iaa=countOverlaps(ns_dn_proms,ns_atac_peaks_Nora_iaa))
= data.frame(unt=countOverlaps(ns_up_proms,ns_atac_peaks_Nora_unt),
ns_up_promc iaa=countOverlaps(ns_up_proms,ns_atac_peaks_Nora_iaa))
boxplot( ns_dn_promc$unt-ns_dn_promc$iaa,
$unt-ns_up_promc$iaa,
ns_up_promcborder=c("blue4","steelblue3"),
outline=FALSE, col="white",names=c("Down","Up"),
ylim=c(-10,10), lwd=2, ylab="veh-IAA" )
write.table(ns_dn_promc,file=paste0(source_data_directory,"ns_dn_promc.txt"),quote=FALSE,row.names = FALSE, sep="\t")
write.table(ns_up_promc,file=paste0(source_data_directory,"ns_up_promc.txt"),quote=FALSE,row.names = FALSE, sep="\t")
CTCF controls - we define peaks in untreated cells and look at CTCF signal in untreated and treated cells
= readNarrowPeak2getSummit( paste0(chipseq_directory, 'ChIP_Seq_CTCF_07-22_MusMus_ESC_AID_KI_CTCF-AID-GFP_OsTIR_TIGRE_2i_Rep_1_peaks.narrowPeak'),
tictmm10Nora_peak chroms=paste0('chr',c(1:19,'X','Y')), 5)
= readNarrowPeak2getSummit( paste0(chipseq_directory, 'ChIP_Seq_CTCF_05-22_MusMus_es-NPC_AID_KI_CTCF-AID-GFP_OsTIR_TIGRE_Nora_Rep_1_peaks.narrowPeak'),
nsctmm10Nora_peak chroms=paste0('chr',c(1:19,'X','Y')), 5)
= import.bw(paste0(chipseq_directory,'ChIP_Seq_CTCF_07-22_MusMus_ESC_AID_KI_CTCF-AID-GFP_OsTIR_TIGRE_2i_Rep_1_RPGC.bw'))
ti_nora_ctcf_untreated_bp = import.bw(paste0(chipseq_directory,'ChIP_Seq_CTCF_07-22_MusMus_ESC_AID_KI_CTCF-AID-GFP_OsTIR_TIGRE_2i-IAA_Rep_1_RPGC.bw'))
ti_nora_ctcf_iaa_bp = import.bw(paste0(chipseq_directory,'ChIP_Seq_CTCF_05-22_MusMus_es-NPC_AID_KI_CTCF-AID-GFP_OsTIR_TIGRE_Nora_Rep_1_RPGC.bw'))
ns_nora_ctcf_untreated_bp = import.bw(paste0(chipseq_directory,'ChIP_Seq_CTCF_05-22_MusMus_es-NPC_AID_KI_CTCF-AID-GFP_OsTIR_TIGRE_Nora_IAA_Rep_1_RPGC.bw'))
ns_nora_ctcf_iaa_bp
seqlevelsStyle(ti_nora_ctcf_untreated_bp)='ucsc'
seqlevelsStyle(ns_nora_ctcf_untreated_bp)='ucsc'
seqlevelsStyle(ti_nora_ctcf_iaa_bp)='ucsc'
seqlevelsStyle(ns_nora_ctcf_iaa_bp)='ucsc'
= do.call('rbind', lapply( as.list(seq(1:length(tictmm10Nora_peak))), function(i){
Ctcf_nora_summit_ranges_ES = tictmm10Nora_peak[i]
g = start(g)
tss = as.character( chrom(g) )
chr =i
peakreturn( data.frame( chr = rep( (chr), 200),
starts = seq( tss-1000, tss+990, length.out=200),
ends = seq( tss-1000, tss+990, length.out=200)+9,
peak = rep( peak, 200) ) )
} ) )= do.call('rbind', lapply( as.list(seq(1:length(nsctmm10Nora_peak))), function(i){
Ctcf_nora_summit_ranges_NS = nsctmm10Nora_peak[i]
g = start(g)
tss = as.character( chrom(g) )
chr =i
peakreturn( data.frame( chr = rep( (chr), 200),
starts = seq( tss-1000, tss+990, length.out=200),
ends = seq( tss-1000, tss+990, length.out=200)+9,
peak = rep( peak, 200) ) )
} ) )
= GRanges(seqnames = Rle(Ctcf_nora_summit_ranges_ES$chr),
Ctcf_nora_summit_ranges_ES ranges = IRanges(as.numeric(Ctcf_nora_summit_ranges_ES$starts),
end = as.numeric(Ctcf_nora_summit_ranges_ES$ends),
names = seq(1, nrow(Ctcf_nora_summit_ranges_ES))),
strand = Rle(rep("*", nrow(Ctcf_nora_summit_ranges_ES))),
peak = Ctcf_nora_summit_ranges_ES$peak )
= GRanges(seqnames = Rle(Ctcf_nora_summit_ranges_NS$chr),
Ctcf_nora_summit_ranges_NS ranges = IRanges(as.numeric(Ctcf_nora_summit_ranges_NS$starts),
end = as.numeric(Ctcf_nora_summit_ranges_NS$ends),
names = seq(1, nrow(Ctcf_nora_summit_ranges_NS))),
strand = Rle(rep("*", nrow(Ctcf_nora_summit_ranges_NS))),
peak = Ctcf_nora_summit_ranges_NS$peak )
save(Ctcf_nora_summit_ranges_ES,file=paste0(objects_directory,'Ctcf_nora_summit_ranges_ES.RData'))
save(Ctcf_nora_summit_ranges_NS,file=paste0(objects_directory,'Ctcf_nora_summit_ranges_NS.RData'))
= getSignalInBins( Ctcf_nora_summit_ranges_ES, ti_nora_ctcf_untreated_bp, 1 )
Ctcf_nora_summit_ranges_ES_untreated = getSignalInBins( Ctcf_nora_summit_ranges_ES, ti_nora_ctcf_iaa_bp, 1 )
Ctcf_nora_summit_ranges_ES_iaa
= getSignalInBins( Ctcf_nora_summit_ranges_NS, ns_nora_ctcf_untreated_bp, 1 )
Ctcf_nora_summit_ranges_NS_untreated = getSignalInBins( Ctcf_nora_summit_ranges_NS, ns_nora_ctcf_iaa_bp, 1 )
Ctcf_nora_summit_ranges_NS_iaa
save( Ctcf_nora_summit_ranges_ES_untreated,
Ctcf_nora_summit_ranges_ES_iaa,
Ctcf_nora_summit_ranges_NS_untreated,
Ctcf_nora_summit_ranges_NS_iaa,file=paste0(objects_directory,'Ctcf_signal_Nora_CTCF_peaks.RData'))
Display the effect of IAA on CTCF level.
load(paste0(objects_directory,'Ctcf_signal_Nora_CTCF_peaks.RData'))
=log2((0.1+Ctcf_nora_summit_ranges_ES_iaa)/(0.1+Ctcf_nora_summit_ranges_ES_untreated))
x=log2((0.1+Ctcf_nora_summit_ranges_NS_iaa)/(0.1+Ctcf_nora_summit_ranges_NS_untreated))
y
par(mfrow=c(1,1),mar=c(5,5,1,1))
plot(density(x[,100]),col='red',lwd=3,ty='l',
xlim=c(-10,10),main='',axes=F,ylim=c(0,0.5))
axis(1,lwd=3)
axis(2,lwd=3)
lines(density(y[,100]),col='blue',lwd=3 )
abline(v=0,lwd=3,col='black',lty=2)
t.test(x[,100])
##
## One Sample t-test
##
## data: x[, 100]
## t = -185.78, df = 13849, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -1.985657 -1.944194
## sample estimates:
## mean of x
## -1.964926
t.test(y[,100])
##
## One Sample t-test
##
## data: y[, 100]
## t = -213.78, df = 18626, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -2.215984 -2.175719
## sample estimates:
## mean of x
## -2.195851
= data.frame(ES=density(x[,100])$y,NS=density(y[,100])$y)
df write.table(df,file=paste0(source_data_directory,"ChIP-seq_FC_IAA.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= readBed_filterChroms(paste0(chipseq_directory,'ATAC_Seq_03-22_MusMus_ESC_MOD_SOX1-GFP_46C_2i_Rep_1_peaks.narrowPeak'),
ti_atac_peaks chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readBed_filterChroms(paste0(chipseq_directory,'ATAC_Seq_05-22_MusMus_es-NPC_MOD_SOX1-GFP_46C_Rep_1_peaks.narrowPeak'),
ns_atac_peaks chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readNarrowPeak2getSummit(paste0(chipseq_directory,'ATAC_Seq_03-22_MusMus_ESC_MOD_SOX1-GFP_46C_2i_Rep_1_peaks.narrowPeak'),
ti_atac_summits chroms=paste0('chr',c(1:19,'X','Y')), 5)
= readNarrowPeak2getSummit(paste0(chipseq_directory,'ATAC_Seq_05-22_MusMus_es-NPC_MOD_SOX1-GFP_46C_Rep_1_peaks.narrowPeak'),
ns_atac_summits chroms=paste0('chr',c(1:19,'X','Y')), 5)
$presence = 1
ctcf_motif
= readBed_filterChroms(paste0(chipseq_directory,'ChIP_Seq_12-21_MusMus_es-ESC_H3K27ac_MOD_SOX1-GFP_2i_Rep_1.narrowPeak'),
K27peaks_ES chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readBed_filterChroms( paste0(chipseq_directory, 'ChIP_Seq_12-21_MusMus_es-NPC_H3K27ac_MOD_SOX1-GFP_Rep_1.narrowPeak'),
K27peaks_NS chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readNarrowPeak2getSummit(paste0(chipseq_directory, 'ChIP_Seq_12-21_MusMus_es-ESC_H3K27ac_MOD_SOX1-GFP_2i_Rep_1.narrowPeak'),
K27peaks_summit_ES chroms=paste0('chr',c(1:19,'X','Y')), 5)
= readNarrowPeak2getSummit(paste0(chipseq_directory, 'ChIP_Seq_12-21_MusMus_es-NPC_H3K27ac_MOD_SOX1-GFP_Rep_1.narrowPeak'),
K27peaks_summit_NS chroms=paste0('chr',c(1:19,'X','Y')), 5)
= K27peaks_ES[-queryHits(findOverlaps(K27peaks_ES,promGRExt))]
ti_enhancers = K27peaks_NS[-queryHits(findOverlaps(K27peaks_NS,promGRExt))]
ns_enhancers
= ti_atac_peaks[queryHits(findOverlaps(ti_atac_peaks,ti_enhancers))]
ti_enhancers_atac = ns_atac_peaks[queryHits(findOverlaps(ns_atac_peaks,ns_enhancers))] ns_enhancers_atac
Induced genes are closer to each other than reduced genes
par(mfrow=c(1,2),mar=c(5,5,1,1),cex.axis=1)
boxplot(elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_up)]))$distance/1000,
elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_dn)]))$distance/1000,
outline=FALSE,border=c('green4','orange3'),col='white',names=c("Up","Down"),ylab="distance (kbp)")
ks.test(elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_up)]))$distance,
elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_dn)]))$distance)
## Warning in
## ks.test(elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id
## %in% : p-value will be approximate in the presence of ties
##
## Two-sample Kolmogorov-Smirnov test
##
## data: elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_up)]))$distance and elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_dn)]))$distance
## D = 0.24865, p-value = 3.431e-07
## alternative hypothesis: two-sided
## distance to nearest enhancer
boxplot(elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_up)],ns_enhancers_atac))$distance/1000,
elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_dn)],ns_enhancers_atac))$distance/1000,
outline=FALSE,border=c('green4','orange3'),col='white',names=c("Up","Down"))
Boxplot data
= max( c(length(NS_sig_genes_up),length(NS_sig_genes_dn)))
theNtimes = data.frame( Prom_prom_up = rep(NA,theNtimes),
df Prom_prom_dn = rep(NA,theNtimes),
Prom_enh_up = rep(NA,theNtimes),
Prom_enh_dn = rep(NA,theNtimes) )
$Prom_prom_up[1:length(elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_up)]))$distance/1000)] = elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_up)]))$distance/1000
df$Prom_prom_dn[1:length(elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_dn)]))$distance/1000)] = elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_dn)]))$distance/1000
df
$Prom_enh_up[1:length(elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_up)],ns_enhancers_atac))$distance/1000)] = elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_up)],ns_enhancers_atac))$distance/1000
df$Prom_enh_dn[1:length(elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_dn)],ns_enhancers_atac))$distance/1000)] = elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_dn)],ns_enhancers_atac))$distance/1000
df
write.table(df,file=paste0(source_data_directory,"boxplots_Prom_enh_Prom_Prom_distance.txt"),row.names = FALSE, sep="\t",quote=FALSE)
Do we see that architectural CTCF peaks separate the induced genes from enhancers more often than reduced genes?
Find the smallest loop domain for each up and down gene, identify regions that flank the insulators (500 upstream), find the region with the highest number of enhancers and compare for induced and reduced genes. We will also check the number of CTCF peaks.
= readBed_filterChroms( paste0(chipseq_directory, 'NS_CTCF_ChIP_NIAMS_merged_filtered_peaks.narrowPeak'),
nsctmm10peak chroms=paste0('chr',c(1:19,'X','Y')), 7)
= readNarrowPeak2getSummit( paste0(chipseq_directory,'NS_CTCF_ChIP_NIAMS_merged_filtered_peaks.narrowPeak'),
nsctmm10 chroms=paste0('chr',c(1:19,'X','Y')), 5)
$score = nsctmm10peak$score
nsctmm10= appendMotifInformation( nsctmm10peak, ctcf_motif,summitFile=nsctmm10 )
nsctmm10peak
= nsctmm10peak[which(nsctmm10peak$motif_strand != "*")]
ctcf_peaks_ns_mm10_strand strand(ctcf_peaks_ns_mm10_strand) = ctcf_peaks_ns_mm10_strand$motif_strand
sum( elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_up)],ctcf_peaks_ns_mm10_strand,ignore.strand=TRUE))$distance == 0)/length(NS_sig_genes_up)
## [1] 0.1731844
sum( elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_dn)],ctcf_peaks_ns_mm10_strand,ignore.strand=TRUE))$distance == 0)/length(NS_sig_genes_dn)
## [1] 0.4545455
=matrix(c(sum( elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_up)],ctcf_peaks_ns_mm10_strand,ignore.strand=TRUE))$distance == 0),
mlength(NS_sig_genes_up),
sum( elementMetadata(distanceToNearest(promoters_tss_gr[which(promoters_tss_gr$gene_id %in% NS_sig_genes_dn)],ctcf_peaks_ns_mm10_strand,ignore.strand=TRUE))$distance == 0),
length(NS_sig_genes_dn)),2,2)
fisher.test(m)
##
## Fisher's Exact Test for Count Data
##
## data: m
## p-value = 2.369e-07
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.2591761 0.5589507
## sample estimates:
## odds ratio
## 0.3815533
par(mfrow=c(1,1))
barplot(100*m[1,]/m[2,],col=c('green4','orange3'),
ylab="%",ylim=c(0,50),names=c("up","down"))
axis(2,lwd=2)
m
## [,1] [,2]
## [1,] 62 90
## [2,] 358 198
How many enhancers flank the loop domains that contain different classes of genes?
= c()
dis2anchor_induced = data.frame()
induced_intervals_left = data.frame()
induced_intervals_right = c()
induced_smallest_loop = c()
induced_largest_loop
for( gene in NS_sig_genes_up ){
=promoters_tss_gr[which(promoters_tss_gr$gene_id %in% gene)]
thisGene= findOverlaps(ns_loops_gr,thisGene)
tp if(length(tp)>0){
= ns_loops_gr[queryHits(tp)]
loops = loops[which.max(width(loops))]
loops2 = loops[which.min(width(loops))]
loops
= rbind(induced_smallest_loop,
induced_smallest_loop data.frame(chrom(loops),start(loops),end(loops)))
= rbind(induced_largest_loop, data.frame(chrom(loops2),start(loops2),end(loops2)))
induced_largest_loop
= c(dis2anchor_induced,elementMetadata(distanceToNearest(thisGene,c(ns_loops_left_anchor[loops$loop],ns_loops_right_anchor[loops$loop])))$distance)
dis2anchor_induced = rbind(induced_intervals_left,data.frame(chrom(loops),start(loops)-500000,start(loops)))
induced_intervals_left = rbind(induced_intervals_right,data.frame(chrom(loops),end(loops),end(loops)+500000))}
induced_intervals_right
}= getGR(induced_smallest_loop[,1],induced_smallest_loop[,2],induced_smallest_loop[,3],0)
induced_smallest_loop_gr
= getGR(induced_intervals_left[,1],induced_intervals_left[,2],induced_intervals_left[,3],0)
induced_intervals_left_gr = getGR(induced_intervals_right[,1],induced_intervals_right[,2],induced_intervals_right[,3],0)
induced_intervals_right_gr = getGR(induced_smallest_loop[,1],induced_smallest_loop[,2],induced_smallest_loop[,3],0)
induced_smallest_loop_gr = getGR(induced_largest_loop[,1],induced_largest_loop[,2],induced_largest_loop[,3],0)
induced_largest_loop_gr
## reduced genes
= c()
dis2anchor_reduced = data.frame()
reduced_intervals_left = data.frame()
reduced_intervals_right = c()
reduced_smallest_loop = c()
reduced_largest_loop
for( gene in NS_sig_genes_dn ){
=promoters_tss_gr[which(promoters_tss_gr$gene_id %in% gene)]
thisGene= findOverlaps(ns_loops_gr,thisGene)
tp if(length(tp)>0){
= ns_loops_gr[queryHits(tp)]
loops = loops[which.max(width(loops))]
loops2 = loops[which.min(width(loops))]
loops = rbind(reduced_smallest_loop,
reduced_smallest_loop data.frame(chrom(loops),start(loops),end(loops)))
= rbind(reduced_largest_loop,
reduced_largest_loop data.frame(chrom(loops2),start(loops2),end(loops2)))
= c(dis2anchor_reduced,elementMetadata(distanceToNearest(thisGene,c(ns_loops_left_anchor[loops$loop],ns_loops_right_anchor[loops$loop])))$distance)
dis2anchor_reduced = rbind(reduced_intervals_left,data.frame(chrom(loops),start(loops)-500000,start(loops)))
reduced_intervals_left = rbind(reduced_intervals_right,data.frame(chrom(loops),end(loops),end(loops)+500000))}
reduced_intervals_right
}
= getGR(reduced_intervals_left[,1],reduced_intervals_left[,2],reduced_intervals_left[,3],0)
reduced_intervals_left_gr = getGR(reduced_intervals_right[,1],reduced_intervals_right[,2],reduced_intervals_right[,3],0)
reduced_intervals_right_gr = getGR(reduced_smallest_loop[,1],reduced_smallest_loop[,2],reduced_smallest_loop[,3],0)
reduced_smallest_loop_gr = getGR(reduced_largest_loop[,1],reduced_largest_loop[,2],reduced_largest_loop[,3],0)
reduced_largest_loop_gr
## ------------
= c()
dis2anchor_random = data.frame()
random_intervals_left = data.frame()
random_intervals_right = c()
random_smallest_loop = c()
random_largest_loop
for( gene in promoters_tss_gr[sample(seq(1,length(promoters_tss_gr)),2000,replace = FALSE)]$gene_id ){
=promoters_tss_gr[which(promoters_tss_gr$gene_id %in% gene)]
thisGene= findOverlaps(ns_loops_gr,thisGene)
tp if(length(tp)>0){
= ns_loops_gr[queryHits(tp)]
loops = loops[which.max(width(loops))]
loops2 = loops[which.min(width(loops))]
loops = rbind(random_smallest_loop,
random_smallest_loop data.frame(chrom(loops),start(loops),end(loops)))
= rbind(random_largest_loop,
random_largest_loop data.frame(chrom(loops2),start(loops2),end(loops2)))
= c(dis2anchor_random,elementMetadata(distanceToNearest(thisGene,c(ns_loops_left_anchor[loops$loop],ns_loops_right_anchor[loops$loop])))$distance)
dis2anchor_random = rbind(random_intervals_left,data.frame(chrom(loops),start(loops)-500000,start(loops)))
random_intervals_left = rbind(random_intervals_right,data.frame(chrom(loops),end(loops),end(loops)+500000))}
random_intervals_right
}
= getGR(random_intervals_left[,1],random_intervals_left[,2],random_intervals_left[,3],0)
random_intervals_left_gr = getGR(random_intervals_right[,1],random_intervals_right[,2],random_intervals_right[,3],0)
random_intervals_right_gr = getGR(random_smallest_loop[,1],random_smallest_loop[,2],random_smallest_loop[,3],0)
random_smallest_loop_gr = getGR(random_largest_loop[,1],random_largest_loop[,2],random_largest_loop[,3],0)
random_largest_loop_gr
## ----------
= data.frame(left=countOverlaps(induced_intervals_left_gr,
induced_left_right
ns_enhancers_atac),
right = countOverlaps(induced_intervals_right_gr,
ns_enhancers_atac))
= data.frame(left=countOverlaps(reduced_intervals_left_gr,
reduced_left_right
ns_enhancers_atac),
right = countOverlaps(reduced_intervals_right_gr,
ns_enhancers_atac))
= data.frame(left=countOverlaps(random_intervals_left_gr,
random_left_right
ns_enhancers_atac),
right = countOverlaps(random_intervals_right_gr,
ns_enhancers_atac))
Check the landscape in the loops
$loop = paste(chrom(induced_largest_loop_gr),start(induced_largest_loop_gr),sep="_")
induced_largest_loop_gr$loop = paste(chrom(reduced_largest_loop_gr),start(reduced_largest_loop_gr),sep="_")
reduced_largest_loop_gr= induced_largest_loop_gr[which(! duplicated(induced_largest_loop_gr$loop))]
induced_largest_loop_gr_filt = reduced_largest_loop_gr[which(! duplicated(reduced_largest_loop_gr$loop))]
reduced_largest_loop_gr_filt
boxplot(rowSums(induced_left_right[which(! duplicated(induced_largest_loop_gr$loop)),]),
rowSums(reduced_left_right[which(! duplicated(reduced_largest_loop_gr$loop)),]),
rowSums(random_left_right),col="white",border=c("green4","orange3","gray60"),
names=c("up","down","random"),ylab="number of enhancers in flanks")
axis(1,lwd=2, at=c(1,2,3),c("up","down","random"))
axis(2,lwd=2)
box(col='black',lwd=2)
t.test(rowSums(induced_left_right[which(! duplicated(induced_largest_loop_gr$loop)),]),
rowSums(reduced_left_right[which(! duplicated(reduced_largest_loop_gr$loop)),]))
##
## Welch Two Sample t-test
##
## data: rowSums(induced_left_right[which(!duplicated(induced_largest_loop_gr$loop)), ]) and rowSums(reduced_left_right[which(!duplicated(reduced_largest_loop_gr$loop)), ])
## t = 4.6081, df = 313.74, p-value = 5.919e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.053708 5.114292
## sample estimates:
## mean of x mean of y
## 12.000 8.416
t.test(rowSums(random_left_right[which(! duplicated(reduced_largest_loop_gr$loop)),]),rowSums(reduced_left_right))
##
## Welch Two Sample t-test
##
## data: rowSums(random_left_right[which(!duplicated(reduced_largest_loop_gr$loop)), ]) and rowSums(reduced_left_right)
## t = -2.8088, df = 243.77, p-value = 0.005376
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.6673615 -0.6439448
## sample estimates:
## mean of x mean of y
## 7.064000 9.219653
= max( length(rowSums(induced_left_right[which(! duplicated(induced_largest_loop_gr$loop)),])),
theNtimes length(rowSums(reduced_left_right[which(! duplicated(reduced_largest_loop_gr$loop)),])),
length(rowSums(random_left_right)) )
= data.frame( induced = rep(NA,theNtimes),
df reduced = rep(NA,theNtimes),
random = rep(NA,theNtimes) )
$induced[1:length(rowSums(induced_left_right[which(! duplicated(induced_largest_loop_gr$loop)),]))] = rowSums(induced_left_right[which(! duplicated(induced_largest_loop_gr$loop)),])
df$reduced[1:length(rowSums(reduced_left_right[which(! duplicated(reduced_largest_loop_gr$loop)),]))] = rowSums(reduced_left_right[which(! duplicated(reduced_largest_loop_gr$loop)),])
df$random[1:length(rowSums(random_left_right))] = rowSums(random_left_right)
df
write.table(df,file=paste0(source_data_directory,"boxplots_flanks.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= read.delim(paste0(data_directory,"PLA_microscopy/Fig-RNaseA-PLA-Ddx5-CTCF_Fus-CTCF-WT-ES_NS.txt"),
rnase header=TRUE)
$Cond=factor(paste(rnase$Cell.type, rnase$Treatment),
rnaselevels=c("ESC Untreated","ESC RNaseA", "NSC Untreated", "NSC RNaseA"))
par(bty="n",mfrow=c(2,1),mar=c(3,3,2,1))
boxplot( split(rnase[rnase$PLA=="Ddx5-Ctcf",2],
$Cond[rnase$PLA=="Ddx5-Ctcf"]),
rnasecol="white",border=c("red","red","blue","blue"),
lwd=2,ylim=c(0,20),axes=FALSE,main="Ddx5-Ctcf")
axis(1,c(1,2,3,4),
c("","", "", ""),
lwd=2)
axis(2,lwd=2)
boxplot( split(rnase[rnase$PLA=="Fus-Ctcf",2],
$Cond[rnase$PLA=="Fus-Ctcf" ]),
rnasecol="white",border=c("red","red","blue","blue"),
lwd=2,ylim=c(0,15),main="Fus-Ctcf",axes=FALSE)
axis(1,c(1,2,3,4),
c("ESC Unt.","ESC RNaseA", "NSC Unt.", "NSC RNaseA"),
lwd=2)
axis(2,lwd=2)
= read.delim(paste0(data_directory,"PLA_microscopy/Fig-PLA-Nono-Ctcf-WT-ES_NS.txt"),
Ctcf_nono header=TRUE)
= split(Ctcf_nono[,2],Ctcf_nono$Cell.type)
x
par(bty="n",mfrow=c(1,1),mar=c(3,3,2,1),pty="m")
boxplot( x,
col="white",border=c("red","blue"),
lwd=2,ylim=c(0,40),axes=FALSE,main="Ctcf-Nono")
axis(1,c(1,2), c("ES","NS"), lwd=2)
axis(2,lwd=2)
t.test(x[[1]],x[[2]])
##
## Welch Two Sample t-test
##
## data: x[[1]] and x[[2]]
## t = -3.0945, df = 33.582, p-value = 0.003958
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -13.552177 -2.804965
## sample estimates:
## mean of x mean of y
## 13.00000 21.17857
= read.delim(paste0(data_directory,"PLA_microscopy/Fig-PLA-Ddx5-Fus-WT-ES_NS.txt"),
ddx_fus header=TRUE)
par(bty="n",mfrow=c(1,1),mar=c(3,3,2,1))
boxplot( split(ddx_fus[,2],
$Cell.type),
ddx_fuscol="white",border=c("red","blue"),
lwd=2,ylim=c(0,40),axes=FALSE,main="Ddx5-Fus")
axis(1,c(1,2),
c("ES","NS"),
lwd=2)
axis(2,lwd=2)
= read.delim(paste0(data_directory,"PLA_microscopy/PLA_Ddx5-CTCF_end_Dec_2023.txt"),
pantr1 header=TRUE)
$Genotype = factor(pantr1$Genotype,levels=c("Wildtype-A2","Wildtype-A3",
pantr1"Pantr1-KO-PB6","Pantr1-KO-PE3"))
= split(pantr1[,2],pantr1$Genotype)
pantr1s par(bty="n",mfrow=c(1,1),mar=c(3,3,2,1))
boxplot( pantr1s[c(1,2,4,3)],
col="white",border=c("blue","blue","gray60","gray60"),
lwd=2,ylim=c(0,100),axes=FALSE,main="CTCF-Ddx5")
axis(1,c(1,2,3,4),
c("Wt1","Wt2",
"PE3","PB6"),
lwd=2)
axis(2,lwd=2)
t.test(c(pantr1s[[1]],pantr1s[[2]]), c(pantr1s[[3]],pantr1s[[4]]))
##
## Welch Two Sample t-test
##
## data: c(pantr1s[[1]], pantr1s[[2]]) and c(pantr1s[[3]], pantr1s[[4]])
## t = 18.201, df = 129.76, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 38.40084 47.76715
## sample estimates:
## mean of x mean of y
## 66.28947 23.20548
= read.delim(paste0(data_directory,"PLA_microscopy/PLA-Fus-CTCF-Pantr-KO-new.txt"),
pantr1_fus header=TRUE)
$Genotype = factor(pantr1_fus$Genotype,
pantr1_fuslevels=c("Wildtype-A2","Wildtype-A3",
"Pantr1-KO-PB6","Pantr1-KO-PE3"))
= pantr1_fus[pantr1_fus$Genotype %in% c("Wildtype-A2","Wildtype-A3","Pantr1-KO-PB6","Pantr1-KO-PE3"),]
pantr1_fus $Genotype = droplevels(pantr1_fus$Genotype)
pantr1_fus
= split(pantr1_fus[,2],pantr1_fus$Genotype)
pantr1s par(bty="n",mfrow=c(1,1),mar=c(3,3,2,1))
boxplot( pantr1s,
col="white",border=c("blue","blue","gray60","gray60"),
lwd=2,ylim=c(0,100),axes=FALSE,main="CTCF-FUS")
axis(1,c(1,2,3,4),
c("Wt1","Wt2",
"PE3","PB6"),
lwd=2)
axis(2,lwd=2)
t.test(c(unlist(pantr1s[c(1,2)])),c(unlist(pantr1s[c(3,4)])))
##
## Welch Two Sample t-test
##
## data: c(unlist(pantr1s[c(1, 2)])) and c(unlist(pantr1s[c(3, 4)]))
## t = 6.2177, df = 120.78, p-value = 7.483e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 10.27250 19.87033
## sample estimates:
## mean of x mean of y
## 58.63768 43.56627
= c(0.34,0,0.01,0,0.01,0)
pantr1_qPCR names(pantr1_qPCR) = c("Wt +","Wt -","PB6+","PB6-","PE3+","PE3-")
barplot(pantr1_qPCR,col=c("blue","blue",rep("gray",4)),ylim=c(0,0.4) )
axis(2,lwd=2)
= read.delim(paste0(data_directory,"PLA_microscopy/Figure_CTCF-Clusters-WT_ES_NS.txt"),header=TRUE)
ctcf_clusters
par(bty="n",mfrow=c(1,1),mar=c(3,3,2,1))
=split(ctcf_clusters[,1],ctcf_clusters$Cell.type)
xboxplot( x, col="white",border=c("red","blue"),
lwd=2,ylim=c(0,200),axes=FALSE,main="CTCF cluster size")
axis(1,c(1,2),
c('ES','NS'),
lwd=2)
axis(2,lwd=2)
t.test(x[[1]],x[[2]])
##
## Welch Two Sample t-test
##
## data: x[[1]] and x[[2]]
## t = -5.9133, df = 64.346, p-value = 1.403e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -67.54590 -33.43454
## sample estimates:
## mean of x mean of y
## 19.67186 70.16207
Overwhelming majority of CTCF peaks intersects a motif.
= readBed_filterChromsStraded( paste0(data_directory,'CTCF_HUMAN.H11MO.0.A.bed'),
ctcf_motif chroms=paste0('chr',c(1:19,'X','Y')), 5 )
= import.bed( paste0(chipseq_directory,'ChIP_SEQ_CTCF_Merged_rpgc_narrow_summits.bed'))
ctcf_wt_NS_peaks seqlevelsStyle(ctcf_wt_NS_peaks)="ucsc"
= ctcf_wt_NS_peaks[which(chrom(ctcf_wt_NS_peaks) %in% paste0("chr",c(1:19,"x")))]
ctcf_wt_NS_peaks
= GenomicRanges::resize(ctcf_wt_NS_peaks,width=200,fix="center")
ctcf_wt_NS_peaks = appendMotifInformation( ctcf_wt_NS_peaks, ctcf_motif,summitFile=ctcf_wt_NS_peaks )
ctcf_wt_NS_peaks table(ctcf_wt_NS_peaks$motif_strand)
##
## * + -
## 5038 18472 18722
= GenomicRanges::resize(ctcf_wt_NS_peaks,fix="center",1)
ctcf_wt_NS_peaks_summit names(ctcf_wt_NS_peaks_summit) = seq(1:length(ctcf_wt_NS_peaks_summit))
= GenomicRanges::resize( ctcf_wt_NS_peaks_summit, fix="center",width=500 )
ctcf_wt_NS_peaks_resized
write.table( ctcf_wt_NS_peaks,
file=paste0(data_directory,"ctcf_wt_NS_peaks.txt"),
sep='\t',quote=FALSE,row.names=FALSE,col.names=TRUE )
= import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_06-23_MusMus_es-NPC_MOD_CTCF-Cterm_HALO_A3_Control_Rep_1_RPGC.bw"))
a3_1_ctcf_rpgc = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_08-23_MusMus_es-NPC_MOD_CTCF-Cterm_HALO_A3_Control_Rep_2_RPGC.bw"))
a3_2_ctcf_rpgc = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_03-23_MusMus_es-NPC_DDX5_KO_CTCF-Cterm_HALO_CB1_Rep_1_RPGC.bw"))
cb1_ctcf_rpgc = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_03-23_MusMus_es-NPC_DDX5_KO_CTCF-Cterm_HALO_CE10_Rep_1_RPGC.bw"))
ce10_ctcf_rpgc seqlevelsStyle(a3_1_ctcf_rpgc) = "ucsc"
seqlevelsStyle(a3_2_ctcf_rpgc) = "ucsc"
seqlevelsStyle(cb1_ctcf_rpgc) = "ucsc"
seqlevelsStyle(ce10_ctcf_rpgc) = "ucsc"
= import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_06-23_MusMus_es-NPC_MOD_CTCF-Cterm_HALO_A3_Control_Rep_1_filtered.bw"))
a3_1_ctcf_raw = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_08-23_MusMus_es-NPC_MOD_CTCF-Cterm_HALO_A3_Control_Rep_2_filtered.bw"))
a3_2_ctcf_raw = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_03-23_MusMus_es-NPC_DDX5_KO_CTCF-Cterm_HALO_CB1_Rep_1_filtered.bw"))
cb1_ctcf_raw = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_03-23_MusMus_es-NPC_DDX5_KO_CTCF-Cterm_HALO_CE10_Rep_1_filtered.bw"))
ce10_ctcf_raw seqlevelsStyle(a3_1_ctcf_raw) = "ucsc"
seqlevelsStyle(a3_2_ctcf_raw) = "ucsc"
seqlevelsStyle(cb1_ctcf_raw) = "ucsc"
seqlevelsStyle(ce10_ctcf_raw) = "ucsc"
= do.call('rbind', lapply( as.list(seq(1:length(ctcf_wt_NS_peaks_summit))),
ctcf_NS_ranges function(i){
= ctcf_wt_NS_peaks_summit[i]
g = start(g)
tss = as.character( chrom(g) )
chr =names(ctcf_wt_NS_peaks_summit)[i]
peakreturn( data.frame( chr = rep( (chr), 200),
starts = seq( tss-1000, tss+990, length.out=200),
ends = seq( tss-1000, tss+990, length.out=200)+9,
peak = rep( peak, 200) ) ) } ) )
= GRanges(seqnames = Rle(ctcf_NS_ranges$chr),
ctcf_NS_ranges ranges = IRanges(as.numeric(ctcf_NS_ranges$starts),
end = as.numeric(ctcf_NS_ranges$ends),
names = seq(1, nrow(ctcf_NS_ranges))),
strand = Rle(rep("*", nrow(ctcf_NS_ranges))),
peak = ctcf_NS_ranges$peak )
save(ctcf_NS_ranges,file=paste0(objects_directory,'ctcf_NS_ranges.RData'))
= getSignalInBins(ctcf_NS_ranges,a3_1_ctcf_rpgc,1)
a3_1_ctcf_rpgc_ctcf_AP = getSignalInBins(ctcf_NS_ranges,a3_2_ctcf_rpgc,1)
a3_2_ctcf_rpgc_ctcf_AP = getSignalInBins(ctcf_NS_ranges,cb1_ctcf_rpgc,1)
cb1_ctcf_rpgc_ctcf_AP = getSignalInBins(ctcf_NS_ranges,ce10_ctcf_rpgc,1)
ce10_ctcf_rpgc_ctcf_AP rownames(a3_1_ctcf_rpgc_ctcf_AP) = names(ctcf_wt_NS_peaks_summit)
rownames(a3_2_ctcf_rpgc_ctcf_AP) = names(ctcf_wt_NS_peaks_summit)
rownames(cb1_ctcf_rpgc_ctcf_AP) = names(ctcf_wt_NS_peaks_summit)
rownames(ce10_ctcf_rpgc_ctcf_AP) = names(ctcf_wt_NS_peaks_summit)
save(a3_1_ctcf_rpgc_ctcf_AP,
a3_2_ctcf_rpgc_ctcf_AP,
cb1_ctcf_rpgc_ctcf_AP,
ce10_ctcf_rpgc_ctcf_AP,file=paste0(objects_directory,"CTCF_Ddx5ko_NS_A3_RPGC.RData"))
= getSignalInBins(ctcf_NS_ranges,a3_1_ctcf_raw,1)
a3_1_ctcf_raw_ctcf_AP = getSignalInBins(ctcf_NS_ranges,a3_2_ctcf_raw,1)
a3_2_ctcf_raw_ctcf_AP = getSignalInBins(ctcf_NS_ranges,cb1_ctcf_raw,1)
cb1_ctcf_raw_ctcf_AP = getSignalInBins(ctcf_NS_ranges,ce10_ctcf_raw,1)
ce10_ctcf_raw_ctcf_AP rownames(a3_1_ctcf_raw_ctcf_AP) = names(ctcf_wt_NS_peaks_summit)
rownames(a3_2_ctcf_raw_ctcf_AP) = names(ctcf_wt_NS_peaks_summit)
rownames(cb1_ctcf_raw_ctcf_AP) = names(ctcf_wt_NS_peaks_summit)
rownames(ce10_ctcf_raw_ctcf_AP) = names(ctcf_wt_NS_peaks_summit)
save(a3_1_ctcf_raw_ctcf_AP,
a3_2_ctcf_raw_ctcf_AP,
cb1_ctcf_raw_ctcf_AP,
ce10_ctcf_raw_ctcf_AP,file=paste0(objects_directory,"CTCF_Ddx5ko_NS_A3_RAW.RData"))
load(paste0(objects_directory,"CTCF_Ddx5ko_NS_A3_RAW.RData"))
load(paste0(objects_directory,'ctcf_NS_ranges.RData'))
=90; b=110
a
= data.frame( a3_1 = ( rowSums(a3_1_ctcf_raw_ctcf_AP[,a:b])),
elbat_ctcf a3_2 = ( rowSums(a3_2_ctcf_raw_ctcf_AP[,a:b])),
cb1 = ( rowSums(cb1_ctcf_raw_ctcf_AP[,a:b])),
ce10 = ( rowSums(ce10_ctcf_raw_ctcf_AP[,a:b])) )
= data.frame( genotype = c("wt","wt","Ddx5","Ddx5"),
metadata_CTCF_peaks clone = c("a3_1","a3_2","cb1","ce10"),
row.names = colnames(elbat_ctcf))
= DESeqDataSetFromMatrix(
Ddx5_ctcf_Deseq2_RPGC countData = elbat_ctcf,
colData = metadata_CTCF_peaks,
design = ~ genotype )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= DESeq(Ddx5_ctcf_Deseq2_RPGC, fitType = 'local') Ddx5_ctcf_Deseq2_RPGC
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
= results(Ddx5_ctcf_Deseq2_RPGC,
res_Ddx5_ctcf_Deseq2_RPGC contrast = c("genotype", "Ddx5", "wt"))
summary( res_Ddx5_ctcf_Deseq2_RPGC )
##
## out of 42232 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 343, 0.81%
## LFC < 0 (down) : 398, 0.94%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 7)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
= counts( Ddx5_ctcf_Deseq2_RPGC,normalized=TRUE ) ctcf_normalized
heatscatter( x=res_Ddx5_ctcf_Deseq2_RPGC$baseMean,
y=res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange,
pch=19, cex=0.1, add.contour = T,
xlim=c(0,1200),ylim=c(-2,2),
xlab=expression(Log[2] ~ (Signal)),
ylab=expression(Log[2] ~ (LFC)),
main="WT mutant")
abline( h=0 )
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
= data.frame( M=res_Ddx5_ctcf_Deseq2_RPGC$baseMean,A=res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange)
df write.table(df,file=paste0(source_data_directory,"MA_ddx5KO.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= res_Ddx5_ctcf_Deseq2_RPGC[!is.na(res_Ddx5_ctcf_Deseq2_RPGC$pvalue),]
res_Ddx5_ctcf_Deseq2_RPGC = res_Ddx5_ctcf_Deseq2_RPGC[!is.na(res_Ddx5_ctcf_Deseq2_RPGC$padj),]
res_Ddx5_ctcf_Deseq2_RPGC
= res_Ddx5_ctcf_Deseq2_RPGC[!is.na(res_Ddx5_ctcf_Deseq2_RPGC$padj),]
res_Ddx5_ctcf_Deseq2_RPGC $col = "gray90"
res_Ddx5_ctcf_Deseq2_RPGC
$col[res_Ddx5_ctcf_Deseq2_RPGC$padj<0.25 & res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange>0]="#049494"
res_Ddx5_ctcf_Deseq2_RPGC$col[res_Ddx5_ctcf_Deseq2_RPGC$padj<0.25 & res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange<0 ]="#305494"
res_Ddx5_ctcf_Deseq2_RPGC$pvalue[res_Ddx5_ctcf_Deseq2_RPGC$pvalue<0.0000000001] = 0.0000000001
res_Ddx5_ctcf_Deseq2_RPGC
par(mfrow=c(1,1),mar=c(5,5,5,5))
plot(res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange,
-log10(res_Ddx5_ctcf_Deseq2_RPGC$pvalue),
col=res_Ddx5_ctcf_Deseq2_RPGC$col,pch=19, cex=0.25,
ylab=expression(-Log[10] ~ (P-val.)),
xlab=expression(Log[2] ~ (Ddx5/WT)),
axes=FALSE, xlim=c(-3,3),cex.lab=1.5 )
axis(1,lwd=2,cex.axis=1.5)
axis(2,lwd=2,cex.axis=1.5)
box(lwd=2, col='black')
points( x=res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange[res_Ddx5_ctcf_Deseq2_RPGC$col=="#049494"],
y=-log10(res_Ddx5_ctcf_Deseq2_RPGC$pvalue[res_Ddx5_ctcf_Deseq2_RPGC$col=="#049494"]),
col="#049494")
points( x=res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange[res_Ddx5_ctcf_Deseq2_RPGC$col=="#305494"],
y=-log10(res_Ddx5_ctcf_Deseq2_RPGC$pvalue[res_Ddx5_ctcf_Deseq2_RPGC$col=="#305494"]),
col="#305494")
table(res_Ddx5_ctcf_Deseq2_RPGC$col)
##
## #049494 #305494 gray90
## 932 1099 40201
= data.frame( LFC=res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange,
df P=res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange)
write.table(df,file=paste0(source_data_directory,"MA_ddx5KO.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= rownames(res_Ddx5_ctcf_Deseq2_RPGC[res_Ddx5_ctcf_Deseq2_RPGC$padj<0.2 & res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange<0,])
lost_peaks = rownames(res_Ddx5_ctcf_Deseq2_RPGC[res_Ddx5_ctcf_Deseq2_RPGC$padj<0.2 & res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange>0,])
gained_peaks
= ctcf_wt_NS_peaks_resized[which(names(ctcf_wt_NS_peaks_resized) %in% lost_peaks)]
lost_Ddx5 = ctcf_wt_NS_peaks_resized[which(names(ctcf_wt_NS_peaks_resized) %in% gained_peaks)]
gained_Ddx5
write.table( res_Ddx5_ctcf_Deseq2_RPGC,
file=paste0(objects_directory,"res_Ddx5_ctcf_Deseq2_RPGC.txt"),
sep='\t',quote=FALSE,row.names=FALSE,col.names=TRUE )
load(paste0(objects_directory,"CTCF_Ddx5ko_NS_A3_RPGC.RData"))
par(mfrow=c(1,2),mar=c(5,5,5,3))
plot( x=seq(-1000,1000,length.out=200),
y=colMeans(a3_1_ctcf_rpgc_ctcf_AP[rownames(a3_1_ctcf_rpgc_ctcf_AP) %in% lost_peaks,]),
ty="l", col="blue3", ylim=c(0,100),
ylab="RPGC", xlab="Distance from CTCF peak summit")
lines(x=seq(-1000,1000,length.out=200,lwd=2),
y=colMeans(a3_2_ctcf_rpgc_ctcf_AP[rownames(a3_2_ctcf_rpgc_ctcf_AP) %in% lost_peaks,]),
col="blue3",lwd=2 )
## Warning: W poleceniu 'seq.default(-1000, 1000, length.out = 200, lwd = 2)':
## dodatkowy argument 'lwd' zostanie odrzucony
lines(x=seq(-1000,1000,length.out=200),
y=colMeans(cb1_ctcf_rpgc_ctcf_AP[rownames(cb1_ctcf_rpgc_ctcf_AP) %in% lost_peaks,]),
col="steelblue3",lwd=2 )
lines(x=seq(-1000,1000,length.out=200),
y=colMeans(ce10_ctcf_rpgc_ctcf_AP[rownames(ce10_ctcf_rpgc_ctcf_AP) %in% lost_peaks,]),
col="steelblue3",lwd=2 )
plot( x=seq(-1000,1000,length.out=200),
y=colMeans(a3_1_ctcf_rpgc_ctcf_AP[rownames(a3_1_ctcf_rpgc_ctcf_AP) %in% gained_peaks,]),
ty="l", col="blue3", ylim=c(0,100), ylab="RPGC", xlab="Distance from CTCF peak summit",lwd=2)
lines(x=seq(-1000,1000,length.out=200),
y=colMeans(a3_2_ctcf_rpgc_ctcf_AP[rownames(a3_2_ctcf_rpgc_ctcf_AP) %in% gained_peaks,]),
col="blue3",lwd=2 )
lines(x=seq(-1000,1000,length.out=200),
y=colMeans(cb1_ctcf_rpgc_ctcf_AP[rownames(cb1_ctcf_rpgc_ctcf_AP) %in% gained_peaks,]),
col="steelblue3",lwd=2 )
lines(x=seq(-1000,1000,length.out=200),
y=colMeans(ce10_ctcf_rpgc_ctcf_AP[rownames(ce10_ctcf_rpgc_ctcf_AP) %in% gained_peaks,]),
col="steelblue3",lwd=2 )
= data.frame( location=seq(-1000,1000,length.out=200),
df lost_wt1=colMeans(a3_1_ctcf_rpgc_ctcf_AP[rownames(a3_1_ctcf_rpgc_ctcf_AP) %in% lost_peaks,]),
lost_wt2=colMeans(a3_2_ctcf_rpgc_ctcf_AP[rownames(a3_2_ctcf_rpgc_ctcf_AP) %in% lost_peaks,]),
lost_cb1=colMeans(cb1_ctcf_rpgc_ctcf_AP[rownames(cb1_ctcf_rpgc_ctcf_AP) %in% lost_peaks,]),
lost_ce10=colMeans(ce10_ctcf_rpgc_ctcf_AP[rownames(ce10_ctcf_rpgc_ctcf_AP) %in% lost_peaks,]),
gained_wt1=colMeans(a3_1_ctcf_rpgc_ctcf_AP[rownames(a3_1_ctcf_rpgc_ctcf_AP) %in% gained_peaks,]),
gained_wt2=colMeans(a3_2_ctcf_rpgc_ctcf_AP[rownames(a3_2_ctcf_rpgc_ctcf_AP) %in% gained_peaks,]),
gained_cb1=colMeans(cb1_ctcf_rpgc_ctcf_AP[rownames(cb1_ctcf_rpgc_ctcf_AP) %in% gained_peaks,]),
gained_ce10=colMeans(ce10_ctcf_rpgc_ctcf_AP[rownames(ce10_ctcf_rpgc_ctcf_AP) %in% gained_peaks,]))
write.table(df,file=paste0(source_data_directory,"AP_CTCF_wt_ddx5KO.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_11-24_MusMus_es-NPC_DDX5_FKBP_KI_CTCF-Cterm_HALO+DDX5-FKBP-KI_NPC_Ddx5-FKBP_KI_4F11_DMSO-Rep1_Rep_1_RPGC.bw"))
DMSO1_ctcf_rpgc = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_11-24_MusMus_es-NPC_DDX5_FKBP_KI_CTCF-Cterm_HALO+DDX5-FKBP-KI_NPC_Ddx5-FKBP_KI_4F11_dTAG13-Rep1_Rep_1_RPGC.bw"))
dTAG1_ctcf_rpgc = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_11-24_MusMus_es-NPC_DDX5_FKBP_KI_CTCF-Cterm_HALO+DDX5-FKBP-KI_NPC_Ddx5-FKBP_KI_4F11_DMSO-Rep2_Rep_1_RPGC.bw"))
DMSO2_ctcf_rpgc = import.bw(paste0(chipseq_directory,"ChIP_Seq_11-24_MusMus_es-NPC_DDX5_FKBP_KI_CTCF-Cterm_HALO+DDX5-FKBP-KI_NPC_Ddx5-FKBP_KI_4F11_dTAG13-Rep2_Rep_1_RPGC.bw"))
dTAG2_ctcf_rpgc seqlevelsStyle(DMSO1_ctcf_rpgc) = "ucsc"
seqlevelsStyle(dTAG1_ctcf_rpgc) = "ucsc"
seqlevelsStyle(DMSO2_ctcf_rpgc) = "ucsc"
seqlevelsStyle(dTAG2_ctcf_rpgc) = "ucsc"
= getSignalInBins(ctcf_NS_ranges,DMSO1_ctcf_rpgc,1)
DMSO1_ctcf_rpgc_AP = getSignalInBins(ctcf_NS_ranges,dTAG1_ctcf_rpgc,1)
dTAG1_ctcf_rpgc_AP = getSignalInBins(ctcf_NS_ranges,DMSO2_ctcf_rpgc,1)
DMSO2_ctcf_rpgc_AP = getSignalInBins(ctcf_NS_ranges,dTAG2_ctcf_rpgc,1)
dTAG2_ctcf_rpgc_AP rownames(DMSO1_ctcf_rpgc_AP) = ctcf_wt_NS_peaks_summit$name
rownames(dTAG1_ctcf_rpgc_AP) = ctcf_wt_NS_peaks_summit$name
rownames(DMSO2_ctcf_rpgc_AP) = ctcf_wt_NS_peaks_summit$name
rownames(dTAG2_ctcf_rpgc_AP) = ctcf_wt_NS_peaks_summit$name
save(DMSO1_ctcf_rpgc_AP,
dTAG1_ctcf_rpgc_AP,
DMSO2_ctcf_rpgc_AP,
dTAG2_ctcf_rpgc_AP,file=paste0(objects_directory,"CTCF_4F11_NS_RPGC.RData"))
load(paste0(objects_directory,"CTCF_4F11_NS_RPGC.RData"))
=90;b=110
a= data.frame( dmso1 = ( rowSums(DMSO1_ctcf_rpgc_AP[,a:b])),
c4f11_ctcf dmso2 = ( rowSums(DMSO2_ctcf_rpgc_AP[,a:b])),
dTAG1 = ( rowSums(dTAG1_ctcf_rpgc_AP[,a:b])),
dTAG2 = ( rowSums(dTAG2_ctcf_rpgc_AP[,a:b])),
row.names = rownames(DMSO1_ctcf_rpgc_AP))
Check with raw signal like Ddx5
load(paste0(objects_directory,'ctcf_NS_ranges.RData'))
= import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_11-24_MusMus_es-NPC_DDX5_FKBP_KI_CTCF-Cterm_HALO+DDX5-FKBP-KI_NPC_Ddx5-FKBP_KI_4F11_DMSO-Rep1_Rep_1_filtered_unnormalized.bw"))
DMSO1_ctcf_raw = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_11-24_MusMus_es-NPC_DDX5_FKBP_KI_CTCF-Cterm_HALO+DDX5-FKBP-KI_NPC_Ddx5-FKBP_KI_4F11_dTAG13-Rep1_Rep_1_filtered_unnormalized.bw"))
dTAG1_ctcf_raw = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_11-24_MusMus_es-NPC_DDX5_FKBP_KI_CTCF-Cterm_HALO+DDX5-FKBP-KI_NPC_Ddx5-FKBP_KI_4F11_DMSO-Rep2_Rep_1_filtered_unnormalized.bw"))
DMSO2_ctcf_raw = import.bw(paste0(chipseq_directory,"ChIP_Seq_11-24_MusMus_es-NPC_DDX5_FKBP_KI_CTCF-Cterm_HALO+DDX5-FKBP-KI_NPC_Ddx5-FKBP_KI_4F11_dTAG13-Rep2_Rep_1_filtered_unnormalized.bw"))
dTAG2_ctcf_raw seqlevelsStyle(DMSO1_ctcf_raw) = "ucsc"
seqlevelsStyle(dTAG1_ctcf_raw) = "ucsc"
seqlevelsStyle(DMSO2_ctcf_raw) = "ucsc"
seqlevelsStyle(dTAG2_ctcf_raw) = "ucsc"
= getSignalInBins(ctcf_NS_ranges,DMSO1_ctcf_raw,1)
DMSO1_ctcf_raw_AP = getSignalInBins(ctcf_NS_ranges,dTAG1_ctcf_raw,1)
dTAG1_ctcf_raw_AP = getSignalInBins(ctcf_NS_ranges,DMSO2_ctcf_raw,1)
DMSO2_ctcf_raw_AP = getSignalInBins(ctcf_NS_ranges,dTAG2_ctcf_raw,1)
dTAG2_ctcf_raw_AP rownames(DMSO1_ctcf_raw_AP) = ctcf_wt_NS_peaks_summit$name
rownames(dTAG1_ctcf_raw_AP) = ctcf_wt_NS_peaks_summit$name
rownames(DMSO2_ctcf_raw_AP) = ctcf_wt_NS_peaks_summit$name
rownames(dTAG2_ctcf_raw_AP) = ctcf_wt_NS_peaks_summit$name
save(DMSO1_ctcf_raw_AP,
dTAG1_ctcf_raw_AP,
DMSO2_ctcf_raw_AP,
dTAG2_ctcf_raw_AP,file=paste0(objects_directory,"CTCF_4F11_NS_RAW.RData"))
= data.frame( dmso1 = ( rowSums(DMSO1_ctcf_raw_AP[,a:b])),
dTAGD_ctcf dmso2 = ( rowSums(DMSO2_ctcf_raw_AP[,a:b])),
dtag1 = ( rowSums(dTAG1_ctcf_raw_AP[,a:b])),
dtag2 = ( rowSums(dTAG2_ctcf_raw_AP[,a:b])) )
= data.frame( treatment = c("dmso","dmso","dtag","dtag"),
metadata_dTAG row.names = colnames(dTAGD_ctcf))
= DESeqDataSetFromMatrix(
dTAG_ctcf_Deseq2 countData = dTAGD_ctcf,
colData = metadata_dTAG,
design = ~ treatment )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= DESeq(dTAG_ctcf_Deseq2, fitType = 'local') dTAG_ctcf_Deseq2
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
= results(dTAG_ctcf_Deseq2,
res_dTAG_ctcf_Deseq2 contrast = c("treatment", "dtag", "dmso"))
summary( res_dTAG_ctcf_Deseq2 )
##
## out of 42232 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 287, 0.68%
## LFC < 0 (down) : 256, 0.61%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
= counts( dTAG_ctcf_Deseq2,normalized=TRUE ) dtag_normalized
heatscatter( x=res_dTAG_ctcf_Deseq2$baseMean,
y=res_dTAG_ctcf_Deseq2$log2FoldChange,
pch=19, cex=0.1, add.contour = T,
xlim=c(0,1200),ylim=c(-2,2),
xlab=expression(Log[2] ~ (Signal)),
ylab=expression(Log[2] ~ (LFC)),
main="dTAG13/DMSO")
abline( h=0 )
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
= data.frame( M=res_dTAG_ctcf_Deseq2$baseMean,A=res_dTAG_ctcf_Deseq2$log2FoldChange)
df write.table(df,file=paste0(source_data_directory,"MA_dTAG.txt"),row.names = FALSE, sep="\t",quote=FALSE)
Peaks lost/gained upon dTAG13 and in genetic loss of Ddx5
= which(1.25*c4f11_ctcf$dmso1<c4f11_ctcf$dTAG1 & 1.25*c4f11_ctcf$dmso2<c4f11_ctcf$dTAG2 & res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange>0 & res_Ddx5_ctcf_Deseq2_RPGC$padj<0.25 )
gained_all = which(c4f11_ctcf$dmso1>1.25*c4f11_ctcf$dTAG1 & c4f11_ctcf$dmso2<1.25*c4f11_ctcf$dTAG2 & res_Ddx5_ctcf_Deseq2_RPGC$log2FoldChange<0 & res_Ddx5_ctcf_Deseq2_RPGC$padj<0.25 )
lost_all length(gained_all)
## [1] 124
length(lost_all)
## [1] 251
= rownames(c4f11_ctcf)[which(1.25*c4f11_ctcf$dmso1<c4f11_ctcf$dTAG1 & 1.25*c4f11_ctcf$dmso2<c4f11_ctcf$dTAG2 )]
gained_4F11 = rownames(c4f11_ctcf)[which(c4f11_ctcf$dmso1>1.25*c4f11_ctcf$dTAG1 & c4f11_ctcf$dmso2<1.25*c4f11_ctcf$dTAG2 )]
lost_4F11 length(gained_4F11)
## [1] 4581
length(lost_4F11)
## [1] 8022
= which(1.25*c4f11_ctcf$dmso1<c4f11_ctcf$dTAG1 & 1.25*c4f11_ctcf$dmso2<c4f11_ctcf$dTAG2 )
gained_4F11_id = which(c4f11_ctcf$dmso1>1.25*c4f11_ctcf$dTAG1 & c4f11_ctcf$dmso2<1.25*c4f11_ctcf$dTAG2 )
lost_4F11_id length(gained_4F11_id)
## [1] 4581
length(lost_4F11_id)
## [1] 8022
= ctcf_wt_NS_peaks_resized[which(ctcf_wt_NS_peaks_resized$name %in% lost_4F11 )]
acute_loss_ctcf = ctcf_wt_NS_peaks_resized[which(ctcf_wt_NS_peaks_resized$name %in% gained_4F11 )]
acute_gain_ctcf
export.bed(ctcf_NS_ranges[gained_all],
con = paste0( data_directory,"gained_all.bed" ) )
export.bed(ctcf_NS_ranges[lost_all],
con = paste0( data_directory,"lost_all.bed" ) )
Coordinates of gained and lost peaks for further analyses.
= ctcf_wt_NS_peaks_resized[lost_all]
lost_Ddx5 = ctcf_wt_NS_peaks_resized[ gained_all ]
gained_Ddx5
= Biostrings::getSeq(BSgenome.Mmusculus.UCSC.mm10,lost_Ddx5) lost_ctcf_sequence
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
## - in 'x': chr1_GL456210_random, chr1_GL456211_random, chr1_GL456212_random, chr1_GL456213_random, chr1_GL456221_random, chr4_GL456216_random, chr4_GL456350_random, chr4_JH584292_random, chr4_JH584293_random, chr4_JH584294_random, chr4_JH584295_random, chr5_GL456354_random, chr5_JH584296_random, chr5_JH584297_random, chr5_JH584298_random, chr5_JH584299_random, chr7_GL456219_random, chrX_GL456233_random, chrY_JH584300_random, chrY_JH584301_random, chrY_JH584302_random, chrY_JH584303_random, chrUn_GL456239, chrUn_GL456359, chrUn_GL456360, chrUn_GL456366, chrUn_GL456367, chrUn_GL456368, chrUn_GL456370, chrUn_GL456372, chrUn_GL456378, chrUn_GL456379, chrUn_GL456381, chrUn_GL456382, chrUn_GL456383, chrUn_GL456385, chrUn_GL456387, chrUn_GL456389, chrUn_GL456390, chrUn_GL456392, chrUn_GL456393, chrUn_GL456394, chrUn_GL456396, chrUn_JH584304
## - in 'y': GL456210.1, GL456211.1, GL456212.1, GL456216.1, GL456221.1, GL456233.1, GL456350.1, GL456354.1, GL456359.1, GL456366.1, GL456367.1, GL456368.1, GL456370.1, GL456372.1, GL456378.1, GL456383.1, GL456385.1, GL456389.1, GL456390.1, GL456392.1, GL456394.1, GL456396.1, JH584293.1, JH584294.1, JH584299.1, JH584304.1
## Make sure to always combine/compare objects based on the same reference
## genome (use suppressWarnings() to suppress this warning).
= Biostrings::getSeq(BSgenome.Mmusculus.UCSC.mm10,gained_Ddx5) gained_ctcf_sequence
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
## - in 'x': chr1_GL456210_random, chr1_GL456211_random, chr1_GL456212_random, chr1_GL456213_random, chr1_GL456221_random, chr4_GL456216_random, chr4_GL456350_random, chr4_JH584292_random, chr4_JH584293_random, chr4_JH584294_random, chr4_JH584295_random, chr5_GL456354_random, chr5_JH584296_random, chr5_JH584297_random, chr5_JH584298_random, chr5_JH584299_random, chr7_GL456219_random, chrX_GL456233_random, chrY_JH584300_random, chrY_JH584301_random, chrY_JH584302_random, chrY_JH584303_random, chrUn_GL456239, chrUn_GL456359, chrUn_GL456360, chrUn_GL456366, chrUn_GL456367, chrUn_GL456368, chrUn_GL456370, chrUn_GL456372, chrUn_GL456378, chrUn_GL456379, chrUn_GL456381, chrUn_GL456382, chrUn_GL456383, chrUn_GL456385, chrUn_GL456387, chrUn_GL456389, chrUn_GL456390, chrUn_GL456392, chrUn_GL456393, chrUn_GL456394, chrUn_GL456396, chrUn_JH584304
## - in 'y': GL456210.1, GL456211.1, GL456212.1, GL456216.1, GL456221.1, GL456233.1, GL456350.1, GL456354.1, GL456359.1, GL456366.1, GL456367.1, GL456368.1, GL456370.1, GL456372.1, GL456378.1, GL456383.1, GL456385.1, GL456389.1, GL456390.1, GL456392.1, GL456394.1, GL456396.1, JH584293.1, JH584294.1, JH584299.1, JH584304.1
## Make sure to always combine/compare objects based on the same reference
## genome (use suppressWarnings() to suppress this warning).
write.table( c4f11_ctcf,file=paste0(data_directory,"c4f11_ctcf.txt"),
sep='\t',quote=FALSE,row.names=FALSE,col.names=TRUE )
write.table( elbat_ctcf,file=paste0(data_directory,"elbat_ctcf.txt"),
sep='\t',quote=FALSE,row.names=FALSE,col.names=TRUE )
write.table( res_Ddx5_ctcf_Deseq2_RPGC,
file=paste0(data_directory,"res_Ddx5_ctcf_Deseq2_RPGC.txt"),
sep='\t',quote=FALSE,row.names=FALSE,col.names=TRUE )
write.table( res_dTAG_ctcf_Deseq2,file=paste0(data_directory,"res_dTAG_ctcf_Deseq2.txt"),
sep='\t',quote=FALSE,row.names=FALSE,col.names=TRUE )
= import.bed(paste0(chipseq_directory,'ChIP_Seq_CTCF_07-22_MusMus_ESC_MOD_CTCF-Cterm_HALO_Control_merged_summits.bed'))
ctcf_wt_ES_peaks seqlevelsStyle(ctcf_wt_ES_peaks)="ucsc"
= do.call('rbind', lapply( as.list(seq(1:length(ctcf_wt_ES_peaks))), function(i){
ctcf_wt_ES_Halo_ranges = ctcf_wt_ES_peaks[i]
g = start(g)
tss = as.character( chrom(g) )
chr =i
peakreturn( data.frame( chr = rep( (chr), 200),
starts = seq( tss-1000, tss+990, length.out=200),
ends = seq( tss-1000, tss+990, length.out=200)+9,
peak = rep( peak, 200) ) ) } ) )
= GRanges(seqnames = Rle(ctcf_wt_ES_Halo_ranges$chr),
ctcf_wt_ES_Halo_ranges ranges = IRanges(as.numeric(ctcf_wt_ES_Halo_ranges$starts),
end = as.numeric(ctcf_wt_ES_Halo_ranges$ends),
names = seq(1, nrow(ctcf_wt_ES_Halo_ranges))),
strand = Rle(rep("*", nrow(ctcf_wt_ES_Halo_ranges))),
peak = ctcf_wt_ES_Halo_ranges$peak )
seqlevelsStyle(ctcf_wt_ES_Halo_ranges) = 'ucsc'
save(ctcf_wt_ES_Halo_ranges,file=paste0(objects_directory,'ctcf_wt_ES_Halo_ranges.RData'))
= import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_07-22_MusMus_ESC_MOD_CTCF-Cterm_HALO_Control_Rep_1_RPGC.bw"))
es_cterm_wt_ctcf1 = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_07-22_MusMus_ESC_MOD_CTCF-Cterm_HALO_Control_Rep_2_RPGC.bw"))
es_cterm_wt_ctcf2
= import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_07-22_MusMus_ESC_DDX5_KO_CTCF-Cterm_HALO_CB1_Rep_1_RPGC.bw"))
es_cterm_cb1_ctcf = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_07-22_MusMus_ESC_DDX5_KO_CTCF-Cterm_HALO_CE10_Rep_1_RPGC.bw"))
es_cterm_ce10_ctcf seqlevelsStyle(es_cterm_wt_ctcf1) = "ucsc"
seqlevelsStyle(es_cterm_wt_ctcf2) = "ucsc"
seqlevelsStyle(es_cterm_cb1_ctcf) = "ucsc"
seqlevelsStyle(es_cterm_ce10_ctcf) = "ucsc"
= getSignalInBins(ctcf_wt_ES_Halo_ranges,es_cterm_wt_ctcf1,1)
es_cterm_wt_ctcf_AP1 = getSignalInBins(ctcf_wt_ES_Halo_ranges,es_cterm_wt_ctcf2,1)
es_cterm_wt_ctcf_AP2
= getSignalInBins(ctcf_wt_ES_Halo_ranges,es_cterm_cb1_ctcf,1)
es_cterm_cb1_ctcf_AP = getSignalInBins(ctcf_wt_ES_Halo_ranges,es_cterm_ce10_ctcf,1)
es_cterm_ce10_ctcf_AP
save(es_cterm_wt_ctcf_AP1,es_cterm_wt_ctcf_AP2,
es_cterm_cb1_ctcf_AP,es_cterm_ce10_ctcf_AP,file=paste0(outputs_directory,"CTCF_Ddx5ko_ES.RData"))
Plot
load(paste0(outputs_directory,"CTCF_Ddx5ko_ES.RData"))
load(paste0(objects_directory,"CTCF_Ddx5ko_NS_A3_RPGC.RData"))
= (es_cterm_wt_ctcf_AP1+es_cterm_wt_ctcf_AP2)/2
es_cterm_ddx5_wt = (es_cterm_ce10_ctcf_AP+es_cterm_cb1_ctcf_AP)/2
es_cterm_ddx5_ko = (a3_1_ctcf_rpgc_ctcf_AP+a3_2_ctcf_rpgc_ctcf_AP)/2
ns_cterm_ddx5_wt = (cb1_ctcf_rpgc_ctcf_AP+ce10_ctcf_rpgc_ctcf_AP)/2
ns_cterm_ddx5_ko
boxplot(log10(rowSums(es_cterm_ddx5_ko[,95:105]))-log10(rowSums(es_cterm_ddx5_wt[,95:105])),
log10(rowSums(ns_cterm_ddx5_ko[,95:105]))-log10(rowSums(ns_cterm_ddx5_wt[,95:105])),
outline=FALSE, border=c("red","blue"),col="white",names=c("ES","NS") )
axis(1,lwd=2,at=c(1,2),c("ES","NS"))
axis(2,lwd=2)
box(col="black",lwd=2)
= max(c(nrow(es_cterm_ddx5_ko),nrow(ns_cterm_ddx5_ko)))
N = data.frame( ES = rep(NA, N), NS=rep(NA,N))
df $ES[1:nrow(es_cterm_ddx5_ko)] = log10(rowSums(es_cterm_ddx5_ko[,95:105]))-log10(rowSums(es_cterm_ddx5_wt[,95:105]))
df$NS[1:nrow(ns_cterm_ddx5_ko)] = log10(rowSums(ns_cterm_ddx5_ko[,95:105]))-log10(rowSums(ns_cterm_ddx5_wt[,95:105]))
df
write.table(df,file=paste0(source_data_directory,"boxplot_CTCF_level.txt"),row.names = FALSE, sep="\t",quote=FALSE)
In 500bp window centered at the peak summit.
library(motifbreakR)
## Ładowanie wymaganego pakietu: MotifDb
## See system.file("LICENSE", package="MotifDb") for use restrictions.
## Warning: zastępowanie poprzedniego importu 'S4Vectors::as.data.frame' przez
## 'motifStack::as.data.frame' podczas ładowanie przestrzeni nazw 'motifbreakR'
##
## Dołączanie pakietu: 'motifbreakR'
## Następujący obiekt został zakryty z 'package:LSD':
##
## homer
data(hocomoco)
= function( this_seq, hcdb ){
findAllHOCOMOCO # this_seq = lost_ctcf_sequence[[10]]; hcdb = hocomoco; i=1
do.call("c",lapply(as.list(seq(1,length(hcdb))), function(x){
countPWM( hcdb[[x]], this_seq, min.score="80%" )
}))
}
= do.call("rbind",lapply(as.list(1:length(lost_ctcf_sequence)),function(x){findAllHOCOMOCO(lost_ctcf_sequence[[x]],hocomoco)}))
lost_ctcf_motif = do.call("rbind",lapply(as.list(1:length(gained_ctcf_sequence)),function(x){findAllHOCOMOCO(gained_ctcf_sequence[[x]],hocomoco)})) gained_ctcf_motif
= colMeans(lost_ctcf_motif>0)
lost_tfs_freq = colMeans(gained_ctcf_motif>0)
gained_tfs_freq
= colSums(lost_ctcf_motif>0)
lost_tfs_N = colSums(gained_ctcf_motif>0)
gained_tfs_N
= do.call("rbind",lapply(as.list(1:length(lost_tfs_N)), function(x){
res_Fisher = matrix(c(lost_tfs_N[x],nrow(lost_ctcf_motif),
m nrow(gained_ctcf_motif)),2,2)
gained_tfs_N[x],= fisher.test(m)
m return( data.frame(pval = round(m$p.value,5), est = m$estimate) )
}))$padj = fdrtool::fdrtool( res_Fisher$pval, statistic="pvalue", plot=FALSE )$qval res_Fisher
## Step 1... determine cutoff point
## Step 2... estimate parameters of null distribution and eta0
## Step 3... compute p-values and estimate empirical PDF/CDF
## Step 4... compute q-values and local fdr
rownames(res_Fisher) = names(hocomoco)
= rep( "gray", nrow(res_Fisher) )
colVector $padj<0.1 & res_Fisher$est>1.25] = "purple4"
colVector[res_Fisher$padj<0.1 & res_Fisher$est<1/1.25] = "orange3"
colVector[res_Fisher
= function(x){
extract_TF = unlist(strsplit( x,"Hsapiens-HOCOMOCO-"))
tp = tp[seq(2,length(tp),by=2)]
tp = unlist(strsplit( tp,"_"))
tp return( tp[seq(1,length(tp),by=2)] )
}
= extract_TF(rownames(res_Fisher[colVector=="purple4",]))
rich_Lost = extract_TF(rownames(res_Fisher[colVector=="orange3",]))
rich_Gained
par(pty="s",mar=c(4,4,4,4),mfrow=c(1,1))
plot( x=lost_tfs_freq,
y=gained_tfs_freq,
xlim=c(0,1),ylim=c(0,1),
xlab="Lost",ylab="Gained",
pch=19, cex=0.75,
col=colVector )
abline(a=0,b=1,col="gray",lwd=2)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
text( x=lost_tfs_freq[colVector=="purple4"]+0.013,
y=gained_tfs_freq[colVector=="purple4"]-0.013,
cex=0.4)
rich_Lost,text( x=lost_tfs_freq[colVector=="orange3"]+0.013,
y=gained_tfs_freq[colVector=="orange3"]-0.013,
cex=0.4) rich_Gained,
write.table( res_Fisher,file=paste0(data_directory,"res_Fisher.txt"),
sep='\t',quote=FALSE,row.names=FALSE,col.names=TRUE )
MAZ is an example. This does not make any sense. Ddx5 loss should stabilise G4, these sould fister CTCF binding… We see that loss of CTCF is observed at sites prone to forming G4 quadruplexes.
rich_Lost
## [1] "AP2A" "AP2B" "AP2C" "AP2D" "ARNT2" "ARNT" "ATF6A" "BHE41" "CTCF"
## [10] "E2F2" "E2F3" "E2F4" "EGR1" "EGR2" "EGR4" "ELK1" "GABPA" "GLIS3"
## [19] "HES1" "HESX1" "HIF1A" "HTF4" "INSM1" "KLF1" "KLF4" "KLF6" "MAZ"
## [28] "MBD2" "MECP2" "MYOG" "NFKB1" "NR0B1" "NRF1" "P73" "PLAG1" "PLAL1"
## [37] "SP1" "SP1" "SP2" "SP3" "STAT3" "WT1" "ZBT7A" "ZFX" "ZIC1"
## [46] "ZIC3" "ZN148"
rich_Gained
## [1] "ALX1" "ARI3A" "ARI3A" "BARX2" "DLX2" "DLX3" "EVI1" "FOXP3" "GATA5"
## [10] "HNF6" "HXA10" "HXA5" "HXB6" "HXB7" "HXB8" "HXD13" "IRF4" "LHX3"
## [19] "MEF2A" "MEF2D" "NFIL3" "NKX31" "PBX1" "PDX1" "PIT1" "PO2F1" "PO3F2"
## [28] "PO4F2" "SOX2" "TAL1" "TBP" "THB" "ZFHX3"
par(mfrow=c(1,1),mar=c(5,5,1,1),pty="m")
boxplot( ctcf_wt_NS_peaks[lost_all]$motif_score,
$motif_score,
ctcf_wt_NS_peaks[gained_all]outline=FALSE, col="white",border=c("purple4","orange3"),
names=c("lost","gained"),xlab="CTCF peaks",lwd=2,
ylab="CTCF motif score" )
t.test( ctcf_wt_NS_peaks[lost_all]$motif_score, ctcf_wt_NS_peaks[gained_all]$motif_score ) # a significant difference
##
## Welch Two Sample t-test
##
## data: ctcf_wt_NS_peaks[lost_all]$motif_score and ctcf_wt_NS_peaks[gained_all]$motif_score
## t = 6.7954, df = 225.54, p-value = 9.517e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 3.533429 6.419644
## sample estimates:
## mean of x mean of y
## 15.77321 10.79667
axis(1,lwd=2,at=c(1,2),c("lost","gained"))
axis(2,lwd=2)
box(col="black",lwd=2)
= max(c(length(lost_all),length(gained_all)))
theNtimes = data.frame( lost_score = rep(NA,theNtimes),
df gained_score = rep(NA,theNtimes) )
$lost_score[1:length(lost_all)] = ctcf_wt_NS_peaks[lost_all]$motif_score
df$gained_score[1:length(gained_all)] = ctcf_wt_NS_peaks[gained_all]$motif_score
dfwrite.table(df,file=paste0(source_data_directory,"boxplots_CTCF_motif_strength.txt"),row.names = FALSE, sep="\t",quote=FALSE)
library(ChIPseeker)
## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
## Registered S3 method overwritten by 'ggtree':
## method from
## identify.gg ggfun
## ChIPseeker v1.28.3 For help: https://guangchuangyu.github.io/software/ChIPseeker
##
## If you use ChIPseeker in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Qing-Yu He. ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparison and visualization. Bioinformatics 2015, 31(14):2382-2383
##
## Dołączanie pakietu: 'ChIPseeker'
## Następujący obiekt został zakryty z 'package:ggVennDiagram':
##
## overlap
library(sigminer)
## Registered S3 methods overwritten by 'registry':
## method from
## print.registry_field proxy
## print.registry_entry proxy
## Registered S3 method overwritten by 'sigminer':
## method from
## print.bytes Rcpp
## sigminer version 2.3.1
## - Star me at https://github.com/ShixiangWang/sigminer
## - Run hello() to see usage and citation.
library(pqsfinder)
= lost_Ddx5[which(chrom(lost_Ddx5) %in% paste0("chr",c(1:19,"X")))]
lost_Ddx5 = gained_Ddx5[which(chrom(gained_Ddx5) %in% paste0("chr",c(1:19,"X")))]
gained_Ddx5
= Biostrings::getSeq(BSgenome.Mmusculus.UCSC.mm10,lost_Ddx5) lost_ctcf_sequence
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
## - in 'x': chr1_GL456210_random, chr1_GL456211_random, chr1_GL456212_random, chr1_GL456213_random, chr1_GL456221_random, chr4_GL456216_random, chr4_GL456350_random, chr4_JH584292_random, chr4_JH584293_random, chr4_JH584294_random, chr4_JH584295_random, chr5_GL456354_random, chr5_JH584296_random, chr5_JH584297_random, chr5_JH584298_random, chr5_JH584299_random, chr7_GL456219_random, chrX_GL456233_random, chrY_JH584300_random, chrY_JH584301_random, chrY_JH584302_random, chrY_JH584303_random, chrUn_GL456239, chrUn_GL456359, chrUn_GL456360, chrUn_GL456366, chrUn_GL456367, chrUn_GL456368, chrUn_GL456370, chrUn_GL456372, chrUn_GL456378, chrUn_GL456379, chrUn_GL456381, chrUn_GL456382, chrUn_GL456383, chrUn_GL456385, chrUn_GL456387, chrUn_GL456389, chrUn_GL456390, chrUn_GL456392, chrUn_GL456393, chrUn_GL456394, chrUn_GL456396, chrUn_JH584304
## - in 'y': GL456210.1, GL456211.1, GL456212.1, GL456216.1, GL456221.1, GL456233.1, GL456350.1, GL456354.1, GL456359.1, GL456366.1, GL456367.1, GL456368.1, GL456370.1, GL456372.1, GL456378.1, GL456383.1, GL456385.1, GL456389.1, GL456390.1, GL456392.1, GL456394.1, GL456396.1, JH584293.1, JH584294.1, JH584299.1, JH584304.1
## Make sure to always combine/compare objects based on the same reference
## genome (use suppressWarnings() to suppress this warning).
= Biostrings::getSeq(BSgenome.Mmusculus.UCSC.mm10,gained_Ddx5) gained_ctcf_sequence
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
## - in 'x': chr1_GL456210_random, chr1_GL456211_random, chr1_GL456212_random, chr1_GL456213_random, chr1_GL456221_random, chr4_GL456216_random, chr4_GL456350_random, chr4_JH584292_random, chr4_JH584293_random, chr4_JH584294_random, chr4_JH584295_random, chr5_GL456354_random, chr5_JH584296_random, chr5_JH584297_random, chr5_JH584298_random, chr5_JH584299_random, chr7_GL456219_random, chrX_GL456233_random, chrY_JH584300_random, chrY_JH584301_random, chrY_JH584302_random, chrY_JH584303_random, chrUn_GL456239, chrUn_GL456359, chrUn_GL456360, chrUn_GL456366, chrUn_GL456367, chrUn_GL456368, chrUn_GL456370, chrUn_GL456372, chrUn_GL456378, chrUn_GL456379, chrUn_GL456381, chrUn_GL456382, chrUn_GL456383, chrUn_GL456385, chrUn_GL456387, chrUn_GL456389, chrUn_GL456390, chrUn_GL456392, chrUn_GL456393, chrUn_GL456394, chrUn_GL456396, chrUn_JH584304
## - in 'y': GL456210.1, GL456211.1, GL456212.1, GL456216.1, GL456221.1, GL456233.1, GL456350.1, GL456354.1, GL456359.1, GL456366.1, GL456367.1, GL456368.1, GL456370.1, GL456372.1, GL456378.1, GL456383.1, GL456385.1, GL456389.1, GL456390.1, GL456392.1, GL456394.1, GL456396.1, JH584293.1, JH584294.1, JH584299.1, JH584304.1
## Make sure to always combine/compare objects based on the same reference
## genome (use suppressWarnings() to suppress this warning).
= DNAString("CG")
CpG = vcountPattern(CpG, lost_ctcf_sequence)
lost_CpG = vcountPattern(CpG, gained_ctcf_sequence)
gained_CpG
par(mfrow=c(1,1),mar=c(5,5,1,1),pty="m")
boxplot( lost_CpG,
gained_CpG, outline=FALSE, col="white",border=c("purple4","orange3"),
names=c("lost","gained"),xlab="CTCF peaks",lwd=2,
ylab="number of CpGs/peak" )
t.test( lost_CpG, gained_CpG ) # a significant difference
##
## Welch Two Sample t-test
##
## data: lost_CpG and gained_CpG
## t = 11.237, df = 369.69, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 12.87545 18.33777
## sample estimates:
## mean of x mean of y
## 21.187251 5.580645
axis(1,lwd=2,at=c(1,2),c("lost","gained"))
axis(2,lwd=2)
box(col="black",lwd=2)
= max(c(length(lost_all),length(gained_all)))
theNtimes = data.frame( lost_score = rep(NA,theNtimes),
df gained_score = rep(NA,theNtimes) )
$lost_score[1:length(lost_all)] = lost_CpG
df$gained_score[1:length(gained_all)] = gained_CpG
dfwrite.table(df,file=paste0(source_data_directory,"boxplots_CpG_motif_strength.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= lapply( as.list(1:length(lost_ctcf_sequence)),
lost_ctcf_sequence_G4 function(le){
pqsfinder(lost_ctcf_sequence[le][[1]],
max_defects = 0,
min_score = 20)
})= lapply( as.list(1:length(gained_ctcf_sequence)),
gained_ctcf_sequence_G4 function(le){
pqsfinder(gained_ctcf_sequence[le][[1]],
max_defects = 0,
min_score = 20)})
save( lost_ctcf_sequence_G4,gained_ctcf_sequence_G4,
file=paste0(objects_directory,"G4_analysis.RData"))
load(paste0(objects_directory,"G4_analysis.RData"))
= unlist(lapply(lost_ctcf_sequence_G4,function(x){length(score(x))}))
lost_G4_general = unlist(lapply(gained_ctcf_sequence_G4,function(x){length(score(x))}))
gained_G4_general
par(mfrow=c(1,1),mar=c(5,5,1,1),pty="m")
boxplot( lost_G4_general,
gained_G4_general, outline=FALSE, col="white",border=c("purple4","orange3"),
names=c("lost","gained"),xlab="CTCF peaks",lwd=2,
ylab="number of putative G4" )
t.test( lost_G4_general, gained_G4_general ) # a significant difference
##
## Welch Two Sample t-test
##
## data: lost_G4_general and gained_G4_general
## t = 8.5811, df = 360.38, p-value = 2.842e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.748968 2.788945
## sample estimates:
## mean of x mean of y
## 4.099602 1.830645
axis(1,lwd=2,at=c(1,2),c("lost","gained"))
axis(2,lwd=2)
box(col="black",lwd=2)
= max(c(length(lost_all),length(gained_all)))
theNtimes = data.frame( lost_score = rep(NA,theNtimes),
df gained_score = rep(NA,theNtimes) )
$lost_score[1:length(lost_all)] = lost_G4_general
df$gained_score[1:length(gained_all)] = gained_G4_general
dfwrite.table(df,file=paste0(source_data_directory,"boxplots_number_G4.txt"),row.names = FALSE, sep="\t",quote=FALSE)
load(paste0(objects_directory,"G4_analysis.RData"))
= function( sr ){
extractMaxScore =score(sr)
tpif(length(tp)>0) {
=tp[is.finite(tp)]
tpreturn( max(tp) )}
}= unlist(lapply(lost_ctcf_sequence_G4,extractMaxScore))
lost_G4_general = unlist(lapply(gained_ctcf_sequence_G4,extractMaxScore))
gained_G4_general
par(mfrow=c(1,1),mar=c(5,5,1,1),pty="m")
boxplot( lost_G4_general,
gained_G4_general, outline=FALSE, col="white",border=c("purple4","orange3"),
names=c("lost","gained"),xlab="CTCF peaks",lwd=2,
ylab="Max G4 score" )
t.test( lost_G4_general, gained_G4_general ) # a significant difference
##
## Welch Two Sample t-test
##
## data: lost_G4_general and gained_G4_general
## t = 3.2901, df = 228.44, p-value = 0.00116
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 3.460778 13.795382
## sample estimates:
## mean of x mean of y
## 47.77391 39.14583
axis(1,lwd=2,at=c(1,2),c("lost","gained"))
axis(2,lwd=2)
box(col="black",lwd=2)
= max(c(length(lost_all),length(gained_all)))
theNtimes = data.frame( lost_score = rep(NA,theNtimes),
df gained_score = rep(NA,theNtimes) )
$lost_score[1:length(lost_G4_general)] = lost_G4_general
df$gained_score[1:length(gained_G4_general)] = gained_G4_general
dfwrite.table(df,file=paste0(source_data_directory,"boxplots_max_G4_score.txt"),row.names = FALSE, sep="\t",quote=FALSE)
load(paste0(objects_directory,"all_CTCF_sequence_G4.RData"))
## average profile of G4
= function(G5res,peak_width, scoreThr){
getAPvector_G4 # G5res=all_CTCF_sequence_G4[[4]];peak_width=1000;scoreThr=20
= IRanges(seq(1,peak_width,by=5),seq(5,peak_width,by=5))
tpgr = G5res@ranges[which(G5res@elementMetadata$score>scoreThr)]
theser if(length(theser)>0){res = countOverlaps(tpgr,theser)}
if(length(theser)==0){res = rep(0, length(tpgr))}
return(res)
}
= do.call("rbind",lapply(all_CTCF_sequence_G4,
G4_all_CTCF_AP function(x){getAPvector_G4(x,peak_width=2000,scoreThr=10)}))
= function(chr,start,end,offset){
getGR = GRanges(seqnames = Rle(chr),
tp ranges = IRanges(start-offset,
end = end+offset),
strand = Rle(rep("*", length(start))),
loop = seq(1, length(start)) )
seqlevelsStyle(tp)='ucsc'
return(tp)}
= BSgenome.Mmusculus.UCSC.mm10
genome = seqinfo(genome)
si
= binAnno( si, as.list(paste0('chr', c(1:19,'X','Y'))), 10000)
ga $binid = unlist(lapply( split(ga,ga$chr)[paste0('chr', c(1:19,'X','Y'))], function(x){seq(1,nrow(x))}) )
ga
= GRanges(seqnames = Rle(ga$chr),
gagr ranges = IRanges(as.numeric(ga$start),
end = as.numeric(ga$end),
names = seq(1, nrow(ga))),
strand = Rle(rep("*", nrow(ga))),
binid = ga$binid)
= binAnno( si, as.list(paste0('chr', c(1:19,'X','Y'))), 5000)
ga5 $binid = unlist(lapply( split(ga5,ga5$chr)[paste0('chr', c(1:19,'X','Y'))], function(x){seq(1,nrow(x))}) )
ga5= GRanges(seqnames = Rle(ga5$chr),
gagr5 ranges = IRanges(as.numeric(ga5$start),
end = as.numeric(ga5$end),
names = seq(1, nrow(ga5))),
strand = Rle(rep("*", nrow(ga5))),
binid = ga5$binid,
seqlengths = seqlengths(BSgenome.Mmusculus.UCSC.mm10))
## Warning in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE): GRanges object contains 21 out-of-bound ranges located on sequences
## chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11,
## chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chrX, and chrY.
## Note that ranges located on a sequence whose length is unknown (NA) or
## on a circular sequence are not considered out-of-bound (use
## seqlengths() and isCircular() to get the lengths and circularity flags
## of the underlying sequences). You can use trim() to trim these ranges.
## See ?`trim,GenomicRanges-method` for more information.
= trim(gagr5)
gagr5 names(gagr5) = paste(chrom(gagr5),names(gagr5),sep="_")
= read.delim( paste0(hic_directory,"A1_A3_merged_loops.bedpe"),
loops_a1_a3 as.is=TRUE )
= loops_a1_a3[-1,]
loops_a1_a3 = do.call("rbind",lapply(as.list(paste0("chr",c(1:19,"X","Y"))),function(chr){
loops_a1_a3 = loops_a1_a3[loops_a1_a3$X.chr1==chr,]
tp = tp[order(tp$x1,decreasing=FALSE),]
tp return(tp)
}))
rownames(loops_a1_a3) = paste0( loops_a1_a3$X.chr1,":",loops_a1_a3$x1,"-",loops_a1_a3$y2 )
= GRanges( seqnames = Rle(loops_a1_a3$X.chr1),
loops_A1_A3_left ranges = IRanges(start=loops_a1_a3$centroid1,
end=loops_a1_a3$centroid1),
names=rownames(loops_a1_a3) )
= GRanges( seqnames = Rle(loops_a1_a3$chr2),
loops_A1_A3_right ranges = IRanges(start=loops_a1_a3$centroid2,
end=loops_a1_a3$centroid2),
names=rownames(loops_a1_a3) )
$left_binID = gagr$binid[ subjectHits(findOverlaps(loops_A1_A3_left,gagr))]
loops_a1_a3$right_binID = gagr$binid[ subjectHits(findOverlaps(loops_A1_A3_right,gagr))]
loops_a1_a3
$left_binID_random = gagr$binid[ subjectHits(findOverlaps(loops_A1_A3_left,gagr))]+8 #+8
loops_a1_a3$right_binID_random = gagr$binid[ subjectHits(findOverlaps(loops_A1_A3_right,gagr))]+8 #-8
loops_a1_a3$size = loops_a1_a3$y2-loops_a1_a3$x2
loops_a1_a3
$left_CTCF = overlapsAny( GenomicRanges::resize( loops_A1_A3_left, 20000,fix="center"), ctcf_wt_NS_peaks[which(ctcf_wt_NS_peaks$motif_strand=="+")])
loops_a1_a3$right_CTCF = overlapsAny( GenomicRanges::resize( loops_A1_A3_right, 20000,fix="center"), ctcf_wt_NS_peaks[which(ctcf_wt_NS_peaks$motif_strand=="-")])
loops_a1_a3
## ------
= which( loops_a1_a3$size>200000 & rowSums(loops_a1_a3[,c("left_CTCF","right_CTCF")])==2 )
loops_to_consider = GenomicRanges::resize(loops_A1_A3_left,fix="center",width=20000)
anchors_left_big = GenomicRanges::resize(loops_A1_A3_right,fix="center",width=20000)
anchors_right_big
= loops_a1_a3
random_loops $left_binID = random_loops$left_binID_random
random_loops$right_binID = random_loops$right_binID_random
random_loops$size = 10000 * (random_loops$right_binID-random_loops$left_binID) random_loops
= unique(queryHits(findOverlaps(ctcf_wt_NS_peaks,c(GenomicRanges::resize( loops_A1_A3_left, 20000,fix="center"),GenomicRanges::resize( loops_A1_A3_right, 20000,fix="center")))))
ctcf_wt_NS_peaks_at_loop_anchors = (1:length(ctcf_wt_NS_peaks))[-unique(queryHits(findOverlaps(ctcf_wt_NS_peaks,c(GenomicRanges::resize( loops_A1_A3_left, 20000,fix="center"),GenomicRanges::resize( loops_A1_A3_right, 20000,fix="center")))))]
ctcf_wt_NS_peaks_not_at_loop_anchors
= ctcf_wt_NS_peaks[ which(ctcf_wt_NS_peaks$motif_strand=="+")]
ctcf_wt_NS_peaks_correct_ori_left = ctcf_wt_NS_peaks_correct_ori_left[unique(queryHits(findOverlaps(ctcf_wt_NS_peaks_correct_ori_left,GenomicRanges::resize( loops_A1_A3_left, 10000,fix="center"))))]
ctcf_wt_NS_peaks_correct_ori_left
= ctcf_wt_NS_peaks[ which(ctcf_wt_NS_peaks$motif_strand=="-")]
ctcf_wt_NS_peaks_correct_ori_right = ctcf_wt_NS_peaks_correct_ori_right[unique(queryHits(findOverlaps(ctcf_wt_NS_peaks_correct_ori_right,GenomicRanges::resize( loops_A1_A3_right, 10000,fix="center"))))] ctcf_wt_NS_peaks_correct_ori_right
par(mfrow=c(1,1),mar=c(5,5,5,3))
plot( x=seq(-1000,1000,length.out=200),
y=colMeans(a3_1_ctcf_rpgc_ctcf_AP[which(ctcf_wt_NS_peaks$name %in% c(ctcf_wt_NS_peaks_correct_ori_left$name,ctcf_wt_NS_peaks_correct_ori_right$name)),]),
ty="l", col="green4", ylim=c(0,60),
ylab="RPGC", xlab="Distance from CTCF peak summit", main="",lwd=2)
lines(x=seq(-1000,1000,length.out=200),
y=colMeans(a3_1_ctcf_rpgc_ctcf_AP[ctcf_wt_NS_peaks_not_at_loop_anchors,]),
lwd=2,
col="brown" )
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
= data.frame( location=seq(-1000,1000,length.out=200),
df loop=colMeans(a3_1_ctcf_rpgc_ctcf_AP[which(ctcf_wt_NS_peaks$name %in% c(ctcf_wt_NS_peaks_correct_ori_left$name,ctcf_wt_NS_peaks_correct_ori_right$name)),]),
not_loop =colMeans(a3_1_ctcf_rpgc_ctcf_AP[ctcf_wt_NS_peaks_not_at_loop_anchors,]) )
write.table(df,file=paste0(source_data_directory,"AP_CTCF_loop_no_loop.txt"),row.names = FALSE, sep="\t",quote=FALSE)
library(forecast)
## Warning: pakiet 'forecast' został zbudowany w wersji R 4.1.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Dołączanie pakietu: 'forecast'
## Następujący obiekt został zakryty z 'package:ggpubr':
##
## gghistogram
load(paste0(objects_directory,"all_CTCF_sequence_G4.RData"))
= function(G5res,peak_width, scoreThr){
getAPvector_G4_Pos # G5res=all_CTCF_sequence_G4[[4]];peak_width=1000;scoreThr=20
= IRanges(seq(1,peak_width,by=20),seq(20,peak_width,by=20))
tpgr = G5res@ranges[which(G5res@elementMetadata$score>scoreThr & G5res@elementMetadata$strand=="+")]
theser if(length(theser)>0){res = countOverlaps(tpgr,theser)}
if(length(theser)==0){res = rep(0, length(tpgr))}
return(res) }
= function(G5res,peak_width, scoreThr){
getAPvector_G4_Neg # G5res=all_CTCF_sequence_G4[[4]];peak_width=1000;scoreThr=20
= IRanges(seq(1,peak_width,by=20),seq(20,peak_width,by=20))
tpgr = G5res@ranges[which(G5res@elementMetadata$score>scoreThr & G5res@elementMetadata$strand=="-")]
theser if(length(theser)>0){res = countOverlaps(tpgr,theser)}
if(length(theser)==0){res = rep(0, length(tpgr))}
return(res) }
= do.call("rbind",lapply(all_CTCF_sequence_G4,
G4_all_CTCF_AP_Pos function(x){getAPvector_G4_Pos(x,peak_width=2000,scoreThr=20)}))
= do.call("rbind",lapply(all_CTCF_sequence_G4,
G4_all_CTCF_AP_Neg function(x){getAPvector_G4_Neg(x,peak_width=2000,scoreThr=20)}))
=TTR::SMA((colMeans(G4_all_CTCF_AP_Pos[which(ctcf_wt_NS_peaks$name %in% c(ctcf_wt_NS_peaks_correct_ori_left$name)),]>0)+rev(colMeans(G4_all_CTCF_AP_Neg[which(ctcf_wt_NS_peaks$name %in% c(ctcf_wt_NS_peaks_correct_ori_right$name)),]>0)))/2,n=3)
sma1=TTR::SMA((colMeans(G4_all_CTCF_AP_Neg[which(ctcf_wt_NS_peaks$name %in% c(ctcf_wt_NS_peaks_correct_ori_left$name)),]>0)+rev(colMeans(G4_all_CTCF_AP_Pos[which(ctcf_wt_NS_peaks$name %in% c(ctcf_wt_NS_peaks_correct_ori_right$name)),]>0)))/2,n=3)
sma2
par(mfrow=c(1,1),mar=c(5,4,4,1),pty="m")
plot(x=seq(-1000,1000,
length.out=length(sma1)),
sma1, ty="l",lwd=2,
ylab="presence of G4q",
xlab="Distance from CTCF summit (bp)", col="black",
ylim=c(0.05,0.1))
lines( x=seq(-1000,1000,length.out=length(sma1)),
col="orange2",lwd=2 )
sma2, box(col="black",lwd=2)
axis(1,lwd=2)
axis(2,lwd=2)
abline(v=0,lwd=1.5,col="gray")
abline(h=mean(c(sma1[1:50],sma2[1:50]),na.rm=TRUE),
lty=2,lwd=0.75,col="gray")
abline(h=mean(c(sma1[51:100],sma2[51:100]),na.rm=TRUE),
lty=2,lwd=0.75,col="gray")
= data.frame( location=seq(-1000,1000,
df length.out=length(sma1)),
pos=sma1,
neg=sma2)
write.table(df,file=paste0(source_data_directory,"G4dist.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= matrix(c(sum(gained_all %in% ctcf_wt_NS_peaks_not_at_loop_anchors ),
m sum(gained_all %in% ctcf_wt_NS_peaks_at_loop_anchors ),
sum(lost_all %in% ctcf_wt_NS_peaks_not_at_loop_anchors ),
sum(lost_all %in% ctcf_wt_NS_peaks_at_loop_anchors ) ),2,2,byrow = T)
barplot(m, beside=TRUE, col= c("orange3","purple4"),
lwd=2,ylim=c(0,200), names=c("Other","Anchor") )
fisher.test(m)
##
## Fisher's Exact Test for Count Data
##
## data: m
## p-value = 6.906e-14
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 3.484954 9.770175
## sample estimates:
## odds ratio
## 5.774276
load( paste0( hic_directory,"wt_A3_full.RData" ) )
load(paste0( hic_directory,"A1_10kb_hic.RData" ) ) # NT1
load(paste0( hic_directory,"cb1_lib28_10kb.RData" ) ) # cb1_lib28_10kb
load(paste0( hic_directory,"ce10_10kb.RData" ) ) # ce10_10kb
= function( ipf_chr_object, binSize, distance2consider ){
getDD # tp = NT1$chr1$balanced;binSize=10000;distance2consider=10000000
= as.data.frame(summary(ipf_chr_object))
tp $distance = binSize*(abs(tp$j-tp$i))
tp= tp[ tp$distance<distance2consider,]
tp = data.frame( seq(0,distance2consider,by=binSize) )
distances colnames(distances) = "distance"
= split(tp,tp$distance)
tps = lapply(tps, function(x){ median(x$x) })
res = do.call("rbind",res)
res $x = NA
distances$x[match(as.numeric(rownames(res)), distances$distance)] = res[,1]
distancesreturn(distances) }
= lapply( NT1, function(x){
wt1_dd =x$balanced
ipf_chr_objectreturn( getDD(ipf_chr_object,10000,50000000))})
= lapply( wt_A3, function(x){
wt3_dd =x$balanced
ipf_chr_objectreturn( getDD(ipf_chr_object,10000,50000000))})
= lapply( cb1_lib28_10kb, function(x){
cb1_dd =x$balanced
ipf_chr_objectreturn( getDD(ipf_chr_object,10000,50000000))})
= lapply( ce10_10kb, function(x){
ce10_dd =x$balanced
ipf_chr_objectreturn( getDD(ipf_chr_object,10000,50000000))})
We can apply mean
=c(1,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19)
chroms
= do.call("cbind",lapply( wt1_dd[chroms],function(x){x$x}))
wt1_DD = do.call("cbind",lapply( wt3_dd[chroms],function(x){x$x}))
wt3_DD = do.call("cbind",lapply( cb1_dd[chroms],function(x){x$x}))
cb1_DD = do.call("cbind",lapply( ce10_dd[chroms],function(x){x$x}))
ce10_DD
par(mfrow=c(1,1),mar=c(5,5,5,2))
plot( x=log10( wt1_dd[[1]]$distance ),
y=log10(rowMeans(cbind(wt1_DD,wt3_DD))),
ty="l", col="blue3", main="",
xlab = expression(Log[10] ~ (Distance (bp))),
ylab = expression(Log[10] ~ (Median (HiC))),
ylim=c(-4,-1.5), lwd=2, cex.lab=1.5, cex.axis=1.5 )
lines( x=log10( cb1_dd[[1]]$distance ),
y=log10(rowMeans(cbind(cb1_DD,ce10_DD))),col="steelblue3",lwd=2)
axis(1,lwd=2,cex.axis=1.5)
axis(2,lwd=2,cex.axis=1.5)
box(col="black",lwd=2)
= data.frame( distance=log10( wt1_dd[[1]]$distance ),
df wt=log10(rowMeans(cbind(wt1_DD,wt3_DD))),
ddx5_KO=log10(rowMeans(cbind(cb1_DD,ce10_DD))))
write.table(df,file=paste0(source_data_directory,"DD_wt_mut.txt"),row.names = FALSE, sep="\t",quote=FALSE)
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
=9;j=13
i
= unlist(lapply(APA_A1_IPF_sig,function(x){sum(x[i:j,i:j],
A1_loop_IPF_signal na.rm = TRUE)}))
= unlist(lapply(APA_A3_IPF_sig,function(x){sum(x[i:j,i:j],
A3_loop_IPF_signal na.rm = TRUE)}))
= unlist(lapply(APA_CB1_IPF_sig,function(x){sum(x[i:j,i:j],
CB1_loop_IPF_signal na.rm = TRUE)}))
= unlist(lapply(APA_CE10_IPF_sig,function(x){sum(x[i:j,i:j],
CE10_loop_IPF_signal na.rm = TRUE)}))
= data.frame(a1=A1_loop_IPF_signal,
ipf a3=A3_loop_IPF_signal,
cb1 = CB1_loop_IPF_signal,
ce10 = CE10_loop_IPF_signal)
Prepare IPF, loop signal and LFM signal.
= data.frame( a1 = getLoopSignal(APA_A1_IPF_sig,2),
loop_signal a3 = getLoopSignal(APA_A3_IPF_sig,2),
cb1 = getLoopSignal(APA_CB1_IPF_sig,2),
ce10 = getLoopSignal(APA_CE10_IPF_sig,2))
= rownames( loop_signal[ rowMeans(loop_signal)>1,] )
real_loops = real_loops[real_loops %in% rownames(loops_a1_a3[rowSums(loops_a1_a3[,c("left_CTCF","right_CTCF")])==2 ,])]
real_loops
= rowMeans( loop_signal[rownames(loop_signal) %in% real_loops,c("a1","a3")])
wt_ls = rowMeans( loop_signal[rownames(loop_signal) %in% real_loops,c("cb1","ce10")])
mt_ls
= mt_ls-wt_ls
ko_ls_minus_wt_ls save( loop_signal, real_loops, wt_ls, mt_ls, ko_ls_minus_wt_ls,
file=paste0(objects_directory,"loop_singal_Ddx5ko_Wt.RData"))
Differential loops - both the IPF signal and loop signal gained in all the possible combinations.
load( paste0(objects_directory,"loop_singal_Ddx5ko_Wt.RData") )
= ipf[ipf$a1>ipf$cb1 & ipf$a1>ipf$ce10 & ipf$a3>ipf$cb1 & ipf$a3>ipf$ce10 & loop_signal$a1>loop_signal$cb1 & loop_signal$a1>loop_signal$ce10 & loop_signal$a3>loop_signal$cb1 & loop_signal$a3>loop_signal$ce10,]
lost_loops = ipf[ipf$cb1>ipf$a1 & ipf$cb1>ipf$a3 & ipf$ce10>ipf$a1 & ipf$cb1>ipf$a3 & loop_signal$a1<loop_signal$cb1 & loop_signal$a1<loop_signal$ce10 & loop_signal$a3<loop_signal$cb1 & loop_signal$a3<loop_signal$ce10,]
gained_loops
dim(lost_loops)
## [1] 1381 4
dim(gained_loops)
## [1] 73 4
par(mfrow=c(1,1),mar=c(5,5,4,1),pty="m")
barplot( c(nrow(lost_loops),nrow(gained_loops)),
col = c("gray40","green3"), ylim=c(0,1500),
names=c("lost","gained"),main="Loop change upon Ddx5 loss")
axis(2,lwd=2)
= rownames(lost_loops)
loop4analysis = loop4analysis[loop4analysis %in% rownames(loops_a1_a3[loops_a1_a3$size>100000,])]
loop4analysis length(loop4analysis)
## [1] 1173
= Reduce("+",APA_A1_IPF_sig[names(APA_A1_IPF_sig) %in% loop4analysis])/length(loop4analysis)
A1_loops_A1_sig_AP = Reduce("+",APA_A3_IPF_sig[names(APA_A3_IPF_sig) %in% loop4analysis])/length(loop4analysis)
A1_loops_A3_sig_AP
= Reduce("+",APA_CB1_IPF_sig[names(APA_CB1_IPF_sig) %in% loop4analysis])/length(loop4analysis)
A1_loops_CB1_sig_AP = Reduce("+",APA_CE10_IPF_sig[names(APA_CE10_IPF_sig) %in% loop4analysis])/length(loop4analysis)
A1_loops_CE10_sig_AP
= ( A1_loops_A1_sig_AP + A1_loops_A3_sig_AP )/2
loops_wt_sig_AP = ( A1_loops_CB1_sig_AP + A1_loops_CE10_sig_AP )/2
loops_Ddx5_sig_AP
= t(matrix(apply(loops_wt_sig_AP,2,rev),21,21))
loops_wt_sig_AP = t(matrix(apply(loops_Ddx5_sig_AP,2,rev),21,21))
loops_Ddx5_sig_AP
sum( loops_wt_sig_AP[10:12,10:12] ) / sum( loops_wt_sig_AP[16:18,4:6] )
## [1] 3.055245
sum( loops_Ddx5_sig_AP[10:12,10:12] ) / sum( loops_Ddx5_sig_AP[16:18,4:6] )
## [1] 2.534127
= matrix( cut(as.numeric(loops_wt_sig_AP),
loops_wt_sig_AP c(seq(0.00005,0.0075,length.out=255),Inf),labels = FALSE), 21,21 )
= matrix( cut(as.numeric(loops_Ddx5_sig_AP),
loops_Ddx5_sig_AP c(seq(0.00005,0.0075,length.out=255),Inf),labels = FALSE), 21,21 )
1,1]=256
loops_wt_sig_AP[21,21]=1
loops_wt_sig_AP[
1,1]=256
loops_Ddx5_sig_AP[21,21]=1
loops_Ddx5_sig_AP[
par(mfrow=c(1,2), pty='s',mar=c(2,2,2,2) )
image( loops_wt_sig_AP,
col=colorRampPalette(c("black","white","orange","red"))(256),
axes=FALSE, main="Wild type")
box(col="black")
image( loops_Ddx5_sig_AP,
col=colorRampPalette(c("black","white","orange","red"))(256),
axes=FALSE, main="Ddx5-/-")
box(col="black")
= GetCentroidSignal( random_loops,
ran_A1_IPF 10,
NT1, paste0("chr",c(1:19) ),
"balanced" )
= ProcessLoops( ran_A1_IPF )
ran_A1_IPF_sig
= GetCentroidSignal( random_loops,
ran_A3_IPF 10,
wt_A3, paste0("chr",c(1:19) ),
"balanced" )
= ProcessLoops( ran_A3_IPF )
ran_A3_IPF_sig
= GetCentroidSignal( random_loops,
ran_CB1_IPF 10,
cb1_lib28_10kb, paste0("chr",c(1:19) ),
"balanced" )
= ProcessLoops( ran_CB1_IPF )
ran_CB1_IPF_sig
= GetCentroidSignal( random_loops,
ran_CE10_IPF 10,
ce10_10kb, paste0("chr",c(1:19) ),
"balanced" )
= ProcessLoops( ran_CE10_IPF )
ran_CE10_IPF_sig
=9;j=13
i= unlist(lapply(ran_A1_IPF_sig,function(x){sum(x[i:j,i:j],
ran_A1_loop_IPF_signal na.rm = TRUE)}))
= unlist(lapply(ran_A3_IPF_sig,function(x){sum(x[i:j,i:j],
ran_A3_loop_IPF_signal na.rm = TRUE)}))
= unlist(lapply(ran_CB1_IPF_sig,function(x){sum(x[i:j,i:j],
ran_CB1_loop_IPF_signal na.rm = TRUE)}))
= unlist(lapply(ran_CE10_IPF_sig,function(x){sum(x[i:j,i:j],
ran_CE10_loop_IPF_signal na.rm = TRUE)}))
= data.frame(a1=ran_A1_loop_IPF_signal,
ran a3=ran_A3_loop_IPF_signal,
cb1 = ran_CB1_loop_IPF_signal,
ce10 = ran_CE10_loop_IPF_signal)
= data.frame( a1 = getLoopSignal(ran_A3_IPF_sig,2),
rand_signal a3 = getLoopSignal(ran_A3_IPF_sig,2),
cb1 = getLoopSignal(ran_CB1_IPF_sig,2),
ce10 = getLoopSignal(ran_CE10_IPF_sig,2))
save( ran, rand_signal, ran_A1_loop_IPF_signal, ran_A3_loop_IPF_signal, ran_CB1_loop_IPF_signal,
ran_CE10_loop_IPF_signal,
ran_A3_IPF_sig, ran_A3_IPF_sig, ran_CB1_IPF_sig,
ran_CE10_IPF_sig,rand_signal,file=paste0(data_directory,"loops_signals_all.RData"))
Big list
load(paste0(data_directory,"loops_signals_all.RData"))
$dist_bin = cut(loops_a1_a3$size,c(0,100000,
loops_a1_a3200000,400000,
800000, 1600000,
100000000),labels=FALSE)
= split(rownames(loops_a1_a3),loops_a1_a3$dist_bin)
loops_categories
$dist_bin = cut(random_loops$size,c(0,100000,200000,
random_loops400000,800000,
1600000,
100000000),labels=FALSE)
= split(rownames(random_loops),loops_a1_a3$dist_bin) loops_categories_ran
Merge them into one thing
= which( rownames(loop_signal) %in% real_loops )
id = rowMeans(loop_signal[id,c("cb1","ce10")])-rowMeans(loop_signal[id,c("a1","a3")])
ratio_ipf_mut_wt
= rowMeans(rand_signal[,c("cb1","ce10")])-rowMeans(rand_signal[,c("a1","a3")])
ratio_ipf_mut_wt_ran
= vector("list",2*length(loops_categories))
resl c(1,3,5,7,9,11)] = lapply( loops_categories_ran, function(ll){ratio_ipf_mut_wt_ran[names(ratio_ipf_mut_wt_ran) %in% ll]})
resl[c(2,4,6,8,10,12)] = lapply( loops_categories, function(ll){ratio_ipf_mut_wt[names(ratio_ipf_mut_wt) %in% ll]})
resl[= lapply(resl,function(x){tp=x[!is.na(x)];tp=tp[is.finite(tp)];return(tp)})
resl
par(mfrow=c(1,1),mar=c(4,4,4,1))
boxplot(resl,outline=FALSE, notch=TRUE,
ylim=c(-1,1), col="white", border=c("gray","purple4"),
cex.lab=1,cex.axis=1,
ylab=expression(Log[2] ~ (Ddx5/WT)),lwd=2)
axis(1,lwd=2,at=1:12,cex.axis=1)
axis(2,lwd=2,cex.axis=1)
box(col="black",lwd=2)
abline(h=0,lwd=2,col="red4")
t.test(resl[[1]],resl[[2]])
##
## Welch Two Sample t-test
##
## data: resl[[1]] and resl[[2]]
## t = -0.80122, df = 40.539, p-value = 0.4277
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.17251789 0.07453693
## sample estimates:
## mean of x mean of y
## -0.053574988 -0.004584509
t.test(resl[[3]],resl[[4]])
##
## Welch Two Sample t-test
##
## data: resl[[3]] and resl[[4]]
## t = -2.8019, df = 38.71, p-value = 0.007892
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3476802 -0.0561136
## sample estimates:
## mean of x mean of y
## -0.09321102 0.10868587
t.test(resl[[5]],resl[[6]])
##
## Welch Two Sample t-test
##
## data: resl[[5]] and resl[[6]]
## t = 7.2078, df = 1474.6, p-value = 9.048e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.05395082 0.09429570
## sample estimates:
## mean of x mean of y
## -0.01644028 -0.09056354
t.test(resl[[7]],resl[[8]])
##
## Welch Two Sample t-test
##
## data: resl[[7]] and resl[[8]]
## t = 12.325, df = 2688.2, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.09545635 0.13157593
## sample estimates:
## mean of x mean of y
## 0.004569777 -0.108946363
t.test(resl[[9]],resl[[10]])
##
## Welch Two Sample t-test
##
## data: resl[[9]] and resl[[10]]
## t = 11.114, df = 1742.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1177987 0.1682873
## sample estimates:
## mean of x mean of y
## 0.01828049 -0.12476249
t.test(resl[[11]],resl[[12]])
##
## Welch Two Sample t-test
##
## data: resl[[11]] and resl[[12]]
## t = 6.0776, df = 796.74, p-value = 1.89e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1108845 0.2166815
## sample estimates:
## mean of x mean of y
## 0.04621846 -0.11756454
unlist(lapply(resl,length))
## [1] 2695 40 2264 38 2742 768 2535 909 1516 565 683 198
= max(unlist(lapply(resl,length)))
theNtimes = data.frame( gr1_matched=rep(NA,theNtimes),
df gr1_loops=rep(NA,theNtimes),
gr2_matched=rep(NA,theNtimes),
gr2_loops=rep(NA,theNtimes),
gr3_matched=rep(NA,theNtimes),
gr3_loops=rep(NA,theNtimes),
gr4_matched=rep(NA,theNtimes),
gr4_loops=rep(NA,theNtimes),
gr5_matched=rep(NA,theNtimes),
gr5_loops=rep(NA,theNtimes),
gr6_matched=rep(NA,theNtimes),
gr6_loops=rep(NA,theNtimes))
$gr1_matched[1:length(resl[[1]])] = resl[[1]]
df$gr1_loops[1:length(resl[[2]])] = resl[[2]]
df$gr2_matched[1:length(resl[[3]])] = resl[[3]]
df$gr2_loops[1:length(resl[[4]])] = resl[[4]]
df$gr3_matched[1:length(resl[[5]])] = resl[[5]]
df$gr3_loops[1:length(resl[[6]])] = resl[[6]]
df$gr4_matched[1:length(resl[[7]])] = resl[[7]]
df$gr4_loops[1:length(resl[[8]])] = resl[[8]]
df$gr5_matched[1:length(resl[[9]])] = resl[[9]]
df$gr5_loops[1:length(resl[[10]])] = resl[[10]]
df$gr6_matched[1:length(resl[[11]])] = resl[[11]]
df$gr6_loops[1:length(resl[[12]])] = resl[[12]]
df
write.table(df,file=paste0(source_data_directory,"boxplots_loop_change_wt_Ddx5_distance.txt"),row.names = FALSE, sep="\t",quote=FALSE)
rm(list=c("wt_A3","NT1","cb1_lib28_10kb","ce10_10kb"))
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 26686194 1425.2 46187740 2466.7 NA 57734675 3083.4
## Vcells 1296007590 9887.8 4377481799 33397.6 131072 5471852248 41747.0
load(paste0( hic_directory,"ko_10kb.RData"))
load(paste0( hic_directory,"wt_A1_A3_10kb.RData"))
names(wt_10kb) = paste0("chr",c(1:19,"X"))
names(ko_10kb) = paste0("chr",c(1:19,"X"))
par(mfrow=c(1,1),mar=c(3,3,3,3), pty="s")
=0;chr="chr10";s=102200000;en=103200000
w= getMatrixForARegion( lowertri=wt_10kb,
TOP uppertri=ko_10kb,
CHROM=chr,
START=s-w,
END=en+w,
ga=ga,
upperLimit=0.012 )
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
image( TOP,axes=FALSE, col=colorRampPalette(c("white","red"))(256))
box(col="black",lwd=3)
par(mfrow=c(1,1),mar=c(3,3,3,3), pty="s")
=0;chr="chr12";s=15600000;en=16800000
w= getMatrixForARegion( lowertri=wt_10kb,
TOP uppertri=ko_10kb,
CHROM=chr,
START=s-w,
END=en+w,
ga=ga,
upperLimit=0.012 )
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
image( TOP,axes=FALSE, col=colorRampPalette(c("white","red"))(256))
box(col="black",lwd=3)
par(mfrow=c(1,1),mar=c(3,3,3,3), pty="s")
=0;chr="chr2";s=37600000;en=38600000
w
= getMatrixForARegion( lowertri=wt_10kb,
TOP uppertri=ko_10kb,
CHROM=chr,
START=s-w,
END=en+w,
ga=ga,
upperLimit=0.012 )
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
image( TOP,axes=FALSE, col=colorRampPalette(c("white","red"))(256))
box(col="black",lwd=3)
par(mfrow=c(1,1),mar=c(3,3,3,3), pty="s",bty="O")
=0;chr="chr7";s=142200000;en=142800000
w
= getMatrixForARegion( lowertri=wt_10kb,
TOP uppertri=ko_10kb,
CHROM=chr,
START=s-w,
END=en+w,
ga=ga,
upperLimit=0.012 )
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
image( TOP,axes=FALSE, col=colorRampPalette(c("white","red"))(256))
box(col="black",lwd=2)
= GetCentroidSignal( loops_a1_a3,
wt_IPF 11,
wt_10kb, paste0("chr",c(1:19) ),
"balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( wt_IPF )
wt_IPF_sig
= GetCentroidSignal( loops_a1_a3,
ko_IPF 11,
ko_10kb, paste0("chr",c(1:19) ),
"balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( ko_IPF ) ko_IPF_sig
rm(list=c("wt_10kb","ko_10kb"))
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 27869940 1488.5 46187740 2466.7 NA 57734675 3083.4
## Vcells 1367615760 10434.1 4377481799 33397.6 131072 5471852248 41747.0
load(paste0( hic_directory,"4F11_DMSO_10kb_hic.RData" ) ) # DMSO_10kb
load(paste0( hic_directory,"4F11_dTAG_10kb_hic.RData" ) ) # dTAG_10kb
= GetCentroidSignal( loops_a1_a3,
DMSO_4F11_loops_IPF 10,
DMSO_10kb, paste0("chr",c(1:19) ),
which_file="balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( DMSO_4F11_loops_IPF )
DMSO_4F11_loops_IPF_sig
= GetCentroidSignal( loops_a1_a3,
dTAG_4F11_loops_IPF 10,
dTAG_10kb, paste0("chr",c(1:19) ),
which_file="balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( dTAG_4F11_loops_IPF )
dTAG_4F11_loops_IPF_sig
= GetCentroidSignal( random_loops,
DMSO_4F11_random_IPF 10,
DMSO_10kb, paste0("chr",c(1:19) ),
which_file="balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( DMSO_4F11_random_IPF )
DMSO_4F11_random_IPF_sig
= GetCentroidSignal( random_loops,
dTAG_4F11_random_IPF 10,
dTAG_10kb, paste0("chr",c(1:19) ),
which_file="balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( dTAG_4F11_random_IPF ) dTAG_4F11_random_IPF_sig
=9;j=13
i
= unlist(lapply(DMSO_4F11_loops_IPF_sig,function(x){sum(x[i:j,i:j],
APA_4F11_DMSO_signal na.rm = TRUE)}))
= unlist(lapply(dTAG_4F11_loops_IPF_sig,function(x){sum(x[i:j,i:j],
APA_4F11_dTAG_signal na.rm = TRUE)}))
= unlist(lapply(DMSO_4F11_random_IPF_sig,function(x){sum(x[i:j,i:j],
ran_4F11_DMSO_signal na.rm = TRUE)}))
= unlist(lapply(dTAG_4F11_random_IPF_sig,function(x){sum(x[i:j,i:j],
ran_4F11_dTAG_signal na.rm = TRUE)}))
= data.frame(dmso = APA_4F11_DMSO_signal,
degron_ipf dtag = APA_4F11_dTAG_signal)
= data.frame(dmso = ran_4F11_DMSO_signal,
random_ipf dtag = ran_4F11_dTAG_signal)
This plot is nice. Here we look at loops with anchors intersecting CTCF peaks losign signal upon dTAG13 treatment.
= unique(c(anchors_left_big[unique(queryHits(findOverlaps(anchors_left_big,acute_loss_ctcf)))]$names,anchors_right_big[unique(queryHits(findOverlaps(anchors_right_big,acute_loss_ctcf)))]$names))
loop4analysis length(loop4analysis)
## [1] 5648
The fold change of loop signal in the DMSO and dTAG13 treated cells for the loops which lose CTCF
= log2((degron_ipf$dtag)/(degron_ipf$dmso))
fc_dTAG = cut( degron_ipf$dmso,
sig_dTAG c(0,0.04,0.08,0.16,0.32,Inf),
labels=FALSE )
table(sig_dTAG)
## sig_dTAG
## 1 2 3 4 5
## 799 2222 4162 4108 1260
= split(fc_dTAG,sig_dTAG)
fcs par(pty="m")
boxplot(fcs,outline=FALSE,notch=TRUE, col="white",
border=colorRampPalette(c("green4","blue4"))(5),lwd=2)
abline(h=0,lwd=2)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
t.test(fcs[[1]])
##
## One Sample t-test
##
## data: fcs[[1]]
## t = 1.2039, df = 798, p-value = 0.229
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.01127725 0.04705101
## sample estimates:
## mean of x
## 0.01788688
t.test(fcs[[2]])
##
## One Sample t-test
##
## data: fcs[[2]]
## t = -8.9669, df = 2221, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.03910128 -0.02506767
## sample estimates:
## mean of x
## -0.03208447
t.test(fcs[[3]])
##
## One Sample t-test
##
## data: fcs[[3]]
## t = -20.027, df = 4161, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.05603898 -0.04604542
## sample estimates:
## mean of x
## -0.0510422
t.test(fcs[[4]])
##
## One Sample t-test
##
## data: fcs[[4]]
## t = -46.118, df = 4107, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.07787825 -0.07152685
## sample estimates:
## mean of x
## -0.07470255
t.test(fcs[[5]])
##
## One Sample t-test
##
## data: fcs[[5]]
## t = -22.315, df = 1259, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.08480700 -0.07110044
## sample estimates:
## mean of x
## -0.07795372
t.test(fcs[[1]],fcs[[5]])
##
## Welch Two Sample t-test
##
## data: fcs[[1]] and fcs[[5]]
## t = 6.2795, df = 886.95, p-value = 5.313e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.06588575 0.12579544
## sample estimates:
## mean of x mean of y
## 0.01788688 -0.07795372
= max(unlist(lapply(fcs,length)))
theNtimes = data.frame( gr1=rep(NA,theNtimes),
df gr2=rep(NA,theNtimes),
gr3=rep(NA,theNtimes),
gr4=rep(NA,theNtimes),
gr5=rep(NA,theNtimes))
$gr1[1:length(fcs[[1]])] = fcs[[1]]
df$gr2[1:length(fcs[[2]])] = fcs[[2]]
df$gr3[1:length(fcs[[3]])] = fcs[[3]]
df$gr4[1:length(fcs[[4]])] = fcs[[4]]
df$gr5[1:length(fcs[[5]])] = fcs[[5]]
dfwrite.table(df,file=paste0(source_data_directory,"boxplots_Loop_signal_FC_dTAG.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= anchors_left_big[which(anchors_left_big$names %in% rownames(lost_loops))]
lost_loop_anchors_left = anchors_right_big[which(anchors_right_big$names %in% rownames(lost_loops))]
lost_loop_anchors_right all(lost_loop_anchors_left$names==lost_loop_anchors_right$names)
## [1] TRUE
= data.frame( lost_left_lost_ctcf = countOverlaps(lost_loop_anchors_left,acute_loss_ctcf),
l lost_right_lost_ctcf = countOverlaps(lost_loop_anchors_right,acute_loss_ctcf),
lost_left_lost_ctcf_ko = countOverlaps(lost_loop_anchors_left,ctcf_wt_NS_peaks[as.numeric(rownames(res_Ddx5_ctcf_Deseq2_RPGC[res_Ddx5_ctcf_Deseq2_RPGC$col=="#305494",]))]),
lost_right_lost_ctcf_ko = countOverlaps(lost_loop_anchors_right,ctcf_wt_NS_peaks[as.numeric(rownames(res_Ddx5_ctcf_Deseq2_RPGC[res_Ddx5_ctcf_Deseq2_RPGC$col=="#305494",]))]),
row.names = lost_loop_anchors_right$names )
## ko_ls_minus_wt_ls --> all the architectural loops
## rownames(l[rowSums(l[,1:2])>0,]) --> architectural loops whereby I see a loss of CTCF binding in dTAG13 treated cells
## ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,3:4])>0,])]
par(mfrow=c(1,1),pty="m",mar=c(5,5,5,1),bty="n")
boxplot( ko_ls_minus_wt_ls,
names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,1:2])>0,])],
ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,3:4])>0,])],
ko_ls_minus_wt_ls[names=c("all loops","dTAG","KO"),
notch=TRUE, outline=FALSE,ylim=c(-1,1),
col=c("white"), border=c("black","gray40","steelblue3"),lwd=2,
ylab="Loop strength change LFC Mut vs Wt")
axis(1,lwd=2,at=c(1,2,3),c("all loops","dTAG","KO"))
axis(2,lwd=2)
abline(h=0,lwd=2,lty=2)
t.test( ko_ls_minus_wt_ls, ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,1:2])>0,])] )
##
## Welch Two Sample t-test
##
## data: ko_ls_minus_wt_ls and ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[, 1:2]) > 0, ])]
## t = 20.853, df = 283.56, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2039634 0.2464822
## sample estimates:
## mean of x mean of y
## -0.1026239 -0.3278467
t.test( ko_ls_minus_wt_ls, ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,3:4])>0,])] )
##
## Welch Two Sample t-test
##
## data: ko_ls_minus_wt_ls and ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[, 3:4]) > 0, ])]
## t = 10.936, df = 47.16, p-value = 1.579e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1893869 0.2747665
## sample estimates:
## mean of x mean of y
## -0.1026239 -0.3347006
length(ko_ls_minus_wt_ls)
## [1] 2518
length( ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,1:2])>0,])])
## [1] 195
length(ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,3:4])>0,])])
## [1] 44
= length(ko_ls_minus_wt_ls)
theNtimes = data.frame( gr1=rep(NA,theNtimes),
df gr2=rep(NA,theNtimes),
gr3=rep(NA,theNtimes))
$gr1[1:length(ko_ls_minus_wt_ls)] = ko_ls_minus_wt_ls
df$gr2[1:length(ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,1:2])>0,])])] = ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,1:2])>0,])]
df$gr3[1:length(ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,3:4])>0,])])] = ko_ls_minus_wt_ls[names(ko_ls_minus_wt_ls) %in% rownames(l[rowSums(l[,3:4])>0,])]
df
write.table(df,file=paste0(source_data_directory,"boxplots_Loop_signal_FC_ctcf_ko_dTAG.txt"),
row.names = FALSE, sep="\t",quote=FALSE)
load(paste0( hic_directory,"4F11_DMSO_5kb_hic.RData" ) )
load(paste0( hic_directory,"4F11_dTAG_5kb_hic.RData" ) )
par(mfrow=c(1,1),mar=c(3,3,3,3), pty="s",bty="O")
="chr3";s=123160000;en=123355000;w=150000
chr
= getMatrixForARegion( lowertri=DMSO_5kb,
TOP uppertri=dTAG_5kb,
CHROM=chr,
START=s-w,
END=en+w,
ga=ga5,
upperLimit=0.012 )
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
image( TOP,axes=FALSE, col=colorRampPalette(c("white","red"))(256))
box(col="black",lwd=3)
Loop signals for tables
= cbind(ipf,degron_ipf)
loop_singals_Ddx5 write.table( loop_singals_Ddx5, file=paste0(data_directory,"loop_singals_Ddx5.txt"),
quote=FALSE, row.names=TRUE, col.names = TRUE,
sep="\t" )
rm(list=c("DMSO_5kb","dTAG_5kb"))
load(paste0( hic_directory,"A3_rep2_10kb_hic.RData" ) ) # A3_rep2
load(paste0( hic_directory,"PB6_10kb_hic.RData" ) ) # pb6
load(paste0( hic_directory,"PE3_10kb_hic.RData" ) ) # Treated
=Treated
pe3rm(list=c("Treated"))
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 28026990 1496.9 46187740 2466.7 NA 57734675 3083.4
## Vcells 4777823025 36451.9 6317706219 48200.3 131072 5471852248 41747.0
Distance decline
= function( ipf_chr_object, binSize, distance2consider ){
getDD # tp = NT1$chr1$balanced;binSize=10000;distance2consider=10000000
= as.data.frame(summary(ipf_chr_object))
tp $distance = binSize*(abs(tp$j-tp$i))
tp= tp[ tp$distance<distance2consider,]
tp = data.frame( seq(0,distance2consider,by=binSize) )
distances colnames(distances) = "distance"
= split(tp,tp$distance)
tps = lapply(tps, function(x){ median(x$x) })
res = do.call("rbind",res)
res $x = NA
distances$x[match(as.numeric(rownames(res)), distances$distance)] = res[,1]
distancesreturn(distances) }
= lapply( A3_rep2, function(x){
a3_2_dd =x$balanced
ipf_chr_objectreturn( getDD(ipf_chr_object,10000,50000000))})
= lapply( pb6, function(x){
pb6_dd =x$balanced
ipf_chr_objectreturn( getDD(ipf_chr_object,10000,50000000))})
= lapply( pe3, function(x){
pe3_dd =x$balanced
ipf_chr_objectreturn( getDD(ipf_chr_object,10000,50000000))})
save( a3_2_dd, pb6_dd, pe3_dd,
file=paste0(objects_directory,"Pantr1_DD.RData") )
We can apply mean
=c(2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19)
chromsload(paste0(objects_directory,"Pantr1_DD.RData"))
= do.call("cbind",lapply( wt1_dd[chroms],function(x){x$x}))
wt1_DD = do.call("cbind",lapply( wt3_dd[chroms],function(x){x$x}))
wt3_DD = do.call("cbind",lapply( a3_2_dd[chroms],function(x){x$x}))
a3_2_DD = do.call("cbind",lapply( pb6_dd[chroms],function(x){x$x}))
pb6_DD = do.call("cbind",lapply( pe3_dd[chroms],function(x){x$x}))
pe3_DD
par(mfrow=c(1,1))
plot( x=log10( a3_2_dd$chr1$distance ),
y=log10(rowMeans(a3_2_DD)),
ty="l", col="blue3", main="",
xlab = expression(Log[10] ~ (Distance (bp))),
ylab = expression(Log[10] ~ (Median (HiC))),
ylim=c(-4,-1.5), lwd=2 )
lines( x=log10( a3_2_dd$chr1$distance ),
y=log10(rowMeans(wt1_DD)),col="blue3",lwd=2)
lines( x=log10( a3_2_dd$chr1$distance ),
y=log10(rowMeans(wt3_DD)),col="blue3",lwd=2)
lines( x=log10( a3_2_dd$chr1$distance ),
y=log10(rowMeans(pb6_DD)),col="deeppink4",lwd=2)
lines( x=log10( a3_2_dd$chr1$distance ),
y=log10(rowMeans(pe3_DD)),col="deeppink4",lwd=2)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
= data.frame( distance=log10( a3_2_dd$chr1$distance ),
df wt1=log10(rowMeans(a3_2_DD)),
wt2=log10(rowMeans(wt1_DD)),
wt3=log10(rowMeans(wt3_DD)),
pantr1_ko_pb6=log10(rowMeans(pb6_DD)),
pantr1_ko_pe3=log10(rowMeans(pe3_DD)) )
write.table(df,file=paste0(source_data_directory,"DD_wt_mutPantr1.txt"),
row.names = FALSE, sep="\t",quote=FALSE)
Example loci
="chr5";s=99200000;en=100050000;w=300000;ul=0.02 # in the fig.
chr="chr1";s=125775000;en=126425000;w=300000;ul=0.01
chr
= getMatrixForARegion( lowertri=A3_rep2,
TO1 uppertri=A3_rep2,
CHROM=chr,
START=s-w,
END=en+w,
ga=ga,
upperLimit=ul )
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
= getMatrixForARegion( lowertri=pe3,
TO2 uppertri=pe3,
CHROM=chr,
START=s-w,
END=en+w,
ga=ga,
upperLimit=ul )
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
= getMatrixForARegion( lowertri=pb6,
TO3 uppertri=pb6,
CHROM=chr,
START=s-w,
END=en+w,
ga=ga,
upperLimit=ul )
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
par(mfrow=c(1,3),mar=c(1,1,2,1), pty="s",bty="O")
image( TO1,axes=FALSE, col=colorRampPalette(c("white","red"))(256),
main="Wild type")
box(col="black",lwd=3)
image( TO2,axes=FALSE, col=colorRampPalette(c("white","red"))(256),
main="Pantr1 - PE3")
box(col="black",lwd=3)
image( TO3,axes=FALSE, col=colorRampPalette(c("white","red"))(256),
main="Pantr1 - PB6")
box(col="black",lwd=3)
Loop signal
=9;j=13
i
= GetCentroidSignal( loops_a1_a3,
a3_2_loops_IPF 10,
A3_rep2, paste0("chr",c(1:19) ),
which_file="balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( a3_2_loops_IPF )
a3_2_loops_IPF_sig
= GetCentroidSignal( loops_a1_a3,
pe3_loops_IPF 10,
pe3, paste0("chr",c(1:19) ),
which_file="balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( pe3_loops_IPF )
pe3_loops_IPF_sig
= GetCentroidSignal( loops_a1_a3,
pb6_loops_IPF 10,
pb6, paste0("chr",c(1:19) ),
which_file="balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( pb6_loops_IPF )
pb6_loops_IPF_sig
= unlist(lapply(a3_2_loops_IPF_sig,function(x){sum(x[i:j,i:j],
A3_loop_signal2 na.rm = TRUE)}))
= unlist(lapply(pe3_loops_IPF_sig,function(x){sum(x[i:j,i:j],
PE3_loop_IPF_signal na.rm = TRUE)}))
= unlist(lapply(pb6_loops_IPF_sig,function(x){sum(x[i:j,i:j],
PB6_loop_IPF_signal na.rm = TRUE)}))
all(names(A3_loop_signal2)==names(PE3_loop_IPF_signal) & names(PE3_loop_IPF_signal)==names(PB6_loop_IPF_signal))
## [1] TRUE
= data.frame(a3=A3_loop_signal2,
ipf_pantr1 pe3 = PE3_loop_IPF_signal,
pb6 = PB6_loop_IPF_signal,
row.names = names(A3_loop_signal2))
## random intervals
= GetCentroidSignal( random_loops,
a3_2_random_loops_IPF 10,
A3_rep2, paste0("chr",c(1:19) ),
which_file="balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( a3_2_random_loops_IPF )
a3_2_random_loops_IPF_sig
= GetCentroidSignal( random_loops,
pe3_random_loops_IPF 10,
pe3, paste0("chr",c(1:19) ),
which_file="balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( pe3_random_loops_IPF )
pe3_random_loops_IPF_sig
= GetCentroidSignal( random_loops,
pb6_random_loops_IPF 10,
pb6, paste0("chr",c(1:19) ),
which_file="balanced" )
## [1] "chr1"
## [1] "chr2"
## [1] "chr3"
## [1] "chr4"
## [1] "chr5"
## [1] "chr6"
## [1] "chr7"
## [1] "chr8"
## [1] "chr9"
## [1] "chr10"
## [1] "chr11"
## [1] "chr12"
## [1] "chr13"
## [1] "chr14"
## [1] "chr15"
## [1] "chr16"
## [1] "chr17"
## [1] "chr18"
## [1] "chr19"
= ProcessLoops( pb6_random_loops_IPF )
pb6_random_loops_IPF_sig = unlist(lapply(a3_2_random_loops_IPF_sig,function(x){sum(x[i:j,i:j],
A3_loop_random_signal2 na.rm = TRUE)}))
= unlist(lapply(pe3_random_loops_IPF_sig,function(x){sum(x[i:j,i:j],
PE3_loop_random_signal na.rm = TRUE)}))
= unlist(lapply(pb6_random_loops_IPF_sig,function(x){sum(x[i:j,i:j],
PB6_loop_random_signal na.rm = TRUE)}))
= data.frame(a3=A3_loop_random_signal2,
ipf_pantr1_ran pe3 = PE3_loop_random_signal,
pb6 = PB6_loop_random_signal,
row.names = names(A3_loop_random_signal2))
write.table( ipf_pantr1_ran, file=paste0(data_directory,"ipf_pantr1_ran.txt"),
quote=FALSE, row.names=TRUE, col.names = TRUE,
sep="\t" )
write.table( ipf_pantr1, file=paste0(data_directory,"ipf_pantr1.txt"),
quote=FALSE, row.names=TRUE, col.names = TRUE,
sep="\t" )
Loop signal - impact of Pantr1
= data.frame( a3 = getLoopSignal(a3_2_loops_IPF_sig,2),
loop_signal_pantr1 pe3 = getLoopSignal(pe3_loops_IPF_sig,2),
pb6 = getLoopSignal(pb6_loops_IPF_sig,2))
= rownames( loop_signal_pantr1[loop_signal_pantr1$a3 >1,] )
real_loops_pantr1 = real_loops_pantr1[real_loops_pantr1 %in% rownames(loops_a1_a3[rowSums(loops_a1_a3[,c("left_CTCF","right_CTCF")])==2 ,])]
real_loops_pantr1
= loop_signal_pantr1[match( real_loops_pantr1,rownames(loop_signal_pantr1)),"a3"]
a3_ls = rowMeans( loop_signal_pantr1[match( real_loops_pantr1,rownames(loop_signal_pantr1)),c("pe3","pb6")])
pantr1_ls
= ipf_pantr1[which( ipf_pantr1$pe3>ipf_pantr1$a3 & ipf_pantr1$pb6>ipf_pantr1$a3 & loop_signal_pantr1$pe3>loop_signal_pantr1$a3 & loop_signal_pantr1$pb6>loop_signal_pantr1$a3),]
loops_gained_pantr
= ipf_pantr1[which( ipf_pantr1$pe3<ipf_pantr1$a3 & ipf_pantr1$pb6<ipf_pantr1$a3 & loop_signal_pantr1$pe3<loop_signal_pantr1$a3 & loop_signal_pantr1$pb6<loop_signal_pantr1$a3),]
loops_lost_pantr
par(mfrow=c(1,1),mar=c(5,5,4,1),pty="m")
barplot( c(nrow(loops_lost_pantr),nrow(loops_gained_pantr)),
col = c("gray40","green3"), ylim=c(0,4000),
names=c("lost","gained"),main="Loop change upon Pantr1 loss")
axis(2,lwd=2)
save( loop_signal_pantr1, real_loops_pantr1, a3_ls, pantr1_ls,
file=paste0(data_directory,"pantr1_loops_singals.RData") ) loops_gained_pantr, loops_lost_pantr,
write.table( loops_lost_pantr,file=paste0(data_directory,"loops_lost_pantr.txt"),
quote=FALSE,row.names=TRUE, col.names = TRUE,
sep="\t" )
write.table( loops_gained_pantr,file=paste0(data_directory,"loops_gained_pantr.txt"),
quote=FALSE,row.names=TRUE, col.names = TRUE,
sep="\t" )
load(paste0(data_directory,"pantr1_loops_singals.RData"))
= log2(rowMeans(ipf_pantr1[match(real_loops_pantr1,rownames(ipf_pantr1)),2:3 ])/((ipf_pantr1$a3[match(real_loops_pantr1,rownames(ipf_pantr1)) ])))
fc_Pantr1 = cut( ipf_pantr1$a3[match(real_loops_pantr1,rownames(ipf_pantr1))],
sig_Pantr c(0,0.04,0.08,0.16,0.32,Inf),
labels=FALSE )
table(sig_Pantr)
## sig_Pantr
## 1 2 3 4 5
## 192 663 1212 677 140
= split(fc_Pantr1,sig_Pantr)
fcs2 save(fc_Pantr1,sig_Pantr, fcs2, file=paste0(data_directory,"Pantr1_arch_loop_strength.RData"))
load(paste0(data_directory,"Pantr1_arch_loop_strength.RData"))
par(pty="m")
boxplot(fcs2,outline=FALSE,notch=TRUE, col="white",
border=colorRampPalette(c("green4","blue4"))(5),lwd=2,
ylim=c(-1,1), ylab="LFC (Mut vs. Pantr1-/-)")
abline(h=0,lwd=1,lty=2)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
t.test(fcs2[[1]])
##
## One Sample t-test
##
## data: fcs2[[1]]
## t = 1.0192, df = 191, p-value = 0.3094
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02758709 0.08657211
## sample estimates:
## mean of x
## 0.02949251
t.test(fcs2[[2]])
##
## One Sample t-test
##
## data: fcs2[[2]]
## t = -13.473, df = 662, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1970322 -0.1469062
## sample estimates:
## mean of x
## -0.1719692
t.test(fcs2[[3]])
##
## One Sample t-test
##
## data: fcs2[[3]]
## t = -44.041, df = 1211, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.3044588 -0.2784897
## sample estimates:
## mean of x
## -0.2914742
t.test(fcs2[[4]])
##
## One Sample t-test
##
## data: fcs2[[4]]
## t = -74.595, df = 676, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.4474995 -0.4245455
## sample estimates:
## mean of x
## -0.4360225
t.test(fcs2[[5]])
##
## One Sample t-test
##
## data: fcs2[[5]]
## t = -23.774, df = 139, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.4491845 -0.3802065
## sample estimates:
## mean of x
## -0.4146955
t.test(fcs2[[1]],fcs2[[5]])
##
## Welch Two Sample t-test
##
## data: fcs2[[1]] and fcs2[[5]]
## t = 13.146, df = 300.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.3776949 0.5106812
## sample estimates:
## mean of x mean of y
## 0.02949251 -0.41469552
unlist(lapply(fcs2, length))
## 1 2 3 4 5
## 192 663 1212 677 140
= max(unlist(lapply(fcs2,length)))
theNtimes = data.frame( gr1=rep(NA,theNtimes),
df gr2=rep(NA,theNtimes),
gr3=rep(NA,theNtimes),
gr4=rep(NA,theNtimes),
gr5=rep(NA,theNtimes))
$gr1[1:length(fcs2[[1]])] = fcs2[[1]]
df$gr2[1:length(fcs2[[2]])] = fcs2[[2]]
df$gr3[1:length(fcs2[[3]])] = fcs2[[3]]
df$gr4[1:length(fcs2[[4]])] = fcs2[[4]]
df$gr5[1:length(fcs2[[5]])] = fcs2[[5]]
dfwrite.table(df,file=paste0(source_data_directory,"boxplots_arch_Loop_signal_FC_Pantr1.txt"),
row.names = FALSE, sep="\t",quote=FALSE)
load(paste0(objects_directory,'ctcf_NS_ranges.RData'))
= import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_10-24_MusMus_es-NPC_PANTR1_KO_CTCF-Cterm_HALO_Pantr1_KO_NPC_Pantr1_KO_PE3_Rep_1_RPGC.bw"))
pe3_1_ctcf_rpgc = import.bw(paste0(chipseq_directory,"ChIP_Seq_CTCF_10-24_MusMus_es-NPC_PANTR1_KO_CTCF-Cterm_HALO_Pantr1_KO_NPC_Pantr1_KO_PB6_Rep_1_RPGC.bw"))
pb6_2_ctcf_rpgc seqlevelsStyle(pe3_1_ctcf_rpgc) = "ucsc"
seqlevelsStyle(pb6_2_ctcf_rpgc) = "ucsc"
= getSignalInBins(ctcf_NS_ranges,pe3_1_ctcf_rpgc,1)
pe3_1_ctcf_rpgc_AP = getSignalInBins(ctcf_NS_ranges,pb6_2_ctcf_rpgc,1)
pb6_1_ctcf_rpgc_AP
=90;b=110
a= data.frame( wt1 = ( rowSums(a3_1_ctcf_rpgc_ctcf_AP[,a:b])),
pantr_ctcf wt2 = ( rowSums(a3_2_ctcf_rpgc_ctcf_AP[,a:b])),
pe3 = ( rowSums(pe3_1_ctcf_rpgc_AP[,a:b])),
pb6 = ( rowSums(pb6_1_ctcf_rpgc_AP[,a:b])),
row.names = rownames(a3_1_ctcf_rpgc_ctcf_AP))
= which( pantr_ctcf$wt1>1.25*pantr_ctcf$pe3 & pantr_ctcf$wt2>1.25*pantr_ctcf$pe3 & pantr_ctcf$wt1>1.25*pantr_ctcf$pb6 & pantr_ctcf$wt2>1.25*pantr_ctcf$pb6 )
peaks_lost_in_Pantr
= which( 1.25*pantr_ctcf$wt1<pantr_ctcf$pe3 & 1.25*pantr_ctcf$wt2<pantr_ctcf$pe3 & 1.25*pantr_ctcf$wt1<pantr_ctcf$pb6 & 1.25*pantr_ctcf$wt2<pantr_ctcf$pb6 )
peaks_gained_in_Pantr
= unique( c(unique(queryHits(findOverlaps(ctcf_wt_NS_peaks,anchors_left_big[which(anchors_left_big$names %in% rownames(loops_lost_pantr))]))),
peaks_at_loops_lost_in_Pantr unique(queryHits(findOverlaps(ctcf_wt_NS_peaks,anchors_right_big[which(anchors_right_big$names %in% rownames(loops_lost_pantr))])))))
par(mfrow=c(1,1),mar=c(5,5,4,1),pty="m")
barplot( c(length(peaks_lost_in_Pantr),length(peaks_gained_in_Pantr)),
col = c("gray40","green3"), ylim=c(0,4000),
names=c("lost","gained"),main="CTCF peak change")
axis(2,lwd=2)
boxplot( rowSums(pantr_ctcf),
rowSums(pantr_ctcf[peaks_lost_in_Pantr,]),
rowSums(pantr_ctcf[peaks_gained_in_Pantr,]),
outline=FALSE,
notch=TRUE, col="white", border=c("black","gray","green3"),
lwd=2)
axis(1,lwd=2, at=c(1,2,3))
axis(2,lwd=2)
box(col="black",lwd=2)
= max(c(length(rowSums(pantr_ctcf)),
theNtimes length(rowSums(pantr_ctcf[peaks_lost_in_Pantr,])),
length(rowSums(pantr_ctcf[peaks_gained_in_Pantr,]))))
= data.frame( All=rep(NA,theNtimes),
df Lost=rep(NA,theNtimes),
Gained=rep(NA,theNtimes))
$All[1:length((rowSums(pantr_ctcf)))] = (rowSums(pantr_ctcf))
df$Lost[1:length((rowSums(pantr_ctcf[peaks_lost_in_Pantr,])))] = (rowSums(pantr_ctcf[peaks_lost_in_Pantr,]))
df$Gained[1:length((rowSums(pantr_ctcf[peaks_gained_in_Pantr,])))] = (rowSums(pantr_ctcf[peaks_gained_in_Pantr,]))
df
write.table(df,file=paste0(source_data_directory,"boxplots_CTCF_signal__Pantr1KO.txt"),row.names = FALSE, sep="\t",quote=FALSE)
write.table( pantr_ctcf, file=paste0(data_directory,"pantr_ctcf.txt"),
quote=FALSE, row.names=FALSE, col.names = TRUE,
sep="\t" )
sum(rownames(lost_loops) %in% rownames(loops_lost_pantr))
## [1] 502
sum(rownames(gained_loops) %in% rownames(loops_gained_pantr))
## [1] 11
sum(peaks_lost_in_Pantr %in% peaks_at_loops_lost_in_Pantr)
## [1] 530
= data.frame( left=countOverlaps(anchors_left_big[which(anchors_left_big$names %in% rownames(loops_lost_pantr))],ctcf_wt_NS_peaks[ peaks_lost_in_Pantr]),
lost_peal_lost_loop_Pantr right=countOverlaps(anchors_right_big[which(anchors_right_big$names %in% rownames(loops_lost_pantr))],ctcf_wt_NS_peaks[peaks_lost_in_Pantr]))
= which( ipf$a1>ipf$cb1 & ipf$a1>ipf$ce10 & ipf$a3>ipf$cb1 & ipf$a3>ipf$ce10 & ipf_pantr1$a3>ipf_pantr1$pe3 & ipf_pantr1$a3>ipf_pantr1$pb6 & degron_ipf$dmso>degron_ipf$dtag )
lostInAll
= which( ipf$a1<ipf$cb1 & ipf$a1<ipf$ce10 & ipf$a3<ipf$cb1 & ipf$a3<ipf$ce10 & ipf_pantr1$a3<ipf_pantr1$pe3 & ipf_pantr1$a3<ipf_pantr1$pb6 & degron_ipf$dmso<degron_ipf$dtag )
gainedInAll
write.table( rownames(ipf)[lostInAll], file=paste0(data_directory,"lostInAll.txt"),
quote=FALSE, row.names=FALSE, col.names = TRUE,
sep="\t" )
write.table( rownames(ipf)[gainedInAll], file=paste0(data_directory,"gainedInAll.txt"),
quote=FALSE, row.names=FALSE, col.names = TRUE,
sep="\t" )
Strong loops are affected by Pantr1/Ddx5
load(paste0(data_directory,"IPF_loops.RData"))
= rowMeans(cbind(ipf$a1,ipf$a3,ipf_pantr1$a3))
loop_strength names(loop_strength) = rownames(ipf)
boxplot( loop_strength[names(loop_strength) %in% rownames(ipf)[lostInAll]],
names(loop_strength) %in% rownames(ipf)[gainedInAll]],
loop_strength[names(loop_strength) %in% rownames(ipf)[-c(lostInAll,gainedInAll)]],
loop_strength[outline=FALSE, notch=TRUE,
names=c("Lost","Gained","Other"),
col="white",
border=c("gray","green3","black"),lwd=2,ylab="Hi-C IPF signal")
axis(1,lwd=1,at=c(1,2,3),c("Lost","Gained","Other"))
axis(2,lwd=1)
length(loop_strength[names(loop_strength) %in% rownames(ipf)[lostInAll]])
## [1] 1916
length(loop_strength[names(loop_strength) %in% rownames(ipf)[gainedInAll]])
## [1] 21
length(loop_strength[names(loop_strength) %in% rownames(ipf)[-c(lostInAll,gainedInAll)]])
## [1] 10617
= max(c(length(lostInAll),length(gainedInAll), length(rownames(ipf)[-c(lostInAll,gainedInAll)])))
theNtimes = data.frame( lost=rep(NA,theNtimes),
df gained=rep(NA,theNtimes),
other=rep(NA,theNtimes))
$lost[1:length(loop_strength[names(loop_strength) %in% rownames(ipf)[lostInAll]])] = loop_strength[names(loop_strength) %in% rownames(ipf)[lostInAll]]
df$gained[1:length(loop_strength[names(loop_strength) %in% rownames(ipf)[gainedInAll]])] = loop_strength[names(loop_strength) %in% rownames(ipf)[gainedInAll]]
df$other[1:length(loop_strength[names(loop_strength) %in% rownames(ipf)[-c(lostInAll,gainedInAll)]])] = loop_strength[names(loop_strength) %in% rownames(ipf)[-c(lostInAll,gainedInAll)]]
dfwrite.table(df,file=paste0(source_data_directory,"boxplots_Loop_signal__loops_affected_by_Pantr1KO.txt"),
row.names = FALSE, sep="\t",quote=FALSE)
= function( B, D, A, M ){
getSUMMEDsignal4bins # B=bins-HOWFAR; D=distance; A=Area; M=thism
= unlist(lapply(as.list(B), function(b){
rows rep( ( (b-D) + seq(-A,A)) , (1+2*A))}))
= unlist(lapply(as.list(B), function(b){
cols rep( ( (b+D) + seq(-A,A) ), each=(1+2*A))}))
= cbind(rows, cols)
ids = M[ids]
m = rep( names(B), each = (1+2*A)^2 )
IDS # print(c(min(ids),max(ids)))
= split( m, IDS )
matrices return( unlist(lapply(matrices,function(m){sum(m, na.rm=T)})) )}
= function( bin, mat, GAGR, distance, Area, HOWFAR ){
InsulationScore ## mat=ko_10kb; distance=5; Area=3;chr="chr1";GAGR=gagr;HOWFAR=10
## bin = gagr[which(chrom(gagr)!="chrY")]
## rm(list=c("mat","distance","Area","chr","GAGR","HOWFAR","B","D","A","M"))
do.call("rbind",lapply( as.list(unique(as.character(chrom(bin)))),function(chr){
print(paste0("processing ",chr))
= mat[[chr]]$balanced
thism = GenomicRanges::resize( bin[which(chrom(bin)==chr)], 1, fix="center")
theseb
= GAGR$binid[queryHits(findOverlaps(GAGR,theseb))]
bins names(bins) = names(theseb[subjectHits(findOverlaps(GAGR,theseb))])
= bins[bins-distance-HOWFAR-Area>0 & bins+distance+HOWFAR+Area<(length(GAGR[which(chrom(GAGR)==chr)]))]
bins
return(data.frame( left=getSUMMEDsignal4bins(B=bins-HOWFAR, D=distance, A=Area, M=thism ),
middle=getSUMMEDsignal4bins(B=bins, D=distance, A=Area, M=thism ),
right=getSUMMEDsignal4bins(B=bins+HOWFAR, D=distance, A=Area, M=thism ) )) } )) }
= function( bin, mat, GAGR, distance, Area, HOWFAR ){
InsulationScorePublishedKR ## mat=ko_10kb; distance=5; Area=3;chr="chr1";GAGR=gagr;HOWFAR=10
## bin = gagr[which(chrom(gagr)!="chrY")]
## rm(list=c("mat","distance","Area","chr","GAGR","HOWFAR","B","D","A","M"))
do.call("rbind",lapply( as.list(unique(as.character(chrom(bin)))),function(chr){
print(paste0("processing ",chr))
= mat[[chr]]
thism = GenomicRanges::resize( bin[which(chrom(bin)==chr)], 1, fix="center")
theseb
= GAGR$binid[queryHits(findOverlaps(GAGR,theseb))]
bins names(bins) = names(theseb[subjectHits(findOverlaps(GAGR,theseb))])
= bins[bins-distance-HOWFAR-Area>0 & bins+distance+HOWFAR+Area<(length(GAGR[which(chrom(GAGR)==chr)]))]
bins
return(data.frame( left=getSUMMEDsignal4bins(B=bins-HOWFAR, D=distance, A=Area, M=thism ),
middle=getSUMMEDsignal4bins(B=bins, D=distance, A=Area, M=thism ),
right=getSUMMEDsignal4bins(B=bins+HOWFAR, D=distance, A=Area, M=thism ) )) } )) }
= function( IS, GAGR ){
processIS = GAGR
res $binid=NULL
res$score = 0
res$score[match(rownames(IS),names(res))] = log2( rowMeans((0.001+IS[,c(1,3)]))/(0.001+IS[,2] ) )
resreturn(res) }
= function(x){
cleanIS is.na(x)]=0
x[!is.finite(x)]=0
x[return(x) }
Genome wide insulation score - prepare gagr again
= GRanges(seqnames = Rle(ga$chr),
gagr ranges = IRanges(as.numeric(ga$start),
end = as.numeric(ga$end),
names = seq(1, nrow(ga))),
strand = Rle(rep("*", nrow(ga))),
binid = ga$binid,
seqlengths = seqlengths(BSgenome.Mmusculus.UCSC.mm10))
## Warning in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE): GRanges object contains 21 out-of-bound ranges located on sequences
## chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11,
## chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chrX, and chrY.
## Note that ranges located on a sequence whose length is unknown (NA) or
## on a circular sequence are not considered out-of-bound (use
## seqlengths() and isCircular() to get the lengths and circularity flags
## of the underlying sequences). You can use trim() to trim these ranges.
## See ?`trim,GenomicRanges-method` for more information.
= trim(gagr)
gagr names(gagr) = paste(chrom(gagr),names(gagr),sep="_")
load(paste0( hic_directory,"ko_10kb.RData"))
load(paste0( hic_directory,"wt_A1_A3_10kb.RData"))
names(wt_10kb) = paste0("chr",c(1:19,"X"))
names(ko_10kb) = paste0("chr",c(1:19,"X"))
# 5,1,10
= InsulationScore( gagr[which(chrom(gagr)!="chrY")],
genome_wide_IS_wt
wt_10kb, 5, 1, 15 )
gagr, = InsulationScore( gagr[which(chrom(gagr)!="chrY")],
genome_wide_IS_ko 5, 1, 15 )
ko_10kb, gagr,
save(genome_wide_IS_wt,file=paste0(objects_directory,"genome_wide_IS__wt.RData"))
save(genome_wide_IS_ko,file=paste0(objects_directory,"genome_wide_IS__ko.RData"))
Insulation at loop anchors - is it affected?
load(paste0(objects_directory,"genome_wide_IS__wt.RData"))
load(paste0(objects_directory,"genome_wide_IS__ko.RData"))
= log2(rowMeans(genome_wide_IS_wt[,c(1,3)])/genome_wide_IS_wt[,2])
wt_is_genome_wide = log2(rowMeans(genome_wide_IS_ko[,c(1,3)])/genome_wide_IS_ko[,2])
ko_is_genome_wide
= cleanIS(wt_is_genome_wide)
wt_is_genome_wide = cleanIS(ko_is_genome_wide)
ko_is_genome_wide
= gagr[which(chrom(gagr)!='chrY')]
wt_is_genome_wide_gr $score=0
wt_is_genome_wide_gr$score[match(names(wt_is_genome_wide),names(wt_is_genome_wide_gr))] = wt_is_genome_wide
wt_is_genome_wide_gr$binid=NULL
wt_is_genome_wide_gr
= gagr[which(chrom(gagr)!='chrY')]
ko_is_genome_wide_gr $score=0
ko_is_genome_wide_gr$score[match(names(ko_is_genome_wide),names(ko_is_genome_wide_gr))] = ko_is_genome_wide
ko_is_genome_wide_gr$binid=NULL
ko_is_genome_wide_gr
export.bw( wt_is_genome_wide_gr,con=paste0(objects_directory,"wt_is_genome_wide_gr.bw"))
export.bw( ko_is_genome_wide_gr,con=paste0(objects_directory,"ko_is_genome_wide_gr.bw"))
IS at peaks of insulation in the wild type cells
= wt_is_genome_wide_gr[which(wt_is_genome_wide_gr$score>0.75)]
is_peaks = GenomicRanges::reduce(is_peaks)
is_peaks_filt = is_peaks_filt[which(width(is_peaks_filt)>2)]
is_peaks_filt = as.data.frame( findOverlaps(is_peaks_filt,gagr) )
is_peaks_filt_binid
$wt = wt_is_genome_wide_gr$score[is_peaks_filt_binid$subjectHits]
is_peaks_filt_binid$ko = ko_is_genome_wide_gr$score[is_peaks_filt_binid$subjectHits]
is_peaks_filt_binid$wt_rand = wt_is_genome_wide_gr$score[is_peaks_filt_binid$subjectHits-10]
is_peaks_filt_binid$ko_rand = ko_is_genome_wide_gr$score[is_peaks_filt_binid$subjectHits-10]
is_peaks_filt_binid
= split(is_peaks_filt_binid,is_peaks_filt_binid$queryHits)
is_peaks_filt_binid_s save(is_peaks_filt_binid_s,file=paste0(objects_directory,'is_peaks_filt_binid_s.RData'))
load(paste0(objects_directory,'is_peaks_filt_binid_s.RData'))
= unlist(lapply(is_peaks_filt_binid_s,function(x){mean(x$ko-x$wt)}))
wt_mut_peaks = unlist(lapply(is_peaks_filt_binid_s,function(x){mean(x$ko_rand-x$wt_rand)}))
wt_mut_random
par(mfrow=c(1,2),pty='m',mar=c(7,5,1,1))
hist(wt_mut_peaks,n=100,
xlim=c(-3,3),col="steelblue", ylim=c(0,2000),main='',
xlab='change in insulation\nDdx5 vs.Wt')
axis(1,lwd=2)
axis(2,lwd=2)
abline(v=0,col="red4",lwd=2,lty=2)
boxplot( wt_mut_peaks, wt_mut_random, outline=FALSE,
border=c('blue3','gray'), col='white',notch=TRUE,
names=c('Peaks','random'),
ylab='change in insulation (Ddx5 vs. Wt)',las=2)
axis(1,lwd=2,at=c(1,2))
axis(2,lwd=2,las=2)
box(col="black",lwd=2)
abline(h=0,col="red4",lwd=2,lty=2)
length(wt_mut_peaks)
## [1] 6132
length(wt_mut_random)
## [1] 6132
= data.frame( wt_mut_peaks=wt_mut_peaks,
df wt_mut_random=wt_mut_random)
write.table(df,file=paste0(source_data_directory,"insulation_supplement.txt"),row.names = FALSE, sep="\t",quote=FALSE)
= read.hic_files( hic_directory, "bonev_KR_dump/chr7_5kb_NS_KR_matrix_",".txt",
ns_5kb_chr7 "chr7" )
ga5,
= InsulationScorePublishedKR( gagr5[which(chrom(gagr5)=="chr7")],
genome_wide_IS_ns
ns_5kb_chr7, 5, 3, 10 )
gagr5, = log2(genome_wide_IS_ns[,2]/rowMeans(genome_wide_IS_ns[,c(1,3)]))
ns_is = cleanIS(ns_is)
ns_is
= gagr5[which(chrom(gagr5)=='chr7')]
ns_is_gr $score=0
ns_is_gr$score[match(names(ns_is),names(ns_is_gr))] = ns_is
ns_is_gr$binid=NULL
ns_is_grexport.bw( ns_is_gr, con=paste0( hic_directory,"bonev_KR_dump/IS_chr7_NS_5kb1.bw" ))
= import.bw(paste0( hic_directory,"bonev_KR_dump/IS_chr7_NS_5kb1.bw" ))
ns_is_gr plot(start(ns_is_gr[(65700/5):(66900/5)]),
65700/5):(66900/5)]$score,
ns_is_gr[(ty="l",xaxs="i",ylab="Insulation",xlab="chr7")
abline(h=0)
rm(list=c("wt_10kb","ko_10kb"))
## Warning in rm(list = c("wt_10kb", "ko_10kb")): nie znaleziono obiektu 'wt_10kb'
## Warning in rm(list = c("wt_10kb", "ko_10kb")): nie znaleziono obiektu 'ko_10kb'
load(paste0( hic_directory,"A3_rep2_10kb_hic.RData" ) ) # A3_rep2
load(paste0( hic_directory,"PB6_10kb_hic.RData" ) ) # pb6
load(paste0( hic_directory,"PE3_10kb_hic.RData" ) ) # Treated
=Treated
pe3rm(list=c("Treated"))
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 28629340 1529.0 88257021 4713.5 NA 88257021 4713.5
## Vcells 5133133149 39162.7 7581327462 57841.0 131072 7422216756 56627.1
= GRanges(seqnames = Rle(ga$chr),
gagr ranges = IRanges(as.numeric(ga$start),
end = as.numeric(ga$end),
names = seq(1, nrow(ga))),
strand = Rle(rep("*", nrow(ga))),
binid = ga$binid,
seqlengths = seqlengths(BSgenome.Mmusculus.UCSC.mm10))
= trim(gagr)
gagr names(gagr) = paste(chrom(gagr),names(gagr),sep="_")
= InsulationScore( gagr[which(chrom(gagr)!="chrY")],
genome_wide_IS_a3
A3_rep2, 5, 1, 15 )
gagr, = InsulationScore( gagr[which(chrom(gagr)!="chrY")],
genome_wide_IS_p3
pe3, 5, 1, 15 )
gagr, = InsulationScore( gagr[which(chrom(gagr)!="chrY")],
genome_wide_IS_p6
pb6, 5, 1, 15 )
gagr,
save(genome_wide_IS_a3,file=paste0(objects_directory,"genome_wide_IS_a3.RData"))
save(genome_wide_IS_p3,file=paste0(objects_directory,"genome_wide_IS_p3.RData"))
save(genome_wide_IS_p6,file=paste0(objects_directory,"genome_wide_IS_p6.RData"))
= log2(rowMeans(genome_wide_IS_a3[,c(1,3)])/genome_wide_IS_a3[,2])
a3_is_genome_wide = log2(rowMeans(genome_wide_IS_p3[,c(1,3)])/genome_wide_IS_p3[,2])
p3_is_genome_wide = log2(rowMeans(genome_wide_IS_p6[,c(1,3)])/genome_wide_IS_p6[,2])
p6_is_genome_wide
= cleanIS(a3_is_genome_wide)
a3_is_genome_wide = cleanIS(p3_is_genome_wide)
p3_is_genome_wide = cleanIS(p6_is_genome_wide)
p6_is_genome_wide
= function( iso, binannoF ){
getbwFile = binannoF[which(chrom(binannoF)!='chrY')]
tp $score=0
tp$score[match(names(iso),names(tp))] = iso
tp$binid=NULL
tpreturn(tp)
}
= getbwFile( a3_is_genome_wide, gagr )
a3_is_genome_wide_gr = getbwFile( p3_is_genome_wide, gagr )
p3_is_genome_wide_gr = getbwFile( p6_is_genome_wide, gagr )
p6_is_genome_wide_gr
export.bw( a3_is_genome_wide_gr,con=paste0(objects_directory,"a3_is_genome_wide_gr.bw"))
export.bw( p3_is_genome_wide_gr,con=paste0(objects_directory,"p3_is_genome_wide_gr.bw"))
export.bw( p6_is_genome_wide_gr,con=paste0(objects_directory,"p6_is_genome_wide_gr.bw"))
save( a3_is_genome_wide_gr, p3_is_genome_wide_gr, p6_is_genome_wide_gr,gagr,
file = paste0( objects_directory,"is_Pantr1.RData" ) )
Let’s check if insulation changes at insulators
load( paste0( objects_directory,"is_Pantr1.RData" ) )
= a3_is_genome_wide_gr[which(a3_is_genome_wide_gr$score>0.75)]
is_peaks = GenomicRanges::reduce(is_peaks)
is_peaks_filt = is_peaks_filt[which(width(is_peaks_filt)>2)]
is_peaks_filt = as.data.frame( findOverlaps(is_peaks_filt,gagr) )
is_peaks_filt_binid
$a3 = a3_is_genome_wide_gr$score[is_peaks_filt_binid$subjectHits]
is_peaks_filt_binid$pe3 = p3_is_genome_wide_gr$score[is_peaks_filt_binid$subjectHits]
is_peaks_filt_binid$pb6 = p6_is_genome_wide_gr$score[is_peaks_filt_binid$subjectHits]
is_peaks_filt_binid$a3_rand = a3_is_genome_wide_gr$score[is_peaks_filt_binid$subjectHits-10]
is_peaks_filt_binid$pe3_rand = p3_is_genome_wide_gr$score[is_peaks_filt_binid$subjectHits-10]
is_peaks_filt_binid$pb6_rand = p6_is_genome_wide_gr$score[is_peaks_filt_binid$subjectHits-10]
is_peaks_filt_binid
= split(is_peaks_filt_binid,is_peaks_filt_binid$queryHits)
is_peaks_filt_binid_s
= unlist(lapply(is_peaks_filt_binid_s,function(x){mean(x$pe3-x$a3)}))
a3_p3_peaks = unlist(lapply(is_peaks_filt_binid_s,function(x){mean(x$pe3_rand-x$a3_rand)}))
a3_p3_random = unlist(lapply(is_peaks_filt_binid_s,function(x){mean(x$pb6-x$a3)}))
a3_p6_peaks = unlist(lapply(is_peaks_filt_binid_s,function(x){mean(x$pb6_rand-x$a3_rand)}))
a3_p6_random
par(mfrow=c(1,1),pty='m',
mar=c(7,5,3,3))
boxplot( a3_p3_peaks,
a3_p3_random,
a3_p6_peaks,
a3_p6_random,outline=FALSE,
border=c('purple4','gray'),
col='white',notch=TRUE,
names=c('Insulators','random','Insulators','random'),
ylab='Pantr1 vs. Wt',las=2,lwd=2)
box(col="black",lwd=2)
axis(1,lwd=2,at=c(1,2,3,4),
c('Insulators','random','Insulators','random'),las=2)
axis(2,lwd=2,las=2)
abline(h=0,col="red4",lwd=2,lty=2)
= read.delim( paste0(data_directory,"ES_SICAP_EMPAIMORETHAN1.txt"))
es_sicap = read.delim( paste0(data_directory,"NS_SICAP_EMPAIMORETHAN1.txt"))
ns_sicap
= es_sicap[es_sicap$Benjamini<0.1,]
es_sicap = es_sicap[unlist(lapply(strsplit(es_sicap$Term,":"),function(x){x[[1]]=="GO"})),]
es_sicap
par(mar=c(5,20,1,1))
barplot( rev(-log10(es_sicap$Benjamini[1:30])),
names=rev(es_sicap$Term[1:30]),las=2,
horiz=TRUE, col="red")
axis(1,lwd=2,las=2,cex.lab=0.3)
par(mar=c(5,20,1,1))
barplot( rev(-log10(ns_sicap$Benjamini)[1:10]),
names=rev(ns_sicap$Term[1:10]),las=2,
horiz=TRUE, col="blue")
axis(1,lwd=2,las=2)
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] pl_PL.UTF-8/pl_PL.UTF-8/pl_PL.UTF-8/C/pl_PL.UTF-8/pl_PL.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] forecast_8.21
## [2] pqsfinder_2.8.0
## [3] sigminer_2.3.1
## [4] ChIPseeker_1.28.3
## [5] motifbreakR_2.6.1
## [6] MotifDb_1.34.0
## [7] ggpubr_0.6.0
## [8] Matrix_1.5-4
## [9] BSgenome.Mmusculus.UCSC.mm10_1.4.0
## [10] BSgenome_1.60.0
## [11] Biostrings_2.60.2
## [12] XVector_0.32.0
## [13] smoothmest_0.1-3
## [14] MASS_7.3-58.3
## [15] scales_1.3.0
## [16] lubridate_1.9.4
## [17] forcats_1.0.0
## [18] stringr_1.5.1
## [19] purrr_1.0.2
## [20] readr_2.1.5
## [21] tidyr_1.3.1
## [22] tibble_3.2.1
## [23] tidyverse_2.0.0
## [24] dplyr_1.1.4
## [25] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
## [26] GenomicFeatures_1.44.2
## [27] pheatmap_1.0.12
## [28] RColorBrewer_1.1-3
## [29] ggplot2_3.5.1
## [30] goseq_1.44.0
## [31] geneLenDataBase_1.28.0
## [32] BiasedUrn_2.0.12
## [33] fgsea_1.18.0
## [34] gwasrapidd_0.99.17
## [35] ggVennDiagram_1.5.2
## [36] vsn_3.60.0
## [37] VennDiagram_1.7.3
## [38] futile.logger_1.4.3
## [39] gplots_3.2.0
## [40] biomaRt_2.48.3
## [41] geneplotter_1.70.0
## [42] annotate_1.70.0
## [43] XML_3.99-0.18
## [44] lattice_0.22-6
## [45] LSD_4.1-0
## [46] org.Hs.eg.db_3.13.0
## [47] AnnotationDbi_1.54.1
## [48] DESeq2_1.32.0
## [49] SummarizedExperiment_1.22.0
## [50] Biobase_2.52.0
## [51] MatrixGenerics_1.4.3
## [52] matrixStats_1.5.0
## [53] sf_1.0-12
## [54] rtracklayer_1.52.1
## [55] GenomicRanges_1.44.0
## [56] GenomeInfoDb_1.28.4
## [57] IRanges_2.26.0
## [58] S4Vectors_0.30.2
## [59] BiocGenerics_0.38.0
##
## loaded via a namespace (and not attached):
## [1] Hmisc_5.0-1
## [2] class_7.3-23
## [3] Rsamtools_2.8.0
## [4] lmtest_0.9-40
## [5] foreach_1.5.2
## [6] crayon_1.5.3
## [7] nlme_3.1-162
## [8] backports_1.5.0
## [9] GOSemSim_2.18.1
## [10] rlang_1.1.4
## [11] limma_3.48.3
## [12] filelock_1.0.3
## [13] BiocParallel_1.26.2
## [14] rjson_0.2.23
## [15] bit64_4.5.2
## [16] glue_1.8.0
## [17] rngtools_1.5.2
## [18] motifStack_1.36.1
## [19] classInt_0.4-9
## [20] DOSE_3.18.3
## [21] tidyselect_1.2.1
## [22] zoo_1.8-12
## [23] GenomicAlignments_1.28.0
## [24] xtable_1.8-4
## [25] magrittr_2.0.3
## [26] evaluate_1.0.1
## [27] quantmod_0.4.27
## [28] cli_3.6.3
## [29] zlibbioc_1.38.0
## [30] rstudioapi_0.17.1
## [31] furrr_0.3.1
## [32] bslib_0.8.0
## [33] rpart_4.1.24
## [34] fastmatch_1.1-6
## [35] ensembldb_2.16.4
## [36] lambda.r_1.2.4
## [37] treeio_1.16.2
## [38] xfun_0.50
## [39] cluster_2.1.8
## [40] urca_1.3-3
## [41] caTools_1.18.3
## [42] tidygraph_1.2.3
## [43] KEGGREST_1.32.0
## [44] ggrepel_0.9.6
## [45] biovizBase_1.40.0
## [46] ape_5.8-1
## [47] listenv_0.9.1
## [48] TFMPvalue_0.0.9
## [49] png_0.1-8
## [50] future_1.34.0
## [51] withr_3.0.2
## [52] bitops_1.0-9
## [53] ggforce_0.4.2
## [54] plyr_1.8.9
## [55] AnnotationFilter_1.16.0
## [56] e1071_1.7-16
## [57] pillar_1.10.1
## [58] cachem_1.1.0
## [59] fs_1.6.5
## [60] TTR_0.24.3
## [61] xts_0.13.1
## [62] vctrs_0.6.5
## [63] generics_0.1.3
## [64] NMF_0.28
## [65] tools_4.1.0
## [66] foreign_0.8-87
## [67] munsell_0.5.1
## [68] tweenr_2.0.3
## [69] proxy_0.4-27
## [70] DelayedArray_0.18.0
## [71] fastmap_1.2.0
## [72] compiler_4.1.0
## [73] abind_1.4-8
## [74] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [75] Gviz_1.36.2
## [76] GenomeInfoDbData_1.2.6
## [77] gridExtra_2.3
## [78] deldir_1.0-6
## [79] BiocFileCache_2.0.0
## [80] jsonlite_1.8.9
## [81] affy_1.70.0
## [82] tidytree_0.4.6
## [83] carData_3.0-5
## [84] genefilter_1.74.1
## [85] lazyeval_0.2.2
## [86] tseries_0.10-54
## [87] car_3.1-3
## [88] doParallel_1.0.17
## [89] latticeExtra_0.6-30
## [90] splitstackshape_1.4.8
## [91] checkmate_2.3.2
## [92] rmarkdown_2.29
## [93] cowplot_1.1.3
## [94] dichromat_2.0-0.1
## [95] igraph_1.4.2
## [96] survival_3.8-3
## [97] yaml_2.3.10
## [98] plotrix_3.8-4
## [99] htmltools_0.5.8.1
## [100] memoise_2.0.1
## [101] VariantAnnotation_1.38.0
## [102] BiocIO_1.2.0
## [103] locfit_1.5-9.10
## [104] quadprog_1.5-8
## [105] graphlayouts_1.2.1
## [106] viridisLite_0.4.2
## [107] digest_0.6.37
## [108] rappdirs_0.3.3
## [109] futile.options_1.0.1
## [110] registry_0.5-1
## [111] units_0.8-1
## [112] RSQLite_2.3.9
## [113] yulab.utils_0.1.9
## [114] data.table_1.16.4
## [115] fracdiff_1.5-2
## [116] blob_1.2.4
## [117] preprocessCore_1.54.0
## [118] splines_4.1.0
## [119] Formula_1.2-5
## [120] labeling_0.4.3
## [121] ProtGenerics_1.24.0
## [122] RCurl_1.98-1.16
## [123] broom_1.0.7
## [124] hms_1.1.3
## [125] colorspace_2.1-1
## [126] base64enc_0.1-3
## [127] BiocManager_1.30.25
## [128] aplot_0.1.10
## [129] nnet_7.3-20
## [130] sass_0.4.9
## [131] Rcpp_1.0.13-1
## [132] enrichplot_1.12.3
## [133] tzdb_0.4.0
## [134] parallelly_1.41.0
## [135] R6_2.5.1
## [136] lifecycle_1.0.4
## [137] formatR_1.14
## [138] curl_6.1.0
## [139] ggsignif_0.6.4
## [140] affyio_1.62.0
## [141] jquerylib_0.1.4
## [142] DO.db_2.9
## [143] qvalue_2.24.0
## [144] iterators_1.0.14
## [145] htmlwidgets_1.6.4
## [146] polyclip_1.10-7
## [147] shadowtext_0.1.4
## [148] timechange_0.3.0
## [149] gridGraphics_0.5-1
## [150] mgcv_1.9-1
## [151] globals_0.16.3
## [152] htmlTable_2.4.3
## [153] patchwork_1.3.0
## [154] codetools_0.2-20
## [155] GO.db_3.13.0
## [156] gtools_3.9.5
## [157] prettyunits_1.2.0
## [158] dbplyr_2.5.0
## [159] gridBase_0.4-7
## [160] gtable_0.3.6
## [161] DBI_1.2.3
## [162] ggfun_0.0.9
## [163] httr_1.4.7
## [164] KernSmooth_2.23-20
## [165] stringi_1.8.4
## [166] progress_1.2.3
## [167] reshape2_1.4.4
## [168] farver_2.1.2
## [169] viridis_0.6.5
## [170] fdrtool_1.2.18
## [171] timeDate_4041.110
## [172] ggtree_3.0.4
## [173] xml2_1.3.6
## [174] boot_1.3-31
## [175] restfulr_0.0.15
## [176] interp_1.1-4
## [177] ade4_1.7-22
## [178] ggplotify_0.1.2
## [179] bit_4.5.0.1
## [180] scatterpie_0.2.4
## [181] jpeg_0.1-10
## [182] ggraph_2.2.1
## [183] pkgconfig_2.0.3
## [184] rstatix_0.7.2
## [185] knitr_1.49