Preprocessing functions

getGR = function(chr,start,end,offset){
  tp = GRanges(seqnames = Rle(chr),
               ranges = IRanges(start-offset,
                                end = end+offset),
               strand = Rle(rep("*", length(start))),
               loop = seq(1, length(start)) ) 
  seqlevelsStyle(tp)='ucsc' 
  return(tp)}


binAnno = function( chrSizes, chroms, binSize ){
  
  do.call('rbind', lapply(chroms, function(chr){
    chr.length = seqlengths(chrSizes[chr])
    dongling = chr.length %% binSize
    if( dongling > 0 ) chr.length = chr.length + (binSize-dongling) + 1 
    data.frame( chr=chr,
                start=seq(1, chr.length-binSize, by=binSize),
                end = seq(binSize, chr.length, by=binSize),
                stringsAsFactors=FALSE )
  }))
  
}

## helper to vectorize the read dumped interaction file
transformToSparseMatrix=function( df,row.annotation,col.annotation ){
  # row.annotation=rowAnnotation; col.annotation=colAnnotation
  sm = Matrix( 0, nrow=nrow(row.annotation), 
                  ncol=nrow(col.annotation) )
  tp = cbind( row=match(as.numeric(df[,1]+1),row.annotation[,2]),
              col=match(as.numeric(df[,2]+1),col.annotation[,2]) )
  sm[tp] = df[,3]
  return( sm )
  
}  

## function to read matrices dumped from .hic files
read.hic_files = function( directory, prefix, suffix, binAnnotation, chroms ){
  
  chrlist = as.list(chroms)
  names(chrlist)= chroms
  
  # prefix="ES1_10kb/ES1_30_"; suffix=".txt"; directory=dumped_dir; binAnnotation=ga
  # prefix=""; suffix=".matrix.txt"; directory=dumped_directory_ele; binAnnotation=ga;chrlist= paste0("chr",c(1:22,"X"))
  lapply( as.list(chrlist), function(chromosome){
    # left = unlist( strsplit( chromosome, ' ' ) )[1] 
    # right = unlist( strsplit( chromosome, ' ' ) )[2]
    # newChromosome = paste(left,right,sep='_')
    # chromosome = "chr1"
    df = read.delim( paste0(directory, prefix, chromosome, suffix ), as.is=T, header=F )
    df = df[!is.na(df$V3),]
    df[,1] = as.numeric(df[,1])
    rowAnnotation = binAnnotation[binAnnotation$chr==chromosome,]
    colAnnotation = binAnnotation[binAnnotation$chr==chromosome,]
    print(chromosome)
    # print(chromosome)
    return( transformToSparseMatrix(df, 
                                    row.annotation = rowAnnotation,
                                    col.annotation = colAnnotation ) )
  } )
}

## wrap all and create a Genome-wide matrix of interactions
getGWmatrix=function( list.of.sm, ga ){
  
  nummSM = Matrix(0, nrow=nrow(ga), ncol=nrow(ga) )
  
  ids = do.call('rbind', lapply( as.list(names(list.of.sm)), function(chrom_comb){
    print( paste0('including ', chrom_comb) )
    chrom = unlist(strsplit(chrom_comb, " "))
    if( chrom[1] == 'chr1' & chrom[2] == 'chr1') {start.row = 0
    start.col = 0}
    if( chrom[1] == 'chr1' & chrom[2] != 'chr1') {start.row = 0
    start.col = min(which( ga$chr==chrom[2] ))-1}
    if( chrom[1] != 'chr1') {start.row = min(which( ga$chr==chrom[1] ))-1
    start.col = min(which( ga$chr==chrom[2] ))-1 }
    tp = as.data.frame(summary(list.of.sm[[chrom_comb]]),stringsAsFactors=F )
    tp$i = tp$i + start.row
    tp$j = tp$j + start.col
    return(tp)
  }))
  print( 'assembling the matrix, depending on the resolution this might take a while' )
  nummSM[ cbind(ids[,1],ids[,2])]=ids[,3]
  return( nummSM )
  
}

## wrap all and create a Genome-wide matrix of interactions
## this function is foreseen to use the list of objects returned by IPF
getGWmatrix_dedicated=function( list.of.sm, ga, which.object ){
  
  nummSM = Matrix(0, nrow=nrow(ga), ncol=nrow(ga) )
  
  if( which.object == 'LFM' ) {i=1}
  if( which.object == 'balanced' ) {i=2}
  
  ids = do.call('rbind', lapply( as.list(names(list.of.sm)), function(chrom){
    print( paste0('including ', chrom) )
    if( chrom == 'chr1' ) {start.row = 0; start.col = 0}
    if( chrom != 'chr1')  {start.row = min(which( ga$chr==chrom ))-1
                           start.col = min(which( ga$chr==chrom ))-1 }
    tp = as.data.frame(summary(list.of.sm[[chrom]][[i]]),stringsAsFactors=F )
    tp$i = tp$i + start.row
    tp$j = tp$j + start.col
    return(tp)
  }))
  print( 'assembling the matrix, depending on the resolution this might take a while' )
  nummSM[ cbind(ids[,1],ids[,2])]=ids[,3]
  return( nummSM )
  
}

getGWmatrix_intra=function( list.of.sm, ga, which.object ){
  if( which.object == 'LFM' ) {i=1}
  if( which.object == 'balanced' ) {i=2}
  
  nummSM = Matrix(0, nrow=nrow(ga), ncol=nrow(ga) )

  ids = do.call('rbind', lapply( as.list(names(list.of.sm)), function(chrom){
    print( paste0('including ', chrom) )
    if( chrom == 'chr1' ) {start.row = 0; start.col = 0}
    if( chrom != 'chr1')  {start.row = min(which( ga$chr==chrom ))-1
    start.col = min(which( ga$chr==chrom ))-1 }
    tp = as.data.frame(summary(list.of.sm[[chrom]][[i]]),stringsAsFactors=F )
    tp$i = tp$i + start.row
    tp$j = tp$j + start.col
    return(tp)
  }))
  print( 'assembling the matrix, depending on the resolution this might take a while' )
  nummSM[ cbind(ids[,1],ids[,2])]=ids[,3]
  return( nummSM )
  
}

Hi-C normalisation

## compute progress of the normalisation
loss.function = function(mat)
{tmp = rowSums(mat)
tmp = tmp[tmp > 0]
rs = abs(tmp - 1)

tmp = colSums(mat)
tmp = tmp[tmp > 0]
cs = abs(tmp - 1)

return( 0.5*sum(c(rs,cs)))  
}

## put the two triangles together
Create_complete_matrix = function(x){
  
  t.sum = as.data.frame(summary(x), stringsAsFactors=FALSE)
  test = sparseMatrix(i = t.sum$i,
                      j = t.sum$j,
                      x = t.sum$x,
                      dims = dim(x),
                      symmetric = TRUE)
  rownames(test) = rownames(x)
  colnames(test) = colnames(x)
  return( test )
  
}

## iterative proportional fitting algorithm
IPF_alg = function(lfm, numberOfIterations){
  
  d =  dim(lfm)[2]
  
  x = matrix(1, nrow = d, ncol = numberOfIterations) 
  y =  matrix(1, nrow = d, ncol = numberOfIterations)
  lf = numeric(numberOfIterations)
  counter = 1  
  
  while( counter <= numberOfIterations) 
  {
    s.temp = rowSums(lfm)
    x[, counter] = ifelse(s.temp > 0,s.temp,1)
    lfm = lfm / x[,counter]
    
    s.temp = colSums(lfm)
    y[, counter] = ifelse(s.temp > 0,s.temp,1)
    
    lfm = t(t(lfm) / y[,counter])
    
    
    lf = c(lf, loss.function(lfm))
    print(paste("loss function: ", lf[length(lf)], "counter: ",  counter))
    counter = counter + 1   
  }
  
  return(list(lfm = lfm, x = x, y=y, lf = lf))
}

## iterate IPF
normalize_LFM_iteratively = function(x, numberOfIterations){
  
  cm = Create_complete_matrix(x)
  tp = IPF_alg(cm, numberOfIterations = numberOfIterations)
  
  x = tp$x
  y = tp$y
  tp.x = data.frame(x)
  test.x = exp(Reduce('+',log(tp.x)))
  
  tp.y = data.frame(y)
  test.y = exp(Reduce('+',log(tp.y)))
  
  fac = smhuber(test.x/test.y)$mu
  
  van = rowSums(cm) == 0 
  test.x[van] = fac
  
  test.x = test.x/sqrt(fac)
  
  tp = cm / test.x
  tp = t(t(tp) / test.x)
  
  return( list( lfm = tp, x=x, y=y ))
  
}

## wrap all into one function
IPF = function(x, numberOfIterations){
  
  twm = normalize_LFM_iteratively(x, numberOfIterations=numberOfIterations)
  biases = twm[2:3]
  corrected = twm[[1]]
  res = vector("list", 3)
  res[[1]] = x
  res[[2]] = corrected
  res[[3]] = biases
  names(res) = c("LFM", "balanced", "IC_biases")
  return(res)
  
  
}

Function - specific tasks in HiC

getPixelsAround = function(ROW,COL,SIZE,ID){
  data.frame( rows = rep( seq(ROW-SIZE,ROW+SIZE),length(seq(ROW-SIZE,ROW+SIZE))),
              col = rep( seq(COL-SIZE,COL+SIZE),each=length(seq(ROW-SIZE,ROW+SIZE))),
              id = ID ) }
GetCentroidSignal = function( loops, hic, size,CHROMS,which_file ){
  # loops = loops_a3; hic=ko_10kb; size=1;chr="chr1"
  lapply( as.list( CHROMS ),function(chr){
    print(chr)
    theseloops = loops[loops$X.chr1==chr,]
    if(which_file=="balanced") {this_hic = hic[[chr]]$balanced}
    if(which_file=="raw") {this_hic = hic[[chr]]$LFM}
    theseloops = theseloops[theseloops$left_binID-size>1,]
    theseloops = theseloops[theseloops$right_binID+size<nrow(this_hic),]
    coords = do.call("rbind",lapply( split(theseloops,rownames(theseloops)), function(l){
      getPixelsAround(ROW=l$left_binID,
                      COL=l$right_binID,
                      SIZE=size,
                      ID=rownames(l))} ))
    coords$hic_signal = this_hic[ cbind(coords$rows,coords$col)]
    return( coords ) } ) } 
ProcessLoops = function( lo ){
  # lo=human_loops_ele30; pixelSize=3
  tp = do.call("rbind",lo)
  tp = split(tp,tp$id)
  tp = lapply( tp, function(x){matrix(x$hic_signal,nrow=sqrt(nrow(x)),ncol=sqrt(nrow(x)))})
  return( tp )
}
GetCentroidSignalGW = function( loops, hic, size,CHROMS,ga ){
  # loops = loops_a3; hic=ko_10kb; size=1;chr="chr1"
  lapply( as.list( CHROMS ),function(chr){
    print(chr)
    this_hic = hic[ min(which(ga$chr==chr)):max(which(ga$chr==chr)), min(which(ga$chr==chr)):max(which(ga$chr==chr))]
    theseloops = loops[loops$X.chr1==chr,]
    theseloops = theseloops[theseloops$left_binID-size>1,]
    theseloops = theseloops[theseloops$right_binID+size<nrow(this_hic),]
    coords = do.call("rbind",lapply( split(theseloops,rownames(theseloops)), function(l){
      getPixelsAround(ROW=l$left_binID,
                      COL=l$right_binID,
                      SIZE=size,
                      ID=rownames(l))} ))
    coords$hic_signal = this_hic[ cbind(coords$rows,coords$col)]
    return( coords ) } ) } 
readBed_filterChromsStraded = function(filePath, chroms, scoreCol){
  # filePath='/Volumes/T7/CTCF/ATAC_ES_2i_nov/peaks/ATACseq40_ES_2i_IAA_1_MM.narrowPeak'
  x= read.delim(filePath,as.is=T,header=F)
  x[,1] = paste0('chr',x[,1])
  x = x[x[,1] %in% chroms,]
  return( GRanges( seqnames = Rle(x[,1]),
                   ranges = IRanges(as.numeric(x[,2]),
                                    end=as.numeric(x[,3]),
                                    names=rownames(x)),
                   strand = x[,6],
                   score=x[,scoreCol]) ) } 
readNarrowPeak2getSummit = function(filePath, chroms, scoreCol){
  # filePath='/Volumes/T7/CTCF/ATAC_ES_2i_nov/peaks/ATACseq40_ES_2i_IAA_1_MM.narrowPeak'
  x= read.delim(filePath,as.is=T,header=F)
  x[,1] = paste0('chr',x[,1])
  x = x[x[,1] %in% chroms,]
  return( GRanges( seqnames = Rle(x[,1]),
                   ranges = IRanges(as.numeric(x[,2]) + as.numeric(x[,10]) ,
                                    end=as.numeric(x[,2]) + as.numeric(x[,10]),
                                    names=rownames(x)),
                   score=x[,scoreCol]) ) }
appendMotifInformation = function( peakFile, motifFile,summitFile ){
  ## we will only consider peaks which have motif close to the summit
  ## peakFile=ctcf_wt_NS_peaks; motifFile=ctcf_motif; summitFile=ctcf_wt_NS_peaks
  start(summitFile) = start(summitFile)-25
  end(summitFile) = end(summitFile)+25
  peakFile$number_of_motifs=countOverlaps(summitFile,motifFile)
  peakFile$motif_score = 0
  peakFile$motif_strand = factor(rep('*',length(peakFile)),levels = c("*",'+','-'))
  motifFile$realStart = start(motifFile)
  motifFile$realStart[which(strand(motifFile)=='-')] = end(motifFile[which(strand(motifFile)=='-')])
  motifFile$motifID = 0
  
  peak_motif = findOverlaps(summitFile,motifFile)
  tp = data.frame(score=motifFile$score,
                  strand=strand(motifFile),
                  motif_start=motifFile$realStart,
                  motif_id = 1:length(motifFile),
                  stringsAsFactors=FALSE)
  tp = split(tp[subjectHits(peak_motif),],queryHits(peak_motif))
  th = lapply(tp,function(x){x[which.max(x$score),]})
  ## checks:
  # table(unlist(lapply(tp,nrow)))
  # table(unlist(lapply(th)))
  peakFile$motif_score[as.numeric(names(th))] = unlist(lapply(th,function(f){f$score}))
  peakFile$motif_strand[as.numeric(names(th))] = unlist(lapply(th,function(f){f$strand}))
  peakFile$motif_start[as.numeric(names(th))] = unlist(lapply(th,function(f){f$motif_start}))
  peakFile$motifID[as.numeric(names(th))] = unlist(lapply(th,function(f){f$motif_id}))
  return(peakFile) }
compute_putative_loop_signal = function(ctcf_ctcf_loop_signal,central_sq) {
  # central_sq - half window size
  signal_samples = c()
  # Get signal from the central 5X5 square of the matrix
  core_range = (ceiling(ncol(ctcf_ctcf_loop_signal)/2) - central_sq):(ceiling(ncol(ctcf_ctcf_loop_signal)/2) + central_sq)
  # Calculate mean
  core_signal = mean(ctcf_ctcf_loop_signal[core_range,core_range])
  # Set boundaries for random selection to avoid the central 5X5 square
  no_go_range = (min(core_range) - 4):max(core_range)
  check_indexs = 1:(ncol(ctcf_ctcf_loop_signal) - 4)
  check_indexs = check_indexs[!(check_indexs %in% no_go_range)]
  # Get 15 sample points to grab a new 5X5 square from around the center
  sample_rows = sample(check_indexs, 15,replace=TRUE)
  sample_cols = sample(check_indexs, 15,replace=TRUE)
  # For each random sample point calculate the 5X5 region and get mean signal
  for(i in 1:15) {
    signal_samples = c(signal_samples,mean(ctcf_ctcf_loop_signal[sample_rows[i]:(sample_rows[i]+4),sample_cols[i]:(sample_cols[i]+4)]))
  }
  # Compute and return
  # the central signal value,
  # the log2 FC for the core signal vs the average of the sample sites,
  # p-value from t-test assuming the core value to be the distribution mean
  return(c('interact_signal' = core_signal, 'l2fc_v.bg' = log2(core_signal/mean(signal_samples)), t.test(signal_samples, mu = core_signal)[3]))
}
getMatrixForARegion = function(lowertri,uppertri,CHROM,START,END,ga,upperLimit){
  # lowertri=wt_A3;uppertri=ko_10kb;CHROM="chr1";START=89900000-w;END=91000000+w;upperLimit=0.015
  bins = ga[ga$chr==CHROM,]
  bins = which(bins$start>START & bins$end<END)
  a = lowertri[[CHROM]]$balanced
  b = uppertri[[CHROM]]$balanced
  a = a[min(bins):max(bins),min(bins):max(bins)]
  b = b[min(bins):max(bins),min(bins):max(bins)]
  ab = a
  ab[upper.tri(ab)] = b[upper.tri(b)]
  diag(ab) = 0
  ab = t(matrix(apply(ab,2,rev),nrow(ab),ncol(ab)))
  ab[ab>upperLimit] = upperLimit
  return(ab) }
getSignalInBins = function( bins, modification, featureColumn ){
  PsDFvsX = findOverlaps(bins,modification)
  PsDFvsX_score = split( as.numeric(elementMetadata(modification)[,featureColumn][subjectHits(PsDFvsX)]), queryHits(PsDFvsX) )
  PsDFvsX_score = unlist(lapply(PsDFvsX_score,function(x){max(x,na.rm=T)}))
  
  bm = rep( 0, length(bins) )
  bm[as.numeric(names(PsDFvsX_score))]=PsDFvsX_score
  bm = matrix(bm,nrow=length(bins)/sum(bins$peak==1), ncol=sum(bins$peak==1), byrow = T )
  rownames(bm) = names(bins)[seq(1,length(bins),by=sum(bins$peak==1))]
  return(bm)
}
getLoopSignal = function(ll,D){
  # ll = APA_A1_IPF_sig; D=2; thr_lfc=0.59; pval_thr=0.01
  tp = lapply(ll,function(x){compute_putative_loop_signal(x,D)})
  return(unlist(lapply(tp,function(x){x[[2]]})))
}
importBEDPE_Filter_Loops = function(loopdir, max_size, extend_anchors_by){
  # loopdir = '/Volumes/T7/CTCF/CTCF_paper/CTCF_paper/data/hiccups_output_esc/'
  return( do.call( 'rbind', lapply(as.list(list.files(loopdir)), function(x){
    tp=read.delim( paste0(loopdir,x,'/merged_loops.bedpe'), header=TRUE )
    tp=tp[ -1, ]
    tp$loop_span=tp$y2-tp$x1
    tp = tp[ tp$loop_span < max_size,]
    tp$x1 = tp$x1-extend_anchors_by
    tp$x2 = tp$x2+extend_anchors_by
    tp$y1 = tp$y1-extend_anchors_by
    tp$y2 = tp$y2+extend_anchors_by
    return(tp)
  })) )
}
compareLoops = function(loops1,loops2,offset,maxLoopSize){
  # offset=0;loops1=es_loops;loops2=ns_loops
  loops1 = loops1[loops1$loop_span<maxLoopSize,]
  loops2 = loops2[loops2$loop_span<maxLoopSize,]
  leftGR1 = getGR(loops1[,1],loops1[,2],loops1[,3],offset=offset)
  leftGR2 = getGR(loops2[,1],loops2[,2],loops2[,3],offset=offset)
  rightGR1 = getGR(loops1[,4],loops1[,5],loops1[,6],offset=offset)
  rightGR2 = getGR(loops2[,4],loops2[,5],loops2[,6],offset=offset)
  
  leftOV = findOverlaps(leftGR1,leftGR2)
  rightOV = findOverlaps(rightGR1,rightGR2)
  
  overlapping1 = table(c(paste(queryHits(leftOV),subjectHits(leftOV),sep='_'),
                         paste(queryHits(rightOV),subjectHits(rightOV),sep='_')))
  overlapping_loops1 = unique(as.numeric( unlist(strsplit(names(overlapping1[overlapping1==2]),"_"))[seq(1,2*length(overlapping1[overlapping1==2]),by=2)]))
  overlapping_loops1 = overlapping_loops1[order(overlapping_loops1)]
  loops1_common = loops1[overlapping_loops1,]
  loops1_specific = loops1[-overlapping_loops1,]
  
  leftOV = findOverlaps(leftGR2,leftGR1)
  rightOV = findOverlaps(rightGR2,rightGR1)
  
  overlapping2 = table(c(paste(queryHits(leftOV),subjectHits(leftOV),sep='_'),
                         paste(queryHits(rightOV),subjectHits(rightOV),sep='_')))
  overlapping_loops2 = unique(as.numeric( unlist(strsplit(names(overlapping2[overlapping2==2]),"_"))[seq(1,2*length(overlapping2[overlapping2==2]),by=2)]))
  loops2_common = loops2[overlapping_loops2,]
  loops2_specific = loops2[-overlapping_loops2,]
  
  res=vector('list',length=4)
  res[[1]] = loops1_common
  res[[2]] = loops1_specific
  res[[3]] = loops2_common
  res[[4]] = loops2_specific
  
  names(res) = c('First_set_common','First_set_specific','Second_set_common',
                 'Second_set_specific')
  
  return(res)
  
}

## Insulation analysis
getSUMMEDsignal4bins = function( B, D, A, M ){
  # B=bins-HOWFAR; D=distance; A=Area; M=thism
  rows = unlist(lapply(as.list(B), function(b){
    rep( ( (b-D) + seq(-A,A)) , (1+2*A))}))
  
  cols = unlist(lapply(as.list(B), function(b){
    rep( ( (b+D) + seq(-A,A) ), each=(1+2*A))}))
  ids = cbind(rows, cols)
  m = M[ids]
  IDS = rep( names(B), each = (1+2*A)^2 )
  # print(c(min(ids),max(ids)))
  matrices = split( m, IDS )
  return( unlist(lapply(matrices,function(m){sum(m, na.rm=T)})) )}
InsulationScore = function( bin, mat, GAGR, distance, Area, HOWFAR ){
  ## mat=ko_10kb; distance=5; Area=3;chr="chr1";GAGR=gagr;HOWFAR=10
  ## bin = gagr[which(chrom(gagr)!="chrY")]
  ## rm(list=c("mat","distance","Area","chr","GAGR","HOWFAR","B","D","A","M"))
  do.call("rbind",lapply( as.list(unique(as.character(chrom(bin)))),function(chr){
    print(paste0("processing ",chr))
    thism = mat[[chr]]$balanced
    theseb = GenomicRanges::resize( bin[which(chrom(bin)==chr)], 1, fix="center")
    
    bins = GAGR$binid[queryHits(findOverlaps(GAGR,theseb))]
    names(bins) = names(theseb[subjectHits(findOverlaps(GAGR,theseb))])
    
    bins = bins[bins-distance-HOWFAR-Area>0 & bins+distance+HOWFAR+Area<(length(GAGR[which(chrom(GAGR)==chr)]))]
    
    return(data.frame( left=getSUMMEDsignal4bins(B=bins-HOWFAR, D=distance, A=Area, M=thism ),
                       middle=getSUMMEDsignal4bins(B=bins, D=distance, A=Area, M=thism ),
                       right=getSUMMEDsignal4bins(B=bins+HOWFAR, D=distance, A=Area, M=thism ) )) } )) }
InsulationScorePublishedKR = function( bin, mat, GAGR, distance, Area, HOWFAR ){
  ## mat=ko_10kb; distance=5; Area=3;chr="chr1";GAGR=gagr;HOWFAR=10
  ## bin = gagr[which(chrom(gagr)!="chrY")]
  ## rm(list=c("mat","distance","Area","chr","GAGR","HOWFAR","B","D","A","M"))
  do.call("rbind",lapply( as.list(unique(as.character(chrom(bin)))),function(chr){
    print(paste0("processing ",chr))
    thism = mat[[chr]]
    theseb = GenomicRanges::resize( bin[which(chrom(bin)==chr)], 1, fix="center")
    
    bins = GAGR$binid[queryHits(findOverlaps(GAGR,theseb))]
    names(bins) = names(theseb[subjectHits(findOverlaps(GAGR,theseb))])
    
    bins = bins[bins-distance-HOWFAR-Area>0 & bins+distance+HOWFAR+Area<(length(GAGR[which(chrom(GAGR)==chr)]))]
    
    return(data.frame( left=getSUMMEDsignal4bins(B=bins-HOWFAR, D=distance, A=Area, M=thism ),
                       middle=getSUMMEDsignal4bins(B=bins, D=distance, A=Area, M=thism ),
                       right=getSUMMEDsignal4bins(B=bins+HOWFAR, D=distance, A=Area, M=thism ) )) } )) }
processIS = function( IS, GAGR ){
  res = GAGR
  res$binid=NULL
  res$score = 0
  res$score[match(rownames(IS),names(res))] = log2( rowMeans((0.001+IS[,c(1,3)]))/(0.001+IS[,2] ) )
  return(res) }
cleanIS = function(x){
  x[is.na(x)]=0
  x[!is.finite(x)]=0
  return(x) }

RNA-seq and ChIP-seq functions

getSignalInBins400 = function( bins, modification, featureColumn ){
  PsDFvsX = findOverlaps(bins,modification)
  PsDFvsX_score = split( as.numeric(elementMetadata(modification)[,featureColumn][subjectHits(PsDFvsX)]), queryHits(PsDFvsX) )
  PsDFvsX_score = unlist(lapply(PsDFvsX_score,function(x){mean(x,na.rm=T)}))
  
  bm = rep( 0, length(bins) )
  bm[as.numeric(names(PsDFvsX_score))]=PsDFvsX_score
  bm = matrix(bm,nrow=length(bins)/401, ncol=401, byrow = T )
  rownames(bm) = (bins$peak)[seq(1,length(bins),by=401)]
  return(bm)
}
reorderMatrixBasedOnStrand = function( m, peaks ){
  m_fwd = m[which(as.character(peaks$motif_strand)=='+'),]
  m_rev = t(apply( m[which(as.character(peaks$motif_strand)=='-'),],1,rev))
  return(rbind(m_fwd,m_rev))
}
GetTPM = function( m,cols2take,gn ){
  # m=PL_Dat; cols2take=c(7,8,14,15,9,10,11,12,13)
  mb = m[,cols2take]
  cs = colSums( mb )
  f1 = do.call( "cbind", lapply(1:ncol(mb), function(x){
    rpk = 1000*(mb[,x]/m$Length)
    SF = sum(rpk)/1000000
    return( rpk/SF ) } )  )
  colnames(f1) = colnames( mb )
  rownames(f1) = gn
  return(f1)
}
readBed_filterChroms = function(filePath, chroms, scoreCol){
  # filePath='/Volumes/T7/CTCF/CTCF_HUMAN.H11MO.0.A.bed'
  x= read.delim(filePath,as.is=T,header=F)
  x[,1] = paste0('chr',x[,1])
  x = x[x[,1] %in% chroms,]
  return( GRanges( seqnames = Rle(x[,1]),
                   ranges = IRanges(as.numeric(x[,2]),
                                    end=as.numeric(x[,3]),
                                    names=rownames(x)),
                   score=x[,scoreCol]) ) } 
readBed_filterChromsStraded = function(filePath, chroms, scoreCol){
  # filePath='/Volumes/T7/CTCF/ATAC_ES_2i_nov/peaks/ATACseq40_ES_2i_IAA_1_MM.narrowPeak'
  x= read.delim(filePath,as.is=T,header=F)
  x[,1] = paste0('chr',x[,1])
  x = x[x[,1] %in% chroms,]
  return( GRanges( seqnames = Rle(x[,1]),
                   ranges = IRanges(as.numeric(x[,2]),
                                    end=as.numeric(x[,3]),
                                    names=rownames(x)),
                   strand = x[,6],
                   score=x[,scoreCol]) ) } 
readNarrowPeak2getSummit = function(filePath, chroms, scoreCol){
  # filePath='/Volumes/T7/CTCF/ATAC_ES_2i_nov/peaks/ATACseq40_ES_2i_IAA_1_MM.narrowPeak'
  x= read.delim(filePath,as.is=T,header=F)
  x[,1] = paste0('chr',x[,1])
  x = x[x[,1] %in% chroms,]
  return( GRanges( seqnames = Rle(x[,1]),
                   ranges = IRanges(as.numeric(x[,2]) + as.numeric(x[,10]) ,
                                    end=as.numeric(x[,2]) + as.numeric(x[,10]),
                                    names=rownames(x)),
                   score=x[,scoreCol]) ) }

Functions

Preprocessing functions

Hi-C normalisation

Function - specific tasks in HiC

RNA-seq and ChIP-seq functions