R/sraConvert.R

sraConvert <-
function (in_acc,
          out_type=c('sra','submission','study','sample','experiment','run'),
          sra_con)
{
	out_type <- tolower(out_type);
	out_type <- match.arg(out_type, several.ok = T)
	if( is.element('sra',out_type) )
        out_type = c('submission','study','sample','experiment','run')
			
	## validate in_acc
	valid_in_acc_type <-
        c('SRA', 'ERA', 'DRA', 'SRP', 'ERP', 'DRP', 'SRS', 'ERS',
          'DRS', 'SRX', 'ERX', 'DRX', 'SRR', 'ERR', 'DRR')
	valid_in_type <-
        c('SRA'='submission', 'ERA'='submission', 'DRA'='submission',
          'SRP'='study', 'ERP'='study', 'DRP'='study', 'SRS'='sample',
          'ERS'='sample', 'DRS'='sample', 'SRX'='experiment',
          'ERX'='experiment', 'DRX'='experiment', 'SRR'='run',
          'ERR'='run', 'DRR'='run')
	
	## trim leading or tailing spaces
	in_acc <- sub('^\\s+|\\s+$','', in_acc, perl=TRUE)
	## the first three should be letters, not special characters, and
	## followed by numbers
	if(any(grep('\\^W{3}|\\D+$', in_acc, perl=TRUE)))
        stop("invalid input SRA accession(s), right ones are like 'SRA003625' or 'SRP000403', or 'SRS001834', 'SRR013350', or 'SRX002512'")

	## extract the leading letters, which should be valid 
	in_acc_type = toupper(unique(sub('\\d+$', '', in_acc, perl= TRUE)))
	## they should be valid
	if( !all(in_acc_type %in%  valid_in_acc_type) )
        stop("Input type shuld be in '",
             paste(valid_in_acc_type, collapse="' '"),
             "'")
	in_type <- unique(valid_in_type[in_acc_type])
	## in_type should be only one type
	if(length(in_type) != 1 )
        stop("Only one type of SRA accession(s) is allowed in an input accession vector, either 'submission','study','sample','experiment' or 'run'")

	## Exclude the in_type in the out_type	
	out_type <- out_type[out_type != in_type];
	select_type <- c(in_type, out_type)	
	
	##Remove self converion
#	if(length(out_type) == 0) {		
#		sra_acc <- as.data.frame(cbind(run = in_acc))
#		return(sra_acc)
#		## print("Not necessary to convert to input itself");
#	}
	
	in_acc_sql = paste("'", paste(in_acc, collapse = "','"),"'", sep="");
	select_type_sql <- paste(paste(select_type, "_accession", sep=''),
                             collapse = "," );
	sql <- paste ("SELECT DISTINCT ", select_type_sql,
                  " FROM sra WHERE ", in_type ,
                  "_accession IN (", in_acc_sql, ")", sep = "");			 
	sra_acc <- dbGetQuery(sra_con, sql);
	names(sra_acc) <- sub('_accession', '', names(sra_acc))
	return(sra_acc);

}
zhujack/SRAdb documentation built on Dec. 6, 2024, 2:15 a.m.