SpliceMap

Latest News: SpliceMap 3.3.5.2 -- Faster and more accurate ... read more

.cfg file format

The new .cfg file format replaces the command line input parameters. Previously, you would have to type:

runSpliceMap ../genome/hg18 L-reads_list1 L-reads_list2 M2 R-refFlat.txt -cuff -ud -E

in order to run SpliceMap. Now, simply edit the run.cfg file and type

runSpliceMap run.cfg

and that's it! An example run.cfg file is included with every SpliceMap download and decriptions of each field can be found in the comments within the file. Detailed descriptions of each field is also below.

.cfg syntax

There are only three types of syntax in a .cfg file.
Comment
Any line begining with # is a comment. # must be the first character on the line.
Single Value
A single value written in the following format
tag = value
For example,
genome_dir = ../../genome/hg18
Multiple Values
Multiple values are grouped by > and < in the following format
> tag
value_1
value_2
...
<

Example .cfg file

The following is an example .cfg file. It is included in every SpliceMap download or you can download it here run.cfg (right-click and save as...). The values which are optional are noted in the comments and you may comment them out.

##
###################################################
#
# This cofiguration file contains all settings for a run
# of SpliceMap.
#
# lines begining with '#' are comments
# lists begin with '> tag' and end with '<' on separate lines
#
###################################################
##

#########################
## Required Settings
##


##
# Directory of the chromosome files in FASTA format
# Each chromosome should be in a separate file (can be concatenated)
# ie. chr1.fa, chr2.fa, ...
# (single value)

genome_dir = ~/genome/hg18/


##
# These are the two lists of sequencer reads files. 
# "reads_list2" can be commeted out if reads are not paired-end.
# Make sure the order of both lists are the same!
# Also, "reads_list1" must be the first pair. 
# Note: pair-reads should be in the "forward-reverse" format. 
# (multiple values)

> reads_list1
fastq_100k_1_1.txt
fastq_100k_3_1.txt
<

> reads_list2
fastq_100k_1_2.txt
fastq_100k_3_2.txt
<


##
# Format of the sequencer reads, also make sure reads are 
# not split over multiple lines.
# Choices are: FASTA, FASTQ, RAW
# (single value)

read_format = FASTQ

##
# Format of the quality string if FASTQ is used
# Choices are: 
#   phred-33 -- Phred base 33 (same as Sanger format) [default]
#   phred-64 -- Phred base 64 (same as Illumina 1.3+)
#   solexa   -- Format used by solexa machines
# (single value)


quality_format = phred-33


##
# The short reads mapper used
# choices are "bowtie", "eland" or "seqmap"
# Bowtie is recommended
# this can be commented out if the mapping has already been done and the 
# appropriate ".t" files exist in the temp directory [advanced]. 
# (single value)

mapper = bowtie


#########################
## Optional Settings
##


##
# these are annotations used to find novel junctions, in ref or bed format. 
# (optional)
# (single value)

#annotations = all.gene.refFlat.txt

##
# Directory name of the directory that stores temporary files
# (optional) Default = temp
# (single value)

temp_path = temp


##
# Directory name of the directory that stores the output files
# (optional) Default = output
# (single value)

out_path = output



##
# Maximum intron size, this is absolute 99th-percentile maximum.
# Introns beyond this size will be ignored. 
# (optional) If you don't set this, we will assume a mamalian genome (400,000)
# (single value)

max_intron = 400000



##
# 25-th intron size, this is the lower 25th-percentile intron size
# This is not the smallest size that SpliceMap will search. That is about ~25bp. 
# (optional) If you don't set this, we will assume a mammalian genome (20,000)
# (single value)

min_intron = 20000

##
# Maximum number of multi-hits
# If a 25-mer seed has more than this many multi-hits, it will be discarded.
# (optional) Default is 10
# (single value)

max_multi_hit = 10


##
# Full read length
# SpliceMap will only use the first "full_read_length" bp for mapping.
# If the read is shorter than "full_read_length", the full read will be used before head clip. 
# If you comment this out SpliceMap will use as many as possible.
# This is for the case where the reads might have N's at the end. 
# It is always desireable to cut off the N's
# (optional)
# (single value)

# full_read_length = 70



##
# Number of bases to clip off the head of the read
# This clipping is applied after "full_read_length"
# (optional)
# (single value)

# head_clip_length = 0



##
# Maximum number of mismatches allowed in seeding for junction search
# Choices are 0,1(default) or 2
# (optional)
# (single value)

seed_mismatch = 1

##
# Maximum number of mismatches allowed in entire read
# No limit on value, however SpliceMap can only identify reads with
# a maximum of 2 mismatches per 25bp. 
# Default is 2. 
# (optional)
# (single value)

read_mismatch = 2


##
# Maximum number of bases allowed to be soft clipped from the ends of reads during
# alignment. This is required as mismatches near junctions could cause parts of a
# a read to not map.
# Default is 40.
# (optional)
# (single value)

#max_clip_allowed = 40


##
# Generate a SAM file
# choices are
#   "cuff" -- Generate Cufflinks compatible SAM file
#   "sam"  -- Generate regular SAM file
#
# If this is commented out, no SAM file will be generated
# (optional)
# (single value)

sam_file = cuff



##
# Name of the chromosome file with wildcards
# If none is given the default of "chr*.fa" will be used [this is compatible with the UCSC genome files]
# If you genome file is concatentated, just type the file name
# (optional)
# (single value)

chromosome_wildcard = chr*.fa

##
# Number of chromosomes to process at once, to take advantage of multi-core systems.
# This is not threading, so it will take extra memory. However, running 2 at a time should be fine.
# (optional) Default = 1
# (single value) 

num_chromosome_together = 2



##
###################################################
#
#  Bowtie specific options
#   these have no meaning if another mapper is used
# 
###################################################
##

#########################
## Required Settings
##


##
# Base of bowtie index, this should be the same genome as the
# chromosome files
# eg. if you bowtie files are "genome/hg18/genome.1.ewbt", …
# then your base dir is "genome/hg18/genome"
# (single value)

bowtie_base_dir = ~/genome/hg18/genome



#########################
## Optional Settings
##

##
# Number of threads to use for mapping (SpliceMap may use this option in future)
# Default value is 2
# (optional)
# (single value)

num_threads = 2


##
# Try hard?
# choices are "yes" or "no"
# Default value is yes. (it is not much slower, about 15%)
# I'm unsure if this is required, feel free to try with 
# this option off and let me know your results. 
# (optional)
# (single value)

try_hard = yes