#!/usr/bin/python
import sys
import struct
import os
from numpy import *
import datetime
from re import *
from copy import *
import threading
import string

################################################################################

def GetPathAndName(pathfilename):
    ls=pathfilename.split('/')
    filename=ls[-1]
    path='/'.join(ls[0:-1])+'/'
    return path, filename

def Readcfgfile(cfg_filename):
    results = {}
    cfg = open(cfg_filename,'r')
    for line in cfg:
        line = line.strip()
        if line=='':
            continue
        if not line[0]=='#':
            ls = line.split('=')
            print ls
            if len(ls)>2:
                print 'warning: too many = in cfg file'
            results[ls[0].strip()] = ls[1].strip()
    cfg.close()
    return results

################################################################################
if len(sys.argv) >= 2:
    run_pathfilename =  sys.argv[0]
    cfg_filename =  sys.argv[1]
else:
    print("usage: python runLSC.py run.cfg")
    print("or ./runLSC.py run.cfg")
    sys.exit(1)
################################################################################

Nthread1 = 8
Nthread2 = 8
Lpseudochr = '50000000'
LgapInpseudochr = '100'
LR_pathfilename = ''
SR_pathfilename = ''
temp_foldername = 'temp3'
output_foldername = 'output3'
I_RemoveBothTails = "Y"
MinNumberofNonN = "40"
MaxN = "1"
################################################################################

cfg_dt = Readcfgfile(cfg_filename)
for key in cfg_dt:
    if key == "Nthread1":
        Nthread1 = int(cfg_dt[key])
    elif key == "Nthread2":
        Nthread2 = int(cfg_dt[key])
    elif key == "Lpseudochr":
        Lpseudochr = cfg_dt[key]
    elif key == "LgapInpseudochr":
        LgapInpseudochr = cfg_dt[key]
    elif key == "LR_pathfilename":
        LR_pathfilename = cfg_dt[key]
    elif key == "SR_pathfilename":
        SR_pathfilename = cfg_dt[key]
    elif key == "temp_foldername":
        temp_foldername = cfg_dt[key]
    elif key == "output_foldername":
        output_foldername = cfg_dt[key]
    elif key == "RemoveBothTails":
        I_RemoveBothTails = cfg_dt[key]
    elif key == "MinNumberofNonN":
        MinNumberofNonN = cfg_dt[key]
    elif key == "MaxN":
        MaxN = cfg_dt[key]
    elif key == "I_nonredundant":
        I_nonredundant = cfg_dt[key]

################################################################################

os.system('mkdir ' + temp_foldername)
os.system('mkdir ' + output_foldername)

bin_path, run_filename = GetPathAndName(run_pathfilename)
LR_path, LR_filename = GetPathAndName(LR_pathfilename)
SR_path, SR_filename = GetPathAndName(SR_pathfilename)
if temp_foldername[-1]!='/':
    temp_foldername=temp_foldername+'/'
if output_foldername[-1]!='/':
    output_foldername=output_foldername+'/'

################################################################################
t0 = datetime.datetime.now()

##########################################

if I_nonredundant == "N":
    print "=== sort and uniq SR data ==="

    fa2seq_cmd = "awk '{if(NR%2==0)print $0}' " + SR_pathfilename + " > " + temp_foldername + "SR.seq"
    os.system (fa2seq_cmd)

    sort_cmd = "sort -T " + temp_foldername + " " + temp_foldername + "SR.seq > " + temp_foldername + "SR_sorted.seq"
    os.system(sort_cmd)

    uniq_cmd = "uniq -c " + temp_foldername + "SR_sorted.seq > " + temp_foldername + "SR_uniq.seq"
    os.system(uniq_cmd)

    uniqseq2fasta_cmd = bin_path + "uniqseq2fasta.py " + temp_foldername + "SR_uniq.seq > " + temp_foldername + "SR_uniq.fa"
    os.system(uniqseq2fasta_cmd)

    print str(datetime.datetime.now()-t0)
    SR_pathfilename = temp_foldername + "SR_uniq.fa"
    rm_cmd = "rm " + temp_foldername + "SR_uniq.seq " + temp_foldername + "SR_sorted.seq " + temp_foldername + "SR.seq"
    os.system(rm_cmd)
##########################################
SR = open(SR_pathfilename,'r')
SR_NR = 0
for line in SR:
    SR_NR+=1
SR.close()

Nsplitline = 1 + (SR_NR/Nthread1)
if Nsplitline%2==1:
    Nsplitline +=1
ext_ls=[]
j=0
k=0
i=0
while i <Nthread1:
    ext_ls.append( '.' + string.lowercase[j] + string.lowercase[k] )
    k+=1
    if k==26:
        j+=1
        k=0
    i+=1

print "===split SR:==="    
splitSR_cmd = "split -l " + str(Nsplitline) + " " + SR_pathfilename + " " + temp_foldername + "SR.fa."
SR_filename="SR.fa"
os.system(splitSR_cmd)

print str(datetime.datetime.now()-t0)
##########################################
print "===compress SR.aa:==="    

i=0
T_compressFASTA_SR_ls = []
for ext in ext_ls:
    compressFASTA_SR_cmd = bin_path + "compressFASTA.py -MinNonN=" + MinNumberofNonN + " -MaxN=" + MaxN + " " + temp_foldername + SR_filename + ext + " " + temp_foldername + SR_filename + ext + "."
    T_compressFASTA_SR_ls.append( threading.Thread(target=os.system, args=(compressFASTA_SR_cmd,)) )
    T_compressFASTA_SR_ls[i].start()
    i+=1
for T in T_compressFASTA_SR_ls:
    T.join()

print str(datetime.datetime.now()-t0)

####################
for ext in ext_ls:
    delSR_aa_cmd = "rm " + temp_foldername + SR_filename + ext + " &"
    os.system(delSR_aa_cmd)
####################

##########################################change output from compressFASTA.py and poolchr.py 

if I_RemoveBothTails == "Y":   
    print "===RemoveBothTails in LR:==="    
    RemoveBothTails_cmd = bin_path + "RemoveBothTails.py " + LR_pathfilename + " " + temp_foldername + "Notwotails_" + LR_filename 
    os.system(RemoveBothTails_cmd)
    print str(datetime.datetime.now()-t0)

print "===compress LR:==="

if I_RemoveBothTails == "Y":
    FASTA2fa_cmd = bin_path + "FASTA2fa.py " + temp_foldername + "Notwotails_" + LR_filename + " " + temp_foldername + "LR.fa"
    deltempLR_cmd = "rm " + temp_foldername + "Notwotails_" + LR_filename  

else:
    FASTA2fa_cmd = bin_path + "FASTA2fa.py " + LR_pathfilename + " " + temp_foldername + "LR.fa"
print FASTA2fa_cmd
os.system(FASTA2fa_cmd)

if I_RemoveBothTails == "Y":
    print deltempLR_cmd
    os.system(deltempLR_cmd)

LR_filename="LR.fa"
print str(datetime.datetime.now()-t0)

print "===compress LR:==="    
   
compressFASTA_LR_cmd = bin_path + "compressFASTA.py -MinNonN=0" + " -MaxN=10000" + " " + temp_foldername + LR_filename + " " + temp_foldername + LR_filename +"."
print compressFASTA_LR_cmd
os.system(compressFASTA_LR_cmd)

####################
delLR_cmd = "rm " + temp_foldername + "LR.fa"
print delLR_cmd
os.system(delLR_cmd)
####################

print str(datetime.datetime.now()-t0)

print "===poolchr LR:==="    
poolchr_cmd = bin_path + "poolchr.py " + temp_foldername + LR_filename  + ".cps " + LgapInpseudochr + ' ' + Lpseudochr
os.system(poolchr_cmd)

print str(datetime.datetime.now()-t0)

print "===novoindex pseudochr:==="    
novoindex_cmd = "novoindex " + temp_foldername + "pseudochr_" + LR_filename + ".cps.nix " + temp_foldername + "pseudochr_" + LR_filename + ".cps"
os.system(novoindex_cmd)

print str(datetime.datetime.now()-t0)


##########################################
print "===novoalign SR.aa.cps:==="    

i=0
T_novoalign_ls=[]
for ext in ext_ls:
    novoalign_cmd = "novoalign -r All -F FA -d " + temp_foldername + "pseudochr_" + LR_filename + ".cps.nix -f " + temp_foldername + SR_filename + ext + ".cps > " + temp_foldername + SR_filename + ext + ".cps.nav" 
    T_novoalign_ls.append( threading.Thread(target=os.system, args=(novoalign_cmd,)) )
    T_novoalign_ls[i].start()
    i+=1
for T in T_novoalign_ls:
    T.join()

print str(datetime.datetime.now()-t0)

##########################################

print "===cat SR.aa.cps:==="    
temp_filename_ls = []
for ext in ext_ls:
    temp_filename_ls.append( temp_foldername + SR_filename + ext + ".cps" )
os.system( "cat " + ' '.join(temp_filename_ls) + " > " + temp_foldername + SR_filename + ".cps" )
print str(datetime.datetime.now()-t0)

####################
for ext in ext_ls:
    delSRcps_aa_cmd = "rm " + temp_foldername + SR_filename + ext + ".cps &"
    os.system(delSRcps_aa_cmd)
####################

print "===cat SR.aa.idx:==="    
temp_filename_ls = []
for ext in ext_ls:
    temp_filename_ls.append( temp_foldername + SR_filename + ext + ".idx" )
os.system( "cat " + ' '.join(temp_filename_ls) + " > " + temp_foldername + SR_filename + ".idx" )
print str(datetime.datetime.now()-t0)

####################
for ext in ext_ls:
    delSRidx_aa_cmd = "rm " + temp_foldername + SR_filename + ext + ".idx &"
    os.system(delSRidx_aa_cmd)
####################

####################

os.system("rm " + temp_foldername + "pseudochr_" + LR_filename + ".cps") 
os.system("rm " + temp_foldername + "pseudochr_" + LR_filename + ".cps.nix")

####################

##########################################

#find -name "SR.fa.??.cps.nav" -print -exec ./bin/convertNAV.sp {} \;
#bin/convertNAV.py dist_LR.fa.cps 100 $1 > $1.convertNAV.log &
print "===convertNAV SR.aa.cps.nav:==="    

i=0
T_convertNAV_ls=[]
for ext in ext_ls:
    convertNAV_cmd = bin_path + "convertNAV.py " + temp_foldername + "dist_" + LR_filename + ".cps " + LgapInpseudochr + ' ' + temp_foldername + SR_filename + ext + ".cps.nav > " + temp_foldername + SR_filename + ext + ".cps.convertNAV.log"
    T_convertNAV_ls.append( threading.Thread(target=os.system, args=(convertNAV_cmd,)) )
    T_convertNAV_ls[i].start()
    i+=1
for T in T_convertNAV_ls:
    T.join()    

print str(datetime.datetime.now()-t0)

####################

os.system("rm " + temp_foldername + "dist_" + LR_filename + ".cps")

####################
for ext in ext_ls:
    delSRnav_aa_cmd = "rm " + temp_foldername + SR_filename + ext + ".cps.nav &"
    os.system(delSRnav_aa_cmd)
####################

##########################################
print "===merge_mapping_file SR.aa.cps.nav.map:==="    

temp_filename_ls = []
for ext in ext_ls:
    temp_filename_ls.append( temp_foldername + SR_filename + ext + ".cps.nav.map" )
os.system( bin_path + "merge_mapping_file.py " + ' '.join(temp_filename_ls) + " " + temp_foldername + "LR_SR.map" )

print str(datetime.datetime.now()-t0)

####################
for ext in ext_ls:
    delSRmap_aa_cmd = "rm " + temp_foldername + SR_filename + ext + ".cps.nav.map &"
    os.system(delSRmap_aa_cmd)
####################

##########################################
print "===split LR_SR.map:==="    

LR_SR_map = open(temp_foldername +"LR_SR.map",'r')
LR_SR_map_NR = len(LR_SR_map.readlines())
LR_SR_map.close()

Nsplitline = 1 + (LR_SR_map_NR/Nthread2)

ext2_ls=[]
j=0
k=0
i=0
while i <Nthread2:
    ext2_ls.append( '.' + string.lowercase[j] + string.lowercase[k] )
    k+=1
    if k==26:
        j+=1
        k=0
    i+=1
    
splitLR_SR_map_cmd = "split -l " + str(Nsplitline) + " " + temp_foldername + "LR_SR.map" + ' ' + temp_foldername + "LR_SR.map" +"."
os.system(splitLR_SR_map_cmd)

print str(datetime.datetime.now()-t0)
##########################################

print "===write LR_SR.map.??_tmp:==="    
writetmp_cmd = bin_path + "writetmp.py " + temp_foldername + "LR_SR.map" + ' ' + temp_foldername + LR_filename  + ' ' + temp_foldername + SR_filename + ' ' + str(Nthread2)
os.system(writetmp_cmd)
print str(datetime.datetime.now()-t0)

####################
for ext in ext2_ls:
    delLR_SR_map_aa_cmd = "rm " + temp_foldername + "LR_SR.map" + ext + " &"
    os.system(delLR_SR_map_aa_cmd)
####################

##########################################change correct_while_piece.py  to correct_for_piece.py 
print "===correct.py LR_SR.map.??_tmp :==="    

i=0
T_correct_for_piece_ls=[]
for ext in ext2_ls:
    correct_for_piece_cmd = bin_path + "correct_nonredundant.py " + temp_foldername + "LR_SR.map" + ext + '_tmp' + ' > ' + temp_foldername + "LR_SR.map_emtry_ls" + ext
    T_correct_for_piece_ls.append( threading.Thread(target=os.system, args=(correct_for_piece_cmd,)) )
    T_correct_for_piece_ls[i].start()
    i+=1
for T in T_correct_for_piece_ls:
    T.join()

print str(datetime.datetime.now()-t0)

####################
for ext in ext2_ls:
    delLR_SR_map_aa_tmp_cmd = "rm " + temp_foldername + "LR_SR.map" + ext + "_tmp &"
    os.system(delLR_SR_map_aa_tmp_cmd)
####################

##########################################

print "===cat full_LR_SR.map.fa :==="    

temp_filename_ls = []
for ext in ext2_ls:
    temp_filename_ls.append( temp_foldername + "full_LR_SR.map" + ext + '_tmp' )
os.system( "cat " +  ' '.join(temp_filename_ls) + " > " + output_foldername + "full_LR_SR.map.fa" )

print "===cat corrected_LR_SR.map.fa :==="    

temp_filename_ls = []
for ext in ext2_ls:
    temp_filename_ls.append( temp_foldername + "corrected_LR_SR.map" + ext + '_tmp' )
os.system( "cat " +  ' '.join(temp_filename_ls) + " > " + output_foldername + "corrected_LR_SR.map.fa" )

print "===cat uncorrected_LR_SR.map.fa :==="    

temp_filename_ls = []
for ext in ext2_ls:
    temp_filename_ls.append( temp_foldername + "uncorrected_LR_SR.map" + ext + '_tmp' )
os.system( "cat " +  ' '.join(temp_filename_ls) + " > " + output_foldername + "uncorrected_LR_SR.map.fa" )

####################
for ext in ext2_ls:
    delfull_LR_SR_map_aa_fa_cmd = "rm " + temp_foldername + "full_LR_SR.map" + ext + "_tmp &"
    os.system(delfull_LR_SR_map_aa_fa_cmd)
    delcorr_LR_SR_map_aa_fa_cmd = "rm " + temp_foldername + "corrected_LR_SR.map" + ext + "_tmp &"
    os.system(delcorr_LR_SR_map_aa_fa_cmd)
    deluncorr_LR_SR_map_aa_fa_cmd = "rm " + temp_foldername + "uncorrected_LR_SR.map" + ext + "_tmp &"
    os.system(deluncorr_LR_SR_map_aa_fa_cmd)
####################

####################
os.system("mkdir " + temp_foldername + "log")
for ext in ext_ls:
    os.system("mv " + temp_foldername + "SR.fa" + ext + ".cps.convertNAV.log " + temp_foldername + "log")
for ext in ext2_ls:
    os.system("mv " + temp_foldername + "LR_SR.map_emtry_ls" + ext + " " + temp_foldername + "log")

####################

##########################################
