#!/usr/bin/python

import sys
import os
from numpy import *
import datetime

aligner = "bowtie"

if len(sys.argv) >= 4:
    dist_filename = sys.argv[1]
    L = int(sys.argv[2])
    nav_filename = sys.argv[3]
    
else:
    print("usage: python convertNAV.py dist_file L nav_file")
    print("or ./convertNAV.py dist_file L nav_file")
    sys.exit(1)

################################################################################
def getstrand(flag):
    if flag=="+":
        return 1
    elif flag == "-":
        return -1
    else:
       flag = int(flag)
       if flag>=16:
           return -(int(bin(flag)[-5])-0.5)/0.5
       else:
           return 1

################################################################################
dist=open(dist_filename,'r')
print "loading dist file: ", dist_filename
t0 = datetime.datetime.now()

read_name_dict = {}
dist_sortedls_dict={}
LR_dist_sortedls_dict={}
low_chr_dict={}
LR_SR_mapping = {}
for line in dist:
    line_list=line.strip().split("\t")
    read_name = line_list[0]
    chr_name = line_list[1]
    end_pt = int(line_list[2])
    start_pt = int(line_list[3])
    if not dist_sortedls_dict.has_key(chr_name):
        dist_sortedls_dict[chr_name]=[]
        LR_dist_sortedls_dict[chr_name]=[]
        read_name_dict[chr_name]=[]
        low_chr_dict[chr_name]={}
    dist_sortedls_dict[chr_name].append(end_pt)
    LR_dist_sortedls_dict[chr_name].append(end_pt)
    read_name_dict[chr_name].append(read_name)
    low_chr_dict[chr_name][read_name] = start_pt
    if not LR_SR_mapping.has_key(read_name):
        LR_SR_mapping[read_name]=[]

dist.close()
print "finish " + dist_filename
print "#####get chr_name, start_pt and and end_pt from dist file: " + str(datetime.datetime.now()-t0)

for chr_name in dist_sortedls_dict:
    print "the number of LONG READS in each pseduo chromsome " + chr_name + ":\t" + str(len(dist_sortedls_dict[chr_name]))

################################################################################
#30003   0       Pac1    2168303 36      9M26I18M        *       0       0       NAGCTGCTGCACATGCATCTGCTCAGCTCAGTACATCTGCTCTAGCACTGCAG   *       NM:i:27 XM:i:1  XO:i:1  XG:i:26 H0:i:1  H1:i:2  XL:i:50 XR:i:0  XD:i:7  XT:A:U  MD:Z:1T25   XA:Z:Pac1,-29241423,14M2D22M11I6M,21;Pac2,-29122083,8M22I17M1I5M,27;
################################################################################

nav=open(nav_filename,'r')
print "loading nav file: ", nav_filename
t0 = datetime.datetime.now()

hit_dict = {}
for line in nav:
    if not line[0]=='#':
        line_list=line.strip().split()
        if line_list[-1]=="NM" or line_list[-1]=="QC":
            continue
        chr_name = line_list[7].strip('>')
        pos = int(line_list[8])
        if dist_sortedls_dict.has_key(chr_name):
             dist_sortedls_dict[chr_name].append(pos)

nav.close()
print "finish " + nav_filename
print "#####add hits from nav:" + str(datetime.datetime.now()-t0)

################################################################################

t0 = datetime.datetime.now()

for chr_name in dist_sortedls_dict:
    dist_sortedls_dict[chr_name].sort()

print "#####sort hits + end_pt:" + str(datetime.datetime.now()-t0)

################################################################################

t0 = datetime.datetime.now()
chr_hit_LRname_dict={}

for chr_name in LR_dist_sortedls_dict:
    chr_hit_LRname_dict[chr_name]={}
    i=0
    LR_read_name =  read_name_dict[chr_name][0]
    end_pt= LR_dist_sortedls_dict[chr_name][0]
    for pos in dist_sortedls_dict[chr_name]:
        if pos>end_pt:
            i+=1
            LR_read_name=read_name_dict[chr_name][i]
            end_pt= LR_dist_sortedls_dict[chr_name][i]
        chr_hit_LRname_dict[chr_name][pos]=LR_read_name

print "#####get LONG READ names:" + str(datetime.datetime.now()-t0)

################################################################################

nav=open(nav_filename,'r')
# newnav=open(nav_filename+'.LR.nav','w')
print "loading nav file: ", nav_filename
t0 = datetime.datetime.now()

hit_dict = {}
for line in nav:
    if line[0]=='#':
#        newnav.write(line)
         print line.strip()
    else:
        line_list=line.strip().split('\t')
        if line_list[-1]=="NM" or line_list[-1]=="QC":
            continue
        chr_name = line_list[7].strip('>')
        pos = int(line_list[8])

        if line_list[9] == 'R':
            strand = -1
        else:
            strand = 1
        
        if chr_hit_LRname_dict.has_key(chr_name):
             if strand<0:
                 line_list[0]='-'+line_list[0].strip('>')
             else:
                 line_list[0]=line_list[0].strip('>')

             line_list[7] = chr_hit_LRname_dict[chr_name][pos]
             line_list[8] = str(  pos - L - low_chr_dict[chr_name][line_list[7]]  )  
             if line_list[8][0]=='-':
                 print "5' extension mapping"
             if line_list[-1] == '.':
                 LR_SR_mapping[line_list[7]].append([line_list[0],line_list[8],""])
             else:
                 LR_SR_mapping[line_list[7]].append([line_list[0],line_list[8],line_list[-1]])

#             newnav.write('\t'.join( line_list ) + '\n')
        else:
#             newnav.write(line)
              print "no " + chr_name
              print line.strip()

#newnav.close()
nav.close()
print "#####write changed chr_name + pos:" + str(datetime.datetime.now()-t0)

################################################################################
LR_SR_mapping_filename=nav_filename+".map"
t0 = datetime.datetime.now()
LR_SR_mapping_file = open(LR_SR_mapping_filename,'w')
for LR in LR_SR_mapping:
    LR_SR_mapping_file.write(LR+'\t')
    temp_SR_ls = []
    for SR in LR_SR_mapping[LR]:
        temp_SR_ls.append (SR[0]+','+str(SR[1])+','+SR[2])
    LR_SR_mapping_file.write(';'.join(temp_SR_ls)+'\n')

LR_SR_mapping_file.close()
print "#####write LR_SR_mapping to file:" + str(datetime.datetime.now()-t0)

