# Add CE and UTR status to CrypSplice results #
# Pass CDS 5U and 3U bed files #

import os, sys, glob
import pandas as pd
import numpy as np

def check_CDS_UTR(row,rdf):
	status=0
	trdf=rdf[(rdf['gene']==row['Gene'])]
	if trdf.shape[0] < 1:
		return (status)
	status=trdf.apply(lambda row2: scanRDF(row2,row['chr_st'],row['chr_ed']),axis=1)
	if 1 in list(status):
		return (1)
	else:
		return (0)

def scanRDF(row2,st,ed):
	#print("Function 3")
	status=0
	if ((row2['chr_st'] <= (st-1)) and ((st-1) <= row2['chr_ed'])) or ((row2['chr_st'] <= (ed+1)) and ((ed+1) <= row2['chr_ed'])):
		status=1
	#print (str(row2['chr_st'])+"\t"+str(row2['chr_ed'])+"\t"+str(st)+"\t"+str(ed)+"\t"+str(status))
	return (status)

def add_CDS_UTR(df,ce,u5,u3):
	#print("Function 1")
	# CDS #
	tdf=df.copy()
	rdf=pd.read_csv(ce,sep="\t",index_col=None,header=None,names=['chr','chr_st','chr_ed','gene','isoform','strand'])
	tdf['CDS']= tdf.apply(lambda row: check_CDS_UTR(row,rdf),axis=1)
	# 5U #
	rdf=pd.read_csv(u5,sep="\t",index_col=None,header=None,names=['chr','chr_st','chr_ed','gene','isoform','strand'])
	tdf['5U'] = tdf.apply(lambda row: check_CDS_UTR(row,rdf),axis=1)
	# 3U #
	rdf=pd.read_csv(u3,sep="\t",index_col=None,header=None,names=['chr','chr_st','chr_ed','gene','isoform','strand'])
	tdf['3U'] = tdf.apply(lambda row: check_CDS_UTR(row,rdf),axis=1)
	return tdf

# Foreach CrypSplice results file #
files=glob.glob("*DEIn.2.txt")
for file in files:
	df=pd.read_csv(file,sep="\t",index_col=None)
	df=add_CDS_UTR(df,sys.argv[1],sys.argv[2],sys.argv[3])
	df.fillna('NA', inplace=True)
	df.to_csv(file.replace(".txt",".3.txt"),sep="\t",index=False)
