#!/usr/bin/python

import re, sys, time
import os
os.chdir(os.path.dirname(__file__))

class GetData:

	def __init__(self):
		while 1:
			try:
				target = input("   Enter the target turnaround time in minutes (0 for no target): ")
				target = int(target)
				break
			except:
 				print "Enter only a whole number larger than 0 (integer)"	
		self.target = target	
		while 1:
			try:
				maxtime = input("   Enter the upper time limit for TAT distributions (minutes):    ")
				maxtime = int(maxtime)
				break
			except:
				print "Enter only a whole number larger than 0 (integer)"
		self.maxtime = maxtime	
		while 1:
			try:
				binsize = input("   Enter the width of the TAT subgroups in minutes (0 to quit):   ")
				if binsize < 1: sys.exit()
				binsize = int(binsize)
				if binsize > maxtime:
					print "\nThe upper time limit for distributions must be equal to or larger"
					print "than the subgroup size. Please try another subgroup size or quit"
					print "to re-enter starting data."
					continue
				if target%binsize:
					print "\nThe width of the subgroups should divide evenly into the lab turnaroud time target"
					print "Please try another group width or quit to re-enter starting data"
					continue
				if maxtime%binsize:
					print "\nThe width of the subgroups does not divide evenly into the"
					print "distribution time limit entered above. Please try another"
					print "subgroup width or quit to re-enter the starting data."
					continue
				break
			except (NameError, SyntaxError):
				print "Enter only a whole number (integer)"
		self.binsize = binsize
		

class TAT_distribution:

	def __init__(self, data):
		self.target = data.target
		self.maxtime = data.maxtime
		self.binsize = data.binsize
		self.TAT_index = 0
		self.avg_in_target = 0
		self.num_in_target = 0
		self.avg_overall = 0
		self.num_overall = 0
		self.tatList = []
		self.number = 1                           # for time groupings, the number in the sequence of groupings
		self.time_grouping = 24                   # the width of the time grouping
		self.setupTATList()
	
	def setTitle(self, text):                     # set title
		self.title = text
		if self.time_grouping < 24:
			self.title += ", " + str((self.number-1) * self.time_grouping) + "00 to " + str((self.number) * self.time_grouping) + "00 hours"
		
	def setNumber(self, num):
		self.number = num
		
	def set_time_grouping(self, gr):
		self.time_grouping = gr
		
	def getTitle(self):
		return self.title
		
	def getTarget(self):
		return str(self.target)
		
	def addData(self, data, TAT_index):           # take a list of data lines and a TAT index and sort them into bins in tatList
		self.TAT_index = TAT_index
		if self.time_grouping < 24:
			start = (self.number - 1) * self.time_grouping
			end = self.number * self.time_grouping
		for dataLine in data:
			if self.time_grouping < 24:
				hr = int(dataLine[6].split(":")[0])
				if hr < start or hr >= end: continue
			tat = int(dataLine[TAT_index])
			if tat > self.maxtime:
				self.tatList[-1].append(dataLine)
			else:
				self.tatList[tat/self.binsize].append(dataLine)
		self.calcAvgs()
			
	def setupTATList(self):
		self.tatList = []
		newboundary = self.binsize
		oldboundary = 0
		while newboundary <= self.maxtime:
			binTitle = '%3d-%3d min' % (oldboundary, newboundary)
			self.tatList.append([binTitle])
			oldboundary = newboundary + 1
			newboundary = oldboundary + self.binsize - 1
		self.tatList.append(['   >%3d min' % (self.maxtime)])
		
	def calcAvgs(self):                           # Calculate average TATs for specimens inside the target time and all specimens
		for list in self.tatList:
			for item in list:
				try: tat = int(item[self.TAT_index])
				except: continue
				if self.target and tat <= self.target:
					self.avg_in_target += tat
					self.num_in_target += 1
				self.avg_overall += tat
				self.num_overall += 1
					
                try:
                        if self.target: self.avg_in_target = self.avg_in_target/float(self.num_in_target)
                        self.avg_overall = self.avg_overall/float(self.num_overall)
                except ZeroDivisionError:
                		self.av_in_target = 0.0
                		self.avg_overall = 0.0
		
		
	def display(self):
		numOut = self.num_overall - self.num_in_target
		maxLength = 0
		for list in self.tatList:
			if len(list) > maxLength: maxLength = len(list)
		maxLength -= 1                                      # subtract 1 for the column title that occupies the first list item
		divisor = maxLength/36                              # desired line length is 60 - 24 for the row header and numbers at the end
		if maxLength/36: divisor += 1
		else: divisor = 1
		output = self.title + "\n\n"
		if self.target:
			output += "Target time:                %3d min\n" % (self.target)
			output += "Number in target:          %4d (%.1f%%)\n" % (self.num_in_target, 100 * float(self.num_in_target)/self.num_overall)
			output += "Number over target:        %4d (%.1f%%)\n\n" % (numOut, 100 * float(numOut)/self.num_overall)
			output += "Avg time for within target: %3d min\n" % (int(self.avg_in_target))
		output += "Overall average time:       %3d min\n\n" % (int(self.avg_overall))
		output += "TAT Distribution Graph:\n\n"
		i = 0
		cum_num = 0
		while i < len(self.tatList):
			if self.target and self.binsize and i == self.target/self.binsize:
				output += "-------------------------------------------------Target Time\n"
			list = self.tatList[i]
			length = len(list) - 1
			cum_num += length
			cum_percent = int(round(float(cum_num * 100)/self.num_overall))
			s = list[0] + " "
			s += ("]" * int(round(length/divisor)))
			s = s + (" (%d, %d%%)" % (length, cum_percent)) + "\n"
			output += s
			i += 1
		output += "\n"
		output += "Parentheses show number in subgroup and cumulative percent.\n"
		return output
		
	def getFollowup(self):
		def compare(a, b):
			if int(a[self.TAT_index]) < int(b[self.TAT_index]): return -1
			elif int(a[self.TAT_index]) > int(b[self.TAT_index]): return 1
			else: return 0
			
		if self.target and self.binsize:
			outputText = self.title + ", TAT over target, sorted by increasing TAT\n"
			i = 0
			while i < len(self.tatList):
				if i >= self.target/self.binsize:
					outputList = self.tatList[i][1:]
					outputList.sort(compare)
					for item in outputList:
						outputText = outputText + "\t".join(item) + "\n"
				i += 1
			return outputText + "\n"
		else:
			return None

#--------------------------------------------Main Program-------------------------------------------

accession = re.compile(r'[XMTWHFS]\d+ ')
fieldSearch = re.compile((r'([XMTWHFS]\d+)\s{4}(.*?)(\d{1,4} \S{4,7}) *(\d\d/\d\d) (\d\d:\d\d)\*? *')+
                         (r'(\d\d/\d\d) (\d\d:\d\d) *(\d+) +(\d+) +(\d+) +(\d+) +(\d+) +(\d+)\*?\s+([A-Z0-9]+) +([A-Z0-9]+) +')+
                         (r'(\d{3,4} \S{4,5}) +(\d\d/\d\d) (\d\d:\d\d) +(\d\d/\d\d) (\d\d:\d\d) +(\S+)\s*'))
titlePattern = re.compile(r'\s+\d\d:\d\d\s+(TURNAROUND TIME REPORT FOR \d\d/\d\d/\d\d\d\d TO \d\d/\d\d/\d\d\d\d)')
selectedTests = re.compile(r'\s+SELECTED TESTS\s+')
text = re.compile(r'\s+\S+')

print "This program analyzes Mysis TAT reports and prints TAT distributions and"
print "outlier lists. The TAT report should be in the same directory as this"
print "program and named TAT-INPUT.TXT. The output file will be saved in the"
print "same directory as this program and will be named TAT-ANALYSIS_DATE_TIME.XLS"
print "where date and time are the date and time the file was created. The analysis"
print "file is tab-delimited and can be opened in Excel.\n"

choice = raw_input('Continue with analysis (y/n)? ')
if choice == 'n' or choice == 'N': sys.exit()

lab = raw_input('\nEnter lab or hospital code (must match code in input file): ')
if not lab: sys.exit()
lab = lab.upper()
print ""

try:
	tatFile = open('TAT-INPUT.TXT')
except:
	print "Can't find file in my directory named TAT-INPUT.TXT. Quitting now."
	sys.exit()

fileList = tatFile.readlines()

accessionLine = ""
tatList = []

for line in fileList:
	title =  titlePattern.match(line)
	if title: 
		title = title.group(1)
		break
i = 0
for line in fileList:
	if i:
		if text.match(line): 
			tests = line.strip().split()
			tests = " ".join(tests)
			break
		else:
			continue
	if selectedTests.match(line): i = 1
else:
	tests = raw_input("\nEnter the test code being analyzed: ")
	if tests:
		tests = tests.upper()
	else:
		tests = "Tests not specified"

for line in fileList:
	if accessionLine and line[:(len(lab)+2)] == (' ' + lab + ' '):
		dataString = (accessionLine.strip() + " " + line.strip())
		fields = fieldSearch.match(dataString)
		tatList.append(fields.group(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21))
		accessionLine = ""
	elif accession.match(line):
		accessionLine = line


while 1:
	locations = raw_input("Location codes (comma separated list or return for all): ")
	if locations:
		locations = locations.split(",")
		for location in locations:
			location.strip()	
		locList = []
		for line in tatList:
			for location in locations:
				if line[14] == location: locList.append(line)
		if not locList:
			print "   Location code(s) not found in file."
			continue
		tatList = locList
		break
	else:
		locations = ["All in report"]
		print "All locations in report."
		break

print ""
time_grouping = 24
group_by_time = raw_input("Divide TAT distributions by time of day (y/n)? ")
if group_by_time == "y" or group_by_time == "Y":
	while 1:
		try:
			time_grouping = input("   Length of division in hours:                ")
			time_grouping = int(time_grouping)
			if 24%time_grouping:
				print "The number of hours for grouping must divide evenly into 24 hours."
				continue
			break
		except:
			print "Enter number of hours for groups (integers only)."
			continue			
print ""

numGroups = 24/time_grouping

analysisList = []

# In-lab turnaround time

choice = raw_input("Calculate in-lab turnaround time (y/n)? ")
if choice == 'y' or choice == 'Y':
	userdata = GetData()
	for i in range(numGroups):
		inLab = TAT_distribution(userdata)
		inLab.setNumber(i + 1)
		inLab.set_time_grouping(time_grouping)
		inLab.setTitle("In-lab turnaround time")
		inLab.addData(tatList, 11)
		analysisList.append(inLab)
		
# Collect to result time

choice = raw_input("Calculate collect-to-result turnaround time (y/n)? ")
if choice == 'y' or choice == 'Y':
	userdata = GetData()
	for i in range(numGroups):
		collectToResult = TAT_distribution(userdata)
		collectToResult.setNumber(i + 1)
		collectToResult.set_time_grouping(time_grouping)
		collectToResult.setTitle("Collect-to-result time")
		collectToResult.addData(tatList, 10)
		analysisList.append(collectToResult)

# Order to collect time

choice = raw_input("Calculate order-to-collect turnaround time (y/n)? ")
if choice == 'y' or choice == 'Y':
	userdata = GetData()
	for i in range(numGroups):
		orderToCollect = TAT_distribution(userdata)
		orderToCollect.setNumber(i + 1)
		orderToCollect.set_time_grouping(time_grouping)
		orderToCollect.setTitle("Order-to-collect time")
		orderToCollect.addData(tatList, 7)
		analysisList.append(orderToCollect)

# Collect to receipt time

choice = raw_input("Calculate collect-to-receipt turnaround time (y/n)? ")
if choice == 'y' or choice == 'Y':
	userdata = GetData()
	for i in range(numGroups):
		collectToReceipt = TAT_distribution(userdata)
		collectToReceipt.setNumber(i + 1)
		collectToReceipt.set_time_grouping(time_grouping)
		collectToReceipt.setTitle("Collect-to-receipt time")
		collectToReceipt.addData(tatList, 9)
		analysisList.append(collectToReceipt)

if not analysisList:
	sys.exit()

print "\n"

outputText = "Analysis:       " + title + "\n"
outputText += "Test(s):        " + tests + "\n"
outputText += "Total samples:  " + str(len(tatList)) + "\n"
outputText += "Locations:      " + ", ".join(locations) + "\n\n"
outputText = outputText + ("=" * 60) + "\n"
for distribution in analysisList:
	outputText += (distribution.display() + "\n")
	outputText = outputText + ("=" * 60) + "\n\n"
	
print outputText

print "Writing output file..."

#Column names in tatList:

outputText += "\t".join(('Acc #', 'Physician', 'Col Tech', 'Order Date', 'Order Time', 'Rec Date', 'Rec Time',
                         'Ord-Col', 'Ord-Rec', 'Col-Rec', 'Col-Res', 'Rec-Res', 'Ord-Res', 'Hospital', 'Location',
                         'Result Tech', 'Col Date', 'Col Time', 'Result Date', 'Result Time', 'Order Code'))
outputText += "\n\n"

for distribution in analysisList:
	outputText += distribution.getFollowup()
	
outFileName = 'TAT-ANALYSIS_' + time.strftime('%y%m%d_%H%M%S') + '.XLS'
	
outFile = open(outFileName, 'w')
outFile.write(outputText)
outFile.close()

print "Analysis complete."
input("Press ENTER to quit.")
