Blog

Learning Python: Data Science (Day 4)

14. April 2019 | 5 Minuten zum Lesen

Tags: tags

As expected, after the data is nicely ordered and made accessible the actual evaluation is not that much of a problem. The first thing I have started with today was to test the script against a larger log file - which, of course, revealed some problems in the parsing routine:

  • There was only a check whether the current line contains playerID. This, however, also returns lines where playerID is being attacked. Moving the if query and checking the actual position of playerID in the string fixed this.
  • To produce less output I have added a PVP flag to prevent enemy players from being parsed.
  • Yesterday I already added a check if a target was hit a by spell. With the larger log some targets were not even part of an event. A second if query fixed that.

The automatic sizing of the output table was also buggy for the target names, which was not apparent when there were only targets with the same name. One should always keep in mind to check parsing code against different raw data sets to be prepared against such “exceptions”.

The new part implemented today was a total damage over plot time against one specific target using matplotlib.

Plot

To create these plots I had to:

  1. Gather all damage entries for every event against one (user-selected) target
  2. Sort the entries by their timestamp, for which a tuple of time and damage had to be created
  3. Replace the entries by the total sum of damage until each timestamp
  4. convert the timestamps from their raw format (4/14 12:37:26.809) to deltatime between each entry and by converting the strings to seconds (see convertToSeconds function)

TIL:

  • command line argument parsing
  • user input
  • tuple lists/zip
  • lambda functions
  • plotting with matplotlib
#!/usr/bin/python
import re
import sys
import matplotlib.pyplot as plt

noPVP = 1
if len(sys.argv) > 1:
	if sys.argv[1] == "-pvp":
		noPVP = 0


def main():
#VARIABLES
	inputFile = "combat.log"
	outputFile = "output.tab"
	playerName = "Xiluthus"
	playerServer = "Antonidas"
	logEvents = ["SPELL_DAMAGE", "SPELL_PERIODIC_DAMAGE"]
	targetList = []
	targetDictionary = {}
	spellList = {}
	for l in logEvents:
		spellList[l] = []
	logData = {}
	
	playerID = playerName+"-"+playerServer
	
	
	#Open input
	try: 
		inFH = open(inputFile, "rt")
	except IOError:
		print ("\n\n\tERROR (input): Could not open/find",inputFile,"\n")
	
	print("\n\nParsing file ",inputFile,"...",sep="",end="")
	
	#loop through file and do the parsing
	for line in inFH:
		if re.match("^\d+",line) and (re.search(logEvents[0],line) or re.search(logEvents[1],line)): 
	
			splitLine = line.split(",")
			# splitLine now contains:
			# 0: timestamp + event name
			# 2: character name 
			# 5: target id 
			# 6: target name 
			# 10: spell name
			# 29: damage
	
			if splitLine[2] == "\""+playerID+"\"":
				# dirty hack: split at double whitespace
				timeStamp, eventName = splitLine[0].split("  ")
				
				# not really necessary, but just for easy reading
				targetID = splitLine[5]
				targetName = splitLine[6]
				spellName = splitLine[10]
				damageValue = splitLine[29]

				# skip player targets
				if noPVP == 1 and (re.match("Player",targetID) or re.match("Pet",targetID)):
					continue

				# add to dictionary
				if eventName not in logData.keys():
					logData[eventName] = {}
				
				if targetID not in logData[eventName].keys():
					logData[eventName][targetID] = {}
				
				if spellName not in logData[eventName][targetID].keys():
					logData[eventName][targetID][spellName] = []
				
				logData[eventName][targetID][spellName].append([timeStamp,damageValue])
		
				# filter some info right here
				# get unique spells and targets
				if spellName not in spellList[eventName]:
					spellList[eventName].append(spellName)
				if targetID not in targetList:
					targetList.append(targetID)	
				# create target ID<->name dictionary
				if targetID not in targetDictionary.keys():
					targetDictionary[targetID] = targetName
	
	
	inFH.close()
	
	print("done\n")

	# 1. get average dmg for all spells
	for e in logEvents:
		print ("Evaluation for event "+e+":\n")
		
		targetLine = ""
		targetNameLength = getTargetNameLength(targetList,targetDictionary)

		for t in targetList:
			targetLine = targetLine+" "+targetDictionary[t].center(targetNameLength)+" |"

		spellNameLength = getSpellnameLength(spellList[e])
		space = " " * spellNameLength		

		print(space+" |"+targetLine)

		for s in spellList[e]:
			outLine = s.ljust(spellNameLength)+" |"
			for t in targetList:
				if t in logData[e].keys():
					if s in logData[e][t].keys():
						outLine = outLine+" "+str(getAverageDmg(logData[e][t][s])).center(targetNameLength)+" |"
					else:
						outLine = outLine+" "+str(0).center(targetNameLength)+" |"
				else:
					outLine = outLine+" "+str(0).center(targetNameLength)+" |"
			print(outLine)

		print()

	# 2. plot damage over time for one target
	print("Select target for total damage over time plot:")
	i = 1
	for t in targetList:
		print("\t"+str(i)+".",targetDictionary[t])
		i += 1

	check = 0
	while check == 0:
		selection = input("Which target number?: ")
		if re.match("^\d+$",selection) and int(selection) < len(targetList)+1 and int(selection) > 0:
			check = 1
		

	selectedTarget = targetList[int(selection)-1]

	damageTuple = []
	for e in logEvents:
		for s in spellList[e]:
			if t in logData[e].keys():
				if s in logData[e][selectedTarget].keys():
					for i in logData[e][selectedTarget][s]:
						# create list of tuples
						damageTuple.append((i[0],i[1]))
	# sort list by first value of tuple
	damageTuple.sort(key=lambda pair:pair[0])

	totalDamage = 0
	totalTime = 0
	prevTime = 0
	i = 0
	for entry in damageTuple:
		# unpack tuple
		(currentTime, currentDamage) = entry
		totalDamage += int(currentDamage)
		if prevTime != 0:
			totalTime += convertToSeconds(prevTime,currentTime)
		prevTime = currentTime
		# update with new value
		damageTuple[i] = (totalTime,totalDamage)
		i += 1
		
	plt.title(targetDictionary[selectedTarget])
	plt.xlabel("Time / Seconds")
	plt.ylabel("Total Damage")
	plt.plot(*zip(*damageTuple))
	plt.show()		

def convertToSeconds(prev,curr):

	(date,time) = re.split("\s+",prev)		
	(h,m,sp) = time.split(":")
	sp = float(sp) + int(h)*3600
	sp = sp + int(m)*60
	
	(date,time) = re.split("\s+",curr)		
	(h,m,sc) = time.split(":")
	sc = float(sc) + int(h)*3600
	sc = sc + int(m)*60
	
	return(sc-sp)

def getTargetNameLength(data,dict):

	length = 0

	for d in range(0,len(data)):
		if len(dict[data[d]]) > length:
			length = len(dict[data[d]])

	return length

def getSpellnameLength(data):

	length = 0

	for d in range(0,len(data)):
		if len(data[d]) > length:
			length = len(data[d])

	return length


def getAverageDmg(data):

	totalDamage = 0
	
	for i in range(0,len(data)):
		timeStamp,damageValue = data[i]
		totalDamage += int(damageValue)

	return(round(totalDamage/len(data),2))

if __name__ == "__main__":
	main()

OUTPUT

[Yesterday's list]

Select target for total damage over time plot:
	1. "Alchemist Pitts"
	2. "Tidesage Morris"
	3. "Tidesage Morris"
	4. "Hex Priest Haraka"
	5. "Thornspine Saurolisk"
	6. "Skithis the Infused"
	7. "Croz Bloodrage"
	8. "Squirrel"
	9. "Hydrath"
	10. "Ivus the Decayed"
Which target number?:

Next steps/ideas:

  • while user wants to see plots
  • with the string->seconds function I can now work on DPS values
  • some ideas for the creating more informative plots - but adding figures to the plot is probably not possible with matplotlib