Documenting Filenames in the Source Code with Python

Python logo

Troubleshooting a web application page that is made from a crazy quilt of source code files is not always easy. Whether your application is built using ASP, PHP, ColdFusion, or something completely different, the challenge here is the same. How do you find the line of code in the source that is causing the problem if you don’t know which file to look in?

If you’re lucky, and a particular piece of error handling text or other identifying information is only found in a couple of places, using a PowerShell script like the one below will find what you’re looking for.

Get-ChildItem -Path "C:\Root folder" -Recurse | Select-String -Pattern "text to search for" | group path | select name

However, if the error text itself is not embedded in the code, but you can find the offending line of code in the rendered web page using Chrome Dev Tools or the like, you may still have a problem locating the actual line of code in source. That’s where this technique comes in. Putting the source code filenames in comments at the beginning and end of each appropriate file can make it easier to determine the origin of the problem. 

I’ve visited this topic before, and used PowerShell to do this task then. Now, I’m using Python. I was able to cobble this script together from one that traverses folders and documents their contents and one that filters the file type since, for this exercise, I only need to alter the ColdFusion (.cfm) files. 

import os
import sys

walk_dir = sys.argv[1]

print('walk_dir = ' + walk_dir)

# If your current working directory may change during script execution, it's recommended to
# immediately convert program arguments to an absolute path. Then the variable root below will
# be an absolute path as well. Example:
# walk_dir = os.path.abspath(walk_dir)
print('walk_dir (absolute) = ' + os.path.abspath(walk_dir))

for root, subdirs, files in os.walk(walk_dir):
    files = [ fi for fi in files if fi.endswith(".cfm") ]    

    for filename in files:
        file_path = os.path.join(root, filename)
        adj_file_path = file_path.replace(walk_dir,'').replace('\\','/')
        print('\t- file %s (full path: %s)' % (filename, adj_file_path))
        
        with open(file_path, 'rb') as f:
            f_content = f.read().decode("utf-8")
            f_content = '<!-- file: ' + adj_file_path + ' -->\n' + f_content + '\n<!-- end of file: ' + adj_file_path + ' -->\n'                       
            
        with open(file_path, 'wb') as f:
            f.write(f_content.encode("utf-8"))

Automate the Export of Primavera P6 Projects using Python – Part 2

Primavera P6 and Python logos

Happy New Year!

The result of Part 1 was a list of XER files, all of which were in one folder. It is not clear from where in the Enterprise Project Structure (EPS) that each file belonged.

P6 EPPM EPS example

Perhaps you would like to create a hierarchy of folders similar to the project screen in P6?

I have modified my original Python script to build out a folder structure that is similar to the EPS. I have not been able to find all levels of the hierarchy within the P6 database, but I was able to get most of the levels represented.

import cx_Oracle
import datetime
import fileinput
import os
import subprocess
import sys
from codecs import open

arguments = len(sys.argv)-1
if arguments < 6:
	print ("The script is called with %i arguments - 6 arguments are required. (DBUsername, DBpassword, DBalias, APPusername, APPpassword, OneXer)" % (arguments))
	sys.exit()
	
DBusername= sys.argv[1] 
DBpassword=sys.argv[2] 
DBalias=sys.argv[3] 
APPusername=sys.argv[4] 
APPpassword=sys.argv[5]
OneXer=sys.argv[6]

if OneXer != "0":
	OneXer = True
else:
	OneXer = False

"""
Url: https://gist.github.com/wassname/1393c4a57cfcbf03641dbc31886123b8
"""
import unicodedata
import string

valid_filename_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
char_limit = 255

def clean_filename(filename, whitelist=valid_filename_chars, replace=' '):
    # replace spaces
    for r in replace:
        filename = filename.replace(r,'_')
    
    # keep only valid ascii chars
    cleaned_filename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').decode()
    
    # keep only whitelisted chars
    cleaned_filename = ''.join(c for c in cleaned_filename if c in whitelist)
    if len(cleaned_filename)>char_limit:
        print("Warning, filename truncated because it was over {}. Filenames may no longer be unique".format(char_limit))
    return cleaned_filename[:char_limit]  
"""
End of function
"""

def secure_log():
	if os.path.exists('myLog.txt'):
		with fileinput.FileInput('myLog.txt', inplace=True) as file:
			for line in file:			
				print(line.replace(APPpassword, len(APPpassword)*'X'), end='')

ospath = os.path.dirname(os.path.abspath(__file__))
pmpath = subprocess.check_output("WHERE PM.EXE", shell=True) 


def begin_process():
	secure_log()
	numtodo = 0
	proj_name_str_first = ''
	if os.path.exists("actions.xml"):
		os.rename("actions.xml", "actions" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") + ".xml")	

	con = cx_Oracle.connect("{}/{}@{}".format(DBusername, DBpassword, DBalias))
	#print (con.version)

	cursor = con.cursor()
	if OneXer:
		cursor.execute("""
			SELECT DISTINCT PROJ_SHORT_NAME FROM PROJECT 
			WHERE ORIG_PROJ_ID IS NULL			
			AND PROJ_SHORT_NAME NOT LIKE '%^%'
			AND PROJ_SHORT_NAME NOT LIKE '%&%'
			ORDER BY PROJ_SHORT_NAME
			""")
	else:
		cursor.execute("""
			SELECT PROJ_SHORT_NAME, FILEPATH FROM (
			SELECT PROJ_SHORT_NAME,
			LTRIM(MAX(SYS_CONNECT_BY_PATH(OBS_NAME,'\\'))
			KEEP (DENSE_RANK LAST ORDER BY OBS_ID),',') AS FILEPATH
			FROM
			(SELECT REPLACE(REPLACE(REPLACE(REPLACE(o.OBS_NAME,',',''),' ','_'),'&','and'),'/','-') AS OBS_NAME, p.PROJ_SHORT_NAME, o.PARENT_OBS_ID, o.OBS_ID
			FROM OBSPROJ op
			JOIN OBS o ON op.OBS_ID = o.OBS_ID
			JOIN PROJECT p ON p.PROJ_ID = op.PROJ_ID
			WHERE p.ORIG_PROJ_ID IS NULL
			AND p.RISK_LEVEL IS NULL
			AND p.PROJ_SHORT_NAME NOT LIKE '%^%'
			AND p.PROJ_SHORT_NAME NOT LIKE '%&%')
			GROUP BY PROJ_SHORT_NAME
			CONNECT BY PARENT_OBS_ID = PRIOR OBS_ID AND PROJ_SHORT_NAME = PRIOR PROJ_SHORT_NAME
			START WITH PARENT_OBS_ID IS NULL)
			ORDER BY PROJ_SHORT_NAME ASC
			""")
			
	f = open(ospath + '\\actions.xml','a')
	xml_file_begin = '<actions>\n'
	f.write(xml_file_begin)
	
	if OneXer:
		xml_file = '\t<action>\n\t\t<type>export</type>\n\t\t<Projects>'
		f.write(xml_file)
			
	for row in cursor:
		proj_name_actual = row[0]				
		filepath = (ospath + row[1])
		proj_name_str = filepath + '\\' + clean_filename(proj_name_actual)
		#print(proj_name_str)		
		#print("Project:", proj_name_actual)
		if OneXer:
			xml_file = '\n\t\t\t<ProjectID>' + proj_name_actual + '</ProjectID>'
			f.write(xml_file)
		else:
			if not os.path.exists(proj_name_str + '.xer'):
				if not os.path.exists(filepath):
					#print('mkdir ' + '\"' + filepath + '\"')
					os.system('mkdir ' + filepath)
				if proj_name_str_first == '':
					proj_name_str_first = proj_name_str
				numtodo += 1
				
				xml_file = '\t<action>\n\t\t<type>export</type>\n\t\t<Projects>\n\t\t\t<ProjectID>' + proj_name_actual + '</ProjectID>\n\t\t</Projects>\n\t\t<outputFile>' + proj_name_str +'.xer</outputFile>\n\t</action>\n'	
				f.write(xml_file)	
			else:
				print("Skipped: Project "+ proj_name_actual + " already exported or errored.")
			
	if OneXer:
		xml_file = '\n\t\t</Projects>\n\t\t<outputFile>' + ospath + '\\OneFile.xer</outputFile>\n\t</action>\n'
		f.write(xml_file)	
	
	xml_file_end = '</actions>\n'
	f.write(xml_file_end)
	f.close()
	if not OneXer:
		print("Number of projects left to process:", numtodo)		
	os.system('PM.EXE /username=' + APPusername + ' /password=' + APPpassword + ' /actionScript=\"' + ospath + '\\actions.xml\" /logfile="' + ospath + '\\_myLog.txt\"')
	con.close()
	secure_log()
	
	if numtodo > 0:
		if not os.path.exists(proj_name_str_first + '.xer'):
			print("Errored on: ", proj_name_str_first)
			g = open(proj_name_str_first + '.xer','w')
			g.write('Error!!')
			g.close()
			h = open(proj_name_str_first + '-ERRROR.txt','w')
			h.write('Error!!')
			h.close()
		begin_process()		
		
begin_process()

The result of running this script should be a folder structure with names that are similar to the headings in the EPS. The exported files will be put into these folders. Strange characters in the names may need to be escaped or replaced (as is done above in the SQL query).