compare_dirs_by_content.py
|
#!/usr/bin/env python
# Bernd Ragutt
# Purpose:
# The script compares the text files of two directories, files with a given extension,
# ignoring differences in white space the following way:
# For byte by byte comparison sequences of white space characters are condensed
# to just one empty space character.
#
# Usage:
# compare_dirs_by_content.py {Extension} NameOfDirectory1 NameOfDirectory2
#
# Default extension:
# .ada
#
# Note:
# The extension begins with a dot '.'
# Directory names may be relative oe absolute.
# A Separator at the end of a name is optional.
# Names containing empty spaces must be enclosed by quotation marks '"'.
#
# Example (Unix):
# compare_dirs_by_content.py .txt /home/ra/temp /home/ra/temp2/
#
# Examples (Windows):
# compare_dirs_by_content.py F:\Projekte\Python\differ "F:\Projekte\Python\differ temp"
# F:\Projekte\Software\Python\differ\compare_dirs_by_content.py differ "differ temp"
#
# For different files
# the first different non white character is marked with a '^'. Its position
# in the files is named 'At' in the output. 'L1-L2' is the difference of
# lengths of files. These numbers are numbers for internal files with all
# substrings "[ \t\n\r\f\v]+" replaced by just one empty space character.
#
# Example of output:
#
## # Comparing contents of files - ignoring white space ...
##
## > Directory 1: F:\Projekte\Python\differ\
## > Directory 2: F:\Projekte\Python\differ temp\
## > Extension : .ada
##
## - Files NOT in directory 2:
## Number Of files: 2
##
## * dummy.ada
## * temp 1.ada
##
## - Files NOT in directory 1:
## Number Of files: 2
##
## * dummy_temp.ada
## * temp.ada
##
## + Equal files - apart from white space:
## Number Of files: 1
##
## * cs1.ada
##
## ! Different files: - apart from white space
## Number Of files: 1
##
## * cs 2.ada
## F1: "True, ZaX => True, Sap => True, Tan => True,"
## F2: "True, Zan => True, Sap => True, Tan => True,"
## #
## At: 2281
## L1-L2: 9
##
##
## # Compared.
#
# Note: Names containing empty spaces are not enclosed by quotation marks.
import os;
if __name__!="__main__":
import PySide2.QtCore;
def getData(FileName):
WhiteSpaceChars=[' ','\t','\n','\r','\f','\v'];
PurgedData='';
try:
fRawData=open(FileName,'r');
RawData=fRawData.read().strip();
fRawData.close();
except:
print('\n! File ERROR - Cannot open/read file: '+FileName);
raise IOError;
# end try
AllowEmptySpace=True;
for c in RawData:
if c in WhiteSpaceChars:
if AllowEmptySpace:
PurgedData+=' ';
AllowEmptySpace=False;
# end if
else:
PurgedData+=c;
AllowEmptySpace=True;
# end if
# end for
return PurgedData;
# end getData()
def compareFiles(NameOfFile1,NameOfFile2,MatchCase,NoSpaceAtAll):
Are_Equal=False;
FirstDifferentCharNb=0;
PurgedData1=getData(NameOfFile1);
PurgedData2=getData(NameOfFile2);
if NoSpaceAtAll:
PurgedData1=PurgedData1.replace(' ','');
PurgedData2=PurgedData2.replace(' ','');
# end if
if not MatchCase:
PurgedData1=PurgedData1.lower();
PurgedData2=PurgedData2.lower();
# end if
if PurgedData1==PurgedData2:
Are_Equal=True;
return [Are_Equal];
# end if
len1=len(PurgedData1);
len2=len(PurgedData2);
Length_Diff_By=len1-len2;
for I in range(min(len1,len2)):
if PurgedData1[I]!=PurgedData2[I]:
FirstDifferentCharNb=I+1;
break;
# end for
min_nb=max(FirstDifferentCharNb-9,0);
max1_nb=min(FirstDifferentCharNb+35,len1-1);
max2_nb=min(FirstDifferentCharNb+35,len2-1);
return [Are_Equal,
Length_Diff_By,
FirstDifferentCharNb,
PurgedData1[min_nb:max1_nb],
PurgedData2[min_nb:max2_nb]];
# end compareFiles()
def getListOfFiles(NameOfDir,Extension):
NonFiles=[];
try:
ListOfFiles=os.listdir(NameOfDir);
except:
print('\n! Directory ERROR - Cannot access: '+NameOfDir);
raise IOError;
# end try
for f in ListOfFiles:
if not f.endswith(Extension):
NonFiles.append(f);
# end for
for f in NonFiles:
ListOfFiles.remove(f);
ListOfFiles.sort();
return ListOfFiles;
# end getListOfFiles()
def compareDirs(NameOfDir1,NameOfDir2,Extension,MatchCase,NoSpaceAtAll):
ListOfFiles1_NotIn2=[];
ListOfFiles2_NotIn1=[];
EqualFiles=[];
DifferentFiles=[];
Outputs=[];
ListOfFiles1=getListOfFiles(NameOfDir1,Extension);
ListOfFiles2=getListOfFiles(NameOfDir2,Extension);
for f in ListOfFiles1:
if ListOfFiles2.count(f)==0:
ListOfFiles1_NotIn2.append(f);
# end for
for f in ListOfFiles2:
if ListOfFiles1.count(f)==0:
ListOfFiles2_NotIn1.append(f);
# end for
for f in ListOfFiles1_NotIn2:
ListOfFiles1.remove(f);
for f in ListOfFiles2_NotIn1:
ListOfFiles2.remove(f);
if ListOfFiles1!=ListOfFiles2:
print('\n### DEVELOPMENT ERROR ###\n');
for f in ListOfFiles1:
Result=compareFiles(NameOfDir1+f,NameOfDir2+f,MatchCase,NoSpaceAtAll);
if __name__!="__main__":
PySide2.QtCore.QCoreApplication.processEvents();
if (Result[0]):
EqualFiles.append(f);
else:
DifferentFiles.append([f,Result[1],Result[2],Result[3],Result[4]]);
# end if
# end for
Outputs.append('\n- Files NOT in directory 2:');
Outputs.append(' Number Of files: '+str(len(ListOfFiles1_NotIn2)));
for f in ListOfFiles1_NotIn2:
Outputs.append(' * '+f);
Outputs.append('\n- Files NOT in directory 1:');
Outputs.append(' Number Of files: '+str(len(ListOfFiles2_NotIn1)));
for f in ListOfFiles2_NotIn1:
Outputs.append(' * '+f);
Outputs.append('\n+ Equal files - apart from white space:');
Outputs.append(' Number Of files: '+str(len(EqualFiles)));
for f in EqualFiles:
Outputs.append(' * '+f);
Outputs.append('\n! Different files: - apart from white space');
Outputs.append(' Number Of files: '+str(len(DifferentFiles)));
for f in DifferentFiles:
FirstDifferentCharNb=f[2];
if FirstDifferentCharNb<10:
Blanks=' ';
for I in range(FirstDifferentCharNb):
Blanks+=' ';
else:
Blanks=' '; # Max 26 blanks
# end if
Outputs.append(' * '+f[0]);
Outputs.append(' F1: "'+f[3]);
Outputs.append(' F2: "'+f[4]);
Outputs.append(Blanks+'^');
Outputs.append(' At: '+str(f[2]));
Outputs.append(' L1-L2: '+str(f[1]));
# end for
return Outputs;
# end compareDirs()
# ------------------------------------------------------------------------------#
# ------------------------------------------------------------------------------#
if __name__=="__main__":
import sys;
MatchCase=True;
NoSpaceAtAll=False;
Extension='.ada';
print('\n\n# Comparing contents of files - apart from white space ...\n');
print('# MatchCase : '+str(MatchCase));
if (len(sys.argv)==4):
Extension=sys.argv[1];
NameOfDir1=sys.argv[2];
NameOfDir2=sys.argv[3];
elif (len(sys.argv)==3):
NameOfDir1=sys.argv[1];
NameOfDir2=sys.argv[2];
else:
print('\n! Input ERROR - 2 or 3 arguments are expected:');
print('! an optional file extension (eg: .txt - default: .ada)');
print('! and 2 directory names (eg for Unix: /home/ra/tmp1 /home/ra/tmp2 ))');
print('! Separators al the end of the names are optional.\n');
print('# Compared.\n');
sys.exit();
# enf if
if NameOfDir1==NameOfDir2:
print('\n! Input ERROR - The names of the directories are identical!\n');
print('# Compared.\n');
sys.exit();
# end if
if not NameOfDir1.endswith(os.sep):
NameOfDir1+=os.sep;
if not NameOfDir2.endswith(os.sep):
NameOfDir2+=os.sep;
print('# Extension : '+Extension);
print('\n# Directory 1: '+NameOfDir1);
print('# Directory 2: '+NameOfDir2);
try:
Outputs=compareDirs(NameOfDir1,NameOfDir2,Extension,MatchCase,NoSpaceAtAll);
for l in Outputs: print(l);
except:
print('\n\n! File ERROR - An exception occured.');
print(' - Note: The files must be text files.');
# end try
print('# Compared.');
# end if main
# ------------------------------------------------------------------------------#
# ------------------------------------------------------------------------------#