compare_files_by_content.py
|
#!/usr/bin/env python
# Bernd Ragutt
# Purpose:
# The script compares 2 text files byte by byte ignoring differences in
# white space thefollowing way: For comparison sequences of white space
# characters are condensed to just one empty space character.
#
# Usage:
# compare_files_by_content.py NameOfFile1 NameOfFile2
#
# Note:
# File names may be relative oe absolute.
# Names containing empty spaces must be enclosed by quotation marks '"'.
#
# For different files
# the first different non white character is marked with a '^'. Its position
# in the files is named 'At' in the output. 'L1-L2' is the difference of
# lengths of files. These numbers are numbers for internal files with all
# substrings "[ \t\n\r\f\v]+" replaced by just one empty space character.
#
# Example of output:
#
## # Comparing contents of files - ignoring white space ...
##
## > File 1: F:\Projekte\Software\Python\differ\cs2.ada
## > File 2: F:\Projekte\Software\Python\differ temp\cs2.ada
##
## ! The files are different by content - apart from white space:
##
## F1: "True, ZaX => True, Sap => True, Tan => True,"
## F2: "True, Zan => True, Sap => True, Tan => True,"
## ^
## At: 2281
## L1-L2: 9
##
## # Compared.
#
# Note: Names containing empty spaces are not enclosed by quotation marks.
def getData(FileName):
WhiteSpaceChars=[' ','\t','\n','\r','\f','\v'];
PurgedData='';
try:
fRawData=open(FileName,mode='r');
RawData=fRawData.read().strip();
except:
print('\n! File ERROR - Cannot open/read file: '+FileName);
print('');
print('# Not compared.\n');
# if not fRawData.closed: fRawData.close();
sys.exit();
# end try
AllowEmptySpace=True;
for c in RawData:
if c in WhiteSpaceChars:
if AllowEmptySpace:
PurgedData+=' ';
AllowEmptySpace=False;
# end if
else:
PurgedData+=c;
AllowEmptySpace=True;
# end if
# end for
return PurgedData;
# end getData()
def compareFiles(NameOfFile1,NameOfFile2,MatchCase,NoSpaceAtAll):
Are_Equal=False;
FirstDifferentCharNb=0;
PurgedData1=getData(NameOfFile1);
PurgedData2=getData(NameOfFile2);
if NoSpaceAtAll:
PurgedData1=PurgedData1.replace(' ','');
PurgedData2=PurgedData2.replace(' ','');
# end if
if not MatchCase:
PurgedData1=PurgedData1.lower();
PurgedData2=PurgedData2.lower();
# end if
if PurgedData1==PurgedData2:
Are_Equal=True;
return [Are_Equal];
# end if
len1=len(PurgedData1);
len2=len(PurgedData2);
Length_Diff_By=len1-len2;
for I in range(min(len1,len2)):
if PurgedData1[I]!=PurgedData2[I]:
FirstDifferentCharNb=I+1;
break;
# end for
min_nb=max(FirstDifferentCharNb-9,0);
max1_nb=min(FirstDifferentCharNb+35,len1-1);
max2_nb=min(FirstDifferentCharNb+35,len2-1);
return [Are_Equal,
Length_Diff_By,
FirstDifferentCharNb,
PurgedData1[min_nb:max1_nb],
PurgedData2[min_nb:max2_nb]];
# end compareFiles()
if __name__=="__main__":
import sys;
MatchCase=True;
NoSpaceAtAll=False;
print('\n\n# Comparing contents of files - apart from white space ...');
print('# Match case:',end=' ');
print(MatchCase);
if (len(sys.argv)==3):
NameOfFile1=sys.argv[1];
NameOfFile2=sys.argv[2];
else:
print('\n! Input ERROR - 2 file names are expected as arguments\n');
print('# Not compared.\n');
sys.exit();
# enf if
if NameOfFile1==NameOfFile2:
print('\n! Input ERROR - The names of the files are identical!\n');
print('# Not compared.\n');
sys.exit();
# end if
print('\n> File 1: '+NameOfFile1);
print('> File 2: '+NameOfFile2);
Result=compareFiles(NameOfFile1,NameOfFile2,MatchCase,NoSpaceAtAll);
if (Result[0]):
print('\n+ The files are equal by content - apart from white space.');
print('');
else:
# Different files
print('\n! The files are different by content - apart from white space:');
print('');
FirstDifferentCharNb=Result[2];
if FirstDifferentCharNb<10:
Blanks=' ';
for I in range(FirstDifferentCharNb):
Blanks+=' ';
else:
Blanks=' '; # Max 26 blanks
# end if
print(' F1: "'+str(Result[3]),end='"\n');
print(' F2: "'+str(Result[4]),end='"\n');
print(Blanks+'^');
print(' At: ',end='');
print(Result[2]);
print(' L1-L2: ',end='');
print(Result[1]);
print('');
# end if
print('# Compared.');
# end if main.