compare_files_by_content.py
#!/usr/bin/env python
# Bernd Ragutt
#  Purpose:
#     The script compares 2 text files byte by byte ignoring differences in 
#     white space thefollowing way: For comparison sequences of white space
#     characters are condensed to just one empty space character.
#
#  Usage:
#     compare_files_by_content.py NameOfFile1 NameOfFile2
#
#  Note:
#      File names may be relative oe absolute.
#      Names containing empty spaces must be enclosed by quotation marks '"'.
#
#  For different files
#     the first different non white character is marked with a '^'. Its position
#     in the files is named 'At' in the output. 'L1-L2' is the difference of
#     lengths of files. These numbers are numbers for internal files with all
#     substrings "[ \t\n\r\f\v]+" replaced by just one empty space character.
#
#  Example of output:
#
## # Comparing contents of files - ignoring white space ...
## 
## > File 1: F:\Projekte\Software\Python\differ\cs2.ada
## > File 2: F:\Projekte\Software\Python\differ temp\cs2.ada
## 
## ! The files are different by content - apart from white space:
## 
##              F1: "True, ZaX => True, Sap => True, Tan => True,"
##              F2: "True, Zan => True, Sap => True, Tan => True,"
##                           ^
##              At:  2281
##           L1-L2:  9
## 
## # Compared.
#
#  Note: Names containing empty spaces are not enclosed by quotation marks.

def getData(FileName):
    WhiteSpaceChars=[' ','\t','\n','\r','\f','\v'];
    PurgedData='';

    try:
        fRawData=open(FileName,mode='r');
        RawData=fRawData.read().strip();
    except:
        print('\n! File ERROR - Cannot open/read file: '+FileName);
        print('');
        print('# Not compared.\n');
        # if not fRawData.closed: fRawData.close();
        sys.exit();
    # end try

    AllowEmptySpace=True;

    for c in RawData:
        if c in WhiteSpaceChars:
            if AllowEmptySpace:
                PurgedData+=' ';
                AllowEmptySpace=False;
            # end if
        else:
            PurgedData+=c;
            AllowEmptySpace=True;
        # end if
    # end for

    return PurgedData;

# end getData()


def compareFiles(NameOfFile1,NameOfFile2,MatchCase,NoSpaceAtAll):
    Are_Equal=False;
    FirstDifferentCharNb=0;

    PurgedData1=getData(NameOfFile1);
    PurgedData2=getData(NameOfFile2);

    if NoSpaceAtAll:
        PurgedData1=PurgedData1.replace(' ','');
        PurgedData2=PurgedData2.replace(' ','');
    # end if

    if not MatchCase:
        PurgedData1=PurgedData1.lower();
        PurgedData2=PurgedData2.lower();
    # end if

    if PurgedData1==PurgedData2:
        Are_Equal=True;
        return [Are_Equal];
    # end if

    len1=len(PurgedData1);
    len2=len(PurgedData2);
    Length_Diff_By=len1-len2;

    for I in range(min(len1,len2)):
        if PurgedData1[I]!=PurgedData2[I]:
            FirstDifferentCharNb=I+1;
            break;
    # end for

    min_nb=max(FirstDifferentCharNb-9,0);
    max1_nb=min(FirstDifferentCharNb+35,len1-1);
    max2_nb=min(FirstDifferentCharNb+35,len2-1);

    return [Are_Equal,
            Length_Diff_By,
            FirstDifferentCharNb,
            PurgedData1[min_nb:max1_nb],
            PurgedData2[min_nb:max2_nb]];

# end compareFiles()

if __name__=="__main__":
    import sys;

    MatchCase=True;
    NoSpaceAtAll=False;

    print('\n\n# Comparing contents of files - apart from white space ...');
    print('# Match case:',end=' ');
    print(MatchCase);

    if (len(sys.argv)==3):
        NameOfFile1=sys.argv[1];
        NameOfFile2=sys.argv[2];
    else:
        print('\n! Input ERROR - 2 file names are expected as arguments\n');
        print('# Not compared.\n');
        sys.exit();
    # enf if

    if NameOfFile1==NameOfFile2:
        print('\n! Input ERROR - The names of the files are identical!\n');
        print('# Not compared.\n');
        sys.exit();
    # end if

    print('\n> File 1: '+NameOfFile1);
    print('> File 2: '+NameOfFile2);

    Result=compareFiles(NameOfFile1,NameOfFile2,MatchCase,NoSpaceAtAll);

    if (Result[0]):
        print('\n+ The files are equal by content - apart from white space.');
        print('');

    else:
        # Different files

        print('\n! The files are different by content - apart from white space:');
        print('');

        FirstDifferentCharNb=Result[2];

        if FirstDifferentCharNb<10:
            Blanks='                 ';
            for I in range(FirstDifferentCharNb):
                Blanks+=' ';
        else:
            Blanks='                          ';  # Max 26 blanks
        # end if    

        print('             F1: "'+str(Result[3]),end='"\n');
        print('             F2: "'+str(Result[4]),end='"\n');
        print(Blanks+'^');
        print('             At:  ',end='');
        print(Result[2]);
        print('          L1-L2:  ',end='');
        print(Result[1]);
        print('');

    # end if

    print('# Compared.');

# end if main.