#!/usr/bin/env perl #*************************************************************************** # _ _ ____ _ # Project ___| | | | _ \| | # / __| | | | |_) | | # | (__| |_| | _ <| |___ # \___|\___/|_| \_\_____| # # Copyright (C) Viktor Szakats # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at https://curl.se/docs/copyright.html. # # You may opt to use, copy, modify, merge, publish, distribute and/or sell # copies of the Software, and permit persons to whom the Software is # furnished to do so, under the terms of the COPYING file. # # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY # KIND, either express or implied. # # SPDX-License-Identifier: curl # ########################################################################### use strict; use warnings; my @tabs = ( "^m4/zz40-xc-ovr.m4", "Makefile\\.(am|example)\$", "\\.sln\$", "^tests/data/data1706-stdout.txt", "^tests/data/test", ); my @need_crlf = ( "\\.(bat|sln)\$", ); my @double_empty_lines = ( "RELEASE-NOTES", "^lib/.+\\.(c|h)\$", "^projects/OS400", "^projects/vms", "^tests/data/test", "\\.(m4|py)\$", ); my @non_ascii_allowed = ( '\xC3\xB6', # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS ); my $non_ascii_allowed = join(', ', @non_ascii_allowed); my @non_ascii = ( ".github/scripts/pyspelling.words", ".mailmap", "RELEASE-NOTES", "docs/BINDINGS.md", "docs/THANKS", "docs/THANKS-filter", ); sub fn_match { my ($filename, @masklist) = @_; foreach my $mask (@masklist) { if($filename =~ $mask) { return 1; } } return 0; } sub eol_detect { my ($content) = @_; my $cr = () = $content =~ /\r/g; my $lf = () = $content =~ /\n/g; if($cr > 0 && $lf == 0) { return "cr"; } elsif($cr == 0 && $lf > 0) { return "lf"; } elsif($cr == 0 && $lf == 0) { return "bin"; } elsif($cr == $lf) { return "crlf"; } return ""; } my $issues = 0; open(my $git_ls_files, '-|', 'git ls-files') or die "Failed running git ls-files: $!"; while(my $filename = <$git_ls_files>) { chomp $filename; open(my $fh, '<', $filename) or die "Cannot open '$filename': $!"; my $content = do { local $/; <$fh> }; close $fh; my @err = (); if(!fn_match($filename, @tabs) && $content =~ /\t/) { push @err, "content: has tab"; } my $eol = eol_detect($content); if($eol eq "") { push @err, "content: has mixed EOL types"; } if($eol ne "crlf" && fn_match($filename, @need_crlf)) { push @err, "content: must use CRLF EOL for this file type"; } if($eol ne "lf" && $content ne "" && !fn_match($filename, @need_crlf)) { push @err, "content: must use LF EOL for this file type"; } if($content =~ /[ \t]\n/) { my $line; for my $l (split(/\n/, $content)) { $line++; if($l =~ /[ \t]$/) { push @err, "line $line: trailing whitespace"; } } } if($content ne "" && $content !~ /\n\z/) { push @err, "content: has no EOL at EOF"; } if($content =~ /\n\n\z/ || $content =~ /\r\n\r\n\z/) { push @err, "content: has multiple EOL at EOF"; } if(!fn_match($filename, @double_empty_lines)) { if($content =~ /\n\n\n/ || $content =~ /\r\n\r\n\r\n/) { my $line = 0; my $blank = 0; for my $l (split(/\n/, $content)) { chomp $l; $line++; if($l =~ /^$/) { if($blank) { my $lineno = sprintf("duplicate empty line @ line %d", $line); push @err, $lineno; } $blank = 1; } else { $blank = 0; } } } } if($content =~ /([\x00-\x08\x0b\x0c\x0e-\x1f\x7f])/) { push @err, "content: has binary contents"; } if($filename !~ /tests\/data/) { # the tests have no allowed UTF bytes $content =~ s/[$non_ascii_allowed]//g; } if(!fn_match($filename, @non_ascii) && ($content =~ /([\x80-\xff]+)/)) { my $non = $1; my $hex; for my $e (split(//, $non)) { $hex .= sprintf("%s%02x", $hex ? " ": "", ord($e)); } my $line; for my $l (split(/\n/, $content)) { $line++; if($l =~ /([\x80-\xff]+)/) { push @err, "line $line: has non-ASCII: '$non' ($hex)"; } } } if(@err) { $issues++; foreach my $err (@err) { print "$filename: $err\n"; } } } close $git_ls_files; if($issues) { exit 1; }