#!/usr/bin/perl use strict; use warnings; use File::Find; use Encode::Guess; my ( $dir, $suf ) = @ARGV; $suf = ($suf) ? qr($suf) : qr(\.html?$); my @results; $dir ||= '/Users/tobe/Sites'; find( \&wanted, ($dir) ); &show( \@results ); sub wanted { return unless (m/$suf/); my $file = $File::Find::name; my $result = &get_charset($file); $result->{'file'} = $file; push( @results, $result ); } # 文字コードを取得 sub get_charset() { my $file = shift; open my $fh, "<", $file; local $/; my $data = <$fh>; close $fh; my $charset = &_get_meta_charset($data); my $enc = &_get_src_charset($data); my $check = ( $charset eq $enc ) ? 'OK' : 'NG'; return { 'html' => $charset, 'src' => $enc, 'check' => $check }; } # html内のmetaタグから文字コード取得 sub _get_meta_charset() { my $data = shift; my $charset = q(); $charset = lc($1) if ( $data =~ m/charset=([\w\-]*)/msix ); return $charset; } # ソースから文字コードを類推 sub _get_src_charset() { my $data = shift; my $enc = q(); my $guess = Encode::Guess::guess_encoding( $data, qw/utf8 euc-jp shiftjis/ ); $enc = $guess->name if ( ref $guess ); $enc =~ s/shiftjis/shift_jis/msix; # metaタグと同じ文字 $enc =~ s/utf8/utf-8/msix; # コード名になるように調整 return $enc; } # 表示 sub show() { my $results = shift; foreach my $result (@$results) { print "$result->{'file'},$result->{'html'},$result->{'src'},$result->{'check'}\n"; } }