C#实现获取文本文件的编码的一个类(区分GB2312和UTF8)
以下是获取文件编码的一个类:
usingSystem;
usingSystem.IO;
usingSystem.Text;
///<summary>
///FileEncoding的摘要说明
///</summary>
namespaceFileEncoding
{
///<summary>
///获取文件的编码格式
///</summary>
publicclassEncodingType
{
///<summary>
///给定文件的路径,读取文件的二进制数据,判断文件的编码类型
///</summary>
///<paramname="FILE_NAME">文件路径</param>
///<returns>文件的编码类型</returns>
publicstaticSystem.Text.EncodingGetType(stringFILE_NAME)
{
FileStreamfs=newFileStream(FILE_NAME,FileMode.Open,FileAccess.Read);
Encodingr=GetType(fs);
fs.Close();
returnr;
}
///<summary>
///通过给定的文件流,判断文件的编码类型
///</summary>
///<paramname="fs">文件流</param>
///<returns>文件的编码类型</returns>
publicstaticSystem.Text.EncodingGetType(FileStreamfs)
{
byte[]Unicode=newbyte[]{0xFF,0xFE,0x41};
byte[]UnicodeBIG=newbyte[]{0xFE,0xFF,0x00};
byte[]UTF8=newbyte[]{0xEF,0xBB,0xBF};//带BOM
EncodingreVal=Encoding.Default;
BinaryReaderr=newBinaryReader(fs,System.Text.Encoding.Default);
inti;
int.TryParse(fs.Length.ToString(),outi);
byte[]ss=r.ReadBytes(i);
if(IsUTF8Bytes(ss)||(ss[0]==0xEF&&ss[1]==0xBB&&ss[2]==0xBF))
{
reVal=Encoding.UTF8;
}
elseif(ss[0]==0xFE&&ss[1]==0xFF&&ss[2]==0x00)
{
reVal=Encoding.BigEndianUnicode;
}
elseif(ss[0]==0xFF&&ss[1]==0xFE&&ss[2]==0x41)
{
reVal=Encoding.Unicode;
}
r.Close();
returnreVal;
}
///<summary>
///判断是否是不带BOM的UTF8格式
///</summary>
///<paramname="data"></param>
///<returns></returns>
privatestaticboolIsUTF8Bytes(byte[]data)
{
intcharByteCounter=1;//计算当前正分析的字符应还有的字节数
bytecurByte;//当前分析的字节.
for(inti=0;i<data.Length;i++)
{
curByte=data[i];
if(charByteCounter==1)
{
if(curByte>=0x80)
{
//判断当前
while(((curByte<<=1)&0x80)!=0)
{
charByteCounter++;
}
//标记位首位若为非0则至少以2个1开始如:110XXXXX...........1111110X
if(charByteCounter==1||charByteCounter>6)
{
returnfalse;
}
}
}
else
{
//若是UTF-8此时第一位必须为1
if((curByte&0xC0)!=0x80)
{
returnfalse;
}
charByteCounter--;
}
}
if(charByteCounter>1)
{
thrownewException("非预期的byte格式");
}
returntrue;
}
}
}
以下是使用示例:
#region打开按钮
///<summary>
///打开按钮
///</summary>
///<paramname="sender"></param>
///<paramname="e"></param>
privatevoidtxtMenuOpen_Click(objectsender,EventArgse)
{
stringfName;
OpenFileDialogopenFileDialog=newOpenFileDialog();
openFileDialog.InitialDirectory="";//注意这里写路径时要用c:而不是c:
openFileDialog.Filter="文本文档|*.txt";
openFileDialog.RestoreDirectory=true;
openFileDialog.FilterIndex=1;
if(openFileDialog.ShowDialog()==DialogResult.OK)
{
fName=openFileDialog.FileName;
txtBox.Text=System.IO.File.ReadAllText(fName,
FileEncoding.EncodingType.GetType(fName));
}
}
#endregion