以下代码文件以CStdioFile向无法向文本中写入中文(用notepad.exe查 看不到写入的中文)
CStdioFile file;
file.Open(…);
file.WriteString(_T("abc你好"));//只能写入abc
解决办法:
使用setlocale语句设定区域
#include <locale>//头文件CStdioFile file;file.Open(…);
char* old_locale = _strdup( setlocale(LC_CTYPE,NULL) );
setlocale( LC_CTYPE, "chs" );//设定
file.WriteString(_T("abc你好"));//正常写入
setlocale( LC_CTYPE, old_locale );
free( old_locale );//还原区域设定
简化处理可以仅使用语句setlocale( LC_CTYPE, "chs" )。
setlocale:
函数原形为:char *setlocale( int category, const char *locale );头文件:<locale.h>所支持的操作系统为:ANSI, Win 95, Win NT对于简体中文可以使用如下设置:setlocale( LC_ALL, "chs" ); 为什么一定要调用setlocale呢?因为在C/C++语言标准中定义了其运行时的字符集环境为"C",也就是ASCII字符集的一个子集,那么mbstowcs在工作时会将cstr中所包含 的字符串看作是ASCII编码的字符,而不认为是一个包含有chs编码的字符串,所以他会将每一个中文拆成2个ASCII编码进行转换,这样得到的结果就 是会形成4个wchar_t的字符组成的串,那么如何才能够让mbstowcs正常工作呢?在调用mbstowcs进行转换之间必须明确的告诉 mbstowcs目前cstr串中包含的是chs编码的字符串,通过setlocale( LC_ALL, "chs" )函数调用来完成,需要注意的是这个函数会改变整个应用程序的字符集编码方式,必须要通过重新调用setlocale( LC_ALL, "C" )函数来还原,这样就可以保证mbstowcs在转换时将cstr中的串看作是中文串,并且转换成为2个wchar_t字符,而不是4个。本地化设置需要具备三个条件:
a. 语言代码 (Language Code)b. 国家代码 (Country Code) c. 编码(Encoding)本地名字可以用下面这些部分来构造:语言代码_国家代码.编码 比如(zh_CN.UTF-8, en_US等) locale的别名表见 /usr/lib/X11/locale/locale.alias(以Debian GNU/Linux为例)setlocale语言字符串参考本文来自CSDN博客,转载请标明出处:
另外还有一种方法就是重新写CStdioFile的派生类CStdioFileEx(网上有)。
//好像C++中没有类能够读些Unicode格式的文本文件,所以我写了下面这个类。用法很简单,大家尝试几下就明白了。
#pragma once
class CStdioFileEx: public CStdioFile
{ public:CStdioFileEx();CStdioFileEx( LPCTSTR lpszFileName, UINT nOpenFlags );virtual BOOL Open( LPCTSTR lpszFileName, UINT nOpenFlags, CFileException* pError = NULL );
virtual BOOL ReadString(CString& rString);BOOL ReadWideString(CStringW& rString);BOOL ReadAnsiString(CStringA& rString);virtual void WriteString(LPCTSTR lpsz);void WriteWideString(LPCWSTR lpsz);void WriteAnsiString(LPCSTR lpsz);bool IsUnicodeFormat() {return m_bIsUnicodeText;}unsigned long GetCharCount();// Additional flag to allow Unicode text format writing
enum {modeWriteUnicode = 0x100000};static bool IsFileUnicode(const CString& sFilePath);
protected:
UINT PreprocessFlags(const CString& sFilePath, UINT& nOpenFlags);bool m_bIsUnicodeText;
};//。cpp文件
#include "stdafx.h"#include "StdioFileEx.h"//在UCS 编码中有一个叫做"ZERO WIDTH NO-BREAK SPACE"的字符,它的编码是FEFF。而FFFE在UCS中是不存在的字符,
//所以不应该出现在实际传输中。UCS规范建议我们在传输字节流前,先传输字符"ZERO WIDTH NO-BREAK SPACE"。这样//如果接收者收到FEFF,就表明这个字节流是Big-Endian的;如果收到FFFE,就表明这个字节流是Little-Endian的。//因此字符"ZERO WIDTH NO-BREAK SPACE"又被称作BOM。//UTF-8不需要BOM来表明字节顺序,但可以用BOM来表明编码方式。字符"ZERO WIDTH NO-BREAK SPACE"的UTF-8编码是//EF BB BF。所以如果接收者收到以EF BB BF开头的字节流,就知道这是UTF-8编码了。//Windows就是使用BOM来标记文本文件的编码方式的。//有些老的浏览器和文本编辑器不支持BOM。#define UNICODE_BOM 0xFEFF//Unicode "byte order mark" which goes at start of fileCStdioFileEx::CStdioFileEx(): CStdioFile()
{ m_bIsUnicodeText = false;}CStdioFileEx::CStdioFileEx(LPCTSTR lpszFileName,UINT nOpenFlags)
:CStdioFile(lpszFileName, PreprocessFlags(lpszFileName, nOpenFlags)){ }BOOL CStdioFileEx::Open(LPCTSTR lpszFileName,UINT nOpenFlags,CFileException* pError /*=NULL*/)
{ PreprocessFlags(lpszFileName, nOpenFlags);return CStdioFile::Open(lpszFileName, nOpenFlags, pError);
}BOOL CStdioFileEx::ReadString(CString& rString)
{ #ifdef _UNICODEreturn ReadWideString(rString);#elsereturn ReadAnsiString(rString);#endif}BOOL CStdioFileEx::ReadWideString(CStringW& rString)
{ _ASSERTE(m_pStream);rString = L""; // empty string without deallocatingif(m_bIsUnicodeText)
{ // If at position 0, discard byte-order mark before reading if(GetPosition() == 0) { wchar_t bom; Read(&bom, sizeof(wchar_t)); } const int nMaxSize = 128; LPWSTR lpsz = rString.GetBuffer(nMaxSize); LPWSTR lpszResult; int nLen = 0; for (;;) { lpszResult = fgetws(lpsz, nMaxSize+1, m_pStream); rString.ReleaseBuffer();// handle error/eof case
if (lpszResult == NULL && !feof(m_pStream)) { Afx_clearerr_s(m_pStream); AfxThrowFileException(CFileException::genericException, _doserrno, m_strFileName); }// if string is read completely or EOF
if (lpszResult == NULL || (nLen = (int)lstrlenW(lpsz)) < nMaxSize || lpsz[nLen-1] == '/n') break;nLen = rString.GetLength();
lpsz = rString.GetBuffer(nMaxSize + nLen) + nLen; } //remove crlf if exist. nLen = rString.GetLength(); if (nLen > 1 && rString.Mid(nLen-2) == L"/r/n") { rString.GetBufferSetLength(nLen-2); } return rString.GetLength() > 0;}else{ CStringA ansiString; BOOL bRetval = ReadAnsiString(ansiString); //setlocale(LC_ALL, "chs_chn.936");//no need rString = ansiString; return bRetval;}}BOOL CStdioFileEx::ReadAnsiString(CStringA& rString)
{ _ASSERTE(m_pStream);rString = ""; // empty string without deallocatingif(!m_bIsUnicodeText)
{ const int nMaxSize = 128; LPSTR lpsz = rString.GetBuffer(nMaxSize); LPSTR lpszResult; int nLen = 0; for (;;) { lpszResult = fgets(lpsz, nMaxSize+1, m_pStream); rString.ReleaseBuffer();// handle error/eof case
if (lpszResult == NULL && !feof(m_pStream)) { Afx_clearerr_s(m_pStream); AfxThrowFileException(CFileException::genericException, _doserrno, m_strFileName); }// if string is read completely or EOF
if (lpszResult == NULL || (nLen = (int)lstrlenA(lpsz)) < nMaxSize || lpsz[nLen-1] == '/n') break;nLen = rString.GetLength();
lpsz = rString.GetBuffer(nMaxSize + nLen) + nLen; } //remove crlf if exist. nLen = rString.GetLength(); if (nLen > 1 && rString.Mid(nLen-2) == "/r/n") { rString.GetBufferSetLength(nLen-2); } return rString.GetLength() > 0;}else{ CStringW wideString; BOOL bRetval = ReadWideString(wideString); //setlocale(LC_ALL, "chs_chn.936");//no need rString = wideString; return bRetval;}}// Purpose: Writes string to file either in Unicode or multibyte, depending on whether the caller specified the
// CStdioFileEx::modeWriteUnicode flag. Override of base class function.void CStdioFileEx::WriteString(LPCTSTR lpsz){ #ifdef _UNICODEWriteWideString(lpsz);#elseWriteAnsiString(lpsz);#endif}void CStdioFileEx::WriteWideString(LPCWSTR lpsz)
{ ASSERT(lpsz != NULL);if (lpsz == NULL)
{ AfxThrowInvalidArgException();}if(m_bIsUnicodeText){ ASSERT(m_pStream != NULL); // If writing Unicode and at the start of the file, need to write byte mark if(GetPosition() == 0) { wchar_t cBOM = (wchar_t)UNICODE_BOM; CFile::Write(&cBOM, sizeof(wchar_t)); } if (fputws(lpsz, m_pStream) == _TEOF) AfxThrowFileException(CFileException::diskFull, _doserrno, m_strFileName);}else{ USES_CONVERSION; WriteAnsiString(CW2A(lpsz));}}void CStdioFileEx::WriteAnsiString(LPCSTR lpsz)
{ ASSERT(lpsz != NULL);if (lpsz == NULL)
{ AfxThrowInvalidArgException();}if(!m_bIsUnicodeText){ ASSERT(m_pStream != NULL); if (fputs(lpsz, m_pStream) == _TEOF) AfxThrowFileException(CFileException::diskFull, _doserrno, m_strFileName);}else{ USES_CONVERSION; WriteWideString(CA2W(lpsz));}} UINT CStdioFileEx::PreprocessFlags(const CString& sFilePath, UINT& nOpenFlags){ m_bIsUnicodeText = false;// If we have writeUnicode we must have write or writeRead as well
if (nOpenFlags & CStdioFileEx::modeWriteUnicode){ ASSERT(nOpenFlags & CFile::modeWrite || nOpenFlags & CFile::modeReadWrite); m_bIsUnicodeText = true;}// If reading in text mode and not creating...else if (nOpenFlags & CFile::typeText && !(nOpenFlags & CFile::modeCreate) && !(nOpenFlags & CFile::modeWrite )){ m_bIsUnicodeText = IsFileUnicode(sFilePath);}//如果要读写Unicode格式的文本文件, 必须切换到typeBinary方式, 因为这会影响fputws/fgetws的工作方式(具体情况参考MSDN)。
if (m_bIsUnicodeText){ nOpenFlags &= ~(CFile::typeText); nOpenFlags |= CFile::typeBinary;}return nOpenFlags;
}// Purpose: Determines whether a file is Unicode by reading the first character and detecting
// whether it's the Unicode byte marker.bool CStdioFileEx::IsFileUnicode(const CString& sFilePath){ CFile file;wchar_t cFirstChar;CFileException exFile;bool bIsUnicode = false;
// Open file in binary mode and read first characterif (file.Open(sFilePath, CFile::typeBinary | CFile::modeRead, &exFile)){ // If byte is Unicode byte-order marker, let's say it's Unicode if (file.Read(&cFirstChar, sizeof(wchar_t)) > 0 && cFirstChar == (wchar_t)UNICODE_BOM) { bIsUnicode = true; }file.Close();
}else{ // Handle error here if you like}return bIsUnicode;
}unsigned long CStdioFileEx::GetCharCount()
{ int nCharSize;unsigned long nByteCount, nCharCount = 0;if (m_pStream)
{ // Get size of chars in file nCharSize = m_bIsUnicodeText ? sizeof(wchar_t): sizeof(char);// If Unicode, remove byte order mark from count
nByteCount = (unsigned long)GetLength();if (m_bIsUnicodeText)
{ nByteCount = nByteCount - sizeof(wchar_t); }// Calc chars
nCharCount = (nByteCount / nCharSize);}return nCharCount;
}