2013-04-04 2 views
3

У меня есть огромные внешние файлы с форматами кодирования «ANSI» и «UCS-2 Little Endian».Кодирование текстовых файлов в VB 6.0

Теперь я хочу изменить формат кодировки файла в UTF-8 с помощью Visual Basic 6.0. Я не хочу изменять исходный файл; Я просто хочу изменить формат кодировки.

Я искал в Интернете; но не может понять код VB и не знаю, как это сделать.

Я также хотел бы иметь возможность читать строки по одному из кодированных файлов UTF-8.

+1

Если вы не понимаете, что кодирующая об этом читать [прекрасную статью] (http://www.joelonsoftware.com/articles/Unicode.html). Я не могу помочь вам с VB6, но вот некоторые похожие вопросы SO: [1] (http://stackoverflow.com/questions/8327984/writing-to-file-with-iso-8859-1-encoding), [2] (http://stackoverflow.com/questions/5182102/vb6-vbscript-change-file-encoding-to-ansii). – linski

ответ

10

ПРИМЕЧАНИЕ. Этот ответ был широко расширен, чтобы вписаться в отредактированный вопрос, который, в свою очередь, был вызван Visual Basic 6 and UTF-8

Следующий код завершает преобразование кодированных строк ANSI, UTF-16 и UTF-32 из файла в строки UTF-8 , в VB6. Вам нужно загрузить весь файл и вывести его. Обратите внимание, что если он был действительно общим, метод LineInputUTF8() был бы LineInput() и требовал бы кодовой страницы.

Option Explicit 

Private Declare Function MultiByteToWideChar Lib "Kernel32.dll" (_ 
    ByVal CodePage As Long, _ 
    ByVal dwFlags As Long, _ 
    ByVal lpMultiByteStr As Long, _ 
    ByVal cbMultiByte As Long, _ 
    ByVal lpWideCharStr As Long, _ 
    ByVal cchWideChar As Long _ 
) As Long 

Private Declare Function WideCharToMultiByte Lib "Kernel32.dll" (_ 
    ByVal CodePage As Long, _ 
    ByVal dwFlags As Long, _ 
    ByVal lpWideCharStr As Long, _ 
    ByVal cchWideChar As Long, _ 
    ByVal lpMultiByteStr As Long, _ 
    ByVal cbMultiByte As Long, _ 
    ByVal lpDefaultChar As Long, _ 
    ByVal lpUsedDefaultChar As Long _ 
) As Long 

Public Const CP_ACP  As Long = 0   ' Default ANSI code page. 
Public Const CP_UTF8  As Long = 65001  ' UTF8. 
Public Const CP_UTF16_LE As Long = 1200  ' UTF16 - little endian. 
Public Const CP_UTF16_BE As Long = 1201  ' UTF16 - big endian. 
Public Const CP_UTF32_LE As Long = 12000  ' UTF32 - little endian. 
Public Const CP_UTF32_BE As Long = 12001  ' UTF32 - big endian. 

' Purpose: Heuristic to determine whether bytes in a file are UTF-8. 
Private Function FileBytesAreUTF8(ByVal the_iFileNo As Integer) As Boolean 

    Const knSampleByteSize   As Long = 2048 
    Dim nLof      As Long 
    Dim nByteCount     As Long 
    Dim nByteIndex     As Long 
    Dim nCharExtraByteCount   As Long 
    Dim bytValue     As Byte 

    ' We look at the first <knSampleByteSize> bytes of the file. However, if the file is smaller, we will have to 
    ' use the smaller size. 
    nLof = LOF(the_iFileNo) 
    If nLof < knSampleByteSize Then 
     nByteCount = nLof 
    Else 
     nByteCount = knSampleByteSize 
    End If 

    ' Go to the start of the file. 
    Seek #the_iFileNo, 1 

    For nByteIndex = 1 To nByteCount 

     Get #the_iFileNo, , bytValue 

     ' If the character we are processing has bytes beyond 1, then we are onto the next character. 
     If nCharExtraByteCount = 0 Then 
      ' 
      ' The UTF-8 specification says that the first byte of a character has masking bits which indicate how many bytes follow. 
      ' 
      ' See: http://en.wikipedia.org/wiki/UTF-8#Description 
      ' 
      ' Bytes in 
      ' sequence Byte 1 Byte 2 Byte 3 Byte 4 
      ' 1   0xxxxxxx 
      ' 2   110xxxxx 10xxxxxx 
      ' 3   1110xxxx 10xxxxxx 10xxxxxx 
      ' 4   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 
      ' 
      If (bytValue And &H80) = &H0 Then 
       nCharExtraByteCount = 0 
      ElseIf (bytValue And &HE0) = &HC0 Then 
       nCharExtraByteCount = 1 
      ElseIf (bytValue And &HF0) = &HE0 Then 
       nCharExtraByteCount = 2 
      ElseIf (bytValue And &HF8) = &HF0 Then 
       nCharExtraByteCount = 3 
      Else 
       ' If none of these masks were matched, then this can't be a UTF-8 character. 
       FileBytesAreUTF8 = False 
       Exit Function 
      End If 
     Else 
      ' All following bytes must be masked as in the table above. 
      If (bytValue And &HC0) = &H80 Then 
       nCharExtraByteCount = nCharExtraByteCount - 1 
       If nCharExtraByteCount = 0 Then 
        FileBytesAreUTF8 = True 
       End If 
      Else 
       ' Not a UTF8 character. 
       FileBytesAreUTF8 = False 
       Exit Function 
      End If 
     End If 

    Next nByteIndex 

End Function 

' Purpose: Take a string whose bytes are in the byte array <the_abytCPString>, with code page <the_nCodePage>, convert to a VB string. 
Private Function FromCPString(ByRef the_abytCPString() As Byte, ByVal the_nCodePage As Long) As String 

    Dim sOutput      As String 
    Dim nValueLen     As Long 
    Dim nOutputCharLen    As Long 

    ' If the code page says this is already compatible with the VB string, then just copy it into the string. No messing. 
    If the_nCodePage = CP_UTF16_LE Then 
     FromCPString = the_abytCPString() 
    Else 

     ' Cache the input length. 
     nValueLen = UBound(the_abytCPString) - LBound(the_abytCPString) + 1 

     ' See how big the output buffer will be. 
     nOutputCharLen = MultiByteToWideChar(the_nCodePage, 0&, VarPtr(the_abytCPString(LBound(the_abytCPString))), nValueLen, 0&, 0&) 

     ' Resize output byte array to the size of the UTF-8 string. 
     sOutput = Space$(nOutputCharLen) 

     ' Make this API call again, this time giving a pointer to the output byte array. 
     MultiByteToWideChar the_nCodePage, 0&, VarPtr(the_abytCPString(LBound(the_abytCPString))), nValueLen, StrPtr(sOutput), nOutputCharLen 

     ' Return the array. 
     FromCPString = sOutput 

    End If 

End Function 

Public Function GetContents(ByVal the_sTextFile As String, ByRef out_nCodePage As Long, Optional ByVal the_nDesiredCodePage As Long = -1, Optional ByRef out_bContainedBOM As Boolean) As String 

    Dim iFileNo      As Integer 
    Dim abytFileContents()   As Byte 
    Dim nDataSize     As Long 

    iFileNo = FreeFile 

    OpenForInput the_sTextFile, iFileNo, out_nCodePage, the_nDesiredCodePage, out_bContainedBOM 

    ' We want to read the entire contents of the file (not including any BOM value). 
    ' After calling OpenForInput(), the file pointer should be positioned after any BOM. 
    ' So size file contents buffer to <file size> - <current position> + 1. 
    nDataSize = LOF(iFileNo) - Seek(iFileNo) + 1 
    ReDim abytFileContents(1 To nDataSize) 
    Get #iFileNo, , abytFileContents() 

    Close iFileNo 

    ' Now we must convert this to UTF-8. But we have to first convert to the Windows NT standard UTF-16 LE. 
    GetContents = FromCPString(abytFileContents(), out_nCodePage) 

End Function 

' Purpose: Reads up to the end of the current line of the file, repositions to the beginning of the next line, if any, and 
'   outputs all characters found. 
' Inputs: the_nFileNo  The number of the file. 
' Outputs: out_sLine  The line from the current position in the file. 
' Return: True if there is more data. 
Public Function LineInputUTF8(ByVal the_nFileNo As Integer, ByRef out_sLine As String) As Boolean 

    Dim bytValue   As Byte 
    Dim abytLine()   As Byte 
    Dim nStartOfLinePos  As Long 
    Dim nEndOfLinePos  As Long 
    Dim nStartOfNextLine As Long 
    Dim nLineLen   As Long 

    ' Save the current file position as the beginning of the line, and cache this value. 
    nStartOfLinePos = Seek(the_nFileNo) 

    ' Retrieves the first byte from the current position. 
    Get #the_nFileNo, , bytValue 

    ' Loop until the end of file is encountered. 
    Do Until EOF(the_nFileNo) 

     ' Check whether this byte represents a carriage return or line feed character (indicating new line). 
     If bytValue = 13 Or bytValue = 10 Then 
      ' By this point, the current position is *after* the CR or LF character, so to get the position of the 
      ' last byte in the line, we must go back two bytes. 
      nEndOfLinePos = Seek(the_nFileNo) - 2 

      ' If this is a carriage return, then we must check the next character. 
      If bytValue = 13 Then 
       Get #the_nFileNo, , bytValue 
       ' Is this a line feed? 
       If bytValue = 10 Then 
       ' Yes. Assume that CR-LF counts as a single NewLine. So the start of the next line should skip over the line feed. 
        nStartOfNextLine = nEndOfLinePos + 3 
       Else 
       ' No. The start of the next line is the current position. 
        nStartOfNextLine = nEndOfLinePos + 2 
       End If 
      ElseIf bytValue = 10 Then 
      ' If this is a line feed, then the start of the next line is the current position. 
       nStartOfNextLine = nEndOfLinePos + 2 
      End If 

      ' Since we have processed all the bytes in the line, exit the loop. 
      Exit Do 
     End If 

     ' Get the next byte. 
     Get #the_nFileNo, , bytValue 
    Loop 

    ' Check to see if there was an end of line. 
    If nEndOfLinePos = 0 Then 
    ' No, this is the end of the file - so use all the remaining characters. 
     nLineLen = Seek(the_nFileNo) - nStartOfLinePos - 1 
    Else 
    ' Yes - so use all the characters up to the end of line position. 
     nLineLen = nEndOfLinePos - nStartOfLinePos + 1 
    End If 

    ' Is this line empty? 
    If nLineLen = 0 Then 
    ' Yes - just return an empty string. 
     out_sLine = vbNullString 
    Else 
    ' No - pull all the bytes from the beginning to the end of the line into a byte array, and then convert that from UTF-8 to a VB string. 
     ReDim abytLine(1 To nLineLen) 
     Get #the_nFileNo, nStartOfLinePos, abytLine() 
     out_sLine = FromCPString(abytLine(), CP_UTF8) 
    End If 

    ' If there is a line afterwards, then move to the beginning of the line, and return True. 
    If nStartOfNextLine > 0 Then 
     Seek #the_nFileNo, nStartOfNextLine 
     LineInputUTF8 = True 
    End If 

End Function 

' Purpose: Analogue of 'Open "fileName" For Input As #fileNo' - but also return what type of text this is via a Code Page value. 
' Inputs: the_sFileName 
'   the_iFileNo 
'   (the_nDesiredCodePage) The code page that you want to use with this file. 
'         If this value is set to the default, -1, this indicates that the code page will be ascertained from the file. 
' Outputs: out_nCodePage   There are only six valid values that are returned if <the_nDesiredCodePage> was set to -1. 
'    CP_ACP  ANSI code page 
'    CP_UTF8  UTF-8 
'    CP_UTF16LE UTF-16 Little Endian (VB and NT default string encoding) 
'    CP_UTF16BE UTF-16 Big Endian 
'    CP_UTF32LE UTF-32 Little Endian 
'    CP_UTF32BE UTF-32 Big Endian 
'   (out_bContainedBOM)  If this was set to True, then the file started with a BOM (Byte Order Marker). 
Public Sub OpenForInput(ByRef the_sFilename As String, ByVal the_iFileNo As Integer, ByRef out_nCodePage As Long, Optional ByVal the_nDesiredCodePage As Long = -1, Optional ByRef out_bContainedBOM As Boolean) 

    ' Note if we want to take account of every case, we should read in the first four bytes, and check for UTF-32 low and high endian BOMs, check 
    ' the first three bytes for the UTF-8 BOM, and finally check the first two bytes for UTF-16 low and hight endian BOMs. 
    Dim abytBOM(1 To 4)    As Byte 
    Dim nCodePage     As Long 

    ' By default, there is no BOM. 
    out_bContainedBOM = False 

    Open the_sFilename For Binary Access Read As #the_iFileNo 

    ' We are interested in -1 (ascertain code page), and then various UTF encodings. 
    Select Case the_nDesiredCodePage 
    Case -1, CP_UTF8, CP_UTF16_BE, CP_UTF16_LE, CP_UTF32_BE, CP_UTF32_LE 

     ' Default code page. 
     nCodePage = CP_ACP 

     ' Pull in the first four bytes to determine the BOM (byte order marker). 
     Get #the_iFileNo, , abytBOM() 

     ' The following are the BOMs for text files: 
     ' 
     ' FF FE   UTF-16, little endian 
     ' FE FF   UTF-16, big endian 
     ' EF BB BF  UTF-8 
     ' FF FE 00 00 UTF-32, little endian 
     ' 00 00 FE FF UTF-32, big-endian 
     ' 
     ' Work out the code page from this information. 

     Select Case abytBOM(1) 
     Case &HFF 
      If abytBOM(2) = &HFE Then 
       If abytBOM(3) = 0 And abytBOM(4) = 0 Then 
        nCodePage = CP_UTF32_LE 
       Else 
        nCodePage = CP_UTF16_LE 
       End If 
      End If 
     Case &HFE 
      If abytBOM(2) = &HFF Then 
       nCodePage = CP_UTF16_BE 
      End If 
     Case &HEF 
      If abytBOM(2) = &HBB And abytBOM(3) = &HBF Then 
       nCodePage = CP_UTF8 
      End If 
     Case &H0 
      If abytBOM(2) = &H0 And abytBOM(3) = &HFE And abytBOM(4) = &HFF Then 
       nCodePage = CP_UTF32_BE 
      End If 
     End Select 

     ' Did we match any BOMs? 
     If nCodePage = CP_ACP Then 
     ' No - we are still defaulting to the ANSI code page. 
      ' Special check for UTF-8. The BOM is not specified in the standard for UTF-8, but according to Wikipedia (which is always right :-)), 
      ' only Microsoft includes this marker at the beginning of files. 
      If FileBytesAreUTF8(the_iFileNo) Then 
       out_nCodePage = CP_UTF8 
      Else 
       out_nCodePage = CP_ACP 
      End If 
     Else 
     ' Yes - we have worked out the code page from the BOM. 
      ' If no code page was suggested, we now return the code page we found. 
      If the_nDesiredCodePage = -1 Then 
       out_nCodePage = nCodePage 
      End If 

      ' Inform the caller that a BOM was found. 
      out_bContainedBOM = True 
     End If 

     ' Reset the file pointer to the beginning of the file data. 
     If out_bContainedBOM Then 
      ' Note that if the code page found was one of the two UTF-32 values, then we are already in the correct position. 
      ' Otherwise, we have to move to just after the end of the BOM. 
      Select Case nCodePage 
      Case CP_UTF16_BE, CP_UTF16_LE 
       Seek #the_iFileNo, 3 
      Case CP_UTF8 
       Seek #the_iFileNo, 4 
      End Select 
     Else 
      ' There is no BOM, so simply go the beginning of the file. 
      Seek #the_iFileNo, 1 
     End If 

    Case Else 
     out_nCodePage = the_nDesiredCodePage 
    End Select 

End Sub 

' Purpose: Analogue of 'Open "fileName" For Append As #fileNo' 
Public Sub OpenForAppend(ByRef the_sFilename As String, ByVal the_iFileNo As Integer, Optional ByVal the_nCodePage As Long = CP_ACP, Optional ByVal the_bPrefixWithBOM As Boolean = True) 

    ' Open the file and move to the end of the file. 
    Open the_sFilename For Binary Access Write As #the_iFileNo 
    Seek the_iFileNo, LOF(the_iFileNo) + 1 

    If the_bPrefixWithBOM Then 
     WriteBOM the_iFileNo, the_nCodePage 
    End If 

End Sub 

' Purpose: Analogue of 'Open "fileName" For Output As #fileNo' 
Public Sub OpenForOutput(ByRef the_sFilename As String, ByVal the_iFileNo As Integer, Optional ByVal the_nCodePage As Long = CP_ACP, Optional ByVal the_bPrefixWithBOM As Boolean = True) 

    ' Ensure we overwrite the file by deleting it ... 
    On Error Resume Next 
    Kill the_sFilename 
    On Error GoTo 0 

    ' ... before creating it. 
    Open the_sFilename For Binary Access Write As #the_iFileNo 

    If the_bPrefixWithBOM Then 
     WriteBOM the_iFileNo, the_nCodePage 
    End If 

End Sub 

' Purpose: Analogue of the 'Print #fileNo, value' statement. But only one value allowed. 
'   Setting <the_bAppendNewLine> = False is analagous to 'Print #fileNo, value;'. 
Public Sub Print_(ByVal the_iFileNo As Integer, ByRef the_sValue As String, Optional ByVal the_nCodePage As Long = CP_ACP, Optional ByVal the_bAppendNewLine As Boolean = True) 

    Const kbytNull     As Byte = 0 
    Const kbytCarriageReturn  As Byte = 13 
    Const kbytNewLine    As Byte = 10 

    Put #the_iFileNo, , ToCPString(the_sValue, the_nCodePage) 

    If the_bAppendNewLine Then 
     Select Case the_nCodePage 
     Case CP_UTF16_BE 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytCarriageReturn 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNewLine 
     Case CP_UTF16_LE 
      Put #the_iFileNo, , kbytCarriageReturn 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNewLine 
      Put #the_iFileNo, , kbytNull 
     Case CP_UTF32_BE 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytCarriageReturn 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNewLine 
     Case CP_UTF32_LE 
      Put #the_iFileNo, , kbytCarriageReturn 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNewLine 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNull 
      Put #the_iFileNo, , kbytNull 
     Case Else 
      Put #the_iFileNo, , kbytCarriageReturn 
      Put #the_iFileNo, , kbytNewLine 
     End Select 
    End If 

End Sub 

Public Sub PutContents(ByRef the_sFilename As String, ByRef the_sFileContents As String, Optional ByVal the_nCodePage As Long = CP_ACP, Optional the_bPrefixWithBOM As Boolean) 

    Dim iFileNo      As Integer 

    iFileNo = FreeFile 
    OpenForOutput the_sFilename, iFileNo, the_nCodePage, the_bPrefixWithBOM 
    Print_ iFileNo, the_sFileContents, the_nCodePage, False 
    Close iFileNo 

End Sub 

' Purpose: Converts a VB string (UTF-16) to UTF8 - as a binary array. 
Private Function ToCPString(ByRef the_sValue As String, ByVal the_nCodePage As Long) As Byte() 

    Dim abytOutput()    As Byte 
    Dim nValueLen     As Long 
    Dim nOutputByteLen    As Long 

    If the_nCodePage = CP_UTF16_LE Then 
     ToCPString = the_sValue 
    Else 

     ' Cache the input length. 
     nValueLen = Len(the_sValue) 

     ' See how big the output buffer will be. 
     nOutputByteLen = WideCharToMultiByte(the_nCodePage, 0&, StrPtr(the_sValue), nValueLen, 0&, 0&, 0&, 0&) 

     If nOutputByteLen > 0 Then 
      ' Resize output byte array to the size of the UTF-8 string. 
      ReDim abytOutput(1 To nOutputByteLen) 

      ' Make this API call again, this time giving a pointer to the output byte array. 
      WideCharToMultiByte the_nCodePage, 0&, StrPtr(the_sValue), nValueLen, VarPtr(abytOutput(1)), nOutputByteLen, 0&, 0& 
     End If 

     ' Return the array. 
     ToCPString = abytOutput() 

    End If 

End Function 

Private Sub WriteBOM(ByVal the_iFileNo As Integer, ByVal the_nCodePage As Long) 

    ' FF FE   UTF-16, little endian 
    ' FE FF   UTF-16, big endian 
    ' EF BB BF  UTF-8 
    ' FF FE 00 00 UTF-32, little endian 
    ' 00 00 FE FF UTF-32, big-endian 

    Select Case the_nCodePage 
    Case CP_UTF8 
     Put #the_iFileNo, , CByte(&HEF) 
     Put #the_iFileNo, , CByte(&HBB) 
     Put #the_iFileNo, , CByte(&HBF) 
    Case CP_UTF16_LE 
     Put #the_iFileNo, , CByte(&HFF) 
     Put #the_iFileNo, , CByte(&HFE) 
    Case CP_UTF16_BE 
     Put #the_iFileNo, , CByte(&HFE) 
     Put #the_iFileNo, , CByte(&HFF) 
    Case CP_UTF32_LE 
     Put #the_iFileNo, , CByte(&HFF) 
     Put #the_iFileNo, , CByte(&HFE) 
     Put #the_iFileNo, , CByte(&H0) 
     Put #the_iFileNo, , CByte(&H0) 
    Case CP_UTF32_BE 
     Put #the_iFileNo, , CByte(&H0) 
     Put #the_iFileNo, , CByte(&H0) 
     Put #the_iFileNo, , CByte(&HFE) 
     Put #the_iFileNo, , CByte(&HFF) 
    End Select 

End Sub 

Следующий код был добавлен в форму, которая имела контроль VSFlexGrid с Lucida Console шрифтом - чисто, чтобы обеспечить способ отображения столько символов, сколько возможно:

Option Explicit 

Private Sub Command_Click() 
    Example_ConvertFileToUTF8 
End Sub 

Private Sub Command2_Click() 
    Example_IterateUTF8Lines 
End Sub 

Private Sub Command3_Click() 
    Example_ReadWriteUTF8Lines 
End Sub 

Private Sub Form_Load() 
    VSFlexGrid.ColWidth(0) = 7000! 
End Sub 

' Purpose: Converts *any* pure text file (UTF16, ASCII, ANSI) to UTF8. 
Private Sub Example_ConvertFileToUTF8() 

    Dim nCodePage     As Long 
    Dim bContainedBOM    As Boolean 
    Dim sFileContents    As String 

    ' Read in contents. 
    sFileContents = TextFile.GetContents("C:\MysteryEncoding.txt", nCodePage, , bContainedBOM) 

    ' And then convert to UTF8. 
    TextFile.PutContents "C:\output.txt", sFileContents, CP_UTF8, bContainedBOM 

End Sub 

' Purpose: Iterates through each line of a UTF-8 text file, and adds it to a control which can display VB strings containing non-ANSI characters. 
'   In this case, I am adding items to a FlexGrid with Font = "Lucida Console". 
Private Sub Example_IterateUTF8Lines() 

    Dim iFileNo    As Integer 
    Dim lCodePage   As Long 
    Dim sLine    As String 

    iFileNo = FreeFile 

    TextFile.OpenForInput "C:\UTF8.txt", iFileNo, lCodePage 

    If lCodePage = CP_UTF8 Then 
     Do While TextFile.LineInputUTF8(iFileNo, sLine) 
      VSFlexGrid.AddItem sLine 
     Loop 
     VSFlexGrid.AddItem sLine 
    Else 
     MsgBox "This is not a UTF8 file." 
    End If 

    Close #iFileNo 

End Sub 

Private Sub Example_ReadWriteUTF8Lines() 

    Dim iFileNoInput  As Integer 
    Dim iFileNoOutput  As Integer 
    Dim lCodePage   As Long 
    Dim bBOM    As Boolean 
    Dim sLine    As String 

    iFileNoInput = FreeFile 
    TextFile.OpenForInput "C:\UTF8.txt", iFileNoInput, lCodePage, , bBOM 

    If lCodePage = CP_UTF8 Then 

     iFileNoOutput = FreeFile 
     TextFile.OpenForOutput "C:\output.txt", iFileNoOutput, lCodePage, bBOM 

     Do While TextFile.LineInputUTF8(iFileNoInput, sLine) 
      TextFile.Print_ iFileNoOutput, sLine, lCodePage 
     Loop 
     TextFile.Print_ iFileNoOutput, sLine, lCodePage, False 

     Close #iFileNoOutput 

    Else 
     MsgBox "This is not a UTF8 file." 
    End If 

    Close #iFileNoInput 

End Sub 
+0

Если вы хотите преобразовать строки ANSI (по которым я предполагаю, что вы будете хранить в байтовом массиве) в строки UTF-8, сначала конвертируйте строку ANSI в строку VB с помощью StrConv (abytANSI, vbUnicode), а затем используйте ToUTF8() для преобразования в массив байтов UTF-8. –

+0

Файл ввода-вывода VB6 по умолчанию использует файлы ANSI. Если Ramesh использует стандартные операторы ввода-вывода файлов VB6, то даже если файлы являются ANSI-файлами, внутренние строки будут нормальными строками VB6 (Unicode UTF-16), а не строками ANSI. – MarkJ

+0

У меня есть два внешних файла, тип которых - «UCS-2 Little Endian» и «ANSI». Теперь я хочу изменить эти форматы файлов на «UTF-8». Над кодом открывается и записывается файл с некоторыми строковыми значениями. Мне не нужно добавлять какие-либо значения, просто я хочу изменить кодировку этих файлов ... – Ramesh

0

у меня есть какой-то VB6 код для изменения кодировки файлов ANSI для кодирования UTF-8.

'--- start function for convert to UTF-8 

Private Function OpenAppendUTF8(ByVal FileName As String) As Integer 
    OpenAppendUTF8 = FreeFile(0) 
    Open FileName For Binary Access Write As #OpenAppendUTF8 
    Seek #OpenAppendUTF8, LOF(OpenAppendUTF8) + 1 
End Function 

Sub DeleteFile(ByVal FileToDelete As String) 
    If Dir$(FileToDelete) = "" Then 'See above 
    Else 
     SetAttr FileToDelete, vbNormal 
     Kill FileToDelete 
    End If 
End Sub 
'- 

Private Sub WriteUTF8(_ 
    ByVal FNum As Integer, _ 
    ByVal Text As String, _ 
    Optional ByVal NL As Boolean) 

    Dim lngResult As Long 
    Dim UTF8() As Byte 

    If NL Then Text = Text & vbNewLine 
    lngResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(Text), Len(Text), _ 
            0, 0, 0, 0) 
    If lngResult > 0 Then 
     ReDim UTF8(lngResult - 1) 
     WideCharToMultiByte CP_UTF8, 0, StrPtr(Text), Len(Text), _ 
          VarPtr(UTF8(0)), lngResult, 0, 0 
     Put #FNum, , UTF8 
    End If 
End Sub 
'------- end function for convert to UTF-8 
'>>> coding 
      Dim ReadString As String 
      Dim writeString As String 
      Dim txtNotepad As String 
      Dim FileNumber As Integer 
      Dim UTF8 As String 
      Dim strsql As String 
      Dim iloop As Integer 
      FileNumber = FreeFile(1)     
       '--- if you need to delete file first. 
       DeleteFile App.Path & "\PRB\fieldRules.txt" 
       '--- 
       FileNumber = OpenAppendUTF8(App.Path & "\PRB\fieldRules.txt") 
       iloop = 1 
       Do While iloop > 0 
        iloop = InStr(1, txtNotepad, "\n") 
        ' "\n" for end of line 

        If iloop <= 5 Then 
         iloop = 0 
        Else 
         'writeString = Mid(txtNotepad, 1, iloop - 1) 
         'Print #FileNumber, writeString 
         WriteUTF8 FileNumber, Mid(txtNotepad, 1, iloop - 1), True 
         txtNotepad = Mid(txtNotepad, iloop + 3, 9999) 
        End If 
       Loop 
       Close #FileNumber 
<<<< end code 
Смежные вопросы