损坏的文本示例:
Prokofiev,Sergey,1891-1953.︠a︡-kont︠s︡ert
至
Prokofiev,Sergey,1891-1953.- Simfonii?a?-kont?s?ert
因此,我使用一个vba宏通过excel将原来的google电子表格传输到xml。虽然我试着告诉excel在高级选项中保存utf-8,尽管xml是在utf 8中打印的,但是显然有些地方是不正确的。这就是我几个月前从堆栈溢出中得到的vba:
Sub MakeXML(iCaptionRow As Integer, iDataStartRow As Integer, sOutputFileName As String)
Dim Q As String
Q = Chr$(34)
Dim sXML As String
sXML = "<?xml version=" & Q & "1.0" & Q & " encoding=" & Q & "UTF-8" & Q & "?>"
sXML = sXML & "<rows>"
''--determine count of columns
Dim iColCount As Integer
iColCount = 1
While Trim$(Cells(iCaptionRow, iColCount)) > ""
iColCount = iColCount + 1
Wend
Dim iRow As Integer
iRow = iDataStartRow
While Cells(iRow, 1) > ""
sXML = sXML & "<row id=" & Q & iRow & Q & ">"
For icol = 1 To iColCount - 1
sXML = sXML & "<" & Trim$(Cells(iCaptionRow, icol)) & ">"
sXML = sXML & Trim$(Cells(iRow, icol))
sXML = sXML & "</" & Trim$(Cells(iCaptionRow, icol)) & ">"
Next
sXML = sXML & "</row>"
iRow = iRow + 1
Wend
sXML = sXML & "</rows>"
Dim nDestFile As Integer, sText As String
''Close any open text files
Close
''Get the number of the next free text file
nDestFile = FreeFile
''Write the entire file to sText
Open sOutputFileName For Output As #nDestFile
Print #nDestFile, sXML
Close
End Sub
Sub test()
MakeXML 1, 2, "C:\Users\Adam Horvath\Documents\~CODE\prokooutputtitleUTF8.xml"
End Sub我认为解决方案涉及以不同的方式写入xml文件,但这种方式并不确定。
发布于 2016-07-16 21:43:25
考虑使用VBA的MSXML对象来构建XML文档,而不是连接字符串值来构建XML,甚至避免文本文件转储。在这种方法中,您有createElement()、creatAttribute()、appendChild()和xmldoc.Save()的过程。特别是针对您的需要,允许您指定编码的createProcessingInstruction()。请注意,尽管标准始终是1.0版本,编码为UTF-8。所以这里的处理指令可能是多余的。
现在,您的文本文件转储使用指定的UTF-8,但可能不是完整的XML格式编码,而是默认的ANSI文本格式。实际上,您可以指定任何内容,文本转储也会遵从,但是使用XML对象会失败。
在结束之前,添加一个XSLT标识转换,以漂亮地打印带有换行和缩进的输出,以避免XML的一行原始输出。您将注意到这样的XSLT是一个字符串表示,但是加载到一个适当的XML文档中。否则,您可以将XSLT外部加载为.xsl文件(顺便说一句,它是一个格式良好的.xml)。
Sub MakeXML(iCaptionRow As Integer, iDataStartRow As Integer, sOutputFileName As String)
' REFERENCE: Microsoft XML V6.0
Dim doc As New MSXML2.DOMDocument60, xslDoc As New MSXML2.DOMDocument60, newDoc As New MSXML2.DOMDocument60
Dim pi As MSXML2.IXMLDOMProcessingInstruction
Dim root As IXMLDOMElement, rowNode As IXMLDOMElement, loopNode As IXMLDOMElement
Dim idAttrib As IXMLDOMAttribute
' PROCESSING INSTRUCTION
Set pi = doc.createProcessingInstruction("xml", " version=""1.0"" encoding=""UTF-8""")
doc.appendChild pi
' DECLARE XML DOC OBJECT
Set root = doc.createElement("rows")
doc.appendChild root
''--determine count of columns
Dim iColCount As Integer
iColCount = Cells(7, Columns.Count).End(xlToLeft).Column
Dim iRow As Integer
iRow = iDataStartRow
Dim icol As Integer
While Cells(iRow, 1) > ""
' ROW NODE
Set rowNode = doc.createElement("row")
root.appendChild rowNode
' ID ATTRIBUTE
Set idAttrib = doc.createAttribute("id")
idAttrib.Value = iRow
rowNode.setAttributeNode idAttrib
' LOOP NODE
For icol = 1 To iColCount - 1
Set loopNode = doc.createElement(Trim$(Cells(iCaptionRow, icol)))
loopNode.Text = Trim$(Cells(iRow, icol))
rowNode.appendChild loopNode
Next icol
iRow = iRow + 1
Wend
' PRETTY PRINT RAW OUTPUT
xslDoc.LoadXML "<?xml version=" & Chr(34) & "1.0" & Chr(34) & "?>" _
& "<xsl:stylesheet version=" & Chr(34) & "1.0" & Chr(34) _
& " xmlns:xsl=" & Chr(34) & "http://www.w3.org/1999/XSL/Transform" & Chr(34) & ">" _
& "<xsl:strip-space elements=" & Chr(34) & "*" & Chr(34) & " />" _
& "<xsl:output method=" & Chr(34) & "xml" & Chr(34) & " indent=" & Chr(34) & "yes" & Chr(34) & "" _
& " encoding=" & Chr(34) & "UTF-8" & Chr(34) & "/>" _
& " <xsl:template match=" & Chr(34) & "node() | @*" & Chr(34) & ">" _
& " <xsl:copy>" _
& " <xsl:apply-templates select=" & Chr(34) & "node() | @*" & Chr(34) & " />" _
& " </xsl:copy>" _
& " </xsl:template>" _
& "</xsl:stylesheet>"
xslDoc.async = False
doc.transformNodeToObject xslDoc, newDoc
newDoc.Save sOutputFileName
End Subhttps://stackoverflow.com/questions/38397358
复制相似问题