Unicode-aware String Decorator [new version]
This is the new version of the macro.
It is smarter than first version, in fact it excludes strings in
comments, or string already decorated, or the #include "MyHeader.h"
stuff, etc.
I've tried to develop the code in a modular way, with classes, so some
part could be reused in other projects/macros.
It seems to work on VS.NET2003; I don't know if it works also in
VS2005 (I don't have it).
I don't know if VS automation exposes the list of all strings in the
document; if such string collection were exposed, a lot of code I have
developed would be useless :) because there's lot of code here for the
string parsing (and exlude strings in comments, etc.)
I tested it, but maybe more testing would be a good thing.
I don't have a site/blog to post it; I'm going to develop a web "home"
in the future, so I'm posting here (with also a sample file before and
after decoration).
BTW: VisualBasic is not my strongest language :) so maybe there could
be some not-smart thing in the code... but it seems OK.
Hope it helps someone for Unicode-awareness decoration of strings in C/C++.
MrAsm
<CODE Language="VisualBasic">
'******************************************************************************
' UnicodeStringDecorator
'
' by MrAsm <mrasm AT usa DOT com>
'
'
' Module with macros for Visual Studio .NET 2003 (and 2005 ?)
' to decorate strings in C/C++ source files, to make them Unicode-aware.
'
' The applied decoration is _T().
' Strings already decorated with _T() or L are skipped.
' Strings in comments are ignored, too.
' It is possible to UnDo the decoration.
'
'
' 2007, June 11th
'
'
' USAGE:
' ======
'
' There are two public macros.
' They can be called to do the decoration job:
'
' - DecorateStringsInSelection
' - DecorateStringsInActiveDocument
'
' These macros call the internal engine, which is structured into classes.
'
' The StringElement class stores information about a found string, like its
' position in the document.
'
' The StringParser class does the parsing job: it searches the document (or
' selection) for strings, excluding the strings in comments, etc.
'
' The StringUnicodeDecorator class uses the StringParser to extract the
' strings, then it filters the collected strings once more, to exclude strings
' that are already decorated.
' Then, the decoration is performed on the final string list.
'
' Note that the StringParser class could be reused also when there is a
' general need of extracting strings from a C/C++ document.
'
'
' TODOs:
' ======
' - More tests (...but seems quite good now)
' - Maybe develop an add-in
'
'
'******************************************************************************
Imports EnvDTE
Imports System
Imports System.Diagnostics
Imports System.IO
Imports System.Collections
Public Module UnicodeStringDecorator
'==========================================================================
' Name: StringElement
' Desc: Data associated to parsed strings.
' It stores string content, string line and delimiting columns.
'==========================================================================
Class StringElement
' The string (including the delimiting quotes "")
Public Property Str() As String
Get
Return mStr
End Get
Set(ByVal Value As String)
mStr = Value
End Set
End Property
' Line number (1-based)
Public Property Line() As Integer
Get
Return mLine
End Get
Set(ByVal Value As Integer)
mLine = Value
End Set
End Property
' Column of the opening string quote (1-based)
Public Property ColumnStart() As Integer
Get
Return mColumnStart
End Get
Set(ByVal Value As Integer)
mColumnStart = Value
End Set
End Property
' Column of the closing string quote (1-based)
Public Property ColumnEnd() As Integer
Get
ToString()
Return mColumnEnd
End Get
Set(ByVal Value As Integer)
mColumnEnd = Value
End Set
End Property
Public Overridable Function ToString() As String
Dim descr As String
descr = "[Line: " & mLine & "; Cols: " & _
mColumnStart & "-" & mColumnEnd & "] " & mStr
Return descr
End Function
Public Sub New()
' Init data members to null values
mStr = ""
mLine = 0
mColumnStart = 0
mColumnEnd = 0
End Sub
Public Sub New(ByVal str As String)
mStr = str
mLine = 0
mColumnStart = 0
mColumnEnd = 0
End Sub
Public Sub New(ByVal str As String, ByVal line As Integer)
mStr = str
mLine = line
mColumnStart = 0
mColumnEnd = 0
End Sub
Public Sub New(ByVal str As String, ByVal line As Integer, _
ByVal colStart As Integer, ByVal colEnd As Integer)
mStr = str
mLine = line
mColumnStart = colStart
mColumnEnd = colEnd
End Sub
Public Sub Clear()
mStr = ""
mLine = 0
mColumnStart = 0
mColumnEnd = 0
End Sub
Private mStr As String ' The string content
Private mLine As Integer ' Line number (starts at 1)
Private mColumnStart As Integer ' Column of opening " (1 based)
Private mColumnEnd As Integer ' Column of closing " (1 based)
End Class
'==========================================================================
' Name: StringParser
' Desc: Class to parse strings, to extract strings from the selection,
' excluding strings in comments
'
' Classes used:
' StringElement
'==========================================================================
Class StringParser
' Return the count of strings parsed
Public ReadOnly Property Count() As Integer
Get
Return Strings.Count
End Get
End Property
' No string parsed?
Public ReadOnly Property IsEmpty() As Boolean
Get
If Strings.Count = 0 Then
Return True
Else
Return False
End If
End Get
End Property
' Access the i-th string element
' (index is 0-based)
Public ReadOnly Property ElementAt(ByVal index As Integer) As StringElement
Get
Return Strings(index)
End Get
End Property
' Access the i-th string.
' (index is 0-based)
Public ReadOnly Property StringAt(ByVal index As Integer) As String
Get
Return Strings(index).Str
End Get
End Property
Public Sub New()
Strings = New ArrayList
InString = False
InComment = False
CurrentLine = 0
End Sub
' Array of parsed strings
Private Strings As ArrayList
' Flag: is in string mode?
Private InString As Boolean
' Flag: is in comment mode?
Private InComment As Boolean
' Position of current line (1-based index)
Private CurrentLine As Integer
' The quote " character
Private Const QuoteChar As Char = Chr(34)
' Init parsing process
Private Sub InitParsing()
InString = False
InComment = False
CurrentLine = 0
Strings = New ArrayList
End Sub
' Exclude a line of type #include "MyFile.h"
Private Function IsSpuriousIncludeString(ByVal line As String) As Boolean
' Remove leading white spaces from string
Dim s As String
s = line.TrimStart(Nothing)
' Check if we have the #include stuff
If s.IndexOf("#include") <> 0 Then
' #include not found at beginning of string
Return False
End If
' We have the #include stuff, so we can discard this line
Return True
End Function
' Process a single line of code
Private Sub ProcessLine(ByVal line As String, ByVal lineNumber As Integer)
Debug.WriteLine("Processing line: ")
Debug.WriteLine(line)
' Generic string start column (0-based)
Dim startCol As Integer = -1
' Generic string end column (0-based)
Dim endCol As Integer = -1
' NOTES
' =====
'
' 1. Visual Basic does not have "continue" statement, so I must use GoTo
' to simulate it
' 2. The check (i < (line.Length - 1)) is useful to check if there is
' at least one more character in the string
'
'
' Special case:
' Exclude what may seem strings, but are just #include headers, e.g.:
' #include "MyFile.h"
'
If IsSpuriousIncludeString(line) Then
Exit Sub
End If
' *** Normal Processing ***
Dim i As Integer ' Pointer to current character in string (0-based)
i = 0
While (i < line.Length)
If line.Chars(i) = "*"c Then
' May exit C-style comment
If InComment And (Not InString) And (i < (line.Length - 1)) Then
If line.Chars(i + 1) = "/"c Then
' Exit C-style comment
InComment = False
i = i + 1
GoTo ContinueLoop
End If
End If
ElseIf line.Chars(i) = "/"c Then
' May enter C/C++ comment
If (Not InString) And (Not InComment) And (i < (line.Length - 1)) Then
If line.Chars(i + 1) = "*"c Then
' /* --> enter C-style comment
InComment = True
i = i + 1
GoTo ContinueLoop
ElseIf line.Chars(i + 1) = "/"c Then
' // --> C++ comment: skip entire line
Exit While
End If
End If
ElseIf (line.Chars(i) = QuoteChar) And (Not InComment) Then
If (i > 0) And (line.Chars(i - 1) = "\"c) Then
' Ignore \"
GoTo ContinueLoop
ElseIf (i > 0) And (line.Chars(i - 1) = "'"c) Then
' Prevent '"'
If (i < (line.Length - 1)) And (line.Chars(i + 1) = "'"c) Then
i = i + 1
GoTo ContinueLoop
End If
ElseIf Not InString Then
' Found the opening " --> Enter string
InString = True
' Mark string start column
startCol = i
Else
' Found the closing " --> Exit string
InString = False
' Save string end column
endCol = i
' Add the new string to list
'
' StringElement column indexes are 1-based,
' so add +1 to column indexes computed in this method
Dim str As String
str = line.Substring(startCol, endCol - startCol + 1)
Strings.Add(New StringElement(str, lineNumber, startCol + 1, endCol + 1))
Debug.WriteLine("Found string: " & _
str & " (at line " & lineNumber & ")")
End If
End If
ContinueLoop: ' Simulate missing "Continue" statement...
' Go to next character
i = i + 1
End While
End Sub
'----------------------------------------------------------------------
' Parse the strings in specified selection.
' The "Strings" public field is filled with the string list
'----------------------------------------------------------------------
Public Sub ParseSelection(ByVal selection As EnvDTE.TextSelection)
Dim startPt As EditPoint
startPt = selection.TopPoint.CreateEditPoint()
Dim endPt As TextPoint
endPt = selection.BottomPoint
' Init the parsing engine
InitParsing()
' Parse all selection
Do While (startPt.LessThan(endPt))
' Get current line text
Dim lineStr As String = startPt.GetText(startPt.LineLength)
' Process line-by-line
ProcessLine(lineStr, startPt.Line)
' Move to next line
startPt.LineDown()
startPt.StartOfLine()
Loop
End Sub
'----------------------------------------------------------------------
' Reset the parser
'----------------------------------------------------------------------
Public Sub Clear()
' Reset object
InitParsing()
End Sub
End Class
'==========================================================================
' Name: StringUnicodeDecorator
' Desc: Helper class to do the Unicode decoration job.
'
' This class implements also the logic to avoid decoration of already
' decorated strings (both with _T() and with L).
'
' Classes used:
' StringParser, StringElement
'==========================================================================
Class StringUnicodeDecorator
' The string parser object
Private stringParser As StringParser
' Selection to operate on
Private selection As EnvDTE.TextSelection
' String for message box titles
Private Const msgTitle As String = "MrAsm's Unicode String Decorator"
'----------------------------------------------------------------------
' Check if the given character is a "separator" character, i.e. is
' a character different from 0-9,a-Z,_ .
'
' (This function is used to check if a string is already decorated.)
'----------------------------------------------------------------------
Private Function IsSeparatorCharacter(ByVal ch As Char) As Boolean
Dim isSeparator As Boolean
Select Case ch
Case "a"c To "z"c
isSeparator = False
Case "A"c To "Z"c
isSeparator = False
Case "_"c
isSeparator = False
Case Else
isSeparator = True
End Select
Return isSeparator
End Function
' Check if current string has the L decoration
Private Function IsDecoratedL(ByVal stringElem As StringElement) As Boolean
' Move to specified line
selection.MoveToLineAndOffset(stringElem.Line, stringElem.ColumnStart)
' If there is no character on the left, the string is not decorated
If stringElem.ColumnStart = 1 Then
Return False
End If
' Get the entire line
selection.SelectLine()
Dim line As String = selection.Text()
' Column index in string element starts with 1, while indexes in
' string characters start at 0.
' So we use a new variable to store indexes for char strings
Dim charIndex As Integer
charIndex = stringElem.ColumnStart - 1
' Check if there is a leading 'L'
If line.Chars(charIndex - 1) <> "L"c Then
' No L --> no L decoration
Return False
End If
' We have found the leading L.
' If there is more on the left, check that the previous character is a separator
If stringElem.ColumnStart = 2 Then
' Just L"...." with L on first character of line.
' So the the string is decorated
Return True
End If
' Check if there is a separator before the L
If IsSeparatorCharacter(line.Chars(charIndex - 2)) = True Then
' This string is decorated
Return True
End If
' String not decorated with L
Return False
End Function
' Check if current string has the _T() decoration
Private Function IsDecoratedT(ByVal stringElem As StringElement) As Boolean
' Move to specified line
selection.MoveToLineAndOffset(stringElem.Line, stringElem.ColumnStart)
' Get the entire line
selection.SelectLine()
Dim line As String = selection.Text()
' Column index in string element starts with 1, while indexes in
' string characters start at 0.
' So we use a new variable to store indexes for char strings
Dim charIndex As Integer
' Look on the right of the string
' Skip spaces.
' If we find a ), the string may be decorated, else it is not
charIndex = stringElem.ColumnEnd ' Point to character next to ending quotes
Const tab As Char = Chr(9) ' \t
While charIndex < line.Length
If (line.Chars(charIndex) <> " "c) And _
(line.Chars(charIndex) <> tab) Then
' This is not a blank character
Exit While
End If
charIndex = charIndex + 1
End While
' If we find the end-of-line, then there is no decoration
Const charCR As Char = Chr(13) ' carriage return
Const charLF As Char = Chr(10) ' line feed
If (line.Chars(charIndex) = charCR) Or (line.Chars(charIndex) = charLF) Then
Return False
End If
' If we reached end of string, the string is not decorated
If charIndex = line.Length Then
Return False
End If
' We have found the first non-blank character after the closing "
' If it is different from ) then this string is not decorated
If (line.Chars(charIndex) <> ")"c) Then
Return False
End If
' We have found the closing ), now we search on the left of the
' string, to see if we found teh _T( decoration
charIndex = stringElem.ColumnStart - 2 ' point to character before opening "
' Skip blanks
While charIndex >= 0
If (line.Chars(charIndex) <> " "c) And _
(line.Chars(charIndex) <> tab) Then
' This is not a blank character
Exit While
End If
' Move leftwards
charIndex = charIndex - 1
End While
' If we reached the beginning of line, this string is not decorated
If charIndex <= 0 Then
Return False
End If
' We are now pointing to first non-blank character.
' Compare with _T decoration
' X <--- charIndex
' _T(
' 0123456
Const startDecoration As String = "_T("
' Check that there is room enough for the starting decoration in the string
If charIndex < (startDecoration.Length - 1) Then
' We have less characters on the left than the decoration string.
' Se the decoration can not fit on the left, so the string
' is not decorated
Return False
End If
' Check if we have the decoration
Dim tmpString As String
charIndex = charIndex - startDecoration.Length + 1
tmpString = line.Substring(charIndex, startDecoration.Length)
If tmpString <> startDecoration Then
' Decoration does not match
Return False
End If
' Decoration match
' Now just check if there is a separating character on the left,
' to check that we have actually have the token _T( and not e.g. Some_T(
If charIndex <= 0 Then
' Begin of string, se we have no token on the left,
' so the string is decorated
Return True
End If
' We have something on the left: check that it is a token separator
charIndex = charIndex - 1
If IsSeparatorCharacter(line.Chars(charIndex)) Then
' Separator found --> the string is already decorated
Return True
End If
' The string is *not* decorated
Return False
End Function
' Check if current string is decorated
Private Function IsDecorated(ByVal stringElem As StringElement) As Boolean
If IsDecoratedL(stringElem) Or IsDecoratedT(stringElem) Then
Return True
Else
Return False
End If
End Function
'----------------------------------------------------------------------
' Decorate all strings found in current selection
' (exclude strings in comments, and already decorated strings).
' The 'selection' data member must be set.
' Display an error message on error (exception)
'----------------------------------------------------------------------
Private Sub DecorateStrings()
Try
' Build the string parser
stringParser = New StringParser
' Extract all strings from the selection (exclude strings in comments)
stringParser.ParseSelection(selection)
If stringParser.IsEmpty Then
MsgBox("No string found for decoration.", _
MsgBoxStyle.OKOnly + MsgBoxStyle.Information, msgTitle)
Exit Sub
End If
' Filter the list of strings, to exclude already decorated strings
Dim stringsToDecorate As New ArrayList
Dim stringElem As StringElement
Dim i As Integer
For i = 0 To (stringParser.Count - 1)
stringElem = stringParser.ElementAt(i)
If Not IsDecorated(stringElem) Then
stringsToDecorate.Add(stringElem)
Else
' Debug
Debug.WriteLine("String already decorated: " & stringElem.Str)
End If
Next
If stringsToDecorate.Count = 0 Then
MsgBox("No string found to decorate.", _
MsgBoxStyle.OKOnly + MsgBoxStyle.Information, msgTitle)
Exit Sub
End If
' Support for Undo
Dim undoWasOpen As Boolean = False
If DTE.UndoContext.IsOpen = True Then
undoWasOpen = True
Else
DTE.UndoContext.Open("Unicode String Decoration", False)
End If
' Traverse the collection of found strings in reversed order,
' i.e. from last one to first one,
' and add the decoration
Dim count As Integer = stringsToDecorate.Count
For i = count - 1 To 0 Step -1
stringElem = stringsToDecorate(i)
Debug.WriteLine("Processing string: " & stringElem.Str & _
" (at line: " & stringElem.Line & ")")
' Move after the ending " and add the )
selection.MoveToLineAndOffset(stringElem.Line, stringElem.ColumnEnd + 1)
selection.Insert(")")
' Move at the starting " and add the _T(
selection.MoveToLineAndOffset(stringElem.Line, stringElem.ColumnStart)
selection.Insert("_T(")
Next
' Close the undo context only if it has been opened by us here
If undoWasOpen = False Then
' Close the undo object to commit changes
DTE.UndoContext.Close()
End If
Catch ex As Exception
MsgBox(ex.Message, _
MsgBoxStyle.OKOnly + MsgBoxStyle.Exclamation, msgTitle)
End Try
End Sub
'----------------------------------------------------------------------
' Decorate strings in current selection
'----------------------------------------------------------------------
Public Sub DecorateStringsInSelection()
' Assign current selection
selection = DTE.ActiveDocument.Selection
If selection Is Nothing Then
MsgBox("No selection found.", _
MsgBoxStyle.OKOnly + MsgBoxStyle.Information, msgTitle)
End If
' Start the decoration process
DecorateStrings()
End Sub
'----------------------------------------------------------------------
' Decorate strings in active document
'----------------------------------------------------------------------
Public Sub DecorateStringsInActiveDocument()
' Check that there is an active document
If DTE.ActiveDocument Is Nothing Then
MsgBox("There is no active document to process.", _
MsgBoxStyle.Exclamation + MsgBoxStyle.OKOnly, msgTitle)
Exit Sub
End If
' Access the active document
Dim document As EnvDTE.Document = DTE.ActiveDocument
' Check that it's a C/C++ file
If document.Language <> "C/C++" Then
MsgBox("It's not a C/C++ file.", _
MsgBoxStyle.Exclamation + MsgBoxStyle.OKOnly, msgTitle)
Exit Sub
End If
' Check that the document is not read-only
If document.ReadOnly Then
MsgBox("Cannot process a read-only document", _
MsgBoxStyle.Exclamation + MsgBoxStyle.OKOnly, msgTitle)
Exit Sub
End If
' Select entire document
selection = document.Selection
selection.SelectAll()
' Start the decoration process
DecorateStrings()
End Sub
End Class
'--------------------------------------------------------------------------
' Decorate all strings found in current selection
' (exclude strings in comments, and already decorated strings)
'--------------------------------------------------------------------------
Public Sub DecorateStringsInSelection()
Dim stringDecorator As New StringUnicodeDecorator
stringDecorator.DecorateStringsInSelection()
End Sub
'--------------------------------------------------------------------------
' Decorate all strings found in the active document
' (exclude strings in comments, and already decorated strings)
'--------------------------------------------------------------------------
Public Sub DecorateStringsInActiveDocument()
Dim stringDecorator As New StringUnicodeDecorator
stringDecorator.DecorateStringsInActiveDocument()
End Sub
End Module
</CODE>
This is file that needs decoration:
<CODE FileName="Dummy.cpp">
//////////////////////////////////////////////////////////////////////////
// A Test File for String Decorator
// by MrAsm
//////////////////////////////////////////////////////////////////////////
// Ignore "...."
//
/* Ignore "blah blah blah" */
#include <memory.h>
#include "DontTouchMe.h" // Don't decorate this
#define TEST_STR _T("....")
#define TEST_STR2 L"Ignore me"
#define A_NAME "DON'T Ignore Me, please!"
static const WCHAR * gString =
L"Don't touch";
static TCHAR sz[] = _T("Hello");
static TCHAR * pleaseDecorateThanks = "Test Me Too!";
/* "Ignore me" */
class X
{
public:
virtual ~X() {}
// Don't decorate "me" !
virtual void func1(int) = 0;
};
class Y : public X
{
public:
virtual ~Y() {}
virtual void func2(int) = 0;
};
class XImpl : public X
{
public:
void func1(int i)
{
cout << "XImpl::func1 - i = " << i << endl; // "Not me"
}
void Dummy()
{
CString s( "I want decoration" );
CString s1;
s1 = _T( "Don't decorate me" ) ;
if ( CString( "A string" ) == CString( "Another" ) )
{
// ...
}
}
};
class YImpl : public XImpl, public Y
{
public:
void func1(int i)
{
XImpl::func1(i);
}
void func2(int i)
{
cout << "YImpl::func2 - i = " << i << endl; /* "Not me" */
}
};
// END "END" END "END" :) <--- Don't touch that, dear decorator
</CODE>
And this is the file after decoration:
<CODE FileName="DummyDecorated.cpp">
//////////////////////////////////////////////////////////////////////////
// A Test File for String Decorator
// by MrAsm
//////////////////////////////////////////////////////////////////////////
// Ignore "...."
//
/* Ignore "blah blah blah" */
#include <memory.h>
#include "DontTouchMe.h" // Don't decorate this
#define TEST_STR _T("....")
#define TEST_STR2 L"Ignore me"
#define A_NAME _T("DON'T Ignore Me, please!")
static const WCHAR * gString =
L"Don't touch";
static TCHAR sz[] = _T("Hello");
static TCHAR * pleaseDecorateThanks = _T("Test Me Too!");
/* "Ignore me" */
class X
{
public:
virtual ~X() {}
// Don't decorate "me" !
virtual void func1(int) = 0;
};
class Y : public X
{
public:
virtual ~Y() {}
virtual void func2(int) = 0;
};
class XImpl : public X
{
public:
void func1(int i)
{
cout << _T("XImpl::func1 - i = ") << i << endl; // "Not me"
}
void Dummy()
{
CString s( _T("I want decoration") );
CString s1;
s1 = _T( "Don't decorate me" ) ;
if ( CString( _T("A string") ) == CString( _T("Another") ) )
{
// ...
}
}
};
class YImpl : public XImpl, public Y
{
public:
void func1(int i)
{
XImpl::func1(i);
}
void func2(int i)
{
cout << _T("YImpl::func2 - i = ") << i << endl; /* "Not me" */
}
};
// END "END" END "END" :) <--- Don't touch that, dear decorator
</CODE>