Re: counting instances of a string.
On Mon, 07 Jul 2008 22:09:34 -0700, Patricia Shanahan <pats@acm.org>
wrote, quoted or indirectly quoted someone who said :
Matcher m = Pattern.compile(Pattern.quote(lookFor)).matcher(page);
I thought this might be a good time to get an idea of how much of a
speed penalty you pay for using a Regex. It is less than I thought.
It costs you roughly 3 times the CPU cycles to use a Regex. It would
be worse for a smaller search.
regex finds 72 time: 12400400
indexOf finds 72 time: 4004280
ratio: 3.0967864385107933
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.mindprod.common11.StringTools;
public class RegVIndex
{
/** regex version
* Count how many times a String occurs on a page.
*
* @param page big String to look in.
* @param lookFor small String to look for and count instances.
* @return number of times the String appears non-overlapping.
*/
private static int countInstances(String page, String lookFor)
{
int count = 0;
Matcher m =
Pattern.compile(Pattern.quote(lookFor)).matcher(page);
while ( m.find() )
++count;
return count;
}
/**
* test harness, benchmark two ways of computing.
*
* @param args not used
*/
public static void main ( String[] args )
{
final long t1 = System.nanoTime();
final int c1 = countInstances( PAGE, "the" );
final long t2 = System.nanoTime();
final int c2 = StringTools.countInstances( PAGE, "the" );
final long t3 = System.nanoTime();
System.out.println( "regex finds " + c1 + " time: " + (t2-t1) );
System.out.println( "indexOf finds " + c2 + " time: " + (t3-t2)
);
System.out.println( " ratio: " + (double)(t2-t1)/(double)(t3-t2)
);
}
final static String PAGE =
"<!-- macro JglossHead \"String\" --><!-- generated --><!doctype
html public \"-//W3C//DTD HTML 4.01 Transitional//EN\"
\"http://www.w3.org/TR/html4/loose.dtd\">\n"
+ "<html>\n"
+ "<head>\n"
+ "<title>String : Java Glossary</title>\n"
+ "<meta http-equiv=\"lang\" content=\"en\">\n"
+ "<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n"
+ "<meta http-equiv=\"Content-Type\" content=\"text/html;
charset=iso-8859-1\">\n"
+ "<meta name=\"Author\" content=\"Roedy Green (250) 361-9093 of
Canadian Mind Products. For email see contact page.\">\n"
+ "<meta name=\"Copyright\" content=\"Canadian Mind Products
1996-2008\">\n"
+ "<meta name=\"Description\" content=\"Canadian Mind Products Java
& Internet Glossary : String\">\n"
+ "<meta name=\"Generator\" content=\"CMP Static HTML Macros\">\n"
+ "<meta name=\"Keywords\" content=\"Java, terminology, glossary,
Roedy Green, primer, overview, introduction, precis, definition,
String\">\n"
+ "<link href=\"../mindprod.css\" type=\"text/css\"
rel=\"stylesheet\" media=\"screen\">\n"
+ "<link href=\"../jdisplay.css\" type=\"text/css\"
rel=\"stylesheet\" media=\"screen\">\n"
+ "<link rel=\"meta\" href=\"../labels.rdf\"
type=\"application/rdf+xml\" title=\"ICRA labels\">\n"
+ "<link rel=\"home\" href=\"../index.html\">\n"
+ "<link rel=\"icon\" href=\"../image/icon16/jgloss.png\">\n"
+ "<link rel=\"prev\" href=\"s.html\">\n"
+ "</head>\n"
+ "<body>\n"
+ "<a name=\"TOP\"></a>\n"
+ "<!--#CONFIG TIMEFMT=\"%Y-%m-%d\"--> <span class=\"ad\">\n"
+ "<script type=\"text/javascript\">\n"
+ "<!--\n"
+ "google_ad_client=\"pub-3625079171090429\";\n"
+ "google_ad_width=180;\n"
+ "google_ad_height=150;\n"
+ "google_ad_format=\"180x150_as\";\n"
+ "google_ad_type = \"text_image\";\n"
+ "google_ad_channel=\"\";\n"
+ "google_color_border = \"336699\";\n"
+ "google_color_bg = \"FFFFFF\";\n"
+ "google_color_link = \"0000FF\";\n"
+ "google_color_url = \"008000\";\n"
+ "google_color_text = \"000000\";\n"
+ "//-->\n"
+ "</script>\n"
+ "<script type=\"text/javascript\"\n"
+
"src=\"http://pagead2.googlesyndication.com/pagead/show_ads.js\">\n"
+ "</script>\n"
+ "</span>\n"
+ "<div class=\"titlejgloss\">\n"
+ "<a name=\"TITLE\"></a>String : Java Glossary\n"
+ "</div>\n"
+ "<a class=\"plain\" href=\"../index.html#TITLE\"
target=\"_top\"><img src=\"../image/stylesheet/home.png\" width=\"26\"
height=\"26\" alt=\"home\" align=\"middle\" border=\"0\"></a>\n"
+ "<a class=\"plain\" href=\"s.html\" target=\"_top\"><img
src=\"../image/stylesheet/back.png\" width=\"26\" height=\"26\"
alt=\"S words\" align=\"middle\" border=\"0\"></a>\n"
+ "<a class=\"plain\" href=\"jagg.html\" target=\"_top\"><img
src=\"../image/stylesheet/localfind.png\" width=\"26\" height=\"26\"
alt=\"local find\" align=\"middle\" border=\"0\"></a>\n"
+ "<a class=\"plain\" href=\"string.html\" target=\"_top\"><img
src=\"../image/stylesheet/fullscreen.png\" width=\"26\" height=\"26\"
alt=\"no local find frame, full screen\" align=\"middle\"
border=\"0\"></a>\n"
+ "<a class=\"plain\"
href=\"http://www.google.com/search?q=String+Java\"><img
src=\"../image/stylesheet/search.png\" width=\"22\" height=\"22\"
alt=\"Google search web for topic\" align=\"middle\"
border=\"0\"></a>\n"
+ "<a class=\"plain\" href=\"#BOTTOM\"><img
src=\"../image/stylesheet/tobottom.png\" width=\"16\" height=\"16\"
alt=\"jump to footer\" align=\"middle\" border=\"0\"></a>\n"
+ "<a class=\"plain\"
href=\"http://www.altavista.com/babelfish/tr?url=http://mindprod.com/jgloss/string.html\">\n"
+ "<img src=\"../image/stylesheet/babelfish.png\" width=\"32\"
height=\"32\" alt=\"translate with Babelfish\" align=\"middle\"
border=\"0\"></a>\n"
+ "by Roedy Green ©<span class=\"date\">1996-2008</span>
Canadian Mind Products\n"
+ "<br>\n"
+ "Go to : <a href=\"punct.html\">punctuation</a> <a
href=\"0-9.html\">0-9</a> <a href=\"a.html\">A</a>\n"
+ "<a href=\"b.html\">B</a> <a href=\"c.html\">C</a> <a
href=\"d.html\">D</a> <a href=\"e.html\">E</a>\n"
+ "<a href=\"f.html\">F</a> <a href=\"g.html\">G</a> <a
href=\"h.html\">H</a> <a href=\"i.html\">I</a>\n"
+ "<a href=\"j.html\">J</a> <a href=\"k.html\">K</a> <a
href=\"l.html\">L</a> <a href=\"m.html\">M</a>\n"
+ "<a href=\"n.html\">N</a> <a href=\"o.html\">O</a> <a
href=\"p.html\">P</a> <a href=\"q.html\">Q</a>\n"
+ "<a href=\"r.html\">R</a> <a href=\"s.html\">S</a> <a
href=\"t.html\">T</a> <a href=\"u.html\">U</a>\n"
+ "<a href=\"v.html\">V</a> <a href=\"w.html\">W</a> <a
href=\"x.html\">X</a> <a href=\"y.html\">Y</a>\n"
+ "<a href=\"z.html\">Z</a> (<a
href=\"masterindex.html\">all</a>)\n"
+ "<br>\n"
+ "<ul class=\"breadcrumb\">\n"
+ "<li class=\"firstbreadcrumb\">\n"
+ "You are here :\n"
+ "</li>\n"
+ "<li>\n"
+ "<a class=\"plain\" href=\"../index.html#TITLE\"
target=\"_top\">home</a>\n"
+ "</li>\n"
+ "<li>\n"
+ "<a class=\"plain\" href=\"jgloss.html\" target=\"_top\">Java
Glossary</a>\n"
+ "</li>\n"
+ "<li>\n"
+ "<a class=\"plain\" href=\"s.html\" target=\"_top\">S
words</a>\n"
+ "</li>\n"
+ "<li>\n"
+ "<span class=\"term\">String</span>\n"
+ "</li>\n"
+ "<li class=\"lastbreadcrumb\">\n"
+ " \n"
+ "</li>\n"
+ "</ul>\n"
+ "<!-- /generated by JglossHead -->\n"
+ "<dl>\n"
+ "\n"
+ "<dt><a name=\"STRING\"></a>String</dt>\n"
+ "\n"
+ "<dd>Strings are quite different from C++. They are <span
class=\"term\">immutable</span>\n"
+ ", i.e. You can’t change the characters in a string. To
look at individual\n"
+ "characters, you need to use <span
class=\"jmethod\">charAt()</span> . Strings in\n"
+ "Java are 16-bit Unicode. To edit strings, you need to use a
<span class=\"jclass\">StringBuffer</span>\n"
+ "object or a <span class=\"keyword\">char[]</span>. In JDK 1.5+
you use <span class=\"jclass\">StringBuilder</span>,\n"
+ "which works exactly like <span
class=\"jclass\">StringBuffer</span>, but it is\n"
+ "faster and not thread-safe.\n"
+ "<p>\n"
+ "You get the size of a <span class=\"jclass\">String</span>
(length in chars) with <span class=\"jclass\">String</span>.\n"
+ "<span class=\"jmethod\">length</span>(), not .<span
class=\"jvar\">length</span> or .\n"
+ "<span class=\"jmethod\">size</span>() used in other classes.\n"
+ "<p>\n"
+ "For manipulating 8-bit characters, you want an array of bytes
— <span class=\"keyword\">byte[]</span>.\n"
+ "<!-- macro Quilt 2 asis gridmenu\n"
+ "{<a href=\"#EMPTY\">Empty Strings</a>}\n"
+ "{<a href=\"#COMPARISON\">Comparison</a>}\n"
+ "{<a href=\"#CASESENSITIVITY\">Case-Sensitivity</a>}\n"
+ "{<a href=\"#CREATING\">Creating Strings</a>}\n"
+ "{<a class=\"redirect\"
href=\"literal.html#STRING\">Literals</a>}\n"
+ "{<a href=\"#TOSTRING\">toString</a>}\n"
+ "{<a href=\"#REPLACE\">Replace</a>}\n"
+ "{<a class=\"redirect\" href=\"trimming.html\">Trimming</a>}\n"
+ "{<a href=\"#VALIDATING\">Validating</a>}\n"
+ "{<a href=\"#REGEX\">Regex</a>}\n"
+ "{<a href=\"#GOTCHAS\">Gotchas</a>}\n"
+ "{<a href=\"#LEARNINGMORE\">Learning More</a>}\n"
+ "{<a class=\"tosee\" href=\"#SEE\">Links</a>} --><!-- generated
-->\n"
+ "<table class=\"gridmenu\" summary=\"menu\">\n"
+ "<colgroup>\n"
+ "<col span=\"2\" width=\"50%\">\n"
+ "</colgroup>\n"
+ "<tbody>\n"
+ "<tr>\n"
+ "<td><a href=\"#EMPTY\">Empty Strings</a></td>\n"
+ "<td><a class=\"redirect\"
href=\"trimming.html\">Trimming</a></td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><a href=\"#COMPARISON\">Comparison</a></td>\n"
+ "<td><a href=\"#VALIDATING\">Validating</a></td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><a href=\"#CASESENSITIVITY\">Case-Sensitivity</a></td>\n"
+ "<td><a href=\"#REGEX\">Regex</a></td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><a href=\"#CREATING\">Creating Strings</a></td>\n"
+ "<td><a href=\"#GOTCHAS\">Gotchas</a></td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><a class=\"redirect\"
href=\"literal.html#STRING\">Literals</a></td>\n"
+ "<td><a href=\"#LEARNINGMORE\">Learning More</a></td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><a href=\"#TOSTRING\">toString</a></td>\n"
+ "<td><a class=\"tosee\" href=\"#SEE\">Links</a></td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><a href=\"#REPLACE\">Replace</a></td>\n"
+ "<td></td>\n"
+ "</tr>\n"
+ "</tbody>\n"
+ "</table>\n"
+ "<!-- /generated by Quilt -->\n"
+ "\n"
+ "<h2><a name=\"EMPTY\"></a>Empty Strings</h2>\n"
+ "\n"
+ "There are three types of empty string, null, "" and
" ".\n"
+ "Here is how to check for each flavour:\n"
+ "<!-- macro JDisplay string.example701.javafrag --><!-- generated
-->\n"
+ "<pre class=\"java\"><span class=\"keyword\">if </span><span
class=\"fence1\">(</span> <span class=\"jvar\">\n"
+ "s </span><span class=\"operator\">== </span><span
class=\"keyword\">null </span><span class=\"fence1\">\n"
+ ")</span> <span class=\"jmethod\">echo </span><span
class=\"fence1\">(</span> <span class=\"string\">"\n"
+ "was null"</span> <span class=\"fence1\">)</span><span
class=\"semicolon\">;</span>\n"
+ "<span class=\"keyword\">else if </span><span
class=\"fence1\">(</span> <span class=\"jvar\">\n"
+ "s</span><span class=\"operator\">.</span><span
class=\"jmethod\">length</span><span class=\"fence2\">\n"
+ "(</span><span class=\"fence2\">)</span> <span
class=\"operator\">== </span><span class=\"decimallow\">\n"
+ "0 </span><span class=\"fence1\">)</span> <span
class=\"jmethod\">echo </span><span class=\"fence1\">\n"
+ "(</span> <span class=\"string\">"was empty"</span>
<span class=\"fence1\">\n"
+ ")</span><span class=\"semicolon\">;</span>\n"
+ "<span class=\"keyword\">else if </span><span
class=\"fence1\">(</span> <span class=\"jvar\">\n"
+ "s</span><span class=\"operator\">.</span><span
class=\"jmethod\">trim</span><span class=\"fence2\">\n"
+ "(</span><span class=\"fence2\">)</span><span
class=\"operator\">.</span><span class=\"jmethod\">\n"
+ "length </span><span class=\"fence2\">(</span><span
class=\"fence2\">)</span> <span class=\"operator\">\n"
+ "== </span><span class=\"decimallow\">0 </span><span
class=\"fence1\">)</span> <span class=\"jmethod\">\n"
+ "echo </span><span class=\"fence1\">(</span> <span
class=\"string\">"was blank\n"
+ "or other whitespace"</span> <span
class=\"fence1\">)</span><span class=\"semicolon\">;</span></pre><!--
/generated by JDisplay -->\n"
+ "\n"
+ "<h2 class=\"gotcha\"><a name=\"COMPARISON\"></a>String
Comparison</h2>\n"
+ "\n"
+ "The following form:\n"
+ "<!-- macro JDisplay string.example702.javafrag --><!-- generated
-->\n"
+ "<pre class=\"java\"><span class=\"keyword\">if </span><span
class=\"fence1\">(</span> <span class=\"string\">"\n"
+ "abc"</span><span class=\"operator\">.</span><span
class=\"jmethod\">equals </span><span class=\"fence2\">\n"
+ "(</span><span class=\"jvar\">s</span><span
class=\"fence2\">)</span> <span class=\"fence1\">\n"
+ ")</span> <span class=\"jmethod\">echo </span><span
class=\"fence1\">(</span> <span class=\"string\">"\n"
+ "matched"</span> <span class=\"fence1\">)</span><span
class=\"semicolon\">;</span></pre><!-- /generated by JDisplay -->\n"
+ "is preferable to:\n"
+ "<!-- macro JDisplay string.example801.javafrag --><!-- generated
-->\n"
+ "<pre class=\"java\"><span class=\"keyword\">if </span><span
class=\"fence1\">(</span> <span class=\"jvar\">\n"
+ "s</span><span class=\"operator\">.</span><span
class=\"jmethod\">equals </span><span class=\"fence2\">\n"
+ "(</span> <span class=\"string\">"abc"</span> <span
class=\"fence2\">)</span>\n"
+ "<span class=\"fence1\">)</span> <span class=\"jmethod\">echo
</span><span class=\"fence1\">\n"
+ "(</span> <span class=\"string\">"matched"</span> <span
class=\"fence1\">)</span><span class=\"semicolon\">;</span></pre><!--
/generated by JDisplay -->\n"
+ "because the first form won’t raise an exception if <span
class=\"jvar\">s</span>\n"
+ "is null. It will treat the strings as not equal.\n"
+ "<p class=\"important\">\n"
+ "Unless Strings have been interned, with <span
class=\"jmethod\">String.intern()</span>,\n"
+ "you cannot compare them for equality with <span
class=\"keyword\">==</span>. You\n"
+ "have to use <span class=\"jmethod\">equals()</span> instead.\n"
+ "<p>\n"
+ "The compiler will not warn you if you inadvertently use <span
class=\"keyword\">==</span>.\n"
+ "Unfortunately, the bug may take a long time to surface if your
compiler or\n"
+ "virtual machine is doing transparent interning. Interning gets
you a reference\n"
+ "to the master copy of a <span class=\"jclass\">String</span>.
This allows the\n"
+ "duplicates to be garbage collected sooner. However, there are
three\n"
+ "disadvantages to interning:\n"
+ "<ol>\n"
+ "<li>\n"
+ "It takes extra time to look up the master string in a <span
class=\"jclass\">Hashtable</span>.\n"
+ "</li>\n"
+ "<li>\n"
+ "In some implementations, you can have a maximum of 64K interned
<span class=\"jclass\">String</span>s.\n"
+ "</li>\n"
+ "<li>\n"
+ "In some implementation, interned <span
class=\"jclass\">String</span>s are never\n"
+ "garbage collected, even when they are no longer used. The
interning process\n"
+ "itself acts as a packratter. The answer is to implement them
with weak\n"
+ "references.\n"
+ "</li>\n"
+ "</ol>\n"
+ "If you want to compare for < or > you cannot use the usual
comparison\n"
+ "operators, you have to use <span
class=\"jmethod\">compareTo()</span> or <span
class=\"jmethod\">compareToIgnoreCase()</span>\n"
+ "instead.\n"
+ "<!-- macro JDisplay gotchas.example14.javafrag --><!-- generated
-->\n"
+ "<pre class=\"java\"><span class=\"jclass\">String </span><span
class=\"jvardef\">s </span><span class=\"operator\">\n"
+ "= </span><span class=\"string\">"apple"</span><span
class=\"semicolon\">;</span>\n"
+ "<span class=\"jclass\">String </span><span class=\"jvardef\">t
</span><span class=\"operator\">\n"
+ "= </span><span class=\"string\">"orange"</span><span
class=\"semicolon\">;</span>\n"
+ "<span class=\"keyword\">if </span><span
class=\"fence1\">(</span> <span class=\"jvar\">\n"
+ "s</span><span class=\"operator\">.</span><span
class=\"jmethod\">compareTo</span><span class=\"fence2\">\n"
+ "(</span><span class=\"jvar\">t</span><span
class=\"fence2\">)</span> <span class=\"operator\"><\n"
+ "</span><span class=\"decimallow\">0 </span><span
class=\"fence1\">)</span>\n"
+ "<span class=\"fence1\">{</span>\n"
+ "<span class=\"jclass\">System</span><span
class=\"operator\">.</span><span class=\"jvar\">\n"
+ "out</span><span class=\"operator\">.</span><span
class=\"jmethod\">println</span><span class=\"fence1\">\n"
+ "(</span> <span class=\"string\">"s < t"</span>
<span class=\"fence1\">)</span><span class=\"semicolon\">;</span>\n"
+ "<span class=\"fence1\">}</span></pre><!-- /generated by JDisplay
-->\n"
+ "<span class=\"jmethod\">compareTo</span> will return:\n"
+ "<ul>\n"
+ "<li>\n"
+ "some positive number if string s lexically comes after t.\n"
+ "</li>\n"
+ "<li>\n"
+ "0 if s is the same as t.\n"
+ "</li>\n"
+ "<li>\n"
+ "some negative number if s sorts earlier than t.\n"
+ "</li>\n"
+ "</ul>\n"
+ "You can think of it roughly like treating the Strings as numbers
and returning s-t.\n"
+ "<p>\n"
+ "Novices might be astonished by the following results:\n"
+ "<ul>\n"
+ "<li>\n"
+ "<span class=\"string\">"abc"</span><span
class=\"jvar\">.compareTo</span><span class=\"fence1\">(</span>\n"
+ "<span class=\"string\">"ABC" </span><span
class=\"fence1\">)</span> <span class=\"keyword\">returns</span>\n"
+ "<span class=\"string\">"abc"</span> > <span
class=\"string\">"ABC"</span>.\n"
+ "<span class=\"jmethod\">compareTo</span> is case sensitive.\n"
+ "</li>\n"
+ "<li>\n"
+ "<span class=\"string\">"abc "</span><span
class=\"jvar\">.compareTo</span>\n"
+ "<span class=\"fence1\">(</span> <span
class=\"string\">"abc" </span><span
class=\"fence1\">(</span>\n"
+ "returns <span class=\"string\">"abc "</span> >
<span class=\"string\">"abc"</span>.\n"
+ "Blanks are treated like any other character.\n"
+ "</li>\n"
+ "<li>\n"
+ "<span class=\"string\">""</span><span
class=\"jvar\">.compareTo</span><span class=\"fence1\">(</span>\n"
+ "<span class=\"keyword\">null </span><span
class=\"fence1\">)</span> raises a <span
class=\"jclass\">java.lang.NullPointerException</span>.\n"
+ "</li>\n"
+ "<li>\n"
+ "<span class=\"string\">""</span> is not the same thing
as null. Most\n"
+ "String functions will be happy to handle <span
class=\"string\">""</span>,\n"
+ "but very few will accept null.\n"
+ "</li>\n"
+ "<li>\n"
+ "The comparison is done by straightforward Unicode numeric
character by character\n"
+ "comparison. There is no adjustment for locale collating
sequence.\n"
+ "</li>\n"
+ "</ul>\n"
+ "When you write your own classes, the default <span
class=\"jmethod\">Object.equals</span>\n"
+ "does <b>not</b> do a field by field comparison. You have to
write your own\n"
+ "version of <span class=\"jmethod\">equals</span> to get that
effect. The default\n"
+ "version simply tests the equality of the two references —
that they both\n"
+ "point to the same object.\n"
+ "\n"
+ "<h2><a name=\"CASESENSITIVITY\"></a>Case-Sensitive and
Case-Insensitive Comparison</h2>\n"
+ "\n"
+ "<!-- macro JDisplay string.comparecasesensitivity.javafrag
--><!-- generated -->\n"
+ "<div class=\"jdisplay\">\n"
+ "<iframe
src=\"snippet/iframe/string.comparecasesensitivity.javafrag.html\"
width=\"800\" height=\"580\" scrolling=\"yes\" frameborder=\"0\"
marginheight=\"0\" marginwidth=\"0\"></iframe>\n"
+ "</div><!-- /generated by JDisplay -->\n"
+ "\n"
+ "<h2><a name=\"CREATING\"></a>Creating Strings</h2>\n"
+ "\n"
+ "Strings are immutable. Therefore they can be reused
indefinitely, and they can\n"
+ "be shared for many purposes. When you assign one String variable
to another, no\n"
+ "copy is made. Even when you take a substring there is no new
String created,\n"
+ "though a new String descriptor is. New Strings are created
when:\n"
+ "<ul>\n"
+ "<li>\n"
+ "you concatenate.\n"
+ "</li>\n"
+ "<li>\n"
+ "you read Strings from files.\n"
+ "</li>\n"
+ "<li>\n"
+ "you <a href=\"newbie.html#STRINGREDUNDANCY\">foolishly</a> use
<span class=\"java\"><span class=\"keyword\">new</span>\n"
+ "String(String);</span>. There is one situation where its use is
legit. See <a href=\"substring.html\">substring</a>\n"
+ "for the explanation.\n"
+ "</li>\n"
+ "<li>\n"
+ "you use <span class=\"java\"><span class=\"keyword\">new</span>
<span class=\"jclass\">String</span>(\n"
+ "<span class=\"jvar\">somethingElse</span> )</span> ; for <a
href=\"conversion.html\">conversion</a>.\n"
+ "</li>\n"
+ "<li>\n"
+ "You use <span class=\"jclass\">StringBuffer/StringBuilder</span>
<span class=\"jmethod\">toString/substring</span>.\n"
+ "</li>\n"
+ "</ul>\n"
+ "\n"
+ "<h2><a name=\"TOSTRING\"></a>toString</h2>\n"
+ "\n"
+ "Every <span class=\"jclass\">Object</span> has a method called
<span class=\"jmethod\">toString</span>\n"
+ "that makes some sort of attempt to convert the contents of the
<span class=\"jclass\">Object</span>\n"
+ "into human-readable form as a Unicode <span
class=\"jclass\">String</span> for\n"
+ "display. Normally, when you write a new class, you write you own
corresponding <span class=\"jmethod\">toString</span>\n"
+ "method for it, even if just for debugging.\n"
+ "<p>\n"
+ "You use it like this: <span class=\"jclass\">String</span> <span
class=\"jvar\">toShow</span>\n"
+ "= <span class=\"jvar\">myThing</span>.<span
class=\"jmethod\">toString</span>();\n"
+ "<p>\n"
+ "The default <span class=\"jclass\">Object</span>.<span
class=\"jmethod\">toString</span>\n"
+ "method is not very clever. It does <b>not</b> display all the
primitives in your\n"
+ "class with field names as you might expect. If you want that,
you must code it\n"
+ "yourself. A default <span class=\"jmethod\">toString</span> will
typically,\n"
+ "instead, do something lame like dump the <span
class=\"jmethod\">hashCode</span>\n"
+ "or the <span class=\"jclass\">Object</span>’s address
— only mildly\n"
+ "interesting.\n"
+ "<p>\n"
+ "<span class=\"jmethod\">toString</span> has a magical property.
It appears to get\n"
+ "invoked automatically to convert to <span
class=\"jclass\">String</span> without\n"
+ "you having to mention <span
class=\"jmethod\">toString</span>.\n"
+ "<ol>\n"
+ "<li>\n"
+ "In one case, <span class=\"jclass\">System</span>.<span
class=\"jvar\">out</span>.<span class=\"jmethod\">println</span>\n"
+ "(and brothers), it is not really magic. <span
class=\"jmethod\">println</span>\n"
+ "pulls it off with a plethora of <a
href=\"overload.html\">overloaded</a> methods. <span
class=\"jmethod\">println</span>\n"
+ "has many overloaded methods, one for each of the <a
href=\"primitive.html\">primitive</a>\n"
+ "types, and then each overloaded method converts its primitive
parameter to a <span class=\"jclass\">String</span>\n"
+ "for you, and passes that on to the variant of <span
class=\"jmethod\">println</span>\n"
+ "that can only handle <span class=\"jclass\">Strings</span>. But,
you say, (glad to\n"
+ "see you are so attentive), primitives don’t have a <span
class=\"jmethod\">toString</span>\n"
+ "method! That is true, but there are <span
class=\"keyword\">static</span> <a
href=\"conversion.html\">conversion</a>\n"
+ "methods to get that effect, such as <span
class=\"jclass\">String</span>.<span
class=\"jmethod\">valueOf</span>(\n"
+ "<span class=\"keyword\">double</span> ). For any <span
class=\"jclass\">Object</span>\n"
+ "other than a <span class=\"jclass\">String</span>, <span
class=\"jmethod\">println</span>\n"
+ "invokes the Object’s usually-overridden custom <span
class=\"jmethod\">toString</span>\n"
+ "method and passes the result on to the <span
class=\"jclass\">String</span>-eating\n"
+ "version of <span class=\"jmethod\">println</span>.\n"
+ "</li>\n"
+ "<li>\n"
+ "When you use concatenation, <span
class=\"jmethod\">toString</span> truly does get\n"
+ "called for you magically, sometimes. If ever you try to add two
<span class=\"jclass\">Object</span>s,\n"
+ "Java presumes you are really trying to concatenate them and
transparently calls\n"
+ "each of their <span class=\"jmethod\">toString</span> methods
and concatenates the\n"
+ "results giving a <span class=\"jclass\">String</span>. It even
works when you try\n"
+ "to add a <span class=\"jclass\">String</span> and a primitive.
Concatenation will\n"
+ "convert the primitive to a <span class=\"jclass\">String</span>
for you and\n"
+ "concatenate the results, transparently. This can lead to <a
href=\"gotchas.html#CONCATENATION\">surprising\n"
+ "results</a>.\n"
+ "</li>\n"
+ "</ol>\n"
+ "\n"
+ "<h2><a name=\"REPLACE\"></a>Replace</h2>\n"
+ "\n"
+ "<span class=\"jclass\">String</span>.<span
class=\"jmethod\">replace</span>( <span
class=\"keyword\">char</span>\n"
+ "<span class=\"jvar\">target</span>, <span
class=\"keyword\">char</span> <span
class=\"jvar\">replacement</span>\n"
+ ") is considerably faster than <span
class=\"jclass\">String</span>.<span
class=\"jmethod\">replace</span>(\n"
+ "<span class=\"jclass\">String</span> <span
class=\"jvar\">target</span>, <span class=\"jclass\">String</span>\n"
+ "<span class=\"jvar\">replacement</span> ). Both replace
<b>all</b> occurrences. So\n"
+ "if you are replacing just a <span class=\"keyword\">char</span>,
use single quotes.\n"
+ "Unforunately, <span class=\"jclass\">String</span>.<span
class=\"jmethod\">replace</span>(\n"
+ "<span class=\"jclass\">String</span> <span
class=\"jvar\">target</span>, <span class=\"jclass\">String</span>\n"
+ "<span class=\"jvar\">replacement</span> ) is only available in
JDK 1.5+.\n"
+ "<p>\n"
+ "<span class=\"jmethod\">replaceAll</span>( <span
class=\"jclass\">String</span> <span class=\"jvar\">regex</span>,\n"
+ "<span class=\"jclass\">String</span> <span
class=\"jvar\">replacement</span> ) also\n"
+ "replaces all instances. The difference is, <span
class=\"jmethod\">replaceAll</span>\n"
+ "looks for a regex <span class=\"jclass\">String</span> not a
simple <span class=\"jclass\">String</span>.\n"
+ "Beware of using <span class=\"jmethod\">replaceAll</span>( <span
class=\"jclass\">String</span>\n"
+ "<span class=\"jvar\">regex</span>, <span
class=\"jclass\">String</span> <span
class=\"jvar\">replacement</span>)\n"
+ "when you meant <span class=\"jmethod\">replace</span>( <span
class=\"jclass\">String</span>\n"
+ "<span class=\"jvar\">target</span>, <span
class=\"jclass\">String</span> <span
class=\"jvar\">replacement</span>\n"
+ "). The second parameter is not just a simple <span
class=\"jclass\">String</span>. <span
class=\"jclass\">String</span>.\n"
+ "<span class=\"jmethod\">replaceAll</span> behaves like <span
class=\"jclass\">Matcher</span>.\n"
+ "<span class=\"jmethod\">replaceAll</span>. <span
class=\"literal\">$</span> is a\n"
+ "reference to a captured String in the search pattern and <span
class=\"literal\">\\</span>\n"
+ "is the regex quote character, meaning literal <span
class=\"literal\">\\</span>\n"
+ "must be coded as <span class=\"literal\">\\\\\\\\</span> and
literal <span class=\"literal\">$</span>\n"
+ "as <span class=\"literal\">\\\\$</span>.\n"
+ "<p>\n"
+ "<span class=\"jmethod\">replaceFirst</span>( <span
class=\"jclass\">String</span> <span class=\"jvar\">regex</span>,\n"
+ "<span class=\"jclass\">String</span> <span
class=\"jvar\">replacement</span> ) also\n"
+ "takes a regex. There is no <span
class=\"jmethod\">replaceFirst</span> that takes\n"
+ "only a simple <span class=\"jclass\">String</span>.\n"
+ "<p>\n"
+ "<span class=\"jclass\">String</span>.<span
class=\"jmethod\">replace</span> in the\n"
+ "Javadoc is shown with <span
class=\"jinterface\">CharSequence</span> parameters.\n"
+ "Don’t let this frighten you. <span
class=\"jclass\">String</span> implements <span
class=\"jinterface\">CharSequence</span>,\n"
+ "so <span class=\"jmethod\">replace</span> works fine on <span
class=\"jclass\">String</span>s.\n"
+ "<span class=\"jmethod\">replace</span> works on some other
things as well such as <span
class=\"jclass\">StringBuilder</span>s.\n"
+ "\n"
+ "<h2><a name=\"VALIDATING\"></a>Validating</h2>\n"
+ "\n"
+ "You can use <span
class=\"jpackage\">com.mindprod.common11</span>.<span
class=\"jclass\">StringTools</span>.\n"
+ "<span class=\"jmethod\">isLegal</span> to ensure a <span
class=\"jclass\">String</span>\n"
+ "contains only the characters you consider legal. You can <a
href=\"../products1.html#COMMON11\">download</a>\n"
+ "it. It is pretty simple, using <span
class=\"jmethod\">indexOf</span> on the legal <span
class=\"jclass\">String</span>.\n"
+ "<p>\n"
+ "You can also use <span class=\"jmethod\">charAt</span> to
extract the characters\n"
+ "one by one, then categorise them with the <span
class=\"jclass\">Character</span>\n"
+ "methods such as <span class=\"jmethod\">isDigit</span>.\n"
+ "\n"
+ "<h2><a name=\"REGEX\"></a>Regex</h2>\n"
+ "\n"
+ "<span class=\"jclass\">String</span> borrows some convenience <a
href=\"regex.html\">regex</a>\n"
+ "methods, such as <span class=\"jmethod\">split</span>, <span
class=\"jmethod\">matches</span>,\n"
+ "<span class=\"jmethod\">replaceAll</span> and <span
class=\"jmethod\">replaceFirst</span>.\n"
+ "Normally you would use the more efficient <span
class=\"jpackage\">java.util.regex</span>\n"
+ "methods where you precompile your <span
class=\"jclass\">Pattern</span> and reuse\n"
+ "it. The <span class=\"jclass\">String</span> versions are for
one-shot use where\n"
+ "efficiency is not a concern.\n"
+ "<p>\n"
+ "Not only <span class=\"jmethod\">replaceAll</span> but <span
class=\"jmethod\">replace</span>\n"
+ "is implemented in an inefficient way, compiling a regex pattern
every time it is\n"
+ "invoked:\n"
+ "<!-- macro JDisplay replace.implementation.javafrag --><!--
generated -->\n"
+ "<div class=\"jdisplay\">\n"
+ "<iframe
src=\"snippet/iframe/replace.implementation.javafrag.html\"
width=\"800\" height=\"156\" scrolling=\"yes\" frameborder=\"0\"
marginheight=\"0\" marginwidth=\"0\"></iframe>\n"
+ "</div><!-- /generated by JDisplay -->\n"
+ "So, if you are going to use <span
class=\"jmethod\">replace</span> or <span
class=\"jmethod\">replaceAll</span>\n"
+ "more than once, you should use a separate regex compile done
only once.\n"
+ "<p>\n"
+ "\n"
+ "<h2 class=\"gotcha\"><a name=\"GOTCHAS\"></a>Gotchas</h2>\n"
+ "\n"
+ "<ul class=\"gotcha\">\n"
+ "<li>\n"
+ "<span class=\"jclass\">String</span>.<span
class=\"jmethod\">replaceAll</span>( a, b\n"
+ ") is <b>not</b> the method to use to replace all instances of b
in a. Instead\n"
+ "you use <span class=\"jclass\">String</span>. <span
class=\"jmethod\">replace</span>\n"
+ "( a, b ). <span class=\"jmethod\">replaceAll</span> is a
convenience <a href=\"regex.html\">regex</a>\n"
+ "method.\n"
+ "</li>\n"
+ "<li>\n"
+ "<span class=\"jclass\">String</span>.<span
class=\"jmethod\">replace</span> ( a, b )\n"
+ "does not modify a. It creates a new modified <span
class=\"jclass\">String</span>.\n"
+ "This is true of all <span class=\"jclass\">String</span>
methods. <span class=\"jclass\">String</span>s\n"
+ "are immutable. No method can modify the original <span
class=\"jclass\">String</span>.\n"
+ "</li>\n"
+ "</ul>\n"
+ "\n"
+ "<h2><a name=\"LEARNINGMORE\"></a>Learning More</h2>\n"
+ "\n"
+ "<!-- macro Sun \"the String class\" api/java/lang/String.html
--><!-- generated -->\n"
+ "<div class=\"sun\">\n"
+ "Sun’s Javadoc on the <span
class=\"jclassdef\">String</span> class :\n"
+ "available:\n"
+ "<ul>\n"
+ "<li>\n"
+ "<a class=\"offsite\"
href=\"http://java.sun.com/javase/6/docs/api/java/lang/String.html\">on\n"
+ "the web at java.Sun.com</a>\n"
+ "</li>\n"
+ "<li>\n"
+ "<a
href=\"file://localhost/J:/Program%20Files/java/jdk1.6.0_06/docs/api/java/lang/String.html\">in\n"
+ "the JDK 1.6.0_06</a> or in <a
href=\"file://localhost/J:/Program%20Files/java/jdk1.5.0_15/docs/api/java/lang/String.html\">JDK\n"
+ "1.5.0_15</a> on your local <a class=\"plain\"
href=\"jdrive.html\"><span class=\"drive\">J:</span>\n"
+ "drive</a>.\n"
+ "</li>\n"
+ "</ul>\n"
+ "</div>\n"
+ "<!-- /generated by Sun --><!-- macro Sun \"StringBuffer\"
api/java/lang/StringBuffer.html --><!-- generated -->\n"
+ "<div class=\"sun\">\n"
+ "Sun’s Javadoc on the <span
class=\"jclassdef\">StringBuffer</span> class :\n"
+ "available:\n"
+ "<ul>\n"
+ "<li>\n"
+ "<a class=\"offsite\"
href=\"http://java.sun.com/javase/6/docs/api/java/lang/StringBuffer.html\">on\n"
+ "the web at java.Sun.com</a>\n"
+ "</li>\n"
+ "<li>\n"
+ "<a
href=\"file://localhost/J:/Program%20Files/java/jdk1.6.0_06/docs/api/java/lang/StringBuffer.html\">in\n"
+ "the JDK 1.6.0_06</a> or in <a
href=\"file://localhost/J:/Program%20Files/java/jdk1.5.0_15/docs/api/java/lang/StringBuffer.html\">JDK\n"
+ "1.5.0_15</a> on your local <a class=\"plain\"
href=\"jdrive.html\"><span class=\"drive\">J:</span>\n"
+ "drive</a>.\n"
+ "</li>\n"
+ "</ul>\n"
+ "</div>\n"
+ "<!-- /generated by Sun --><!-- macro Sun \"StringBuilder\"
api/java/lang/StringBuilder.html --><!-- generated -->\n"
+ "<div class=\"sun\">\n"
+ "Sun’s Javadoc on the <span
class=\"jclassdef\">StringBuilder</span> class :\n"
+ "available:\n"
+ "<ul>\n"
+ "<li>\n"
+ "<a class=\"offsite\"
href=\"http://java.sun.com/javase/6/docs/api/java/lang/StringBuilder.html\">on\n"
+ "the web at java.Sun.com</a>\n"
+ "</li>\n"
+ "<li>\n"
+ "<a
href=\"file://localhost/J:/Program%20Files/java/jdk1.6.0_06/docs/api/java/lang/StringBuilder.html\">in\n"
+ "the JDK 1.6.0_06</a> or in <a
href=\"file://localhost/J:/Program%20Files/java/jdk1.5.0_15/docs/api/java/lang/StringBuilder.html\">JDK\n"
+ "1.5.0_15</a> on your local <a class=\"plain\"
href=\"jdrive.html\"><span class=\"drive\">J:</span>\n"
+ "drive</a>.\n"
+ "</li>\n"
+ "</ul>\n"
+ "</div>\n"
+ "<!-- /generated by Sun --><a name=\"SEE\"></a>\n"
+ "<div class=\"see\">\n"
+ "<a href=\"casesensitive.html\">case sensitive</a>\n"
+ "<br>\n"
+ "<a href=\"charsequence.html\">CharSequence</a>\n"
+ "<br>\n"
+ "<a href=\"indexof.html\">indexOf</a>\n"
+ "<br>\n"
+ "<a href=\"interned.html\">interned Strings</a>\n"
+ "<br>\n"
+ "<a href=\"overload.html\">overload</a>\n"
+ "<br>\n"
+ "<a href=\"regex.html\">regex</a>\n"
+ "<br>\n"
+ "<a href=\"literal.html#STRING\">String literals</a>\n"
+ "<br>\n"
+ "<a href=\"newbie.html#STRINGREDUNDANCY\">String
redundancy</a>\n"
+ "<br>\n"
+ "<a href=\"stringbuffer.html\">StringBuffer</a>\n"
+ "<br>\n"
+ "<a href=\"stringbuilder.html\">StringBuilder</a>\n"
+ "<br>\n"
+ "<a href=\"stringwidth.html\">stringWidth</a>\n"
+ "<br>\n"
+ "<a href=\"substring.html\">substring</a>\n"
+ "</div>\n"
+ "</dd>\n"
+ "</dl>\n"
+ "<!-- macro Foot nonmil --><!-- generated --><a
name=\"BOTTOM\"></a>\n"
+ "<hr class=\"foot\">\n"
+ "<table class=\"borderless\" summary=\"standard footer\">\n"
+ "<tbody>\n"
+ "<tr>\n"
+ "<td align=\"center\" valign=\"middle\" rowspan=\"7\"><a
class=\"plain\" href=\"../index.html#TITLE\" target=\"_top\">\n"
+ "<img src=\"../image/stylesheet/home.png\" width=\"26\"
height=\"26\" align=\"middle\" alt=\"CMP_home\" border=\"0\"></a>\n"
+ "<a class=\"plain\" href=\"#TOP\"><img
src=\"../image/stylesheet/totop.png\" width=\"16\" height=\"16\"
alt=\"jump to top\" align=\"middle\" border=\"0\"></a>\n"
+ "<br>\n"
+ "<a class=\"plain\" href=\"../index.html#TITLE\"
target=\"_top\"><img src=\"../image/cmpmartybutton.png\" width=\"133\"
height=\"168\" alt=\"CMP logo\" border=\"0\"></a></td>\n"
+ "<td colspan=\"3\"><span class=\"roedy\"><img
src=\"../image/stylesheet/feedback.png\" width=\"22\" height=\"18\"
alt=\"feedback\" align=\"middle\" border=\"0\">Please\n"
+ "email your <a class=\"plain\"
href=\"../feedback/feedback.html\">feedback for\n"
+ "publication</a>, errors, omissions, broken/redirected link
reports\n"
+ "<br>\n"
+ "and suggestions to improve this page to <a class=\"plain\"
href=\"../contact/contact.html\">Roedy\n"
+ "Green</a> : <img src=\"../image/mailto/feedback.png\"
width=\"209\" height=\"22\" alt=\"feedback email\" class=\"mailto\"
border=\"0\"></span></td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><span class=\"cmplogo\"><span
class=\"cmplogocaps\">C</span>anadian <span
class=\"cmplogocaps\">M</span>ind\n"
+ "<span class=\"cmplogocaps\">P</span>roducts</span></td>\n"
+ "<td valign=\"top\" rowspan=\"2\"><a class=\"plain\"
href=\"css.html\"><img src=\"../image/seal/madewithcss.png\"
width=\"87\" height=\"29\" alt=\"made with CSS\" border=\"0\"></a>\n"
+ "<br>\n"
+ "<a class=\"plain\" href=\"htmlvalidator.html\"><img
src=\"../image/seal/htmlvalidated.png\" width=\"88\" height=\"31\"
alt=\"HTML Checked!\" border=\"0\"></a>\n"
+ "<br>\n"
+ "<a class=\"plain\" href=\"icra.html\"><img
src=\"../image/seal/icra.png\" width=\"88\" height=\"31\" alt=\"ICRA
ratings logo\" border=\"0\"></a></td>\n"
+ "<td valign=\"top\" rowspan=\"2\"><span class=\"ad\">\n"
+ "<script type=\"text/javascript\">\n"
+ "<!--\n"
+ "google_ad_client=\"pub-3625079171090429\";\n"
+ "google_ad_width=468;\n"
+ "google_ad_height=60;\n"
+ "google_ad_format=\"468x60_as\";\n"
+ "google_ad_type = \"text_image\";\n"
+ "google_ad_channel=\"\";\n"
+ "google_color_border = \"336699\";\n"
+ "google_color_bg = \"FFFFFF\";\n"
+ "google_color_link = \"0000FF\";\n"
+ "google_color_url = \"008000\";\n"
+ "google_color_text = \"000000\";\n"
+ "//-->\n"
+ "</script>\n"
+ "<script type=\"text/javascript\"\n"
+
"src=\"http://pagead2.googlesyndication.com/pagead/show_ads.js\">\n"
+ "</script>\n"
+ "</span></td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><span class=\"domain\">mindprod.com</span> IP:[<span
class=\"ip\">65.110.21.43</span>]</td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td>Your face IP:[<span class=\"ip\"><!--#echo
var=\"REMOTE_ADDR\" --></span>]</td>\n"
+ "<td colspan=\"2\">The information on this page is for
non-military use only.</td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><span class=\"unobtrusive\">You are visitor number</span>
<span class=\"hitcount\"><!--#exec
cgi=\"/perl/count.pl\"--></span>.</td>\n"
+ "<td colspan=\"2\">Military use includes use by defence
contractors.</td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><span class=\"unobtrusive\">You can get a fresh copy of this
page from:</span></td>\n"
+ "<td colspan=\"2\"><span class=\"unobtrusive\">or possibly from
your local <a class=\"plain\" href=\"jdrive.html\">\n"
+ "<span class=\"drive\">J:</span> drive</a> (Java virtual
drive/Mindprod website\n"
+ "mirror)</span></td>\n"
+ "</tr>\n"
+ "<tr>\n"
+ "<td><a class=\"plain\"
href=\"http://mindprod.com/jgloss/string.html\">http://mindprod.com/jgloss/string.html</a></td>\n"
+ "<td colspan=\"2\"><a class=\"plain\"
href=\"file://localhost/J:/mindprod/jgloss/string.html\">J:\\mindprod\\jgloss\\string.html</a></td>\n"
+ "</tr>\n"
+ "</tbody>\n"
+ "</table>\n"
+ "</body>\n"
+ "</html>\n"
+ "<!-- /generated by Foot -->\n";
}
--
Roedy Green Canadian Mind Products
The Java Glossary
http://mindprod.com