Re: indexOf
On Thu, 21 Feb 2008 06:17:28 -0500, "Jean Pierre Daviau"
<Once@WasEno.ugh> wrote, quoted or indirectly quoted someone who said
:
How comes that the checkExtensions method does not work and the
tedious if( temp.indexOf("mov") != -1 ) do?
here is a the way I handled the problem of good, bad and iffy
extensions:
/*
copyright (c) 2002-2008 Roedy Green, Canadian Mind Products
#101 - 2536 Wark Street
Victoria, BC Canada V8T 4G8
tel: (250) 361-9093
http://mindprod.com
Source and executables may be freely used for any purpose except
military.
version history
version 1.0 initial
1.1 allow multiple files on the command line.
trim leading and trailing blank lines.
ensures consistent use of \r\n on Windows, or equivalent for
platform.
ensures file ends with exactly one \r\n
1.2 2005-07-27 add more bad extensions.
1.3 2005-07-16 add Javadoc
1.4 2006-03-05 reformat with IntelliJ, add Javadoc.
1.5 2007-06-07 add pad, icon.
*/
package com.mindprod.dedup;
import com.mindprod.common11.StringTools;
import java.awt.*;
import java.io.*;
/**
* <pre>
* Removes adjacent duplicate lines from a text file.
* Trims trailing blanks on each line.
* Trims leading and trailing blank lines.
* If nothing changed, file date will not be disturbed.
* Case sensitive compare, Only compares adjacent lines. Does not sort
the file
* first.
* converts all Unix, DOS, or Mac line terminators to the platform
style.
* <p/>
* usage: java com.mindprod.dedup.DeDup MySource.txt another.txt
* or with JET:
* dedup.exe MySource.txt another.txt
* </pre>
*
* @author Roedy Green, Canadian Mind Products
* @version 1.5, 2007-06-24
*/
public final class DeDup
{
// ------------------------------ FIELDS
------------------------------
/**
* which line end convention do we use
*/
static boolean unix = false;
/**
* input "before" file name
*/
static String inFilename;
/**
* output "after" file name, the temporary, later renamed to match
the input
*/
static String outFilename;
private static final String RELEASEDATE = "2007-06-24";
private static String TITLESTRING = "DeDup";
private static final String VERSIONSTRING = "1.5";
/**
* input "before" reader
*/
static BufferedReader inReader;
/**
* input "before" file
*/
static File inFile;
/**
* output "after" file
*/
static File outFile;
/**
* output "after" file writer
*/
static PrintWriter outWriter;
/**
* don't need undisplayed copyright notice, since have banner.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
* <p/>
* extensions known unsafe to run DeDup on.
*/
/**
* extensions known unsafe to run DeDup on.
*/
static final String[] badExtensions = {
"ans",
"asm",
"bat",
"batfrag",
"blk",
"bmp",
"bod",
"btm",
"btmfrag",
"c",
"cfrag",
"class",
"cmd",
"com",
"cpp",
"cppfrag",
"css",
"cssfrag",
"csv",
"csvfrag",
"dat",
"dll",
"doc",
"e",
"exe",
"gif",
"h",
"hfrag",
"hpp",
"hppfrag",
"htm",
"html",
"htmlfrag",
"ico",
"ih",
"ini",
"jar",
"java",
"javafrag",
"jnlp",
"jnlpfrag",
"jpg",
"jsp",
"jspfrag",
"mac",
"mbx",
"mft",
"obj",
"p7b",
"pas",
"png",
"policy",
"prn",
"properties",
"ps",
"rh",
"seq",
"ser",
"sh",
"site",
"so",
"sql",
"sqlfrag",
"sym",
"tab",
"toc",
"use",
"usg",
"wiki",
"xml",
"xmlfrag",
"zip", };
/**
* extensions known safe to run DeDup on.
*/
static final String[] goodExtensions =
{ "ctl", "list", "log", "lst", "txt", };
// -------------------------- STATIC METHODS
--------------------------
/**
* display a banner about the author
*/
static void banner()
{
/* Usually not displayed, just embedded. */
System.out
.println( TITLESTRING
+ " "
+ VERSIONSTRING
+ "\n"
+ "\nFreeware to remove adjacent duplicate
lines."
+ "\ncopyright (c) 2002-2008 Roedy Green,
Canadian Mind Products"
+ "\n#101 - 2536 Wark Street, Victoria, BC
Canada V8T 4G8"
+ "\nTelephone: (250) 361-9093
Internet:roedyg@mindprod.com"
+ "\nMay be used freely for non-military use
only\n"
+ "released: "
+ RELEASEDATE
+ "\n\n" );
}// end banner
/**
* Ask user to confirm that some action is ok.
*
* @param prompt Question to ask the user.
*
* @return true if the user answers, yes it is ok to proceed.
Should redo this with a modal dialog so don't have to
* hit Y enter.
*/
static boolean confirm( String prompt )
{
/* just give a warning */
System.out.print( prompt );
System.out.print( " (Y)es (N)o " );
while ( true )
{/* loop forever till user enters Y or N */
honk();
int response = '\033';// default esc
try
{
// read single keystroke, even though user has to hit
enter.
response = System.in.read();// the console is a
// fileInputReader
}
catch ( IOException e )
{
}
response = Character.toUpperCase( (char) response );
switch ( response )
{
case 'Y':
System.out.println( " Yes" );
return true;
case 'N':
System.out.println( " No" );
return false;
/* others, keep looping */
}// end switch
}// end while
}// end confirm
/**
* Guts of the class. This is the dedup logic. copy inReader to
outWriter, processing tabs and line ends Presume
* files already open. Does not close them.
*
* @throws IOException
*/
static void deDupFile() throws IOException
{
String prevLine = null;
String thisLine;
boolean inLeading = true;
boolean pendingBlankLine = false;
while ( ( thisLine = inReader.readLine() ) != null )
{
thisLine = StringTools.trimTrailing( thisLine );
if ( thisLine.length() == 0 )
{
pendingBlankLine = true;
}
else if ( !thisLine.equals( prevLine ) )
{
// deal first with and pending blank lines
if ( inLeading )
{
// ignore leading blank lines.
inLeading = false;
pendingBlankLine = false;
}
else
{
if ( pendingBlankLine )
{
// emit just one embedded blank line, collapse
dup blank
// lines.
outWriter.println();
pendingBlankLine = false;
}
}
// deal with the unique line
outWriter.println( thisLine );
prevLine = thisLine;
}
}/* end while */
// fall out the end with pendingBlankLine we just totally
ignore.
// that is how we trim trailing blanks.
}// end deDupFile
/**
* abort the run, clean up as best as possible.
*/
static void die()
{
honk();
try
{
if ( inReader != null )
{
inReader.close();
}
if ( outWriter != null )
{
outWriter.close();
}
}
catch ( IOException e )
{
}
System.exit( 1 );/* exit with errorlevel = 1 */
}// end die
/**
* make sure the filename we are about to process has a safe
extension.
*/
static void ensureSafeFilename()
{
/*
* Ensure appropriate file name extensions. good =.txt etc -
done
* without prompt bad =.exe etc. - abort warning =.doc &
others - ask
*/
String extension = "";
int whereDot = inFilename.lastIndexOf( '.' );
if ( whereDot >= 0 && whereDot <= inFilename.length() - 2 )
{
extension = inFilename.substring( whereDot + 1 );
}
for ( int i = 0; i < goodExtensions.length; i++ )
{
if ( extension.equalsIgnoreCase( goodExtensions[ i ] ) )
{/* match, it is Good */
return;
}
}
for ( int i = 0; i < badExtensions.length; i++ )
{
if ( extension.equalsIgnoreCase( badExtensions[ i ] ) )
{/* match, it is bad */
inFile = null;
return;
}
}
/* just give a warning */
if ( !confirm( "\n Warning!\n"
+ " DeDup is not usually used on files such
as "
+ inFilename
+ ".\n"
+ " Do you want to dedup anyway?" ) )
{
inFile = null;
}
}// end ensureSafeFilename
/**
* make a noise
*/
static void honk()
{
Toolkit.getDefaultToolkit().beep();
}// end honk
/**
* open the input "before" file
*/
static void openInReader()
{
try
{
inFile = new File( inFilename );
if ( !inFile.exists() )
{
banner();
System.out.print( "Oops! Cannot find file " );
System.out.println( inFilename );
die();
}
// ignore directories, usually put there by wildcard
expansion.
if ( inFile.isDirectory() )
{
inFile = null;
// keep going
return;
}
if ( !inFile.canRead() )
{
banner();
System.out
.print( "Oops! no permission to read (i.e.
examine) the file " );
System.out.println( inFilename );
die();
}
if ( !inFile.canWrite() )
{
banner();
System.out
.print( "Oops! no permission to write (i.e.
change) the file " );
System.out.println( inFilename );
die();
}
inReader = new BufferedReader( new FileReader( inFile ),
4096
/* buffsize */ );
}
catch ( FileNotFoundException e )
{
banner();
System.out.print( "Oops! Cannot open file " );
System.out.println( inFilename );
die();
}
}// end openInReader
/**
* open the output "after" file
*/
static void openOutWriter()
{
try
{
// get a temporary file in the same directory as inFile.
// outFile = getTempFile("DeDup", inFile);
outFile = File.createTempFile( "dedup", "tmp", inFile
.getParentFile() );
outWriter =
new PrintWriter( new BufferedWriter( new
FileWriter( outFile ),
64 * 1024
/* buffsize
*/ ), false
/* auto flush */ );
}
catch ( IOException e )
{
System.out
.println( "Oops! Cannot create the temporary work
file\n" );
die();
}
}// end OpenOutWriter
// --------------------------- main() method
---------------------------
/**
* Command line utility to remove adjacent duplicate lines.
*
* @param args list of filenames to dedup.
*/
public static void main( String[] args )
{
try
{
// process each file on command line, or expanded wild
card.
for ( int i = 0; i < args.length; i++ )
{
inFilename = args[ i ];
openInReader();/* Open input "before" file. */
/* Make sure file exists before */
/* song and dance about extension. */
if ( inFile == null )
{
/* ignore */
System.out
.println( "- "
+ inFilename
+ " : could not open. Directory
or unreadable file" );
continue;
}
ensureSafeFilename();/* make sure filename has sane
extension */
if ( inFile == null )
{
/* ignore */
System.out
.println( "- "
+ inFilename
+ " : bypassed based on
extension" );
continue;
}
openOutWriter();/* open output "after" file */
/*
* copy inReader to outWriter removing duplicate
lines, trailing
* spaces, and lead/trailing blank lines
*/
deDupFile();
/*
* if we trimmed, changed line ends, removed dups,
file size
* should change. In a pathological case it would not,
but then
* we do no damage.
*/
inReader.close();
outWriter.close();
if ( inFile.length() == outFile.length() )
{
// nothing changed
System.out
.println( "- "
+ inFilename
+ " : contained no duplicate
lines. Left as is." );
}
else
{
// file really did change.
System.out.println( "* " + inFilename + " :
changed!" );
/* Rename output to input */
inFile.delete();
outFile.renameTo( inFile );
// don't delete outFile, it has been renamed to a
real file
}
}// end for
}
catch ( IOException e )
{
System.out.print( "Oops! IO failure. e.g. cannot find
file.\n" );
die();
}
}// end main
}
--
Roedy Green Canadian Mind Products
The Java Glossary
http://mindprod.com