17.1 文字处理
17.1 文字处理
如果您有
正如大家即将看到的那样,建立这些例子的目的都是为了解决本书编制过程中遇到的一些问题。但是,它们的能力并非仅止于此。通过简单的改造,即可让它们在其他场合大显身手。除此以外,它们还揭示出了本书以前没有强调过的一项
对于本书每一个完整的代码列表(不是代码段
我首先将整本书都以
但这样还不够。程序还要对包(package)名进行跟踪,从而监视章内发生的变化。由于每一章使用的所有包都以
但为什么还要如此麻烦地使用打包文件呢?这是由于不同的计算机平台用不同的方式在文件里保存文本信息。其中最大的问题是换行字符的表示方法;当然,还有可能存在另一些问题。然而,
下面是完整的代码,后面会对它进行详细的说明:
//: CodePackager.java
// "Packs" and "unpacks" the code in "Thinking
// in Java" for cross-platform distribution.
/* Commented so CodePackager sees it and starts
a new chapter directory, but so you don't
have to worry about the directory where this
program lives:
package c17;
*/
import java.util.*;
import java.io.*;
class Pr {
static void error(String e) {
System.err.println("ERROR: " + e);
System.exit(1);
}
}
class IO {
static BufferedReader disOpen(File f) {
BufferedReader in = null;
try {
in = new BufferedReader(
new FileReader(f));
} catch(IOException e) {
Pr.error("could not open " + f);
}
return in;
}
static BufferedReader disOpen(String fname) {
return disOpen(new File(fname));
}
static DataOutputStream dosOpen(File f) {
DataOutputStream in = null;
try {
in = new DataOutputStream(
new BufferedOutputStream(
new FileOutputStream(f)));
} catch(IOException e) {
Pr.error("could not open " + f);
}
return in;
}
static DataOutputStream dosOpen(String fname) {
return dosOpen(new File(fname));
}
static PrintWriter psOpen(File f) {
PrintWriter in = null;
try {
in = new PrintWriter(
new BufferedWriter(
new FileWriter(f)));
} catch(IOException e) {
Pr.error("could not open " + f);
}
return in;
}
static PrintWriter psOpen(String fname) {
return psOpen(new File(fname));
}
static void close(Writer os) {
try {
os.close();
} catch(IOException e) {
Pr.error("closing " + os);
}
}
static void close(DataOutputStream os) {
try {
os.close();
} catch(IOException e) {
Pr.error("closing " + os);
}
}
static void close(Reader os) {
try {
os.close();
} catch(IOException e) {
Pr.error("closing " + os);
}
}
}
class SourceCodeFile {
public static final String
startMarker = "//:", // Start of source file
endMarker = "} ///:~", // End of source
endMarker2 = "}; ///:~", // C++ file end
beginContinue = "} ///:Continued",
endContinue = "///:Continuing",
packMarker = "###", // Packed file header tag
eol = // Line separator on current system
System.getProperty("line.separator"),
filesep = // System's file path separator
System.getProperty("file.separator");
public static String copyright = "";
static {
try {
BufferedReader cr =
new BufferedReader(
new FileReader("Copyright.txt"));
String crin;
while((crin = cr.readLine()) != null)
copyright += crin + "\n";
cr.close();
} catch(Exception e) {
copyright = "";
}
}
private String filename, dirname,
contents = new String();
private static String chapter = "c02";
// The file name separator from the old system:
public static String oldsep;
public String toString() {
return dirname + filesep + filename;
}
// Constructor for parsing from document file:
public SourceCodeFile(String firstLine,
BufferedReader in) {
dirname = chapter;
// Skip past marker:
filename = firstLine.substring(
startMarker.length()).trim();
// Find space that terminates file name:
if(filename.indexOf(' ') != -1)
filename = filename.substring(
0, filename.indexOf(' '));
System.out.println("found: " + filename);
contents = firstLine + eol;
if(copyright.length() != 0)
contents += copyright + eol;
String s;
boolean foundEndMarker = false;
try {
while((s = in.readLine()) != null) {
if(s.startsWith(startMarker))
Pr.error("No end of file marker for " +
filename);
// For this program, no spaces before
// the "package" keyword are allowed
// in the input source code:
else if(s.startsWith("package")) {
// Extract package name:
String pdir = s.substring(
s.indexOf(' ')).trim();
pdir = pdir.substring(
0, pdir.indexOf(';')).trim();
// Capture the chapter from the package
// ignoring the 'com' subdirectories:
if(!pdir.startsWith("com")) {
int firstDot = pdir.indexOf('.');
if(firstDot != -1)
chapter =
pdir.substring(0,firstDot);
else
chapter = pdir;
}
// Convert package name to path name:
pdir = pdir.replace(
'.', filesep.charAt(0));
System.out.println("package " + pdir);
dirname = pdir;
}
contents += s + eol;
// Move past continuations:
if(s.startsWith(beginContinue))
while((s = in.readLine()) != null)
if(s.startsWith(endContinue)) {
contents += s + eol;
break;
}
// Watch for end of code listing:
if(s.startsWith(endMarker) ||
s.startsWith(endMarker2)) {
foundEndMarker = true;
break;
}
}
if(!foundEndMarker)
Pr.error(
"End marker not found before EOF");
System.out.println("Chapter: " + chapter);
} catch(IOException e) {
Pr.error("Error reading line");
}
}
// For recovering from a packed file:
public SourceCodeFile(BufferedReader pFile) {
try {
String s = pFile.readLine();
if(s == null) return;
if(!s.startsWith(packMarker))
Pr.error("Can't find " + packMarker
+ " in " + s);
s = s.substring(
packMarker.length()).trim();
dirname = s.substring(0, s.indexOf('#'));
filename = s.substring(s.indexOf('#') + 1);
dirname = dirname.replace(
oldsep.charAt(0), filesep.charAt(0));
filename = filename.replace(
oldsep.charAt(0), filesep.charAt(0));
System.out.println("listing: " + dirname
+ filesep + filename);
while((s = pFile.readLine()) != null) {
// Watch for end of code listing:
if(s.startsWith(endMarker) ||
s.startsWith(endMarker2)) {
contents += s;
break;
}
contents += s + eol;
}
} catch(IOException e) {
System.err.println("Error reading line");
}
}
public boolean hasFile() {
return filename != null;
}
public String directory() { return dirname; }
public String filename() { return filename; }
public String contents() { return contents; }
// To write to a packed file:
public void writePacked(DataOutputStream out) {
try {
out.writeBytes(
packMarker + dirname + "#"
+ filename + eol);
out.writeBytes(contents);
} catch(IOException e) {
Pr.error("writing " + dirname +
filesep + filename);
}
}
// To generate the actual file:
public void writeFile(String rootpath) {
File path = new File(rootpath, dirname);
path.mkdirs();
PrintWriter p =
IO.psOpen(new File(path, filename));
p.print(contents);
IO.close(p);
}
}
class DirMap {
private Hashtable t = new Hashtable();
private String rootpath;
DirMap() {
rootpath = System.getProperty("user.dir");
}
DirMap(String alternateDir) {
rootpath = alternateDir;
}
public void add(SourceCodeFile f){
String path = f.directory();
if(!t.containsKey(path))
t.put(path, new Vector());
((Vector)t.get(path)).addElement(f);
}
public void writePackedFile(String fname) {
DataOutputStream packed = IO.dosOpen(fname);
try {
packed.writeBytes("###Old Separator:" +
SourceCodeFile.filesep + "###\n");
} catch(IOException e) {
Pr.error("Writing separator to " + fname);
}
Enumeration e = t.keys();
while(e.hasMoreElements()) {
String dir = (String)e.nextElement();
System.out.println(
"Writing directory " + dir);
Vector v = (Vector)t.get(dir);
for(int i = 0; i < v.size(); i++) {
SourceCodeFile f =
(SourceCodeFile)v.elementAt(i);
f.writePacked(packed);
}
}
IO.close(packed);
}
// Write all the files in their directories:
public void write() {
Enumeration e = t.keys();
while(e.hasMoreElements()) {
String dir = (String)e.nextElement();
Vector v = (Vector)t.get(dir);
for(int i = 0; i < v.size(); i++) {
SourceCodeFile f =
(SourceCodeFile)v.elementAt(i);
f.writeFile(rootpath);
}
// Add file indicating file quantity
// written to this directory as a check:
IO.close(IO.dosOpen(
new File(new File(rootpath, dir),
Integer.toString(v.size())+".files")));
}
}
}
public class CodePackager {
private static final String usageString =
"usage: java CodePackager packedFileName" +
"\nExtracts source code files from packed \n" +
"version of Tjava.doc sources into " +
"directories off current directory\n" +
"java CodePackager packedFileName newDir\n" +
"Extracts into directories off newDir\n" +
"java CodePackager -p source.txt packedFile" +
"\nCreates packed version of source files" +
"\nfrom text version of Tjava.doc";
private static void usage() {
System.err.println(usageString);
System.exit(1);
}
public static void main(String[] args) {
if(args.length == 0) usage();
if(args[0].equals("-p")) {
if(args.length != 3)
usage();
createPackedFile(args);
}
else {
if(args.length > 2)
usage();
extractPackedFile(args);
}
}
private static String currentLine;
private static BufferedReader in;
private static DirMap dm;
private static void
createPackedFile(String[] args) {
dm = new DirMap();
in = IO.disOpen(args[1]);
try {
while((currentLine = in.readLine())
!= null) {
if(currentLine.startsWith(
SourceCodeFile.startMarker)) {
dm.add(new SourceCodeFile(
currentLine, in));
}
else if(currentLine.startsWith(
SourceCodeFile.endMarker))
Pr.error("file has no start marker");
// Else ignore the input line
}
} catch(IOException e) {
Pr.error("Error reading " + args[1]);
}
IO.close(in);
dm.writePackedFile(args[2]);
}
private static void
extractPackedFile(String[] args) {
if(args.length == 2) // Alternate directory
dm = new DirMap(args[1]);
else // Current directory
dm = new DirMap();
in = IO.disOpen(args[0]);
String s = null;
try {
s = in.readLine();
} catch(IOException e) {
Pr.error("Cannot read from " + in);
}
// Capture the separator used in the system
// that packed the file:
if(s.indexOf("###Old Separator:") != -1 ) {
String oldsep = s.substring(
"###Old Separator:".length());
oldsep = oldsep.substring(
0, oldsep. indexOf('#'));
SourceCodeFile.oldsep = oldsep;
}
SourceCodeFile sf = new SourceCodeFile(in);
while(sf.hasFile()) {
dm.add(sf);
sf = new SourceCodeFile(in);
}
dm.write();
}
} ///:~
我们注意到
头两个类是“支持/工具”类,作用是使程序剩余的部分在编写时更加连贯,也更便于阅读。第一个是
帮助解决问题的第一个类是
//////////////////////////////////////////////////
// Copyright (c) Bruce Eckel, 1998
// Source code file from the book "Thinking in Java"
// All rights reserved EXCEPT as allowed by the
// following statements: You may freely use this file
// for your own work (personal or commercial),
// including modifications and distribution in
// executable form only. Permission is granted to use
// this file in classroom situations, including its
// use in presentation materials, as long as the book
// "Thinking in Java" is cited as the source.
// Except in classroom situations, you may not copy
// and distribute this code; instead, the sole
// distribution point is http://www.BruceEckel.com
// (and official mirror sites) where it is
// freely available. You may not remove this
// copyright and notice. You may not distribute
// modified versions of the source code in this
// package. You may not use this file in printed
// media without the express permission of the
// author. Bruce Eckel makes no representation about
// the suitability of this software for any purpose.
// It is provided "as is" without express or implied
// warranty of any kind, including any implied
// warranty of merchantability, fitness for a
// particular purpose or non-infringement. The entire
// risk as to the quality and performance of the
// software is with you. Bruce Eckel and the
// publisher shall not be liable for any damages
// suffered by you or any third party as a result of
// using or distributing software. In no event will
// Bruce Eckel or the publisher be liable for any
// lost revenue, profit, or data, or for direct,
// indirect, special, consequential, incidental, or
// punitive damages, however caused and regardless of
// the theory of liability, arising out of the use of
// or inability to use software, even if Bruce Eckel
// and the publisher have been advised of the
// possibility of such damages. Should the software
// prove defective, you assume the cost of all
// necessary servicing, repair, or correction. If you
// think you've found an error, please email all
// modified files with clearly commented changes to:
// Bruce@EckelObjects.com. (please use the same
// address for non-code errors found in the book).
//////////////////////////////////////////////////
从一个打包文件中提取文件时,当初所用系统的文件分隔符也会标注出来,以便用本地系统适用的符号替换它。
当前章的子目录保存在
- 构建一个打包文件
第一个构造器用于从本书的
从这时起,大家会发现
解析出并保存好文件名后,第一行会被置入字串
另一种特殊情况与
- 从打包文件中提取
第二个构造器用于将源码文件从打包文件中恢复(提取)出来。在这儿,作为调用者的方法不必担心会跳过一些中间文本。打包文件包含了所有源码文件,它们相互间紧密地靠在一起。需要传递给该构造器的仅仅是一个
一旦发现
构造器剩下的部分就非常简单了。它读入每一行,把它合并到
- 程序列表的存取
接下来的一系列方法是简单的访问器:directory()、filename()(注意方法可能与字段有相同的拼写和大小写形式)和
最后三个方法致力于将这个代码列表写进一个文件——要么通过
准备写
- 整套列表的包容
以子目录的形式组织代码列表是非常方便的,尽管这要求先在内存中建好整套列表。之所以要这样做,还有另一个很有说服力的原因:为了构建更“健康”的系统。也就是说,在创建代码列表的每个子目录时,都会加入一个额外的文件,它的名字包含了那个目录内应有的文件数目。
可通过两种方式建立一个
写一个打包文件时,需打开一个准备写入的文件(当作
用
- 主程序
前面介绍的那些类都要在
创建一个打包文件时,它默认位于当前目录,所以我们用默认构造器创建
提取/释放一个打包文件时,提取出来的内容可进入当前目录,亦可进入另一个备用目录。所以需要相应地创建
尽管对涉及文字处理的一些项目来说,前例显得比较方便,但下面要介绍的项目却能立即发挥作用,因为它执行的是一个样式检查,以确保我们的大小写形式符合“事实上”的
为了让这个程序正确运行,首先必须构建一个类名,将它作为一个“仓库”,负责容纳标准
为了用程序检查自己的代码,需要运行它,并向它传递要使用的仓库文件的路径与名字。它会检查当前目录中的所有类和标识符,并告诉我们哪些没有遵守典型的
要注意这个程序并不是十全十美的。有些时候,它可能报告自己查到一个问题。但当我们仔细检查代码的时候,却发现没有什么需要更改的。尽管这有点儿烦人,但仍比自己动手检查代码中的所有错误强得多。
下面列出源代码,后面有详细的解释:
//: ClassScanner.java
// Scans all files in directory for classes
// and identifiers, to check capitalization.
// Assumes properly compiling code listings.
// Doesn't do everything right, but is a very
// useful aid.
import java.io.*;
import java.util.*;
class MultiStringMap extends Hashtable {
public void add(String key, String value) {
if(!containsKey(key))
put(key, new Vector());
((Vector)get(key)).addElement(value);
}
public Vector getVector(String key) {
if(!containsKey(key)) {
System.err.println(
"ERROR: can't find key: " + key);
System.exit(1);
}
return (Vector)get(key);
}
public void printValues(PrintStream p) {
Enumeration k = keys();
while(k.hasMoreElements()) {
String oneKey = (String)k.nextElement();
Vector val = getVector(oneKey);
for(int i = 0; i < val.size(); i++)
p.println((String)val.elementAt(i));
}
}
}
public class ClassScanner {
private File path;
private String[] fileList;
private Properties classes = new Properties();
private MultiStringMap
classMap = new MultiStringMap(),
identMap = new MultiStringMap();
private StreamTokenizer in;
public ClassScanner() {
path = new File(".");
fileList = path.list(new JavaFilter());
for(int i = 0; i < fileList.length; i++) {
System.out.println(fileList[i]);
scanListing(fileList[i]);
}
}
void scanListing(String fname) {
try {
in = new StreamTokenizer(
new BufferedReader(
new FileReader(fname)));
// Doesn't seem to work:
// in.slashStarComments(true);
// in.slashSlashComments(true);
in.ordinaryChar('/');
in.ordinaryChar('.');
in.wordChars('_', '_');
in.eolIsSignificant(true);
while(in.nextToken() !=
StreamTokenizer.TT_EOF) {
if(in.ttype == '/')
eatComments();
else if(in.ttype ==
StreamTokenizer.TT_WORD) {
if(in.sval.equals("class") ||
in.sval.equals("interface")) {
// Get class name:
while(in.nextToken() !=
StreamTokenizer.TT_EOF
&& in.ttype !=
StreamTokenizer.TT_WORD)
;
classes.put(in.sval, in.sval);
classMap.add(fname, in.sval);
}
if(in.sval.equals("import") ||
in.sval.equals("package"))
discardLine();
else // It's an identifier or keyword
identMap.add(fname, in.sval);
}
}
} catch(IOException e) {
e.printStackTrace();
}
}
void discardLine() {
try {
while(in.nextToken() !=
StreamTokenizer.TT_EOF
&& in.ttype !=
StreamTokenizer.TT_EOL)
; // Throw away tokens to end of line
} catch(IOException e) {
e.printStackTrace();
}
}
// StreamTokenizer's comment removal seemed
// to be broken. This extracts them:
void eatComments() {
try {
if(in.nextToken() !=
StreamTokenizer.TT_EOF) {
if(in.ttype == '/')
discardLine();
else if(in.ttype != '*')
in.pushBack();
else
while(true) {
if(in.nextToken() ==
StreamTokenizer.TT_EOF)
break;
if(in.ttype == '*')
if(in.nextToken() !=
StreamTokenizer.TT_EOF
&& in.ttype == '/')
break;
}
}
} catch(IOException e) {
e.printStackTrace();
}
}
public String[] classNames() {
String[] result = new String[classes.size()];
Enumeration e = classes.keys();
int i = 0;
while(e.hasMoreElements())
result[i++] = (String)e.nextElement();
return result;
}
public void checkClassNames() {
Enumeration files = classMap.keys();
while(files.hasMoreElements()) {
String file = (String)files.nextElement();
Vector cls = classMap.getVector(file);
for(int i = 0; i < cls.size(); i++) {
String className =
(String)cls.elementAt(i);
if(Character.isLowerCase(
className.charAt(0)))
System.out.println(
"class capitalization error, file: "
+ file + ", class: "
+ className);
}
}
}
public void checkIdentNames() {
Enumeration files = identMap.keys();
Vector reportSet = new Vector();
while(files.hasMoreElements()) {
String file = (String)files.nextElement();
Vector ids = identMap.getVector(file);
for(int i = 0; i < ids.size(); i++) {
String id =
(String)ids.elementAt(i);
if(!classes.contains(id)) {
// Ignore identifiers of length 3 or
// longer that are all uppercase
// (probably static final values):
if(id.length() >= 3 &&
id.equals(
id.toUpperCase()))
continue;
// Check to see if first char is upper:
if(Character.isUpperCase(id.charAt(0))){
if(reportSet.indexOf(file + id)
== -1){ // Not reported yet
reportSet.addElement(file + id);
System.out.println(
"Ident capitalization error in:"
+ file + ", ident: " + id);
}
}
}
}
}
}
static final String usage =
"Usage: \n" +
"ClassScanner classnames -a\n" +
"\tAdds all the class names in this \n" +
"\tdirectory to the repository file \n" +
"\tcalled 'classnames'\n" +
"ClassScanner classnames\n" +
"\tChecks all the java files in this \n" +
"\tdirectory for capitalization errors, \n" +
"\tusing the repository file 'classnames'";
private static void usage() {
System.err.println(usage);
System.exit(1);
}
public static void main(String[] args) {
if(args.length < 1 || args.length > 2)
usage();
ClassScanner c = new ClassScanner();
File old = new File(args[0]);
if(old.exists()) {
try {
// Try to open an existing
// properties file:
InputStream oldlist =
new BufferedInputStream(
new FileInputStream(old));
c.classes.load(oldlist);
oldlist.close();
} catch(IOException e) {
System.err.println("Could not open "
+ old + " for reading");
System.exit(1);
}
}
if(args.length == 1) {
c.checkClassNames();
c.checkIdentNames();
}
// Write the class names to a repository:
if(args.length == 2) {
if(!args[1].equals("-a"))
usage();
try {
BufferedOutputStream out =
new BufferedOutputStream(
new FileOutputStream(args[0]));
c.classes.save(out,
"Classes found by ClassScanner.java");
out.close();
} catch(IOException e) {
System.err.println(
"Could not write " + args[0]);
System.exit(1);
}
}
}
}
class JavaFilter implements FilenameFilter {
public boolean accept(File dir, String name) {
// Strip path information:
String f = new File(name).getName();
return f.trim().endsWith(".java");
}
} ///:~
为简化程序,来自标准
针对特定目录中的文件,为找出相应的类与标识符,我们使用了两个
在
若记号是个’/’,意味着它可能是个注释,所以就调用
如果单词是
只要在主解析循环中碰到一个正斜杠,就会调用
为方便起见,
接下来的两个方法是实际进行检查的地方。在
在
这个方法并不是报告每一个以大写字符开头的标识符,而是跟踪那些已在一个名为
程序列表剩下的部分由
无论准备构建一个“仓库”,还是准备使用一个现成的,都必须尝试打开现有仓库。通过创建一个