一道面试题(很有意思的)
我有一个文本文件(例如:附件中的chars.txt),
我想写个Java程序,读一遍这个
文件,然后打印出来文件中英文字母(a 到 z)出现的次数。不区分大小写。
你能不能写个完整的Java程序,尽快Email给我?
写的过程中,请注意:代码规范,程序运行效率。
一个创业公司出的题,哈哈?
我第一个想到的解决办法就是用map来做统计。
代码如下:
package cn.com.linjiawang.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Hashtable;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
public class CharTools {
/**
* 对Map按照键(key)来排序
* @param map 表示要排序的map
* @return Map.Entry[]
*/
@SuppressWarnings( { "unused", "unchecked" })
private static Map.Entry[] getSortedHashtableByKey(
final Map<Object, Object> map) {
Set<Entry<Object, Object>> set = map.entrySet();
Map.Entry[] entries = (Map.Entry[]) set.toArray(new Map.Entry[set
.size()]);
Arrays.sort(entries, new Comparator() {
public int compare(Object arg0, Object arg1) {
Object key1 = ((Map.Entry) arg0).getKey();
Object key2 = ((Map.Entry) arg1).getKey();
return ((Comparable) key1).compareTo(key2);
}
});
return entries;
}
/**
* 对Map按照值(value)来排序
* @param map 表示要排序的map
* @return Map.Entry[]
*/
@SuppressWarnings( { "unused", "unchecked" })
public static Map.Entry[] getSortedHashtableByValue(Hashtable h) {
Set set = h.entrySet();
Map.Entry[] entries = (Map.Entry[]) set.toArray(new Map.Entry[set
.size()]);
Arrays.sort(entries, new Comparator() {
public int compare(Object arg0, Object arg1) {
int key1 = Integer.parseInt(((Map.Entry) arg0).getValue()
.toString());
int key2 = Integer.parseInt(((Map.Entry) arg1).getValue()
.toString());
return ((Comparable) key1).compareTo(key2);
}
});
return entries;
}
/**
* 统计字符中字母出现的频率
* @param wordMap
* @param str 表示要统计的字符串
*/
private static void wordCount(Map<Character, Integer> wordMap, String str) {
str = str.toLowerCase();
char[] chars = str.toCharArray();
for (char c : chars) {
if (c >= 'a' && c <= 'z') {
if (wordMap.get(c) == null) {
wordMap.put(c, 1);
} else {
int count = Integer.valueOf(wordMap.get(c));
wordMap.remove(c);
wordMap.put(c, count + 1);
}
} else {
continue;
}
}
}
/**
* 读取文件并且统计文件中的词频
* @param filePath
* @return
*/
@SuppressWarnings("finally")
public static Map<Character, Integer> loadFile(String filePath) {
Map<Character, Integer> wordMap = new HashMap<Character, Integer>();
File file = null;
FileReader fr = null;
BufferedReader br = null;
String str = null;
try {
file = new File(filePath);
if (file.exists()) {
fr = new FileReader(file);
br = new BufferedReader(fr);
while ((str = br.readLine()) != null) {
wordCount(wordMap, str);
}
} else {
System.out.println("该文件不存在");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null) {
br.close();
br = null;
}
if (fr != null) {
fr.close();
fr = null;
}
if(file !=null){
file = null;
}
} catch (final IOException e) {
e.printStackTrace();
}
}
return wordMap;
}
@SuppressWarnings("unchecked")
public static void main(String[] args) {
long begin = System.currentTimeMillis();
Map map = loadFile("d://chars.txt");
Map.Entry[] entrys = getSortedHashtableByKey(map);
for (Entry entry : entrys) {
char word = (Character) entry.getKey();
int wordCount = (Integer) entry.getValue();
System.out.println("字母" + word + "出现" + wordCount);
}
System.out.println("执行时间" + (System.currentTimeMillis() - begin) + "ms");
}
}
接着对mail给我说,map统计是可以的,问我可以不可以用int[] 来做:
费了40分钟写了一个用int统计的代码如下:
package cn.com.linjiawang.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
public class CharTools {
/**
* 统计字符中字母出现的频率
* @param wordCounts 统计数组
* @param str 表示要统计的字符串
*/
private static void wordCount(int[] wordCounts, String str) {
str = str.toLowerCase();
char[] chars = str.toCharArray();
int tempCount = 0;
for (char c : chars) {
if (c >= 'a' && c <= 'z') {
tempCount = 25 - ('z' - c);
wordCounts[tempCount]++;
} else {
continue;
}
}
}
/**
* 读取文件并且统计文件中的词频
* @param filePath
* @return
*/
@SuppressWarnings("finally")
public static int[] loadFile(String filePath) {
int[] wordCounts = new int[26];
File file = null;
FileReader fr = null;
BufferedReader br = null;
String str = null;
try {
file = new File(filePath);
if (file.exists()) {
fr = new FileReader(file);
br = new BufferedReader(fr);
while ((str = br.readLine()) != null) {
wordCount(wordCounts, str);
}
} else {
System.out.println("该文件不存在");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null) {
br.close();
br = null;
}
if (fr != null) {
fr.close();
fr = null;
}
if (file != null) {
file = null;
}
} catch (final IOException e) {
e.printStackTrace();
}
}
return wordCounts;
}
@SuppressWarnings("unchecked")
public static void main(String[] args) {
long begin = System.currentTimeMillis();
int []wordCounts = loadFile("d://chars.txt");
char c = 'a';
for(int wordCount:wordCounts) {
if(wordCount>0){
System.out.print(c+"->");
System.out.println(wordCount);
}
c++;
}
System.out.println("执行时间" + (System.currentTimeMillis() - begin) + "ms");
}
}
两段代码改动不大,但是效率却差了好多,
同样一个 19.8mb大小的字典文件,用map来做数字统计费时 3904ms.
用 int[]来统计费时是1141ms
这就是效率呀
文件,然后打印出来文件中英文字母(a 到 z)出现的次数。不区分大小写。
你能不能写个完整的Java程序,尽快Email给我?
写的过程中,请注意:代码规范,程序运行效率。
一个创业公司出的题,哈哈?
我第一个想到的解决办法就是用map来做统计。
代码如下:
package cn.com.linjiawang.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Hashtable;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
public class CharTools {
/**
* 对Map按照键(key)来排序
* @param map 表示要排序的map
* @return Map.Entry[]
*/
@SuppressWarnings( { "unused", "unchecked" })
private static Map.Entry[] getSortedHashtableByKey(
final Map<Object, Object> map) {
Set<Entry<Object, Object>> set = map.entrySet();
Map.Entry[] entries = (Map.Entry[]) set.toArray(new Map.Entry[set
.size()]);
Arrays.sort(entries, new Comparator() {
public int compare(Object arg0, Object arg1) {
Object key1 = ((Map.Entry) arg0).getKey();
Object key2 = ((Map.Entry) arg1).getKey();
return ((Comparable) key1).compareTo(key2);
}
});
return entries;
}
/**
* 对Map按照值(value)来排序
* @param map 表示要排序的map
* @return Map.Entry[]
*/
@SuppressWarnings( { "unused", "unchecked" })
public static Map.Entry[] getSortedHashtableByValue(Hashtable h) {
Set set = h.entrySet();
Map.Entry[] entries = (Map.Entry[]) set.toArray(new Map.Entry[set
.size()]);
Arrays.sort(entries, new Comparator() {
public int compare(Object arg0, Object arg1) {
int key1 = Integer.parseInt(((Map.Entry) arg0).getValue()
.toString());
int key2 = Integer.parseInt(((Map.Entry) arg1).getValue()
.toString());
return ((Comparable) key1).compareTo(key2);
}
});
return entries;
}
/**
* 统计字符中字母出现的频率
* @param wordMap
* @param str 表示要统计的字符串
*/
private static void wordCount(Map<Character, Integer> wordMap, String str) {
str = str.toLowerCase();
char[] chars = str.toCharArray();
for (char c : chars) {
if (c >= 'a' && c <= 'z') {
if (wordMap.get(c) == null) {
wordMap.put(c, 1);
} else {
int count = Integer.valueOf(wordMap.get(c));
wordMap.remove(c);
wordMap.put(c, count + 1);
}
} else {
continue;
}
}
}
/**
* 读取文件并且统计文件中的词频
* @param filePath
* @return
*/
@SuppressWarnings("finally")
public static Map<Character, Integer> loadFile(String filePath) {
Map<Character, Integer> wordMap = new HashMap<Character, Integer>();
File file = null;
FileReader fr = null;
BufferedReader br = null;
String str = null;
try {
file = new File(filePath);
if (file.exists()) {
fr = new FileReader(file);
br = new BufferedReader(fr);
while ((str = br.readLine()) != null) {
wordCount(wordMap, str);
}
} else {
System.out.println("该文件不存在");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null) {
br.close();
br = null;
}
if (fr != null) {
fr.close();
fr = null;
}
if(file !=null){
file = null;
}
} catch (final IOException e) {
e.printStackTrace();
}
}
return wordMap;
}
@SuppressWarnings("unchecked")
public static void main(String[] args) {
long begin = System.currentTimeMillis();
Map map = loadFile("d://chars.txt");
Map.Entry[] entrys = getSortedHashtableByKey(map);
for (Entry entry : entrys) {
char word = (Character) entry.getKey();
int wordCount = (Integer) entry.getValue();
System.out.println("字母" + word + "出现" + wordCount);
}
System.out.println("执行时间" + (System.currentTimeMillis() - begin) + "ms");
}
}
接着对mail给我说,map统计是可以的,问我可以不可以用int[] 来做:
费了40分钟写了一个用int统计的代码如下:
package cn.com.linjiawang.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
public class CharTools {
/**
* 统计字符中字母出现的频率
* @param wordCounts 统计数组
* @param str 表示要统计的字符串
*/
private static void wordCount(int[] wordCounts, String str) {
str = str.toLowerCase();
char[] chars = str.toCharArray();
int tempCount = 0;
for (char c : chars) {
if (c >= 'a' && c <= 'z') {
tempCount = 25 - ('z' - c);
wordCounts[tempCount]++;
} else {
continue;
}
}
}
/**
* 读取文件并且统计文件中的词频
* @param filePath
* @return
*/
@SuppressWarnings("finally")
public static int[] loadFile(String filePath) {
int[] wordCounts = new int[26];
File file = null;
FileReader fr = null;
BufferedReader br = null;
String str = null;
try {
file = new File(filePath);
if (file.exists()) {
fr = new FileReader(file);
br = new BufferedReader(fr);
while ((str = br.readLine()) != null) {
wordCount(wordCounts, str);
}
} else {
System.out.println("该文件不存在");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null) {
br.close();
br = null;
}
if (fr != null) {
fr.close();
fr = null;
}
if (file != null) {
file = null;
}
} catch (final IOException e) {
e.printStackTrace();
}
}
return wordCounts;
}
@SuppressWarnings("unchecked")
public static void main(String[] args) {
long begin = System.currentTimeMillis();
int []wordCounts = loadFile("d://chars.txt");
char c = 'a';
for(int wordCount:wordCounts) {
if(wordCount>0){
System.out.print(c+"->");
System.out.println(wordCount);
}
c++;
}
System.out.println("执行时间" + (System.currentTimeMillis() - begin) + "ms");
}
}
两段代码改动不大,但是效率却差了好多,
同样一个 19.8mb大小的字典文件,用map来做数字统计费时 3904ms.
用 int[]来统计费时是1141ms
这就是效率呀
haohao
2008-08-04 17:06:47
评论:3
阅读:148
引用:0
无题
@2008-08-05 02:50:50 hofman
还能再快3倍,如果去掉readline()的瓶颈的话。
无题
@2008-08-05 00:03:41 hofman
测试之后,发现我的看法是错误的,比较奇怪。但else continue 好象没有意义。
无题
@2008-08-04 23:26:58 hofman
wordCount调用次数过多,可能接近100万,觉得直接放在main里更合适。
