FileInput.java
/*
* Copyright 2013 University of Glasgow.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package broadwick.io;
import com.google.common.base.Splitter;
import com.google.common.base.Throwables;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;
import lombok.extern.slf4j.Slf4j;
/**
* Simple interface for reading from a file. It simply wraps the java io classes to simplify the input of data.
*/
@Slf4j
public class FileInput implements AutoCloseable {
/**
* Open a handle to a file with the given name.
* @param dataFileName the name of the file.
* @throws IOException if the resource can't be found.
*/
public FileInput(final String dataFileName) throws IOException {
this(dataFileName, DEFAULT_SEP, DEFAULT_ENCODING);
}
/**
* Open a handle to a file with the given name.
* @param dataFileName the name of the file.
* @param sep The single char for the separator (not a list of separator characters).
* @throws IOException if the resource can't be found.
*/
public FileInput(final String dataFileName, final String sep) throws IOException {
this(dataFileName, sep, DEFAULT_ENCODING);
}
/**
* Open a handle to a file with the given name.
* @param dataFileName the name of the file.
* @param sep The single char for the separator (not a list of separator characters).
* @param encoding the encoding used in the file e.g. UTF-8.
* @throws IOException if the resource can't be found.
*/
public FileInput(final String dataFileName, final String sep, final Charset encoding) throws IOException {
fieldSep = Pattern.compile(sep);
fileEncoding = encoding;
path = Paths.get(dataFileName.replace(" ", "\\ "));
reader = Files.newBufferedReader(path, fileEncoding);
}
/**
* Read a line from the input file, split it according to the seperator specified in the constructor and get a list
* of tokens from the line. Comment characters (#) are supported, where any character after the comment character is
* not added to the collection of tokens.
* @return a list of entries in the line.
* @throws IOException if a line cannot be read, e.g if the object was closed.
*/
public final List<String> readLine() throws IOException {
List<String> tokens = new LinkedList<>();
try {
String line = reader.readLine();
// we've not reached the end of the file,
while (line != null && (line.isEmpty() || line.charAt(0) == '#')) {
// read until the next non-empty line
line = reader.readLine();
if (line == null) {
return tokens;
}
}
if (line != null && !line.isEmpty()) {
tokens = tokeniseLine(line);
}
} catch (IOException e) {
final StringBuilder sb = new StringBuilder("Unable to read from ");
sb.append(path.getFileName()).append(", Reason : ").append(e.getLocalizedMessage());
log.error(sb.toString());
sb.append("\n").append(Throwables.getStackTraceAsString(e));
throw new IOException(sb.toString());
}
return tokens;
}
/**
* Get the next (non-comment) line from the file.
* @return a string of the next non-comment line in the file.
* @throws IOException if a line cannot be read, e.g if the object was closed.
*/
public final String readNextLine() throws IOException {
String line = reader.readLine();
try {
// we've not reached the end of the file,
while (line != null && (line.isEmpty() || line.charAt(0) == '#')) {
line = reader.readLine();
}
} catch (IOException e) {
final StringBuilder sb = new StringBuilder("Unable to read from ");
sb.append(path.getFileName()).append(", Reason : ").append(e.getLocalizedMessage());
log.error(sb.toString());
sb.append("\n").append(Throwables.getStackTraceAsString(e));
throw new IOException(sb.toString());
}
if (line != null) {
line = line.trim();
}
return line;
}
/**
* Read the contents of the file into a single string object.
* @return the contents of the file.
* @throws IOException if a line cannot be read, e.g if the object was closed.
*/
public final String read() throws IOException {
final StringBuilder sb = new StringBuilder();
String line = reader.readLine();
while (line != null) {
sb.append(line).append("\n");
line = reader.readLine();
}
return sb.toString();
}
/**
* Split a string (a line read from a file into tokens.
* @param line the line that is to be tokenised.
* @return a list of [string] tokens.
*/
private List<String> tokeniseLine(final String line) {
final List<String> tokens = new LinkedList<>();
if (line != null && !line.isEmpty()) {
String trimmedLine = line.trim();
final int indexOfCommentchar = trimmedLine.indexOf(COMMENT_CHAR);
if (indexOfCommentchar > 0) {
trimmedLine = trimmedLine.substring(0, indexOfCommentchar);
}
for (String token : Splitter.on(fieldSep).trimResults().split(trimmedLine)) {
tokens.add(token.trim());
}
}
return tokens;
}
@Override
public final void close() {
try {
reader.close();
} catch (IOException ex) {
log.error("Failed to close file {}.", path.getFileName());
}
}
/**
* Obtain an iterator for this input file.
* @return the FileInputIterator for this file.
*/
public final FileInputIterator iterator() {
// the reader object will be instantiated by now so we should not have a NullPointerException.
return new FileInputIterator(this);
}
private Path path;
protected BufferedReader reader;
private Pattern fieldSep;
private Charset fileEncoding;
private static final String DEFAULT_SEP = "[\\s,]";
private static final Charset DEFAULT_ENCODING = StandardCharsets.UTF_8;
private static final char COMMENT_CHAR = '#';
}