/*
* Created on Apr 29, 2005
*/
/*
* Copyright (c) 2002-5 Gregor Heinrich. All rights reserved. Redistribution and
* use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met: 1. Redistributions of source
* code must retain the above copyright notice, this list of conditions and the
* following disclaimer. 2. Redistributions in binary form must reproduce the
* above copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the distribution.
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESSED OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.knowceans.freshmind.cluster;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashSet;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* BibFileMod takes a LaTeX document and a list of BibTeX database files and
* creates a new BibTeX database that contains the subset of the union of
* bibliographic entries referenced in the LaTeX document.
*
* The file uses a simple regex pattern to find the references in the LaTeX
* file, as taken by the "\cite*{}" commands (plain and natbib packages and
* BibTeX styles).
*
* On the BibTeX entries, a regex identifies the entries with the keys.
*
* TODO: bibtex file can be loaded automatically already. Make consistent with
* multiple bibtex files case. (e.g., -o option for file output)
*
* @author heinrich
*/
public class BibFileMod {
private static String bibtexfile = null;
private static String path;
/**
* @param args
*/
public static void main(String[] args) {
// String[] test = {"D:/_mlpraktikum/themen2/themen.tex",
// "D:/_disswnew/_bibliography/all20041029.bib",
// "D:/_mlpraktikum/themen2/themenurs.tex.bib"};
// args = test;
System.out.println("BibFileMod BibTeX file modifier");
if (args.length < 1) {
System.out
.println("Use with arguments: ( )+");
System.out.println("Writes to .bib");
System.out
.println("If no bib file given, the one referenced in the \n"
+ "tex file is used as source.");
System.exit(1);
}
HashSet citations = getCitations(args[0]);
if (bibtexfile != null) {
System.out
.println("Referenced BibTeX file: " + bibtexfile + ".bib");
}
System.out.println("Found citation keys:");
for (String cit : citations) {
System.out.println(cit);
}
TreeMap bibentries = new TreeMap();
for (int i = 1; i < args.length; i++) {
getBibEntries(args[i], citations, bibentries);
}
if (args.length < 2 && bibtexfile != null) {
getBibEntries(bibtexfile + ".bib", citations, bibentries);
}
String outfile = args[0] + ".bib";
System.out.println("Write to " + outfile);
write(outfile, bibentries);
}
/**
* @param filename
* @param bibentries
*/
private static void write(String filename,
TreeMap bibentries) {
try {
BufferedWriter bw = new BufferedWriter(new FileWriter(filename));
bw
.write("%% Created with BibFileMod by GH (http://www.arbylon.net/projects)");
bw.write("\n");
System.out.println("Write entries found in database:");
for (String key : bibentries.keySet()) {
System.out.println(key);
bw.write(bibentries.get(key));
bw.write("\n");
}
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Open the BibTeX file and add to the bibentries vector all BibTeX entries
* whose key is in citations and not in the bibentries map.
*
* @param string BibTeX file path
* @param citations list of citation keys
* @param bibentries map of citation->BibTeX entry string entries
*/
private static void getBibEntries(String texfile,
HashSet citations, TreeMap bibentries) {
StringBuffer content = new StringBuffer();
try {
BufferedReader br = new BufferedReader(new FileReader(texfile));
String line;
while ((line = br.readLine()) != null) {
content.append(line).append("\n");
}
br.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
String entryPattern = "(?si)(@[\\w]+\\{" + // opening entry
"([\\w_\\-,]+)," + // bibtex key
".*?" + // non-greedy content
"\\}\\s*,*\\s*\\})";
Pattern p = Pattern.compile(entryPattern);
Matcher m = p.matcher(content);
while (m.find()) {
String ref = m.group(2);
String entry = m.group(1);
if (citations.contains(ref)) {
// System.out.println("Found entry: " + ref);
// System.out.println(entry);
bibentries.put(ref, entry);
}
}
}
/**
* Parse the LaTeX file for citations with the usual "\cite*{}" format.
*
* @param texfile
* @return
*/
private static HashSet getCitations(String texfile) {
HashSet citations = new HashSet();
path = new File(texfile).getParent().toString();
StringBuffer content = new StringBuffer();
try {
BufferedReader br = new BufferedReader(new FileReader(texfile));
String line;
while ((line = br.readLine()) != null) {
content.append(line).append(" ");
}
br.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
String citationPattern = "\\\\cite[^\\{]*\\{([\\w_\\-,]+)\\}";
// TODO: does not work with line comments...
String bibfilePattern = "\\\\bibliography\\{([^\\}]+)\\}";
// String replaced;
//
// replaced = content.toString().replaceAll(citation, "\nCITATION[$1]");
// System.out.println(replaced);
Pattern p = Pattern.compile(citationPattern);
Matcher m = p.matcher(content);
while (m.find()) {
String ref = m.group(1);
String[] refs = ref.split(",");
for (String oneref : refs) {
citations.add(oneref);
}
}
p = Pattern.compile(bibfilePattern);
m = p.matcher(content);
while (m.find()) {
bibtexfile = m.group(1);
}
if (bibtexfile != null) {
File f = new File(bibtexfile);
if (!f.isAbsolute()) {
try {
bibtexfile = new File(path + "/" + bibtexfile)
.getCanonicalPath();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return citations;
}
}