|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectedu.iastate.jtm.jmed.StreamXMLParser
edu.iastate.jtm.jmed.MedlineParser
public class MedlineParser
Extract citation info from Medline/PubMed XML files.
| Field Summary |
|---|
| Fields inherited from class edu.iastate.jtm.jmed.StreamXMLParser |
|---|
unit |
| Constructor Summary | |
|---|---|
MedlineParser()
Creates a new blank instance of MedlineParser in default MEDLINE mode. |
|
MedlineParser(boolean pubmode)
|
|
MedlineParser(org.dom4j.io.SAXReader xp)
Creates a new blank instance of MedlineParser in default MEDLINE mode. |
|
MedlineParser(org.dom4j.io.SAXReader xp,
boolean pubmode)
|
|
| Method Summary | |
|---|---|
void |
extractPlainText(java.io.File infile,
java.io.File outDir)
Extract plain text abstracts from an XML file into a directory. |
void |
extractPlainText(java.io.File infile,
java.lang.String outDir,
int size)
Extract plain text abstracts from an XML file into multiple directories. |
void |
extractPlainText(java.io.InputStream instream,
java.io.File outDir)
Extract plain text abstracts from an input stream into a directory. |
void |
extractPlainText(java.io.InputStream instream,
java.lang.String outDir,
int size)
Extract plain text abstracts from an input stream into multiple directories. |
void |
extractPmids(java.io.File output)
Extract all PMIDs to a single file. |
void |
extractPmids(java.lang.String prefix,
int size)
Extract all PMIDs to multiple files. |
boolean |
isPubMode()
|
static void |
main(java.lang.String[] args)
For test. |
protected boolean |
open(boolean parseHeader)
|
void |
setAutoAdjustMode(boolean auto)
|
void |
setPubMode(boolean mode)
|
void |
splitXml(java.io.File infile,
java.lang.String prefix,
int size,
boolean compress)
Split a large XML file into small ones. |
void |
splitXml(java.io.InputStream instream,
java.lang.String prefix,
int size,
boolean compress)
Split a large XML stream into small files. |
| Methods inherited from class edu.iastate.jtm.jmed.StreamXMLParser |
|---|
close, copyUnit, getHeader, getRootName, getSingleField, getUnit, getUnitCount, nextUnit, nextUnit, open, open, open, open, open, open, setIgnoreXmlError, setOutputPlain, setRefillZone, setRootMatcher, setSaxParser, setUnitMatcher, setVerbose |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public MedlineParser(boolean pubmode)
throws org.xml.sax.SAXException
org.xml.sax.SAXException
public MedlineParser()
throws org.xml.sax.SAXException
org.xml.sax.SAXException
public MedlineParser(org.dom4j.io.SAXReader xp,
boolean pubmode)
throws org.xml.sax.SAXException
org.xml.sax.SAXException
public MedlineParser(org.dom4j.io.SAXReader xp)
throws org.xml.sax.SAXException
org.xml.sax.SAXException| Method Detail |
|---|
protected boolean open(boolean parseHeader)
throws java.io.IOException
open in class StreamXMLParserjava.io.IOExceptionpublic void setAutoAdjustMode(boolean auto)
public void setPubMode(boolean mode)
public boolean isPubMode()
public void extractPmids(java.io.File output)
throws org.dom4j.DocumentException,
java.io.IOException
output - the output file.
org.dom4j.DocumentException
java.io.IOException
public void extractPmids(java.lang.String prefix,
int size)
throws org.dom4j.DocumentException,
java.io.IOException
prefix - prefix of output filenames.size - number of PMIDs in each file.
org.dom4j.DocumentException
java.io.IOException
public void extractPlainText(java.io.File infile,
java.io.File outDir)
throws org.dom4j.DocumentException,
java.io.IOException
infile - input XML fileoutDir - output directory
org.dom4j.DocumentException
java.io.IOException
public void extractPlainText(java.io.File infile,
java.lang.String outDir,
int size)
throws org.dom4j.DocumentException,
java.io.IOException
infile - input XML fileoutDir - prefix of output directoriessize - number of abstracts per directory
org.dom4j.DocumentException
java.io.IOException
public void extractPlainText(java.io.InputStream instream,
java.io.File outDir)
throws org.dom4j.DocumentException,
java.io.IOException
instream - input streamoutDir - output directory
org.dom4j.DocumentException
java.io.IOException
public void extractPlainText(java.io.InputStream instream,
java.lang.String outDir,
int size)
throws org.dom4j.DocumentException,
java.io.IOException
instream - input streamoutDir - prefix of output directoriessize - number of abstracts per directory
org.dom4j.DocumentException
java.io.IOException
public void splitXml(java.io.File infile,
java.lang.String prefix,
int size,
boolean compress)
throws org.dom4j.DocumentException,
java.io.IOException
infile - input XML fileprefix - prefix of output XML filessize - number of abstracts in each output file
org.dom4j.DocumentException
java.io.IOException
public void splitXml(java.io.InputStream instream,
java.lang.String prefix,
int size,
boolean compress)
throws org.dom4j.DocumentException,
java.io.IOException
instream - input streamprefix - prefix of output XML filessize - number of abstracts in each output file
org.dom4j.DocumentException
java.io.IOExceptionpublic static void main(java.lang.String[] args)
args - the command line arguments
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||