In financial databases, we often face difficulties with handling large amount of trade data, which include XML. In a lot of cases I have seen XML stored as either XMLTYPE or CLOB, whilst 99.9% of the XML parsing/processing is actually done outside the database, in Java.
Not only this use a large amount of disk space – in some particular system I have seen 2 million trades equates to around 300GB worth of XML data — this also means that everytime we transfer the data to the java side for processing, we have to transfer large amount of data across the network. Wasteful.
The alternative is to either move the processing to the database (which is not covered in the scope of this article), but to try a different approach, i.e. storing the XML data as compressed.
Compressed, XML data could shrink by as much as 90% or better, which means not only the disk usage is going to shrink considerably, but also the network traffic. And if we have multiple layer of applications all using this XML, the benefit will be multiplied considerably.
To do this, here is some examples of what is needed:
1) a java procedure to zip data
CREATE OR REPLACE AND RESOLVE JAVA SOURCE NAMED “ZipXml” as package com.oracle;
import java.io.*;
import java.util.zip.*;
import java.sql.*;
import oracle.sql.*;
public class ZipXml
{
public static void zip(oracle.sql.CLOB src, oracle.sql.BLOB dst[]) throws Exception {
InputStream in = null;
OutputStream out = null;
try {
in = src.getAsciiStream();
out = new GZIPOutputStream(dst[0].getBinaryOutputStream());
byte[] buffer = new byte[1024];
for (int read = 0; (read = in.read(buffer)) > 0; ) {
out.write(buffer, 0, read);
}
out.flush();
} finally {
if (in != null) {
try { in.close(); } catch(IOException e) { e.printStackTrace(); }
}
if (out != null) {
try { out.close(); } catch(IOException e) { e.printStackTrace(); }
}
}
}
};
/
2) a java procedure to unzip data
CREATE OR REPLACE AND RESOLVE JAVA SOURCE NAMED “UnzipXml” as package com.oracle;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.zip.GZIPInputStream;
public class UnzipXml
{
public static void unzipClob(oracle.sql.BLOB src, oracle.sql.CLOB dst[]) {
ByteArrayOutputStream bout = null;
InputStream in = null;
try {
dst[0].putString(1, “Pending Unzip”);
bout = new ByteArrayOutputStream ();
in = new GZIPInputStream(src.getBinaryStream());
byte[] buffer = new byte[1024];
for (int read = 0; (read = in.read(buffer)) > 0; ) {
bout.write(buffer, 0, read);
}
bout.flush();
String s = new String(bout.toByteArray());
dst[0].putString(1, s);
} catch (Exception e) {
try { dst[0].putString(1, e.getMessage()); }
catch (Exception es) {}
e.printStackTrace();
} finally {
if (in != null) {
try { in.close(); } catch(IOException e) { e.printStackTrace(); }
}
}
}
public static void unzipBlob(oracle.sql.BLOB src, oracle.sql.BLOB dst[]) {
OutputStream out = null;
InputStream in = null;
try {
out = dst[0].getBinaryOutputStream();
in = new GZIPInputStream(src.getBinaryStream());
byte[] buffer = new byte[1024];
for (int read = 0; (read = in.read(buffer)) > 0; ) {
out.write(buffer, 0, read);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (in != null) {
try { in.close(); } catch(IOException e) { e.printStackTrace(); }
}
if (out != null) {
try { out.close(); } catch(IOException e) { e.printStackTrace(); }
}
}
}
};
/
3) a package to 2 simple functions, zip and unzip
CREATE OR REPLACE PACKAGE xml_zip_pkg AS
PROCEDURE ZIP(src IN CLOB, dst IN OUT BLOB);
PROCEDURE UNZIP(src IN BLOB, dst IN OUT CLOB);
FUNCTION ZIP(src IN CLOB) RETURN BLOB;
FUNCTION UNZIP (src IN BLOB) RETURN CLOB;
END;
/
CREATE OR REPLACE PACKAGE BODY xml_zip_pkg AS
PROCEDURE ZIP(src IN CLOB, dst IN OUT BLOB)
AS LANGUAGE JAVA
NAME ‘com.oracle.ZipXml.zip(oracle.sql.CLOB, oracle.sql.BLOB[])’;
PROCEDURE UNZIP(src IN BLOB, dst IN OUT CLOB)
AS LANGUAGE JAVA
NAME ‘com.oracle.UnzipXml.unzipClob(oracle.sql.BLOB, oracle.sql.CLOB[])’;
FUNCTION ZIP(src IN clob) RETURN blob
IS
lvResult blob;
BEGIN
IF src is not null THEN
DBMS_LOB.createtemporary(lvResult, true, DBMS_LOB.CALL);
xml_zip_pkg.ZIP(src, lvResult);
END IF;
RETURN lvResult;
END ZIP;
FUNCTION UNZIP(src IN blob) RETURN clob
IS
lvResult clob;
BEGIN
IF src is not null THEN
DBMS_LOB.createtemporary(lvResult, true, DBMS_LOB.CALL);
xml_zip_pkg.UNZIP(src, lvResult);
END IF;
RETURN lvResult;
END UNZIP;
END;
/
An example usage will be as followed:
create
table hd_trade as select * from trade where rownum < 10 ;
alter
table hd_trade add zip_trade_xml blob;
update
hd_trade set zip_trade_xml = xml_zip_pkg.zip(xmltype.getclobval(trade_xml));commit;
To perform manipulation on the fly
select
substr(extract(xmltype(xml_zip_pkg.unzip(zip_trade_xml)),‘//notional’),1) from hd_trade where rownum=1
select
dbms_lob.getlength(xmltype.getclobval(trade_xml)) original_length, dbms_lob.getlength(zip_trade_xml) zipped_length from hd_trade