Thursday, 15 March 2018

External Tables and XMLTAG to Load XML Documents in Oracle Database 12c Release 2 (12.2)

This article describes how to use the XMLTAG clause of an external table to load XML fragments from XML documents in Oracle Database 12c Release 2 (12.2).



1. Setup


Create a file called "/tmp/test1.xml" with the following contents.

<employees>
  <employee><employee_number>7369</employee_number><employee_name>SMITH</employee_name><job>CLERK</job></employee>
  <employee><employee_number>7499</employee_number><employee_name>ALLEN</employee_name><job>SALESMAN</job></employee>
  <employee><employee_number>7521</employee_number><employee_name>WARD</employee_name><job>SALESMAN</job></employee>
  <employee><employee_number>7566</employee_number><employee_name>JONES</employee_name><job>MANAGER</job></employee>
  <employee><employee_number>7654</employee_number><employee_name>MARTIN</employee_name><job>SALESMAN</job></employee>
  <employee><employee_number>7698</employee_number><employee_name>BLAKE</employee_name><job>MANAGER</job></employee>
  <employee><employee_number>7782</employee_number><employee_name>CLARK</employee_name><job>MANAGER</job></employee>
  <employee><employee_number>7788</employee_number><employee_name>SCOTT</employee_name><job>ANALYST</job></employee>
  <employee><employee_number>7839</employee_number><employee_name>KING</employee_name><job>PRESIDENT</job></employee>
  <employee><employee_number>7844</employee_number><employee_name>TURNER</employee_name><job>SALESMAN</job></employee>
  <employee><employee_number>7876</employee_number><employee_name>ADAMS</employee_name><job>CLERK</job></employee>
  <employee><employee_number>7900</employee_number><employee_name>JAMES</employee_name><job>CLERK</job></employee>
  <employee><employee_number>7902</employee_number><employee_name>FORD</employee_name><job>ANALYST</job></employee>
  <employee><employee_number>7934</employee_number><employee_name>MILLER</employee_name><job>CLERK</job></employee>
</employees>

Create a directory object to access the file.

CONN / AS SYSDBA
ALTER SESSION SET CONTAINER = pdb1;

CREATE OR REPLACE DIRECTORY tmp_dir AS '/tmp/';
GRANT READ, WRITE ON DIRECTORY tmp_dir TO test;

CONN test/test@pdb1

2. External Table (XMLTAG) : Single Tag


Create an external table using the XMLTAG clause, which in this case indicates we want to return XML fragments with employee as a tag name.

DROP TABLE tab_ext;

CREATE TABLE tab_ext (
  doc1 VARCHAR2(4000)
)
ORGANIZATION EXTERNAL (
  TYPE oracle_loader
  DEFAULT DIRECTORY tmp_dir1
  ACCESS PARAMETERS (
    RECORDS
    XMLTAG ("employee")
    FIELDS NOTRIM
    MISSING FIELD VALUES ARE NULL
  )
  LOCATION ('test1.xml')
)
REJECT LIMIT UNLIMITED;

We can see the XML fragments are being read correctly with the following query.

COLUMN doc1 FORMAT A120

SELECT doc1 FROM tab_ext;

DOC1
------------------------------------------------------------------------------------------
<employee><employee_number>7369</employee_number><employee_name>SMITH</employee_name><job>CLERK</job></employee>
<employee><employee_number>7499</employee_number><employee_name>ALLEN</employee_name><job>SALESMAN</job></employee>
<employee><employee_number>7521</employee_number><employee_name>WARD</employee_name><job>SALESMAN</job></employee>
<employee><employee_number>7566</employee_number><employee_name>JONES</employee_name><job>MANAGER</job></employee>
<employee><employee_number>7654</employee_number><employee_name>MARTIN</employee_name><job>SALESMAN</job></employee>
<employee><employee_number>7698</employee_number><employee_name>BLAKE</employee_name><job>MANAGER</job></employee>
<employee><employee_number>7782</employee_number><employee_name>CLARK</employee_name><job>MANAGER</job></employee>
<employee><employee_number>7788</employee_number><employee_name>SCOTT</employee_name><job>ANALYST</job></employee>
<employee><employee_number>7839</employee_number><employee_name>KING</employee_name><job>PRESIDENT</job></employee>
<employee><employee_number>7844</employee_number><employee_name>TURNER</employee_name><job>SALESMAN</job></employee>
<employee><employee_number>7876</employee_number><employee_name>ADAMS</employee_name><job>CLERK</job></employee>
<employee><employee_number>7900</employee_number><employee_name>JAMES</employee_name><job>CLERK</job></employee>
<employee><employee_number>7902</employee_number><employee_name>FORD</employee_name><job>ANALYST</job></employee>
<employee><employee_number>7934</employee_number><employee_name>MILLER</employee_name><job>CLERK</job></employee>

14 rows selected.

SQL>

We can present it as relational columns using the XMLTABLE operator.

SELECT xt.*
FROM   tab_ext x,
       XMLTABLE('/employee'
         PASSING XMLTYPE(x.doc1)
         COLUMNS 
           "EMPLOYEE_NUMBER" NUMBER(4)    PATH 'employee_number',
           "EMPLOYEE_NAME"   VARCHAR2(10) PATH 'employee_name',
           "JOB"             VARCHAR2(9)  PATH 'job'
         ) xt
ORDER BY 1;

EMPLOYEE_NUMBER EMPLOYEE_N JOB
--------------- ---------- ---------
           7369   SMITH      CLERK
           7499   ALLEN      SALESMAN
           7521   WARD       SALESMAN
           7566   JONES      MANAGER
           7654   MARTIN     SALESMAN
           7698   BLAKE      MANAGER
           7782   CLARK      MANAGER
           7788   SCOTT      ANALYST
           7839   KING       PRESIDENT
           7844   TURNER     SALESMAN
           7876   ADAMS      CLERK
           7900   JAMES      CLERK
           7902   FORD       ANALYST
           7934   MILLER     CLERK

14 rows selected.

SQL>

3. External Table (XMLTAG) : Multiple Tags


Create an external table using the XMLTAG clause, which indicates we want to return XML fragments with employee_number, employee_name or jobs as a tag name.

DROP TABLE tab_ext;

CREATE TABLE tab_ext (
  doc1 VARCHAR2(4000)
)
ORGANIZATION EXTERNAL (
  TYPE oracle_loader
  DEFAULT DIRECTORY tmp_dir1
  ACCESS PARAMETERS (
    RECORDS
    XMLTAG ("employee_number", "employee_name", "job")
    FIELDS NOTRIM
    MISSING FIELD VALUES ARE NULL
  )
  LOCATION ('test1.xml')
)
REJECT LIMIT UNLIMITED;

We can see the XML fragments are being read correctly with the following query.

COLUMN doc1 FORMAT A120

SELECT doc1 FROM tab_ext;

DOC1
-----------------------------------------------------------------------------------------
<employee_number>7369</employee_number>
<employee_name>SMITH</employee_name>
<job>CLERK</job>
<employee_number>7499</employee_number>
<employee_name>ALLEN</employee_name>
<job>SALESMAN</job>
<employee_number>7521</employee_number>
<employee_name>WARD</employee_name>
<job>SALESMAN</job>
<employee_number>7566</employee_number>
<employee_name>JONES</employee_name>
<job>MANAGER</job>
<employee_number>7654</employee_number>
<employee_name>MARTIN</employee_name>
<job>SALESMAN</job>
<employee_number>7698</employee_number>
<employee_name>BLAKE</employee_name>
<job>MANAGER</job>
<employee_number>7782</employee_number>
<employee_name>CLARK</employee_name>
<job>MANAGER</job>
<employee_number>7788</employee_number>
<employee_name>SCOTT</employee_name>
<job>ANALYST</job>
<employee_number>7839</employee_number>
<employee_name>KING</employee_name>
<job>PRESIDENT</job>
<employee_number>7844</employee_number>
<employee_name>TURNER</employee_name>
<job>SALESMAN</job>
<employee_number>7876</employee_number>
<employee_name>ADAMS</employee_name>
<job>CLERK</job>
<employee_number>7900</employee_number>
<employee_name>JAMES</employee_name>
<job>CLERK</job>
<employee_number>7902</employee_number>
<employee_name>FORD</employee_name>
<job>ANALYST</job>
<employee_number>7934</employee_number>
<employee_name>MILLER</employee_name>
<job>CLERK</job>

42 rows selected.

SQL>

Since each fragment is treated separately it is presented as a separate row, and there is no relationship between the fragments.