Heute (so zwischendurch) mal ein ganz anderes Thema: Wenn es darum geht, Kommaseparierte Dateien in die Datenbank zu laden, gibt es zahlreiche Wege: SQL*Loader, External Tables, Application Express, 3rd-Party Tools und zahlreiche Varianten mehr ... Alle erfordern mehr oder weniger Aufwand - Um eine external table anzulegen, muss man die SQL*Loader-Syntax schon gut kennen, auch wenn die Datei noch so einfach strukturiert ist.
Ich habe mich schon länger gefragt, ob die im Post
LDAP Server abfragen mit SQL verwendeten völlig dynamischen
Table Functions hier Verwendung finden können. Hintergrund ist ja, dass, verwendet man für Table Functions den sog.
Interface Approach, die Struktur der zurückgegebenen Tabelle völlig dynamisch sein kann.
Also habe ich es mal probiert ... der Code ist zwar ein wenig länger geworden, aber eine erste Version ist fertig. Es funktioniert wie folgt:
select * from table (easy_csv_parser.parse(
'CSV_DIR', -- Directory-Objekt, in welchem die Datei liegt
'file.txt', -- Dateiname
',', -- Feld-Delimiter
'"', -- Felder optional umschlossen von
1, -- 1: Es soll versucht werden, die Datentypen selbst zu erkennen
1 -- 1: Spaltennamen aus erster Zeile entnehmen
))
/
Insbesondere wenn man mit dem CSV-Format noch experimentiert, sollte man stets noch einen SQL-Kommentar hinzufügen, um das SQL-Kommando für die Datenbank neu zu machen. Das Directory-Objekt muss auf ein Verzeichnis im Betriebssystem zeigen und ggfs. zunächst mit CREATE DIRECTORY erzeugt werden. Das Ergebnis mit bspw. dieser Datei hier ...
NAME,VORNAME,ORT,GEHALT,DATUM
Czarski,Carsten,München,400099,01-DEC-01
Franz,Muster,Trier,3000,01-JAN-01
"Mustermann, uu","Karl", Bad Homburg, 500, NODAT
... sieht dann etwa so aus:
NAME VORNAME ORT GEHALT DATUM
--------------- --------------- --------------- ---------- --------------------
Czarski Carsten München 400099 01-DEC-01
Franz Muster Trier 3000 01-JAN-01
Mustermann, uu Karl Bad Homburg 500
3 Zeilen ausgewählt.
Probiert es einmal aus - Feedback ist erwünscht. Nächstes mal geht es wieder mit XML in der Datenbank, Ausführungsplänen und Performance weiter. Und hier kommt nun endlich der Code:
CREATE type easy_csv_parser as object(
row_types anytype,
utlfile_id number,
utlfile_datatype number,
ecp_delimiter varchar2(10),
ecp_enclosed varchar2(10),
ecp_sample number,
static function parse(
p_dir in varchar2,
p_filename in varchar2,
p_delimiter in varchar2 default ',',
p_enclosed in varchar2 default '',
p_sample in number default 1,
p_cols_firstline in number default 1
) return anydataset pipelined using easy_csv_parser,
static function ODCITableDescribe(
record_table out anytype,
p_dir in varchar2,
p_filename in varchar2,
p_delimiter in varchar2 default ',',
p_enclosed in varchar2 default '',
p_sample in number default 1,
p_cols_firstline in number default 1
) return number,
static function ODCITablePrepare (
sctx out easy_csv_parser,
tab_func_info in sys.ODCITabFuncInfo,
p_dir in varchar2,
p_filename in varchar2,
p_delimiter in varchar2 default ',',
p_enclosed in varchar2 default '',
p_sample in number default 1 ,
p_cols_firstline in number default 1
) return number,
static function ODCITableStart (
sctx in out easy_csv_parser,
p_dir in varchar2,
p_filename in varchar2,
p_delimiter in varchar2 default ',',
p_enclosed in varchar2 default '',
p_sample in number default 1 ,
p_cols_firstline in number default 1
) return number,
member function ODCITableFetch (
self in out easy_csv_parser,
nrows in number,
record_out out anydataset
) return number,
member function ODCITableClose (
self in easy_csv_parser
) return number
)
/
sho err
CREATE or replace type BODY easy_csv_parser as
static function ODCITableDescribe(
record_table out anytype,
p_dir in varchar2,
p_filename in varchar2,
p_delimiter in varchar2 default ',',
p_enclosed in varchar2 default '',
p_sample in number default 1 ,
p_cols_firstline in number default 1
) return number as
v_record_structure anytype;
v_field_name varchar2(4000);
v_field_data varchar2(4000);
v_filehandle utl_file.file_type;
v_line_fields varchar2(32767);
v_line_data varchar2(32767);
v_f_ep pls_integer := -1;
v_f_sp pls_integer := 1;
v_d_ep pls_integer := -1;
v_d_sp pls_integer := 1;
v_enc_p pls_integer := 1;
v_field_pos pls_integer := 1;
v_typecode pls_integer := dbms_types.typecode_varchar2;
v_typecode_prec pls_integer;
v_data_date date;
v_data_number number;
begin
anytype.begincreate(dbms_types.typecode_object, v_record_structure);
v_filehandle := utl_file.fopen(p_dir, p_filename, 'R', 32767);
utl_file.get_line(v_filehandle, v_line_fields, 32767);
if p_cols_firstline = 1 then
utl_file.get_line(v_filehandle, v_line_data, 32767);
else
v_line_data := v_line_fields;
end if;
while v_f_ep != 0 loop
v_f_ep := instr(v_line_fields, p_delimiter, v_f_sp);
v_d_ep := instr(v_line_data, p_delimiter, v_d_sp);
if v_f_ep > 0 then
v_enc_p := instr(
replace(v_line_fields, p_enclosed||p_enclosed, ''),
p_enclosed,
v_f_sp
);
if v_enc_p > 0 and v_enc_p < v_f_ep then
v_enc_p := instr(
replace(v_line_fields, p_enclosed||p_enclosed, ''),
p_enclosed,
v_enc_p + 1
);
if v_enc_p > 0 then
v_f_ep := instr(v_line_fields, p_delimiter, v_enc_p + 1);
else
v_f_ep := instr(v_line_fields, p_delimiter, v_f_sp);
end if;
end if;
end if;
if v_d_ep > 0 then
v_enc_p := instr(
replace(v_line_data, p_enclosed||p_enclosed, ''),
p_enclosed,
v_d_sp
);
if v_enc_p > 0 and v_enc_p < v_d_ep then
v_enc_p := instr(replace(
v_line_data, p_enclosed||p_enclosed, ''),
p_enclosed,
v_enc_p + 1
);
if v_enc_p > 0 then
v_d_ep := instr(v_line_data, p_delimiter, v_enc_p + 1);
else
v_d_ep := instr(v_line_data, p_delimiter, v_d_sp);
end if;
end if;
end if;
if v_f_ep = 0 then
v_field_name := substr(v_line_fields, v_f_sp);
v_field_data := substr(v_line_data, v_d_sp);
else
v_field_name := substr(v_line_fields, v_f_sp, (v_f_ep - v_f_sp));
v_field_data := substr(v_line_data, v_d_sp, (v_d_ep - v_d_sp));
end if;
if p_enclosed is not null then
v_field_name := regexp_replace(v_field_name, '^'||p_enclosed, '');
v_field_name := regexp_replace(v_field_name, p_enclosed||'$', '');
v_field_data := regexp_replace(v_field_data, '^'||p_enclosed, '');
v_field_data := regexp_replace(v_field_data, p_enclosed||'$', '');
v_field_data := rtrim(ltrim(v_field_data));
end if;
v_typecode := dbms_types.typecode_varchar2;
if p_sample = 1 and v_field_data is not null then
begin
v_data_date := to_date(v_field_data);
v_typecode := dbms_types.typecode_date;
exception when others then
begin
v_data_number := to_number(v_field_data);
v_typecode := dbms_types.typecode_number;
exception when others then null;
end;
end;
end if;
v_record_structure.addattr(
ANAME => case
when p_cols_firstline = 1 then upper(replace(v_field_name,' ','_'))
else 'COLUMN_'||lpad(v_field_pos, 3, '0')
end,
TYPECODE => v_typecode,
PREC => null,
SCALE => null,
LEN => case
when v_typecode = dbms_types.typecode_varchar2 then 4000
else null
end,
CSID => null,
CSFRM => null,
ATTR_TYPE => null
);
v_f_sp := v_f_ep + 1;
v_d_sp := v_d_ep + 1;
v_field_pos := v_field_pos + 1;
v_enc_p := -1;
end loop;
v_record_structure.endcreate();
anytype.begincreate(dbms_types.typecode_table, record_table);
record_table.setinfo(
null, null, null, null, null, v_record_structure, dbms_types.typecode_object, 0
);
record_table.endcreate();
utl_file.fclose(v_filehandle);
return odciconst.success;
-- exception when others then
-- return odciconst.error;
end;
static function ODCITablePrepare (
sctx out easy_csv_parser,
tab_func_info in sys.ODCITabFuncInfo,
p_dir in varchar2,
p_filename in varchar2,
p_delimiter in varchar2 default ',',
p_enclosed in varchar2 default '',
p_sample in number default 1 ,
p_cols_firstline in number default 1
) return number is
prec pls_integer;
scale pls_integer;
len pls_integer;
csid pls_integer;
csfrm pls_integer;
record_desc anytype;
aname varchar2(30);
dummy pls_integer;
begin
-- insert into log values ('ODCITablePrepare entered') ; commit;
dummy := tab_func_info.RetType.GetAttrElemInfo(
null, prec, scale, len, csid, csfrm, record_desc, aname
);
sctx := easy_csv_parser(
record_desc, null, null, p_delimiter, p_enclosed, p_sample
);
return odciconst.success;
end;
static function ODCITableStart (
sctx in out easy_csv_parser,
p_dir in varchar2,
p_filename in varchar2,
p_delimiter in varchar2 default ',',
p_enclosed in varchar2 default '',
p_sample in number default 1 ,
p_cols_firstline in number default 1
) return number is
v_filehandle utl_file.file_type;
v_line varchar2(32767);
begin
v_filehandle := utl_file.fopen(p_dir, p_filename, 'R', 32767);
if p_cols_firstline = 1 then
utl_file.get_line(v_filehandle, v_line);
end if;
sctx.utlfile_id := v_filehandle.id;
sctx.utlfile_datatype := v_filehandle.datatype;
return odciconst.success;
end;
member function ODCITableFetch (
self in out easy_csv_parser,
nrows in number,
record_out out anydataset
) return number is
v_filehandle utl_file.file_type;
v_field varchar2(4000);
v_line varchar2(32767);
v_f_ep pls_integer := -1;
v_f_sp pls_integer := 1;
v_enc_p pls_integer := -1;
v_field_pos pls_integer := 1;
d_prec PLS_INTEGER;
d_scale PLS_INTEGER;
d_len PLS_INTEGER;
d_csid PLS_INTEGER;
d_csfrm PLS_INTEGER;
d_schema_name VARCHAR2(30);
d_type_name VARCHAR2(30);
d_version varchar2(100);
d_count PLS_INTEGER;
d_desc pls_integer;
d_attr_elt_type anytype;
d_aname varchar2(30);
begin
v_filehandle.id := self.utlfile_id;
v_filehandle.datatype := self.utlfile_datatype;
d_desc := self.row_types.getinfo(
d_prec, d_scale, d_len, d_csid, d_csfrm,
d_schema_name, d_type_name, d_version, d_count
);
record_out := null;
anydataset.begincreate(dbms_types.typecode_object, self.row_types, record_out);
record_out.addinstance;
record_out.piecewise();
begin
utl_file.get_line(v_filehandle, v_line, 32767);
while v_f_ep != 0 and v_field_pos <= d_count loop
v_f_ep := instr(v_line, ecp_delimiter, v_f_sp);
if v_f_ep > 0 then
v_enc_p := instr(
replace(v_line, self.ecp_enclosed||self.ecp_enclosed, ''),
self.ecp_enclosed,
v_f_sp
);
if v_enc_p > 0 and v_enc_p < v_f_ep then
v_enc_p := instr(
replace(v_line, self.ecp_enclosed||self.ecp_enclosed, ''),
self.ecp_enclosed,
v_enc_p + 1
);
if v_enc_p > 0 then
v_f_ep := instr(v_line, ecp_delimiter, v_enc_p + 1);
else
v_f_ep := instr(v_line, ecp_delimiter, v_f_sp);
end if;
end if;
end if;
if v_f_ep = 0 then
v_field := substr(v_line, v_f_sp);
else
v_field := substr(v_line, v_f_sp, (v_f_ep - v_f_sp));
end if;
if self.ecp_enclosed is not null then
v_field := regexp_replace(v_field, '^'||self.ecp_enclosed, '');
v_field := regexp_replace(v_field, self.ecp_enclosed||'$', '');
end if;
d_desc := self.row_types.getattreleminfo(
v_field_pos, d_prec, d_scale, d_len, d_csid, d_csfrm, d_attr_elt_type, d_aname
);
if d_desc = dbms_types.typecode_varchar2 or self.ecp_sample = 0 then
record_out.setvarchar2(v_field);
elsif d_desc = dbms_types.typecode_date then
begin
record_out.setdate(to_date(ltrim(rtrim(v_field))));
exception when others then
record_out.setdate(to_date(null));
end;
elsif d_desc = dbms_types.typecode_number then
begin
record_out.setnumber(to_number(v_field));
exception when others then
record_out.setnumber(to_number(null));
end;
end if;
v_f_sp := v_f_ep + 1;
v_field_pos := v_field_pos + 1;
end loop;
while v_field_pos <= d_count loop
d_desc := self.row_types.getattreleminfo(
v_field_pos, d_prec, d_scale, d_len, d_csid, d_csfrm, d_attr_elt_type, d_aname
);
if d_desc = dbms_types.typecode_varchar2 or self.ecp_sample = 0 then
record_out.setvarchar2(null);
elsif d_desc = dbms_types.typecode_date then
record_out.setdate(to_date(null));
elsif d_desc = dbms_types.typecode_number then
record_out.setnumber(to_number(null));
end if;
v_field_pos := v_field_pos + 1;
end loop;
record_out.endcreate;
exception
when NO_DATA_FOUND then
begin
record_out.endcreate;
exception when others then null;
end;
record_out := null;
end;
return odciconst.success;
end;
member function ODCITableClose (
self in easy_csv_parser
) return number is
v_filehandle utl_file.file_type;
begin
v_filehandle.id := self.utlfile_id;
v_filehandle.datatype := self.utlfile_datatype;
utl_file.fclose(v_filehandle);
return odciconst.success;
end;
end;
/
sho err