"""
This module handles conversion of ROOT's TFile and
contained TTrees into HDF5 format with PyTables
"""
from __future__ import absolute_import
import os
import sys
import warnings
from pkg_resources import parse_version
import tables
TABLES_NEW_API = parse_version(tables.__version__) >= parse_version('3')
if TABLES_NEW_API:
tables_open = tables.open_file
else:
tables_open = tables.openFile
from root_numpy import tree2array, RootNumpyUnconvertibleWarning
from numpy.lib import recfunctions
from .io import root_open, TemporaryFile
from . import log; log = log[__name__]
from .extern.progressbar import ProgressBar, Bar, ETA, Percentage
from .extern.six import string_types
from .logger.utils import check_tty
from . import QROOT
__all__ = [
'tree2hdf5',
'root2hdf5',
]
def _drop_object_col(rec, warn=True):
# ignore columns of type `object` since PyTables does not support these
if rec.dtype.hasobject:
object_fields = []
fields = rec.dtype.fields
for name in rec.dtype.names:
if fields[name][0].kind == 'O':
object_fields.append(name)
if warn:
log.warning(
"ignoring unsupported object branch '{0}'".format(
name))
# NumPy 1.7.1: TypeError: Cannot change data-type for object array.
#return rec[non_object_fields]
if object_fields:
rec = recfunctions.rec_drop_fields(rec, object_fields)
return rec
def tree2hdf5(tree, hfile, group=None,
entries=-1, show_progress=False, **kwargs):
"""
Convert a TTree into a HDF5 table.
Parameters
----------
tree : ROOT.TTree
A ROOT TTree.
hfile : string or PyTables HDF5 File
A PyTables HDF5 File handle or string path to an existing HDF5 file.
group : string or PyTables Group instance, optional (default=None)
Write the table at this location in the HDF5 file.
entries : int, optional (default=-1)
The number of entries to read at once while converting a ROOT TTree
into an HDF5 table. By default read the entire TTree into memory (this
may not be desired if your TTrees are large).
show_progress : bool, optional (default=False)
If True, then display and update a progress bar on stdout as the TTree
is converted.
kwargs : dict, optional
Additional keyword arguments for the tree2array function.
"""
show_progress = show_progress and check_tty(sys.stdout)
if show_progress:
widgets = [Percentage(), ' ', Bar(), ' ', ETA()]
own_h5file = False
if isinstance(hfile, string_types):
hfile = tables_open(filename=hfile, mode="w", title="Data")
own_h5file = True
log.info("Converting tree '{0}' with {1:d} entries ...".format(
tree.GetName(),
tree.GetEntries()))
if not group:
group = hfile.root
elif isinstance(group, string_types):
group_where = '/' + os.path.dirname(group)
group_name = os.path.basename(group)
if TABLES_NEW_API:
group = hfile.create_group(group_where, group_name,
createparents=True)
else:
group = hfile.createGroup(group_where, group_name)
if tree.GetName() in group:
log.warning(
"Tree '{0}' already exists "
"in the output file".format(tree.GetName()))
return
total_entries = tree.GetEntries()
pbar = None
if show_progress and total_entries > 0:
pbar = ProgressBar(widgets=widgets, maxval=total_entries)
if entries <= 0:
# read the entire tree
if pbar is not None:
pbar.start()
array = tree2array(tree, **kwargs)
array = _drop_object_col(array)
if TABLES_NEW_API:
table = hfile.create_table(
group, tree.GetName(),
array, tree.GetTitle())
else:
table = hfile.createTable(
group, tree.GetName(),
array, tree.GetTitle())
# flush data in the table
table.flush()
# flush all pending data
hfile.flush()
else:
# read the tree in chunks
start = 0
while start < total_entries or start == 0:
if start > 0:
with warnings.catch_warnings():
warnings.simplefilter(
"ignore",
RootNumpyUnconvertibleWarning)
warnings.simplefilter(
"ignore",
tables.NaturalNameWarning)
array = tree2array(
tree,
start=start,
stop=start + entries,
**kwargs)
array = _drop_object_col(array, warn=False)
table.append(array)
else:
array = tree2array(
tree,
start=start,
stop=start + entries,
**kwargs)
array = _drop_object_col(array)
if pbar is not None:
# start after any output from root_numpy
pbar.start()
if TABLES_NEW_API:
table = hfile.create_table(
group, tree.GetName(),
array, tree.GetTitle())
else:
table = hfile.createTable(
group, tree.GetName(),
array, tree.GetTitle())
start += entries
if start <= total_entries and pbar is not None:
pbar.update(start)
# flush data in the table
table.flush()
# flush all pending data
hfile.flush()
if pbar is not None:
pbar.finish()
if own_h5file:
hfile.close()
[docs]def root2hdf5(rfile, hfile, rpath='',
entries=-1, userfunc=None,
show_progress=False,
ignore_exception=False,
**kwargs):
"""
Convert all trees in a ROOT file into tables in an HDF5 file.
Parameters
----------
rfile : string or asrootpy'd ROOT File
A ROOT File handle or string path to an existing ROOT file.
hfile : string or PyTables HDF5 File
A PyTables HDF5 File handle or string path to an existing HDF5 file.
rpath : string, optional (default='')
Top level path to begin traversal through the ROOT file. By default
convert everything in and below the root directory.
entries : int, optional (default=-1)
The number of entries to read at once while converting a ROOT TTree
into an HDF5 table. By default read the entire TTree into memory (this
may not be desired if your TTrees are large).
userfunc : callable, optional (default=None)
A function that will be called on every tree and that must return a
tree or list of trees that will be converted instead of the original
tree.
show_progress : bool, optional (default=False)
If True, then display and update a progress bar on stdout as each tree
is converted.
ignore_exception : bool, optional (default=False)
If True, then ignore exceptions raised in converting trees and instead
skip such trees.
kwargs : dict, optional
Additional keyword arguments for the tree2array function.
"""
own_rootfile = False
if isinstance(rfile, string_types):
rfile = root_open(rfile)
own_rootfile = True
own_h5file = False
if isinstance(hfile, string_types):
hfile = tables_open(filename=hfile, mode="w", title="Data")
own_h5file = True
for dirpath, dirnames, treenames in rfile.walk(
rpath, class_ref=QROOT.TTree):
# skip directories w/o trees
if not treenames:
continue
treenames.sort()
group_where = '/' + os.path.dirname(dirpath)
group_name = os.path.basename(dirpath)
if not group_name:
group = hfile.root
elif TABLES_NEW_API:
group = hfile.create_group(group_where, group_name,
createparents=True)
else:
group = hfile.createGroup(group_where, group_name)
ntrees = len(treenames)
log.info(
"Will convert {0:d} tree{1} in {2}".format(
ntrees, 's' if ntrees != 1 else '',
os.path.join(group_where, group_name)))
for treename in treenames:
input_tree = rfile.Get(os.path.join(dirpath, treename))
if userfunc is not None:
tmp_file = TemporaryFile()
# call user-defined function on tree and get output trees
log.info("Calling user function on tree '{0}'".format(
input_tree.GetName()))
trees = userfunc(input_tree)
if not isinstance(trees, list):
trees = [trees]
else:
trees = [input_tree]
tmp_file = None
for tree in trees:
try:
tree2hdf5(tree, hfile, group=group,
entries=entries,
show_progress=show_progress,
**kwargs)
except Exception as e:
if ignore_exception:
log.error("Failed to convert tree '{0}': {1}".format(
tree.GetName(), str(e)))
else:
raise
input_tree.Delete()
if userfunc is not None:
for tree in trees:
tree.Delete()
tmp_file.Close()
if own_h5file:
hfile.close()
if own_rootfile:
rfile.Close()
def main():
import rootpy
from rootpy.extern.argparse import (
ArgumentParser,
ArgumentDefaultsHelpFormatter, RawTextHelpFormatter)
class formatter_class(ArgumentDefaultsHelpFormatter,
RawTextHelpFormatter):
pass
parser = ArgumentParser(formatter_class=formatter_class,
description="Convert ROOT files containing TTrees into HDF5 files "
"containing HDF5 tables")
parser.add_argument('--version', action='version',
version=rootpy.__version__,
help="show the version number and exit")
parser.add_argument('-n', '--entries', type=int, default=100000,
help="number of entries to read at once")
parser.add_argument('-f', '--force', action='store_true', default=False,
help="overwrite existing output files")
parser.add_argument('-u', '--update', action='store_true', default=False,
help="update existing output files")
parser.add_argument('--ext', default='h5',
help="output file extension")
parser.add_argument('-c', '--complevel', type=int, default=5,
choices=range(0, 10),
help="compression level")
parser.add_argument('-l', '--complib', default='zlib',
choices=('zlib', 'lzo', 'bzip2', 'blosc'),
help="compression algorithm")
parser.add_argument('-s', '--selection', default=None,
help="apply a selection on each "
"tree with a cut expression")
parser.add_argument(
'--script', default=None,
help="Python script containing a function with the same name \n"
"that will be called on each tree and must return a tree or \n"
"list of trees that will be converted instead of the \n"
"original tree")
parser.add_argument('-q', '--quiet', action='store_true', default=False,
help="suppress all warnings")
parser.add_argument('-d', '--debug', action='store_true', default=False,
help="show stack trace in the event of "
"an uncaught exception")
parser.add_argument('--no-progress-bar', action='store_true', default=False,
help="do not show the progress bar")
parser.add_argument('--ignore-exception', action='store_true',
default=False,
help="ignore exceptions raised in converting trees "
"and instead skip such trees")
parser.add_argument('files', nargs='+')
args = parser.parse_args()
import logging
if hasattr(logging, 'captureWarnings'):
logging.captureWarnings(True)
def formatwarning(message, category, filename, lineno, line=None):
return "{0}: {1}".format(category.__name__, message)
warnings.formatwarning = formatwarning
args.ext = args.ext.strip('.')
if args.quiet:
warnings.simplefilter(
"ignore",
RootNumpyUnconvertibleWarning)
warnings.simplefilter(
"ignore",
tables.NaturalNameWarning)
userfunc = None
if args.script is not None:
# get user-defined function
try:
exec(compile(open(args.script).read(), args.script, 'exec'),
globals(), locals())
except IOError:
sys.exit('Could not open script {0}'.format(args.script))
funcname = os.path.splitext(os.path.basename(args.script))[0]
try:
userfunc = locals()[funcname]
except KeyError:
sys.exit(
"Could not find the function '{0}' in the script {1}".format(
funcname, args.script))
for inputname in args.files:
outputname = os.path.splitext(inputname)[0] + '.' + args.ext
output_exists = os.path.exists(outputname)
if output_exists and not (args.force or args.update):
sys.exit(
"Output {0} already exists. "
"Use the --force option to overwrite it".format(outputname))
try:
rootfile = root_open(inputname)
except IOError:
sys.exit("Could not open {0}".format(inputname))
try:
if args.complevel > 0:
filters = tables.Filters(complib=args.complib,
complevel=args.complevel)
else:
filters = None
hd5file = tables_open(filename=outputname,
mode='a' if args.update else 'w',
title='Data', filters=filters)
except IOError:
sys.exit("Could not create {0}".format(outputname))
try:
log.info("Converting {0} ...".format(inputname))
root2hdf5(rootfile, hd5file,
entries=args.entries,
userfunc=userfunc,
selection=args.selection,
show_progress=not args.no_progress_bar,
ignore_exception=args.ignore_exception)
log.info("{0} {1}".format(
"Updated" if output_exists and args.update else "Created",
outputname))
except KeyboardInterrupt:
log.info("Caught Ctrl-c ... cleaning up")
hd5file.close()
rootfile.Close()
if not output_exists:
log.info("Removing {0}".format(outputname))
os.unlink(outputname)
sys.exit(1)
except Exception as e:
if args.debug:
# If in debug mode show full stack trace
import traceback
traceback.print_exception(*sys.exc_info())
log.error(str(e))
sys.exit(1)
finally:
hd5file.close()
rootfile.Close()