Reading Files

Reading back files is fairly simple. For this tutorial, we’ll use some internal tools to larcv to create some files, then read them back and vaildate the results.

As a side note … this is exactly what the CI Tests are doing to validate the serialization / deserialization process.

import larcv
import numpy
from random import Random

random = Random()

Step 1 - Create a file with a data product in it

Any data product will do, but we’ll use Sparse Clusters in 3D because that’s one of the more complex data products.

# This is from the larcv/data_generator.py file:

def build_sparse_cluster_list(rand_num_events, n_projections=3):


    voxel_set_array_list = []
     
    for event in range(rand_num_events):
        #  Add space for an event:
        voxel_set_array_list.append([])
        # Get a piece of data, sparse tensor:
        for projection in range(n_projections):
            # In this event, add space for a projection:
            voxel_set_array_list[event].append([])
            n_clusters = random.randint(1,25)
            for cluster in range(n_clusters):

                n_voxels = random.randint(1,50)
                cluster_d = {                    
                    'values'  : [],
                    'indexes' : random.sample(range(128*128), n_voxels),
                    'n_voxels': n_voxels}
                for j in range(n_voxels):
                    cluster_d['values'].append(random.uniform(-1e3, 1e3) )

                voxel_set_array_list[event][projection].append(cluster_d)

    return voxel_set_array_list

And here is the function to write them to file:

def write_sparse_clusters(file_name, voxel_set_array_list, dimension=2, n_projections=3):


    import copy

    io_manager = larcv.IOManager(larcv.IOManager.kWRITE)
    io_manager.set_out_file(file_name)
    io_manager.initialize()


    # For this test, the meta is pretty irrelevant as long as it is consistent
    meta_list = []
    for projection in range(n_projections):
        if dimension == 2:
            meta_list.append(larcv.ImageMeta2D())
        else:
            meta_list.append(larcv.ImageMeta3D())

        for dim in range(dimension):
            L = 10.
            N = 128
            meta_list[-1].set_dimension(dim, L, N)

        meta_list[-1].set_projection_id(projection)

    for i in range(len(voxel_set_array_list)):
        io_manager.set_id(1001, 0, i)
        # Get a piece of data, sparse cluster:
        if dimension== 2:
            ev_cluster = io_manager.get_data("cluster2d","test")
        else:
            ev_cluster = io_manager.get_data("cluster3d","test")


        for projection in range(n_projections):
            clusters = voxel_set_array_list[i][projection]
            if dimension == 2:
                vsa = larcv.SparseCluster2D()
            else:
                vsa = larcv.SparseCluster3D()
            for cluster in range(len(clusters)):
                vs = larcv.VoxelSet()

                vs.id(cluster)
                indexes = clusters[cluster]['indexes']
                values = clusters[cluster]['values']
                for j in range(clusters[cluster]['n_voxels']):
                    vs.emplace(indexes[j], values[j], False)
                vsa.insert(vs)          
            vsa.meta(meta_list[projection])
            
            ev_cluster.set(vsa)

        io_manager.save_entry()




    io_manager.finalize()

    return

So, let’s write a file to disk:

voxel_set_array_list = build_sparse_cluster_list(rand_num_events=10, n_projections=2)
file_name = "test_write_sparse_clusters.h5"
write_sparse_clusters(file_name, voxel_set_array_list, dimension=3, n_projections=2)
    [NORMAL]  <IOManager::finalize> Closing output file

Note that the dimension is pretty arbitrary here, as long as it is 2 or 3. These data are just noise; there is no change in how noisy it is if you are raveling 2D or 3D data, as long as the total voxel space is big enough. We used 128 sided spaces in both 2D and 3D above, so it’s ok.

Read back the sparse clusters

io_manager = larcv.IOManager(larcv.IOManager.kREAD)
io_manager.add_in_file(file_name)
io_manager.initialize()


loaded_voxel_set_array_list = []

for event in range(io_manager.get_n_entries()):
    # append a list of projections for this event:
    loaded_voxel_set_array_list.append([])

    io_manager.read_entry(event)

    # This would be cluster2d, if we used 2d above!
    ev_cluster = io_manager.get_data("cluster3d","test")

    for projection in range(ev_cluster.size()):
        # Append a list of clusters for this projection:
        loaded_voxel_set_array_list[event].append([])
        print("Number of clusters: ", ev_cluster.sparse_cluster(projection).size())
        sparse_cluster = ev_cluster.sparse_cluster(projection)
        print("Current voxel_set_array_list length: ", len(loaded_voxel_set_array_list))
        print(f"Current voxel_set_array_list[{event}] length: ", len(loaded_voxel_set_array_list[event]))
        for cluster in range(sparse_cluster.size()):
            # Append a dict of values for this cluster
            loaded_voxel_set_array_list[event][projection].append({
                "indexes" : [],
                "values"  : [],
                "n_voxels": sparse_cluster.voxel_set(cluster).size()
                })
            for j in range(sparse_cluster.voxel_set(cluster).size()):
                loaded_voxel_set_array_list[event][projection][cluster]['indexes'].append(sparse_cluster.voxel_set(cluster).as_vector()[j].id())
                loaded_voxel_set_array_list[event][projection][cluster]['values'].append(sparse_cluster.voxel_set(cluster).as_vector()[j].value())
Number of clusters:  1
Current voxel_set_array_list length:  1
Current voxel_set_array_list[0] length:  1
Number of clusters:  12
Current voxel_set_array_list length:  1
Current voxel_set_array_list[0] length:  2
    [NORMAL]  <IOManager::prepare_input> Opening a file in READ mode: "test_write_sparse_clusters.h5"
    [NORMAL]  <IOManager::prepare_input> File "test_write_sparse_clusters.h5" has 10 entries
    [NORMAL]  <IOManager::initialize> Prepared input with 10 entries...
Number of clusters:  1
Current voxel_set_array_list length:  2
Current voxel_set_array_list[1] length:  1
Number of clusters:  13
Current voxel_set_array_list length:  2
Current voxel_set_array_list[1] length:  2
Number of clusters:  10
Current voxel_set_array_list length:  3
Current voxel_set_array_list[2] length:  1
Number of clusters:  10
Current voxel_set_array_list length:  3
Current voxel_set_array_list[2] length:  2
Number of clusters:  7
Current voxel_set_array_list length:  4
Current voxel_set_array_list[3] length:  1
Number of clusters:  15
Current voxel_set_array_list length:  4
Current voxel_set_array_list[3] length:  2
Number of clusters:  4
Current voxel_set_array_list length:  5
Current voxel_set_array_list[4] length:  1
Number of clusters:  16
Current voxel_set_array_list length:  5
Current voxel_set_array_list[4] length:  2
Number of clusters:  4
Current voxel_set_array_list length:  6
Current voxel_set_array_list[5] length:  1
Number of clusters:  19
Current voxel_set_array_list length:  6
Current voxel_set_array_list[5] length:  2
Number of clusters:  24
Current voxel_set_array_list length:  7
Current voxel_set_array_list[6] length:  1
Number of clusters:  24
Current voxel_set_array_list length:  7
Current voxel_set_array_list[6] length:  2
Number of clusters:  15
Current voxel_set_array_list length:  8
Current voxel_set_array_list[7] length:  1
Number of clusters:  7
Current voxel_set_array_list length:  8
Current voxel_set_array_list[7] length:  2
Number of clusters:  4
Current voxel_set_array_list length:  9
Current voxel_set_array_list[8] length:  1
Number of clusters:  6
Current voxel_set_array_list length:  9
Current voxel_set_array_list[8] length:  2
Number of clusters:  4
Current voxel_set_array_list length:  10
Current voxel_set_array_list[9] length:  1
Number of clusters:  18
Current voxel_set_array_list length:  10
Current voxel_set_array_list[9] length:  2

Step 3 - Check it’s consistent

These are some basic checks (the ones in the CI test) to make sure the data read back matches the data that went in.

# Check the same number of events came back:
assert(len(loaded_voxel_set_array_list) == 10)
for event in range(10):
    # Check the same number of projections per event:
    print
    assert(len(loaded_voxel_set_array_list[event]) == len(voxel_set_array_list[event]))

    for projection in range(2):
        # CHeck the number of clusters in this projection:
        assert(len(loaded_voxel_set_array_list[event][projection]) == len(voxel_set_array_list[event][projection]))

        for cluster in range(len(loaded_voxel_set_array_list[event][projection])):
            # Check the same number of voxels:
            input_voxelset = voxel_set_array_list[event][projection][cluster]
            read_voxelset = loaded_voxel_set_array_list[event][projection][cluster]
            assert(read_voxelset['n_voxels'] == input_voxelset['n_voxels'])

            # Check voxel properties:
            # Sum of indexes
            # Sum of values
            # std of values
            if input_voxelset['n_voxels'] == 0:
                continue
            # print(input_voxelset['values'])
            assert(numpy.sum(input_voxelset['indexes']) == numpy.sum(read_voxelset['indexes']))
            assert( abs( numpy.sum(input_voxelset['values']) - numpy.sum(read_voxelset['values']) ) < 1e-3 )
            assert( abs( numpy.std(input_voxelset['values']) - numpy.std(read_voxelset['values']) ) < 1e-3 )

print("All assertions passed.")
All assertions passed.