RAPIDO
Repeatable Analysis Programming for Interpretability, Durability, and Organization
Loading...
Searching...
No Matches
RAPIDO

Repeatable Analysis Programming for Interpretability, Durability, and Organization

License: MIT Website shields.io

RAPIDO is a C++ framework designed to make writing HEP analyses more ergonomic and readable. It wraps some basic functionality of ROOT. The idea is that an analysis, in general, consists of a few objects: a TTree (to hold some skimmed N-Tuple and/or a set of histograms), a cutflow (a collection of boolean logic for filtering events), and a looper (some way to run over multiple files). RAPIDO is designed to handle all three of these tasks such that every analysis that uses it is structured in the same way. In addition, the way in which it is structured lends itself to the common workflow of a HEPEx-er.

RAPIDO Tools

  1. Arbol: TTree wrapper that reduces the hassle of setting up and using TTrees
    • Arbusto: TTree wrapper for skimming
  2. Cutflow: Binary search tree with lambda nodes and other bells and whistles
    • Histflow: An extension of the Cutflow object that handles histogramming at any given step of the cutflow
  3. Looper: Basic looper for a TChain of TFiles that uses any selector
  1. HEPCLI: Command Line Interface (CLI) with relevant arguments for physics analysis

Set Up Instructions

  1. Clone this repository
  2. cd into the cloned repository and run make -j5
  3. Write your script (e.g. main.cc) and #include whatever you need
  4. Compile and run using your favorite Makefile:
    $ make
    $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/rapido/src
    $ export ROOT_INCLUDE_PATH=$ROOT_INCLUDE_PATH:$PWD/rapido/src
    $ ./main

Examples

  1. Minimal Cutflow example
    #include "cutflow.h"
    #include <stdlib.h>
    using namepsace std;
    int main()
    {
    Cutflow dummy_cutflow = Cutflow();
    Cut* dummy_root = new LambdaCut("root", []() { return bool(rand() % 2); });
    dummy_cutflow.setRoot(dummy_root);
    Cut* node0 = new LambdaCut("node0", []() { return bool(rand() % 2); });
    dummy_cutflow.insert("root", node0, Left);
    Cut* node1 = new LambdaCut("node1", []() { return bool(rand() % 2); });
    dummy_cutflow.insert("root", node1, Right);
    Cut* node2 = new LambdaCut("node2", []() { return bool(rand() % 2); });
    dummy_cutflow.insert("node1", node2, Right);
    Cut* node3 = new LambdaCut("node3", []() { return bool(rand() % 2); });
    dummy_cutflow.insert("node1", node3, Left);
    Cut* node4 = new LambdaCut("node4", []() { return bool(rand() % 2); });
    dummy_cutflow.insert("node2", node4, Right);
    for (int i = 0; i < 5; i++)
    {
    Cut* terminal_node = dummy_cutflow.run();
    cout << "terminated at " << terminal_node->name << endl;
    }
    dummy_cutflow.print();
    return 0;
    }
    Definition cutflow.h:25
    std::string name
    Definition cutflow.h:28
    Definition cutflow.h:151
    void print(bool show_timing=false)
    Definition cutflow.cc:384
    void insert(std::string target_cut_name, Cut *new_cut, Direction direction)
    Definition cutflow.cc:156
    virtual bool run()
    Definition cutflow.cc:292
    void setRoot(Cut *new_root)
    Definition cutflow.cc:142
    Definition cutflow.h:102
  2. A simple Arbol+Looper example (using ROOT::MakeSelector to read an arbitrary ROOT file)
    $ root # only known to work for ROOT v6.22 and greater
    root[0] TFile* f = new TFile("/path/to/myfile.root")
    root[1] TreeName->MakeSelector("MySelector")
    (int) 0
    root [2] .q
    $ mv MySelector.C MySelector.cc
    $ rootcint myselectordict.cc -c MySelector.h
    $ mv myselectordict* rapdio/
    $ mv MySelector* rapido/
    $ cd rapido/
    $ make clean
    $ make -j5
    // Selector
    #include "MySelector.h"
    // RAPIDO
    #include "arbol.h"
    #include "looper.h"
    int main()
    {
    // Initialize Arbol
    TFile* output_tfile = new Tfile("output.root", "RECREATE");
    Arbol arbol = Arbol(output_tfile);
    // Initialize branches
    arbol.newBranch<int>("event");
    arbol.newBranch<float>("met");
    arbol.newBranch<float>("ht");
    arbol.newBranch<int>("n_jets");
    arbol.newVecBranch<float>("good_jet_pt"); // newVecBranch<float> <--> newBranch<std::vector<float>>
    // Get file
    TChain* tchain = new TChain("TreeName");
    tchain->Add("/path/to/myfile.root");
    // Initialize Looper
    MySelector selector;
    Looper looper = Looper(tchain);
    // Run
    looper.run(
    [&](TTree* ttree) { selector.Init(ttree) },
    [&](int entry)
    {
    selector.GetEntry(entry);
    selector.Process(entry);
    // --> Event-level Logic <--
    // Reset tree
    arbol.resetBranches(); // variables like arbol and selector are captured by reference
    // Loop over jets
    float ht = 0.;
    for (unsigned int i = 0; i < *selector.nJet; i++)
    {
    if (selector.Jet_pt[i] > 30)
    {
    arbol.appendToVecLeaf<float>("good_jet_pt", selector.Jet_pt[i]);
    ht += selector.Jet_pt[i];
    }
    }
    arbol.setLeaf<int>("event", *selector.event);
    arbol.setLeaf<float>("ht", ht);
    arbol.setLeaf<float>("met", *selector.MET_pt);
    arbol.setLeaf<int>("n_jets", arbol.getVecLeaf<float>("goot_jet_pt").size());
    arbol.fill();
    return;
    }
    );
    // Write results to a ROOT file
    arbol.write();
    return 0;
    }
    Definition arbol.h:47
    virtual void fill()
    void resetBranches()
    void newBranch(TString new_branch_name)
    void newVecBranch(TString new_branch_name)
    void setLeaf(TString branch_name, Type new_value)
    void appendToVecLeaf(TString branch_name, Type new_value)
    virtual void write(bool close=true)
    std::vector< Type > getVecLeaf(TString branch_name)
    Definition looper.h:17
    void run(std::function< void(TTree *ttree)> init, std::function< void(int entry)> eval)
  3. Arbol+Cutflow+Looper+HEPCLI example (now using NanoCORE to read NanoAOD)
    // ROOT
    #include "TH1F.h"
    // NanoCORE
    #include "Nano.h" // nt
    #include "tqdm.h" // bar
    #include "ElectronSelections.h" // Electron IDs
    #include "MuonSelections.h" // Muon IDs
    // RAPIDO
    #include "arbol.h"
    #include "cutflow.h"
    #include "looper.h"
    using namespace std;
    using namespace tas;
    int main(int argc, char** argv)
    {
    // CLI
    HEPCLI cli = HEPCLI(argc, argv);
    // Initialize Looper
    Looper looper = Looper(cli.input_tchain);
    // Initialize Arbol
    output_tfile = cli.output_dir+"/"+cli.output_name+".root"
    Arbol arbol = Arbol(output_tfile);
    // Event branches
    arbol.newBranch<int>("event", -999);
    arbol.newBranch<float>("met", -999);
    // Leptons
    arbol.newBranch<int>("leading_lep_id", -999);
    arbol.newBranch<float>("leading_lep_pt", -999);
    arbol.newBranch<float>("leading_lep_eta", -999);
    arbol.newBranch<float>("leading_lep_phi", -999);
    arbol.newBranch<int>("trailing_lep_id", -999);
    arbol.newBranch<float>("trailing_lep_pt", -999);
    arbol.newBranch<float>("trailing_lep_eta", -999);
    arbol.newBranch<float>("trailing_lep_phi", -999);
    // Initialize Cutflow
    Cutflow cutflow = Cutflow(cli.output_name+"_Cutflow");
    // Initialize some hists
    TH1F* ld_lep_pt_hist = new TH1F("ld_lep_pt_hist", "ld_lep_pt_hist", 20, 0, 200);
    TH1F* tr_lep_pt_hist = new TH1F("tr_lep_pt_hist", "tr_lep_pt_hist", 20, 0, 200);
    cutflow.globals.newVar<TH1F>("ld_lep_pt_hist", *ld_lep_pt_hist);
    cutflow.globals.newVar<TH1F>("tr_lep_pt_hist", *tr_lep_pt_hist);
    Cut* root = new LambdaCut(
    "Bookkeeping",
    [&]()
    {
    arbol.setLeaf("event", nt.event());
    arbol.setLeaf("met", nt.MET_pt());
    return true;
    },
    [&]()
    {
    // Dummy weight
    return 0.001;
    }
    );
    cutflow.setRoot(root);
    Cut* dilep_presel = new LambdaCut(
    "DileptonPreselection",
    [&]()
    {
    int n_tight_leps = 0;
    int n_loose_not_tight_leps = 0;
    Leptons leptons = getLeptons();
    Lepton leading_lep;
    Lepton trailing_lep;
    for (auto& lep : leptons)
    {
    if (lep.pt() < 20) { continue; }
    if (lep.idlevel() == SS::IDtight)
    {
    if (lep.pt() > leading_lep.pt())
    {
    trailing_lep = leading_lep;
    leading_lep = lep;
    }
    else if (lep.pt() > trailing_lep.pt()) { trailing_lep = lep; }
    n_tight_leps++;
    }
    if (lep.idlevel() == SS::IDfakable) { n_loose_not_tight_leps++; }
    }
    if (n_tight_leps == 2 && n_loose_not_tight_leps == 0)
    {
    arbol.setLeaf<int>("leading_lep_id", leading_lep.id());
    arbol.setLeaf<float>("leading_lep_pt", leading_lep.pt());
    arbol.setLeaf<float>("leading_lep_eta", leading_lep.eta());
    arbol.setLeaf<float>("leading_lep_phi", leading_lep.phi());
    arbol.setLeaf<int>("trailing_lep_id", trailing_lep.id());
    arbol.setLeaf<float>("trailing_lep_pt", trailing_lep.pt());
    arbol.setLeaf<float>("trailing_lep_eta", trailing_lep.eta());
    arbol.setLeaf<float>("trailing_lep_phi", trailing_lep.phi());
    return true;
    }
    else { return false; }
    }
    );
    cutflow.insert("Bookkeeping", dilep_presel, Right);
    Cut* monolep_or_fakes = new LambdaCut("SingleLepOrFakes", [&]() { return true; });
    cutflow.insert("DileptonPreselection", monolep_or_fakes, Left);
    Cut* dilep_sign = new LambdaCut(
    "CheckDilepSign",
    [&]()
    {
    int leading_lep_id = arbol.getLeaf<int>("leading_lep_id");
    int trailing_lep_id = arbol.getLeaf<int>("trailing_lep_id");
    return leading_lep_id*trailing_lep_id > 0;
    }
    );
    cutflow.insert("DileptonPreselection", dilep_sign, Right);
    Cut* SS_presel = new LambdaCut("SSPreselection", [&]() { return true; });
    cutflow.insert("CheckDilepSign", SS_presel, Right);
    Cut* OS_presel = new LambdaCut(
    "OSPreselection",
    [&]()
    {
    TH1F& ld_lep_pt_hist = cutflow.globals.getRef<TH1F>("ld_lep_pt_hist");
    TH1F& tr_lep_pt_hist = cutflow.globals.getRef<TH1F>("tr_lep_pt_hist");
    ld_lep_pt_hist.Fill(arbol.getLeaf<float>("leading_lep_pt"));
    tr_lep_pt_hist.Fill(arbol.getLeaf<float>("trailing_lep_pt"));
    return true;
    },
    [&]()
    {
    // Dummy weight
    return 0.25;
    }
    );
    cutflow.insert("CheckDilepSign", OS_presel, Left);
    // Run looper
    tqdm bar; // progress bar
    looper.run(
    [&](TTree* ttree)
    {
    // Runs once per file
    nt.Init(ttree);
    },
    [&](int entry)
    {
    // Runs once per event
    bar.progress(looper.n_events_processed, looper.n_events_to_process);
    nt.GetEntry(entry);
    // Reset tree
    arbol.resetBranches();
    // Run cutflow
    bool passed = cutflow.run("OSPreselection");
    if (passed) { arbol.fill(); }
    return;
    }
    );
    // Wrap up
    bar.finish();
    cutflow.print();
    cutflow.writeCSV(cli.output_dir);
    arbol.write();
    return 0;
    }
    Type getLeaf(TString branch_name)
    Definition hepcli.h:16
    std::string output_dir
    Definition hepcli.h:36
    TChain * input_tchain
    Definition hepcli.h:52
    std::string output_name
    Definition hepcli.h:38
    unsigned int n_events_processed
    Definition looper.h:29