#!/usr/bin/env python3

"""
Read the /proc/<PID>/io file for all processes owned by a
given user, and sum them into one comma-separated line:
user,cancelled_write_bytes,rchar,read_bytes,syscr,syscw,wchar,write_bytes

Requires a faily recent Linux kernel with CONFIG_TASK_IO_ACCOUNTING
turned on, i.e. one which enables the existence of /proc/self/io

See section "/proc/<pid>/io" of
http://www.kernel.org/doc/Documentation/filesystems/proc.txt
for details about the values displayed.
"""

# Author: Troels Arvin <troels@arvin.dk>
# Latest version: http://troels.arvin.dk/code/resusage/

# Copyright (c) 2010, 2024, Troels Arvin.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of the  the Danish National Board of Health nor the
#       names of its contributors may be used to endorse or promote products
#       derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL the Danish National Board of Health BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import getopt
import os
import pickle
import pwd
import sys

this_prog = os.path.basename(__file__)
snapshot_file = os.path.expanduser('~')+'/.'+this_prog+'_snap'
print_headers = True
diff = False
save = False
comment = None

# Global dict which the add_to_struct modifies
res_struct = {
    'rchar'                 : 0,
    'wchar'                 : 0,
    'syscr'                 : 0,
    'syscw'                 : 0,
    'read_bytes'            : 0,
    'write_bytes'           : 0,
    'cancelled_write_bytes' : 0,
}

# For consistent output, in the same order as in the /proc/.../io file
key_sort_order = [ 'rchar','wchar','syscr','syscw','read_bytes',
    'write_bytes','cancelled_write_bytes' ]

# --------------------------------------------------------
# Functions
# --------------------------------------------------------

def err(msg):
    sys.stderr.write("Error: %s\n" % msg)
    sys.exit(1)

def owned_by(dirent, compare_uid):
    st_info = os.stat("/proc/%s" % dirent)
    return st_info.st_uid == compare_uid

def usage(do_exit = True):

    print ( "Usage:")
    print ( "  %s -u <user> [ -n | --noheaders ] " % this_prog)
    print ( "    [ -c <comment> | --comment=<comment> ] [ -d | --diff ] ")
    print ( "    [ -s | --save ]")
    print ()
    print ( "For all processes owned by the given user, this utility will")
    print ( "print ( the sum of I/O related resource usage counts, summed")
    print ( "from values in the relevant /proc/<pid>/io files. Output is")
    print ( "comma-separated.")
    print ()
    print ( " -c / --comment : Prefix output with a comment field; the")
    print ( "                  comment will be print ( in verbatim, so it")
    print ( "                  should normally be a single word, or")
    print ( "                  \"-delimited, in order to respect the CSV")
    print ( "                  format.")
    print ()
    print ( " -d / --diff    : Load the results from a previous run, if it")
    print ( "                  exists.")
    print ( "                  Subtract previous values from current")
    print ( "                  values before print (ing results.")
    print ()
    print ( " -n / --noheader: print ( a header-line before data.")
    print ()
    print ( " -s / --save    : Save current values before exiting; will be")
    print ( "                  used for subsequent diff calculations.")
    print ()
    print ( "File used for loading and saving:")
    print ( snapshot_file)
    print ( "Note: The snapshots don't save data per-user or per-comment,")
    print ( "so the following invocation sequence will wipe data for the")
    print ( "foo user:")
    print ( "%s -u foo -s" % this_prog)
    print ( "%s -u bar -s" % this_prog)
    print ()
    print ( "The meaning of the fields are:")
    print ( "  rchar                : Bytes read, cached or not")
    print ( "  wchar                : Bytes written")
    print ( "  syscr                : Number of read syscalls")
    print ( "  syscw                : Number of write syscalls")
    print ( "  read_bytes           : Bytes read from underlying storage")
    print ( "  write_bytes          : Bytes written to underlying storage")
    print ( "  cancelled_write_bytes: Bytes caused to not happen, by")
    print ( "                         truncating pagecache.")
    print ( "Details at /proc/<pid>/io section of:")
    print ( "http://www.kernel.org/doc/Documentation/filesystems/proc.txt")
    if do_exit:
        sys.exit(0)

def add_to_struct(pid_part):
    io_path = "/proc/%s/io" % pid_part

    # Read file contents
    try:
        with open(io_path, 'r') as f:
            data = f.read()
    except:
        return 0

    lines = data.split('\n')
    for line in lines:
        if line.strip():    # skip empty lines
            k_v = line.split(':')
            k = k_v[0]
            v = int(k_v[1])
            res_struct[k] += v

    return 1


# --------------------------------------------------------
# Args handling
# --------------------------------------------------------
if len(sys.argv) < 2:
    err("At least one argument needed")

try:
    options, args = getopt.getopt(sys.argv[1:],
        "c:dhnsu:",
        [
            'comment=',
            'diff',
            'help',
            'noheaders',
            'save',
            'user=',
        ]
    )
except getopt.GetoptError as e:
    err("Error parsing arguments: " + str(e))

user = None
for name, value in options:
    if name in ('-c', '--comment'):
        comment = value
    if name in ('-d', '--diff'):
        diff = True
    if name in ("-h", "--help"):
        usage()
    if name in ("-n", "--noheaders"):
        print_headers = False
    if name in ('-s', '--save'):
        save = True
    if name in ('-u', '--user'):
        user = value

if user is None:
    err("No user indicated")

# --------------------------------------------------------
# Main work
# --------------------------------------------------------

# Prepare for the diff functionality
prev_struct = res_struct.copy()

# Load previous values, if asked for, and such values exist
if diff and os.path.exists(snapshot_file):
    try:
        with open(snapshot_file,'rb') as sf:
            try:
                prev_struct = pickle.load(sf)
            except pickle.PickleError as e:
                err("Loading structure from '%s' failed: %e" % (snapshot_file, e))
    except IOError as e:
        err("Snapshot file %s exists, cannot be read: %s" % (snapshot_file, e))
    except Exception as e:
        err("Snapshot file %s exists, but seems corrupted (%s)" % (snapshot_file, e))

# Determine uid of given user
try:
    uid = pwd.getpwnam(user).pw_uid
except KeyError:
    err("User '%s' unknown" % user)

# Find sub-directories of /proc related to processes owned by
# specified user
dirents = os.listdir('/proc')
proc_dirs = [ e for e in dirents if e.isdigit() and owned_by(e,uid) ]

# Add up numbers
num_tries = 0
num_successes = 0
for proc_dir in proc_dirs:
    num_tries += 1
    num_successes += add_to_struct(proc_dir)

if num_tries > 0 and num_successes == 0:
    err("Could not read any data: Lack of permissions, or kernel too old?")

# Save for later diff'ing, if asked for.
if save:
    try:
        with open(snapshot_file, 'wb') as sf:
            try:
                pickle.dump(res_struct,sf)
            except Exception as e:
                err("Couldn't write data structure in '%s': %s" % (snapshot_file, e))
    except:
        err("Could not open %s for writing" % snapshot_file)

# --------------------------------------------------------
# Output
# --------------------------------------------------------

# Handle comment, if asked for
prefix_header = ''
prefix = ''
if comment:
    prefix_header = "comment,"
    prefix = comment + ','

# Print header, if asked for
if print_headers:
    print(prefix_header + "user," + ','.join(key_sort_order))

# Print result -- prefixed by a comment, if asked for
print (
    "%s%s,%s" % (
        prefix,
        user,
        ','.join(
            [
                str( res_struct[key] - prev_struct[key] )
                    for key in key_sort_order
            ]
        )
    )
)