blob: e2d57fef0da3bf6dc66c90ef20631725b6248caf [file] [log] [blame]
/* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* GPL HEADER END
*/
/*
* Copyright (c) 2011, 2012, Intel Corporation.
* Use is subject to license terms.
*
* Author: Niu Yawei <niu@whamcloud.com>
*/
/*
* lustre/obdclass/lprocfs_jobstats.c
*/
#ifndef EXPORT_SYMTAB
# define EXPORT_SYMTAB
#endif
#define DEBUG_SUBSYSTEM S_CLASS
#include <obd_class.h>
#include <lprocfs_status.h>
#include <lustre/lustre_idl.h>
#if defined(LPROCFS)
/*
* JobID formats & JobID environment variable names for supported
* job schedulers:
*
* SLURM:
* JobID format: 32 bit integer.
* JobID env var: SLURM_JOB_ID.
* SGE:
* JobID format: Decimal integer range to 99999.
* JobID env var: JOB_ID.
* LSF:
* JobID format: 6 digit integer by default (up to 999999), can be
* increased to 10 digit (up to 2147483646).
* JobID env var: LSB_JOBID.
* Loadleveler:
* JobID format: String of machine_name.cluster_id.process_id, for
* example: fr2n02.32.0
* JobID env var: LOADL_STEP_ID.
* PBS:
* JobID format: String of sequence_number[.server_name][@server].
* JobID env var: PBS_JOBID.
* Maui/MOAB:
* JobID format: Same as PBS.
* JobID env var: Same as PBS.
*/
struct job_stat {
struct hlist_node js_hash;
struct list_head js_list;
atomic_t js_refcount;
char js_jobid[JOBSTATS_JOBID_SIZE];
time_t js_timestamp; /* seconds */
struct lprocfs_stats *js_stats;
struct obd_job_stats *js_jobstats;
};
static unsigned job_stat_hash(cfs_hash_t *hs, const void *key, unsigned mask)
{
return cfs_hash_djb2_hash(key, strlen(key), mask);
}
static void *job_stat_key(struct hlist_node *hnode)
{
struct job_stat *job;
job = hlist_entry(hnode, struct job_stat, js_hash);
return job->js_jobid;
}
static int job_stat_keycmp(const void *key, struct hlist_node *hnode)
{
struct job_stat *job;
job = hlist_entry(hnode, struct job_stat, js_hash);
return (strlen(job->js_jobid) == strlen(key)) &&
!strncmp(job->js_jobid, key, strlen(key));
}
static void *job_stat_object(struct hlist_node *hnode)
{
return hlist_entry(hnode, struct job_stat, js_hash);
}
static void job_stat_get(cfs_hash_t *hs, struct hlist_node *hnode)
{
struct job_stat *job;
job = hlist_entry(hnode, struct job_stat, js_hash);
atomic_inc(&job->js_refcount);
}
static void job_free(struct job_stat *job)
{
LASSERT(atomic_read(&job->js_refcount) == 0);
LASSERT(job->js_jobstats);
write_lock(&job->js_jobstats->ojs_lock);
list_del_init(&job->js_list);
write_unlock(&job->js_jobstats->ojs_lock);
lprocfs_free_stats(&job->js_stats);
OBD_FREE_PTR(job);
}
static void job_putref(struct job_stat *job)
{
LASSERT(atomic_read(&job->js_refcount) > 0);
if (atomic_dec_and_test(&job->js_refcount))
job_free(job);
}
static void job_stat_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
{
struct job_stat *job;
job = hlist_entry(hnode, struct job_stat, js_hash);
job_putref(job);
}
static void job_stat_exit(cfs_hash_t *hs, struct hlist_node *hnode)
{
CERROR("Should not have any items!");
}
static cfs_hash_ops_t job_stats_hash_ops = {
.hs_hash = job_stat_hash,
.hs_key = job_stat_key,
.hs_keycmp = job_stat_keycmp,
.hs_object = job_stat_object,
.hs_get = job_stat_get,
.hs_put_locked = job_stat_put_locked,
.hs_exit = job_stat_exit,
};
static int job_iter_callback(cfs_hash_t *hs, cfs_hash_bd_t *bd,
struct hlist_node *hnode, void *data)
{
time_t oldest = *((time_t *)data);
struct job_stat *job;
job = hlist_entry(hnode, struct job_stat, js_hash);
if (!oldest || job->js_timestamp < oldest)
cfs_hash_bd_del_locked(hs, bd, hnode);
return 0;
}
static void lprocfs_job_cleanup(struct obd_job_stats *stats, bool force)
{
time_t oldest, now;
if (stats->ojs_cleanup_interval == 0)
return;
now = cfs_time_current_sec();
if (!force && now < stats->ojs_last_cleanup +
stats->ojs_cleanup_interval)
return;
oldest = now - stats->ojs_cleanup_interval;
cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback,
&oldest);
stats->ojs_last_cleanup = cfs_time_current_sec();
}
static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs)
{
struct job_stat *job;
LASSERT(jobs->ojs_cntr_num && jobs->ojs_cntr_init_fn);
OBD_ALLOC_PTR(job);
if (job == NULL)
return NULL;
job->js_stats = lprocfs_alloc_stats(jobs->ojs_cntr_num, 0);
if (job->js_stats == NULL) {
OBD_FREE_PTR(job);
return NULL;
}
jobs->ojs_cntr_init_fn(job->js_stats);
memcpy(job->js_jobid, jobid, JOBSTATS_JOBID_SIZE);
job->js_timestamp = cfs_time_current_sec();
job->js_jobstats = jobs;
INIT_HLIST_NODE(&job->js_hash);
INIT_LIST_HEAD(&job->js_list);
atomic_set(&job->js_refcount, 1);
return job;
}
int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
int event, long amount)
{
struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
struct job_stat *job, *job2;
ENTRY;
LASSERT(stats && stats->ojs_hash);
lprocfs_job_cleanup(stats, false);
if (!jobid || !strlen(jobid))
RETURN(-EINVAL);
if (strlen(jobid) >= JOBSTATS_JOBID_SIZE) {
CERROR("Invalid jobid size (%lu), expect(%d)\n",
(unsigned long)strlen(jobid) + 1, JOBSTATS_JOBID_SIZE);
RETURN(-EINVAL);
}
job = cfs_hash_lookup(stats->ojs_hash, jobid);
if (job)
goto found;
job = job_alloc(jobid, stats);
if (job == NULL)
RETURN(-ENOMEM);
job2 = cfs_hash_findadd_unique(stats->ojs_hash, job->js_jobid,
&job->js_hash);
if (job2 != job) {
job_putref(job);
job = job2;
/* We cannot LASSERT(!list_empty(&job->js_list)) here,
* since we just lost the race for inserting "job" into the
* ojs_list, and some other thread is doing it _right_now_.
* Instead, be content the other thread is doing this, since
* "job2" was initialized in job_alloc() already. LU-2163 */
} else {
LASSERT(list_empty(&job->js_list));
write_lock(&stats->ojs_lock);
list_add_tail(&job->js_list, &stats->ojs_list);
write_unlock(&stats->ojs_lock);
}
found:
LASSERT(stats == job->js_jobstats);
LASSERT(stats->ojs_cntr_num > event);
job->js_timestamp = cfs_time_current_sec();
lprocfs_counter_add(job->js_stats, event, amount);
job_putref(job);
RETURN(0);
}
EXPORT_SYMBOL(lprocfs_job_stats_log);
void lprocfs_job_stats_fini(struct obd_device *obd)
{
struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
time_t oldest = 0;
if (stats->ojs_hash == NULL)
return;
cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback, &oldest);
cfs_hash_putref(stats->ojs_hash);
stats->ojs_hash = NULL;
LASSERT(list_empty(&stats->ojs_list));
}
EXPORT_SYMBOL(lprocfs_job_stats_fini);
static void *lprocfs_jobstats_seq_start(struct seq_file *p, loff_t *pos)
{
struct obd_job_stats *stats = p->private;
loff_t off = *pos;
struct job_stat *job;
read_lock(&stats->ojs_lock);
if (off == 0)
return SEQ_START_TOKEN;
off--;
list_for_each_entry(job, &stats->ojs_list, js_list) {
if (!off--)
return job;
}
return NULL;
}
static void lprocfs_jobstats_seq_stop(struct seq_file *p, void *v)
{
struct obd_job_stats *stats = p->private;
read_unlock(&stats->ojs_lock);
}
static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos)
{
struct obd_job_stats *stats = p->private;
struct job_stat *job;
struct list_head *next;
++*pos;
if (v == SEQ_START_TOKEN) {
next = stats->ojs_list.next;
} else {
job = (struct job_stat *)v;
next = job->js_list.next;
}
return next == &stats->ojs_list ? NULL :
list_entry(next, struct job_stat, js_list);
}
/*
* Example of output on MDT:
*
* job_stats:
* - job_id: test_id.222.25844
* snapshot_time: 1322494486
* open: { samples: 3, unit: reqs }
* close: { samples: 3, unit: reqs }
* mknod: { samples: 0, unit: reqs }
* link: { samples: 0, unit: reqs }
* unlink: { samples: 0, unit: reqs }
* mkdir: { samples: 0, unit: reqs }
* rmdir: { samples: 0, unit: reqs }
* rename: { samples: 1, unit: reqs }
* getattr: { samples: 7, unit: reqs }
* setattr: { samples: 0, unit: reqs }
* getxattr: { samples: 0, unit: reqs }
* setxattr: { samples: 0, unit: reqs }
* statfs: { samples: 0, unit: reqs }
* sync: { samples: 0, unit: reqs }
*
* Example of output on OST:
*
* job_stats:
* - job_id 4854
* snapshot_time: 1322494602
* read: { samples: 0, unit: bytes, min: 0, max: 0, sum: 0 }
* write: { samples: 1, unit: bytes, min: 10, max: 10, sum: 10 }
* setattr: { samples: 0, unit: reqs }
* punch: { samples: 0, unit: reqs }
* sync: { samples: 0, unit: reqs }
*/
static const char spaces[] = " ";
static int inline width(const char *str, int len)
{
return len - min((int)strlen(str), 15);
}
static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v)
{
struct job_stat *job = v;
struct lprocfs_stats *s;
struct lprocfs_counter ret;
struct lprocfs_counter *cntr;
struct lprocfs_counter_header *cntr_header;
int i;
if (v == SEQ_START_TOKEN) {
seq_printf(p, "job_stats:\n");
return 0;
}
seq_printf(p, "- %-16s %s\n", "job_id:", job->js_jobid);
seq_printf(p, " %-16s %ld\n", "snapshot_time:", job->js_timestamp);
s = job->js_stats;
for (i = 0; i < s->ls_num; i++) {
cntr = lprocfs_stats_counter_get(s, 0, i);
cntr_header = &s->ls_cnt_header[i];
lprocfs_stats_collect(s, i, &ret);
seq_printf(p, " %s:%.*s { samples: %11"LPF64"u",
cntr_header->lc_name,
width(cntr_header->lc_name, 15), spaces,
ret.lc_count);
if (cntr_header->lc_units[0] != '\0')
seq_printf(p, ", unit: %5s", cntr_header->lc_units);
if (cntr_header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
seq_printf(p, ", min:%8"LPF64"u, max:%8"LPF64"u,"
" sum:%16"LPF64"u",
ret.lc_count ? ret.lc_min : 0,
ret.lc_count ? ret.lc_max : 0,
ret.lc_count ? ret.lc_sum : 0);
}
if (cntr_header->lc_config & LPROCFS_CNTR_STDDEV) {
seq_printf(p, ", sumsq: %18"LPF64"u",
ret.lc_count ? ret.lc_sumsquare : 0);
}
seq_printf(p, " }\n");
}
return 0;
}
struct seq_operations lprocfs_jobstats_seq_sops = {
start: lprocfs_jobstats_seq_start,
stop: lprocfs_jobstats_seq_stop,
next: lprocfs_jobstats_seq_next,
show: lprocfs_jobstats_seq_show,
};
static int lprocfs_jobstats_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc;
rc = seq_open(file, &lprocfs_jobstats_seq_sops);
if (rc)
return rc;
seq = file->private_data;
seq->private = PDE_DATA(inode);
return 0;
}
static ssize_t lprocfs_jobstats_seq_write(struct file *file, const char *buf,
size_t len, loff_t *off)
{
struct seq_file *seq = file->private_data;
struct obd_job_stats *stats = seq->private;
char jobid[JOBSTATS_JOBID_SIZE];
int all = 0;
struct job_stat *job;
if (!memcmp(buf, "clear", strlen("clear"))) {
all = 1;
} else if (len < JOBSTATS_JOBID_SIZE) {
memset(jobid, 0, JOBSTATS_JOBID_SIZE);
/* Trim '\n' if any */
if (buf[len - 1] == '\n')
memcpy(jobid, buf, len - 1);
else
memcpy(jobid, buf, len);
} else {
return -EINVAL;
}
LASSERT(stats->ojs_hash);
if (all) {
time_t oldest = 0;
cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback,
&oldest);
return len;
}
if (!strlen(jobid))
return -EINVAL;
job = cfs_hash_lookup(stats->ojs_hash, jobid);
if (!job)
return -EINVAL;
cfs_hash_del_key(stats->ojs_hash, jobid);
job_putref(job);
return len;
}
struct file_operations lprocfs_jobstats_seq_fops = {
.owner = THIS_MODULE,
.open = lprocfs_jobstats_seq_open,
.read = seq_read,
.write = lprocfs_jobstats_seq_write,
.llseek = seq_lseek,
.release = lprocfs_seq_release,
};
int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
cntr_init_callback init_fn)
{
struct proc_dir_entry *entry;
struct obd_job_stats *stats;
ENTRY;
LASSERT(obd->obd_proc_entry != NULL);
LASSERT(obd->obd_type->typ_name);
if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) &&
strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME)) {
CERROR("Invalid obd device type.\n");
RETURN(-EINVAL);
}
stats = &obd->u.obt.obt_jobstats;
LASSERT(stats->ojs_hash == NULL);
stats->ojs_hash = cfs_hash_create("JOB_STATS",
HASH_JOB_STATS_CUR_BITS,
HASH_JOB_STATS_MAX_BITS,
HASH_JOB_STATS_BKT_BITS, 0,
CFS_HASH_MIN_THETA,
CFS_HASH_MAX_THETA,
&job_stats_hash_ops,
CFS_HASH_DEFAULT);
if (stats->ojs_hash == NULL)
RETURN(-ENOMEM);
INIT_LIST_HEAD(&stats->ojs_list);
rwlock_init(&stats->ojs_lock);
stats->ojs_cntr_num = cntr_num;
stats->ojs_cntr_init_fn = init_fn;
stats->ojs_cleanup_interval = 600; /* 10 mins by default */
stats->ojs_last_cleanup = cfs_time_current_sec();
entry = proc_create_data("job_stats", 0644, obd->obd_proc_entry,
&lprocfs_jobstats_seq_fops, stats);
if (entry)
RETURN(0);
else
RETURN(-ENOMEM);
}
EXPORT_SYMBOL(lprocfs_job_stats_init);
int lprocfs_rd_job_interval(struct seq_file *m, void *data)
{
struct obd_device *obd = (struct obd_device *)data;
struct obd_job_stats *stats;
LASSERT(obd != NULL);
stats = &obd->u.obt.obt_jobstats;
return seq_printf(m, "%d\n", stats->ojs_cleanup_interval);
}
EXPORT_SYMBOL(lprocfs_rd_job_interval);
int lprocfs_wr_job_interval(struct file *file, const char *buffer,
unsigned long count, void *data)
{
struct obd_device *obd = (struct obd_device *)data;
struct obd_job_stats *stats;
int val, rc;
LASSERT(obd != NULL);
stats = &obd->u.obt.obt_jobstats;
rc = lprocfs_write_helper(buffer, count, &val);
if (rc)
return rc;
stats->ojs_cleanup_interval = val;
lprocfs_job_cleanup(stats, true);
return count;
}
EXPORT_SYMBOL(lprocfs_wr_job_interval);
#endif /* LPROCFS*/