net/mlx5e: Add clock info page to mlx5 core devices
Adds a new page to mlx5 core containing clock info data that allows
user level applications to translate between cqe timestamp to
nanoseconds. The information stored into this page is represented
through mlx5_ib_clock_info.
In order to synchronize between kernel and user space a sequence
number is incremented at the beginning and end of each update.
An odd number means the data is being updated while an even means
the access was already done. To guarantee that the data structure
was accessed atomically user will:
repeat:
seq1 = <read sequence>
goto <repeate> while odd
<read data structure>
seq2 = <read sequence>
if seq1 != seq2 goto repeat
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Eitan Rabin <rabin@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index fa8aed6..4b6cb9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -31,6 +31,8 @@
*/
#include <linux/clocksource.h>
+#include <linux/highmem.h>
+#include <rdma/mlx5-abi.h>
#include "en.h"
enum {
@@ -71,6 +73,28 @@
return mlx5_read_internal_timer(mdev) & cc->mask;
}
+static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+ struct mlx5_clock *clock = &mdev->clock;
+ u32 sign;
+
+ if (!clock_info)
+ return;
+
+ sign = smp_load_acquire(&clock_info->sign);
+ smp_store_mb(clock_info->sign,
+ sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING);
+
+ clock_info->cycles = clock->tc.cycle_last;
+ clock_info->mult = clock->cycles.mult;
+ clock_info->nsec = clock->tc.nsec;
+ clock_info->frac = clock->tc.frac;
+
+ smp_store_release(&clock_info->sign,
+ sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2);
+}
+
static void mlx5_pps_out(struct work_struct *work)
{
struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
@@ -109,6 +133,7 @@
write_lock_irqsave(&clock->lock, flags);
timecounter_read(&clock->tc);
+ mlx5_update_clock_info_page(clock->mdev);
write_unlock_irqrestore(&clock->lock, flags);
schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
}
@@ -123,6 +148,7 @@
write_lock_irqsave(&clock->lock, flags);
timecounter_init(&clock->tc, &clock->cycles, ns);
+ mlx5_update_clock_info_page(clock->mdev);
write_unlock_irqrestore(&clock->lock, flags);
return 0;
@@ -152,6 +178,7 @@
write_lock_irqsave(&clock->lock, flags);
timecounter_adjtime(&clock->tc, delta);
+ mlx5_update_clock_info_page(clock->mdev);
write_unlock_irqrestore(&clock->lock, flags);
return 0;
@@ -179,6 +206,7 @@
timecounter_read(&clock->tc);
clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
clock->nominal_c_mult + diff;
+ mlx5_update_clock_info_page(clock->mdev);
write_unlock_irqrestore(&clock->lock, flags);
return 0;
@@ -470,6 +498,7 @@
clock->cycles.shift);
clock->nominal_c_mult = clock->cycles.mult;
clock->cycles.mask = CLOCKSOURCE_MASK(41);
+ clock->mdev = mdev;
timecounter_init(&clock->tc, &clock->cycles,
ktime_to_ns(ktime_get_real()));
@@ -482,6 +511,25 @@
do_div(ns, NSEC_PER_SEC / 2 / HZ);
clock->overflow_period = ns;
+ mdev->clock_info_page = alloc_page(GFP_KERNEL);
+ if (mdev->clock_info_page) {
+ mdev->clock_info = kmap(mdev->clock_info_page);
+ if (!mdev->clock_info) {
+ __free_page(mdev->clock_info_page);
+ mlx5_core_warn(mdev, "failed to map clock page\n");
+ } else {
+ mdev->clock_info->sign = 0;
+ mdev->clock_info->nsec = clock->tc.nsec;
+ mdev->clock_info->cycles = clock->tc.cycle_last;
+ mdev->clock_info->mask = clock->cycles.mask;
+ mdev->clock_info->mult = clock->nominal_c_mult;
+ mdev->clock_info->shift = clock->cycles.shift;
+ mdev->clock_info->frac = clock->tc.frac;
+ mdev->clock_info->overflow_period =
+ clock->overflow_period;
+ }
+ }
+
INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow);
if (clock->overflow_period)
@@ -521,5 +569,12 @@
cancel_work_sync(&clock->pps_info.out_work);
cancel_delayed_work_sync(&clock->overflow_work);
+
+ if (mdev->clock_info) {
+ kunmap(mdev->clock_info_page);
+ __free_page(mdev->clock_info_page);
+ mdev->clock_info = NULL;
+ }
+
kfree(clock->ptp_info.pin_config);
}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9136e35..c403151 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -797,6 +797,7 @@
u32 nominal_c_mult;
unsigned long overflow_period;
struct delayed_work overflow_work;
+ struct mlx5_core_dev *mdev;
struct ptp_clock *ptp;
struct ptp_clock_info ptp_info;
struct mlx5_pps pps_info;
@@ -844,6 +845,8 @@
struct cpu_rmap *rmap;
#endif
struct mlx5_clock clock;
+ struct mlx5_ib_clock_info *clock_info;
+ struct page *clock_info_page;
};
struct mlx5_db {
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index f6d319d..0299dee 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -381,4 +381,20 @@
__u32 comp_mask;
__u32 reserved;
};
+
+struct mlx5_ib_clock_info {
+ __u32 sign;
+ __u32 resv;
+ __u64 nsec;
+ __u64 cycles;
+ __u64 frac;
+ __u32 mult;
+ __u32 shift;
+ __u64 mask;
+ __u64 overflow_period;
+};
+
+enum {
+ MLX5_IB_CLOCK_INFO_KERNEL_UPDATING = 1,
+};
#endif /* MLX5_ABI_USER_H */