blob: fcffa519c5351e216f9aee7bf56b18ba134449e7 [file] [log] [blame]
Karthikeyan Ramasubramanian04e3f902016-09-19 09:24:36 -06001/* Copyright (c) 2014,2016 The Linux Foundation. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 */
12#include <linux/cdev.h>
13#include <linux/delay.h>
14#include <linux/device.h>
15#include <linux/errno.h>
16#include <linux/fs.h>
17#include <linux/ioctl.h>
18#include <linux/ipc_logging.h>
19#include <linux/module.h>
20#include <linux/of.h>
21#include <linux/platform_device.h>
22#include <linux/qmi_encdec.h>
23#include <linux/ratelimit.h>
24#include <linux/sched.h>
25#include <linux/slab.h>
26#include <linux/srcu.h>
27#include <linux/thread_info.h>
28#include <linux/uaccess.h>
29
30#include <soc/qcom/msm_qmi_interface.h>
31#include <soc/qcom/subsystem_notif.h>
32#include <soc/qcom/subsystem_restart.h>
33
34#include "system_health_monitor_v01.h"
35
36#define MODULE_NAME "system_health_monitor"
37
38#define SUBSYS_NAME_LEN 256
39#define SSRESTART_STRLEN 256
40
41enum {
42 SHM_INFO_FLAG = 0x1,
43 SHM_DEBUG_FLAG = 0x2,
44};
45static int shm_debug_mask = SHM_INFO_FLAG;
46module_param_named(debug_mask, shm_debug_mask,
47 int, 0664);
48static int shm_default_timeout_ms = 2000;
49module_param_named(default_timeout_ms, shm_default_timeout_ms,
50 int, 0664);
51
52#define DEFAULT_SHM_RATELIMIT_INTERVAL (HZ / 5)
53#define DEFAULT_SHM_RATELIMIT_BURST 2
54
55#define SHM_ILCTXT_NUM_PAGES 2
56static void *shm_ilctxt;
57#define SHM_INFO_LOG(x...) do { \
58 if ((shm_debug_mask & SHM_INFO_FLAG) && shm_ilctxt) \
59 ipc_log_string(shm_ilctxt, x); \
60} while (0)
61
62#define SHM_DEBUG(x...) do { \
63 if ((shm_debug_mask & SHM_DEBUG_FLAG) && shm_ilctxt) \
64 ipc_log_string(shm_ilctxt, x); \
65} while (0)
66
67#define SHM_ERR(x...) do { \
68 if (shm_ilctxt) \
69 ipc_log_string(shm_ilctxt, x); \
70 pr_err(x); \
71} while (0)
72
73struct class *system_health_monitor_classp;
74static dev_t system_health_monitor_dev;
75static struct cdev system_health_monitor_cdev;
76static struct device *system_health_monitor_devp;
77
78#define SYSTEM_HEALTH_MONITOR_IOCTL_MAGIC (0xC3)
79
80#define CHECK_SYSTEM_HEALTH_IOCTL \
81 _IOR(SYSTEM_HEALTH_MONITOR_IOCTL_MAGIC, 0, unsigned int)
82
83static struct workqueue_struct *shm_svc_workqueue;
84static void shm_svc_recv_msg(struct work_struct *work);
85static DECLARE_DELAYED_WORK(work_recv_msg, shm_svc_recv_msg);
86static struct qmi_handle *shm_svc_handle;
87
88struct disconnect_work {
89 struct work_struct work;
90 void *conn_h;
91};
92static void shm_svc_disconnect_worker(struct work_struct *work);
93
94struct req_work {
95 struct work_struct work;
96 void *conn_h;
97 void *req_h;
98 unsigned int msg_id;
99 void *req;
100};
101static void shm_svc_req_worker(struct work_struct *work);
102
103/**
104 * struct hma_info - Information about a Health Monitor Agent(HMA)
105 * @list: List to chain up the hma to the hma_list.
106 * @subsys_name: Name of the remote subsystem that hosts this HMA.
107 * @ssrestart_string: String to restart the subsystem that hosts this HMA.
108 * @conn_h: Opaque connection handle to the HMA.
109 * @timeout: Timeout as registered by the HMA.
110 * @check_count: Count of the health check attempts.
111 * @report_count: Count of the health reports handled.
112 * @reset_srcu: Sleepable RCU to protect the reset state.
113 * @is_in_reset: Flag to identify if the remote subsystem is in reset.
114 * @restart_nb: Notifier block to receive subsystem restart events.
115 * @restart_nb_h: Handle to subsystem restart notifier block.
116 * @rs: Rate-limit the health check.
117 */
118struct hma_info {
119 struct list_head list;
120 char subsys_name[SUBSYS_NAME_LEN];
121 char ssrestart_string[SSRESTART_STRLEN];
122 void *conn_h;
123 uint32_t timeout;
124 atomic_t check_count;
125 atomic_t report_count;
126 struct srcu_struct reset_srcu;
127 atomic_t is_in_reset;
128 struct notifier_block restart_nb;
129 void *restart_nb_h;
130 struct ratelimit_state rs;
131};
132
133struct restart_work {
134 struct delayed_work dwork;
135 struct hma_info *hmap;
136 void *conn_h;
137 int check_count;
138};
139static void shm_svc_restart_worker(struct work_struct *work);
140
141static DEFINE_MUTEX(hma_info_list_lock);
142static LIST_HEAD(hma_info_list);
143
144static struct msg_desc shm_svc_register_req_desc = {
145 .max_msg_len = HMON_REGISTER_REQ_MSG_V01_MAX_MSG_LEN,
146 .msg_id = QMI_HEALTH_MON_REG_REQ_V01,
147 .ei_array = hmon_register_req_msg_v01_ei,
148};
149
150static struct msg_desc shm_svc_register_resp_desc = {
151 .max_msg_len = HMON_REGISTER_RESP_MSG_V01_MAX_MSG_LEN,
152 .msg_id = QMI_HEALTH_MON_REG_RESP_V01,
153 .ei_array = hmon_register_resp_msg_v01_ei,
154};
155
156static struct msg_desc shm_svc_health_check_ind_desc = {
157 .max_msg_len = HMON_HEALTH_CHECK_IND_MSG_V01_MAX_MSG_LEN,
158 .msg_id = QMI_HEALTH_MON_HEALTH_CHECK_IND_V01,
159 .ei_array = hmon_health_check_ind_msg_v01_ei,
160};
161
162static struct msg_desc shm_svc_health_check_complete_req_desc = {
163 .max_msg_len = HMON_HEALTH_CHECK_COMPLETE_REQ_MSG_V01_MAX_MSG_LEN,
164 .msg_id = QMI_HEALTH_MON_HEALTH_CHECK_COMPLETE_REQ_V01,
165 .ei_array = hmon_health_check_complete_req_msg_v01_ei,
166};
167
168static struct msg_desc shm_svc_health_check_complete_resp_desc = {
169 .max_msg_len = HMON_HEALTH_CHECK_COMPLETE_RESP_MSG_V01_MAX_MSG_LEN,
170 .msg_id = QMI_HEALTH_MON_HEALTH_CHECK_COMPLETE_RESP_V01,
171 .ei_array = hmon_health_check_complete_resp_msg_v01_ei,
172};
173
174/**
175 * restart_notifier_cb() - Callback to handle SSR events
176 * @this: Reference to the notifier block.
177 * @code: Type of SSR event.
178 * @data: Data that needs to be handled as part of SSR event.
179 *
180 * This function is used to identify if a subsystem which hosts an HMA
181 * is already in reset, so that a duplicate subsystem restart is not
182 * triggered.
183 *
184 * Return: 0 on success, standard Linux error codes on failure.
185 */
186static int restart_notifier_cb(struct notifier_block *this,
187 unsigned long code, void *data)
188{
189 struct hma_info *tmp_hma_info =
190 container_of(this, struct hma_info, restart_nb);
191
192 if (code == SUBSYS_BEFORE_SHUTDOWN) {
193 atomic_set(&tmp_hma_info->is_in_reset, 1);
194 synchronize_srcu(&tmp_hma_info->reset_srcu);
195 SHM_INFO_LOG("%s: %s going to shutdown\n",
196 __func__, tmp_hma_info->ssrestart_string);
197 } else if (code == SUBSYS_AFTER_POWERUP) {
198 atomic_set(&tmp_hma_info->is_in_reset, 0);
199 SHM_INFO_LOG("%s: %s powered up\n",
200 __func__, tmp_hma_info->ssrestart_string);
201 }
202 return 0;
203}
204
205/**
206 * shm_svc_restart_worker() - Worker to restart a subsystem
207 * @work: Reference to the work item being handled.
208 *
209 * This function restarts the subsystem which hosts an HMA. This function
210 * checks the following before triggering a restart:
211 * 1) Health check report is not received.
212 * 2) The subsystem has not undergone a reset.
213 * 3) The subsystem is not undergoing a reset.
214 */
215static void shm_svc_restart_worker(struct work_struct *work)
216{
217 int rc;
218 struct delayed_work *dwork = to_delayed_work(work);
219 struct restart_work *rwp =
220 container_of(dwork, struct restart_work, dwork);
221 struct hma_info *tmp_hma_info = rwp->hmap;
222 int rcu_id;
223
224 if (rwp->check_count <= atomic_read(&tmp_hma_info->report_count)) {
225 SHM_INFO_LOG("%s: No Action on Health Check Attempt %d to %s\n",
226 __func__, rwp->check_count,
227 tmp_hma_info->subsys_name);
228 kfree(rwp);
229 return;
230 }
231
232 if (!tmp_hma_info->conn_h || rwp->conn_h != tmp_hma_info->conn_h) {
233 SHM_INFO_LOG(
234 "%s: Connection to %s is reset. No further action\n",
235 __func__, tmp_hma_info->subsys_name);
236 kfree(rwp);
237 return;
238 }
239
240 rcu_id = srcu_read_lock(&tmp_hma_info->reset_srcu);
241 if (atomic_read(&tmp_hma_info->is_in_reset)) {
242 SHM_INFO_LOG(
243 "%s: %s is going thru restart. No further action\n",
244 __func__, tmp_hma_info->subsys_name);
245 srcu_read_unlock(&tmp_hma_info->reset_srcu, rcu_id);
246 kfree(rwp);
247 return;
248 }
249
250 SHM_ERR("%s: HMA in %s failed to respond in time. Restarting %s...\n",
251 __func__, tmp_hma_info->subsys_name,
252 tmp_hma_info->ssrestart_string);
253 rc = subsystem_restart(tmp_hma_info->ssrestart_string);
254 if (rc < 0)
255 SHM_ERR("%s: Error %d restarting %s\n",
256 __func__, rc, tmp_hma_info->ssrestart_string);
257 srcu_read_unlock(&tmp_hma_info->reset_srcu, rcu_id);
258 kfree(rwp);
259}
260
261/**
262 * shm_send_health_check_ind() - Initiate a subsystem health check
263 * @tmp_hma_info: Info about an HMA which resides in a subsystem.
264 *
265 * This function initiates a health check of a subsytem, which hosts the
266 * HMA, by sending a health check QMI indication message.
267 *
268 * Return: 0 on success, standard Linux error codes on failure.
269 */
270static int shm_send_health_check_ind(struct hma_info *tmp_hma_info)
271{
272 int rc;
273 struct restart_work *rwp;
274
275 if (!tmp_hma_info->conn_h)
276 return 0;
277
278 /* Rate limit the health check as configured by the subsystem */
279 if (!__ratelimit(&tmp_hma_info->rs))
280 return 0;
281
282 rwp = kzalloc(sizeof(*rwp), GFP_KERNEL);
283 if (!rwp) {
284 SHM_ERR("%s: Error allocating restart work\n", __func__);
285 return -ENOMEM;
286 }
287
288 INIT_DELAYED_WORK(&rwp->dwork, shm_svc_restart_worker);
289 rwp->hmap = tmp_hma_info;
290 rwp->conn_h = tmp_hma_info->conn_h;
291
292 rc = qmi_send_ind(shm_svc_handle, tmp_hma_info->conn_h,
293 &shm_svc_health_check_ind_desc, NULL, 0);
294 if (rc < 0) {
295 SHM_ERR("%s: Send Error %d to %s\n",
296 __func__, rc, tmp_hma_info->subsys_name);
297 kfree(rwp);
298 return rc;
299 }
300
301 rwp->check_count = atomic_inc_return(&tmp_hma_info->check_count);
302 queue_delayed_work(shm_svc_workqueue, &rwp->dwork,
303 msecs_to_jiffies(tmp_hma_info->timeout));
304 return 0;
305}
306
307/**
308 * kern_check_system_health() - Check the system health
309 *
310 * This function is used by the kernel drivers to initiate the
311 * system health check. This function in turn triggers SHM to send
312 * QMI message to all the HMAs connected to it.
313 *
314 * Return: 0 on success, standard Linux error codes on failure.
315 */
316int kern_check_system_health(void)
317{
318 int rc;
319 int final_rc = 0;
320 struct hma_info *tmp_hma_info;
321
322 mutex_lock(&hma_info_list_lock);
323 list_for_each_entry(tmp_hma_info, &hma_info_list, list) {
324 rc = shm_send_health_check_ind(tmp_hma_info);
325 if (rc < 0) {
326 SHM_ERR("%s by %s failed for %s - rc %d\n", __func__,
327 current->comm, tmp_hma_info->subsys_name, rc);
328 final_rc = rc;
329 }
330 }
331 mutex_unlock(&hma_info_list_lock);
332 return final_rc;
333}
334EXPORT_SYMBOL(kern_check_system_health);
335
336/**
337 * shm_svc_connect_cb() - Callback to handle connect event from an HMA
338 * @handle: QMI Service handle in which a connect event is received.
339 * @conn_h: Opaque reference to the connection handle.
340 *
341 * Return: 0 on success, standard Linux error codes on failure.
342 */
343static int shm_svc_connect_cb(struct qmi_handle *handle, void *conn_h)
344{
345 SHM_DEBUG("%s: conn_h %p\n", __func__, conn_h);
346 return 0;
347}
348
349/**
350 * shm_svc_disconnect_worker() - Worker to handle disconnect event from an HMA
351 * @work: Reference to the work item.
352 *
353 * This function handles the disconnect event from an HMA in a deferred manner.
354 */
355static void shm_svc_disconnect_worker(struct work_struct *work)
356{
357 struct hma_info *tmp_hma_info;
358 struct disconnect_work *dwp =
359 container_of(work, struct disconnect_work, work);
360
361 mutex_lock(&hma_info_list_lock);
362 list_for_each_entry(tmp_hma_info, &hma_info_list, list) {
363 if (dwp->conn_h == tmp_hma_info->conn_h) {
364 SHM_INFO_LOG("%s: conn_h %p to HMA in %s exited\n",
365 __func__, dwp->conn_h,
366 tmp_hma_info->subsys_name);
367 tmp_hma_info->conn_h = NULL;
368 atomic_set(&tmp_hma_info->report_count,
369 atomic_read(&tmp_hma_info->check_count));
370 break;
371 }
372 }
373 mutex_unlock(&hma_info_list_lock);
374 kfree(dwp);
375}
376
377/**
378 * shm_svc_disconnect_cb() - Callback to handle disconnect event from an HMA
379 * @handle: QMI Service handle in which a disconnect event is received.
380 * @conn_h: Opaque reference to the connection handle.
381 *
382 * Return: 0 on success, standard Linux error codes on failure.
383 */
384static int shm_svc_disconnect_cb(struct qmi_handle *handle, void *conn_h)
385{
386 struct disconnect_work *dwp;
387
388 dwp = kzalloc(sizeof(*dwp), GFP_ATOMIC);
389 if (!dwp) {
390 SHM_ERR("%s: Error allocating work item\n", __func__);
391 return -ENOMEM;
392 }
393
394 INIT_WORK(&dwp->work, shm_svc_disconnect_worker);
395 dwp->conn_h = conn_h;
396 queue_work(shm_svc_workqueue, &dwp->work);
397 return 0;
398}
399
400/**
401 * shm_svc_req_desc_cb() - Callback to identify the request descriptor
402 * @msg_id: Message ID of the QMI request.
403 * @req_desc: Request Descriptor of the QMI request.
404 *
405 * Return: 0 on success, standard Linux error codes on failure.
406 */
407static int shm_svc_req_desc_cb(unsigned int msg_id,
408 struct msg_desc **req_desc)
409{
410 int rc;
411
412 SHM_DEBUG("%s: called for msg_id %d\n", __func__, msg_id);
413 switch (msg_id) {
414 case QMI_HEALTH_MON_REG_REQ_V01:
415 *req_desc = &shm_svc_register_req_desc;
416 rc = sizeof(struct hmon_register_req_msg_v01);
417 break;
418
419 case QMI_HEALTH_MON_HEALTH_CHECK_COMPLETE_REQ_V01:
420 *req_desc = &shm_svc_health_check_complete_req_desc;
421 rc = sizeof(struct hmon_health_check_complete_req_msg_v01);
422 break;
423
424 default:
425 SHM_ERR("%s: Invalid msg_id %d\n", __func__, msg_id);
426 rc = -ENOTSUPP;
427 }
428 return rc;
429}
430
431/**
432 * handle_health_mon_reg_req() - Handle the HMA register QMI request
433 * @conn_h: Opaque reference to the connection handle to an HMA.
434 * @req_h: Opaque reference to the request handle.
435 * @buf: Pointer to the QMI request structure.
436 *
437 * This function handles the register request from an HMA. The request
438 * contains the subsystem name which hosts the HMA and health check
439 * timeout for the HMA.
440 *
441 * Return: 0 on success, standard Linux error codes on failure.
442 */
443static int handle_health_mon_reg_req(void *conn_h, void *req_h, void *buf)
444{
445 int rc;
446 struct hma_info *tmp_hma_info;
447 struct hmon_register_req_msg_v01 *req =
448 (struct hmon_register_req_msg_v01 *)buf;
449 struct hmon_register_resp_msg_v01 resp;
450 bool hma_info_found = false;
451
452 if (!req->name_valid) {
453 SHM_ERR("%s: host name invalid\n", __func__);
454 goto send_reg_resp;
455 }
456
457 mutex_lock(&hma_info_list_lock);
458 list_for_each_entry(tmp_hma_info, &hma_info_list, list) {
459 if (!strcmp(tmp_hma_info->subsys_name, req->name) &&
460 !tmp_hma_info->conn_h) {
461 tmp_hma_info->conn_h = conn_h;
462 if (req->timeout_valid)
463 tmp_hma_info->timeout = req->timeout;
464 else
465 tmp_hma_info->timeout = shm_default_timeout_ms;
466 ratelimit_state_init(&tmp_hma_info->rs,
467 DEFAULT_SHM_RATELIMIT_INTERVAL,
468 DEFAULT_SHM_RATELIMIT_BURST);
469 SHM_INFO_LOG("%s: from %s timeout_ms %d\n",
470 __func__, req->name, tmp_hma_info->timeout);
471 hma_info_found = true;
472 } else if (!strcmp(tmp_hma_info->subsys_name, req->name)) {
473 SHM_ERR("%s: Duplicate HMA from %s - cur %p, new %p\n",
474 __func__, req->name, tmp_hma_info->conn_h,
475 conn_h);
476 }
477 }
478 mutex_unlock(&hma_info_list_lock);
479
480send_reg_resp:
481 if (hma_info_found) {
482 memset(&resp, 0, sizeof(resp));
483 } else {
484 resp.resp.result = QMI_RESULT_FAILURE_V01;
485 resp.resp.error = QMI_ERR_INVALID_ID_V01;
486 }
487 rc = qmi_send_resp(shm_svc_handle, conn_h, req_h,
488 &shm_svc_register_resp_desc, &resp, sizeof(resp));
489 if (rc < 0)
490 SHM_ERR("%s: send_resp failed to %s - rc %d\n",
491 __func__, req->name, rc);
492 return rc;
493}
494
495/**
496 * handle_health_mon_health_check_complete_req() - Handle the HMA health report
497 * @conn_h: Opaque reference to the connection handle to an HMA.
498 * @req_h: Opaque reference to the request handle.
499 * @buf: Pointer to the QMI request structure.
500 *
501 * This function handles health reports from an HMA. The health report is sent
502 * in response to a health check QMI indication sent by SHM.
503 *
504 * Return: 0 on success, standard Linux error codes on failure.
505 */
506static int handle_health_mon_health_check_complete_req(void *conn_h,
507 void *req_h, void *buf)
508{
509 int rc;
510 struct hma_info *tmp_hma_info;
511 struct hmon_health_check_complete_req_msg_v01 *req =
512 (struct hmon_health_check_complete_req_msg_v01 *)buf;
513 struct hmon_health_check_complete_resp_msg_v01 resp;
514 bool hma_info_found = false;
515
516 if (!req->result_valid) {
517 SHM_ERR("%s: Invalid result\n", __func__);
518 goto send_resp;
519 }
520
521 mutex_lock(&hma_info_list_lock);
522 list_for_each_entry(tmp_hma_info, &hma_info_list, list) {
523 if (tmp_hma_info->conn_h != conn_h)
524 continue;
525 hma_info_found = true;
526 if (req->result == HEALTH_MONITOR_CHECK_SUCCESS_V01) {
527 atomic_inc(&tmp_hma_info->report_count);
528 SHM_INFO_LOG("%s: %s Health Check Success\n",
529 __func__, tmp_hma_info->subsys_name);
530 } else {
531 SHM_INFO_LOG("%s: %s Health Check Failure\n",
532 __func__, tmp_hma_info->subsys_name);
533 }
534 }
535 mutex_unlock(&hma_info_list_lock);
536
537send_resp:
538 if (hma_info_found) {
539 memset(&resp, 0, sizeof(resp));
540 } else {
541 resp.resp.result = QMI_RESULT_FAILURE_V01;
542 resp.resp.error = QMI_ERR_INVALID_ID_V01;
543 }
544 rc = qmi_send_resp(shm_svc_handle, conn_h, req_h,
545 &shm_svc_health_check_complete_resp_desc,
546 &resp, sizeof(resp));
547 if (rc < 0)
548 SHM_ERR("%s: send_resp failed - rc %d\n",
549 __func__, rc);
550 return rc;
551}
552
553/**
554 * shm_svc_req_worker() - Worker to handle QMI requests
555 * @work: Reference to the work item.
556 *
557 * This function handles QMI requests from HMAs in a deferred manner.
558 */
559static void shm_svc_req_worker(struct work_struct *work)
560{
561 struct req_work *rwp =
562 container_of(work, struct req_work, work);
563
564 switch (rwp->msg_id) {
565 case QMI_HEALTH_MON_REG_REQ_V01:
566 handle_health_mon_reg_req(rwp->conn_h, rwp->req_h, rwp->req);
567 break;
568
569 case QMI_HEALTH_MON_HEALTH_CHECK_COMPLETE_REQ_V01:
570 handle_health_mon_health_check_complete_req(rwp->conn_h,
571 rwp->req_h, rwp->req);
572 break;
573 default:
574 SHM_ERR("%s: Invalid msg_id %d\n", __func__, rwp->msg_id);
575 }
576 kfree(rwp->req);
577 kfree(rwp);
578}
579
580/**
581 * shm_svc_req_cb() - Callback to notify about QMI requests from HMA
582 * @handle; QMI Service handle in which the request is received.
583 * @conn_h: Opaque reference to the connection handle to an HMA.
584 * @req_h: Opaque reference to the request handle.
585 * @msg_id: Message ID of the request.
586 * @req: Pointer to the request structure.
587 *
588 * This function is called by kernel QMI Service Interface to notify the
589 * incoming QMI request on the SHM service handle.
590 *
591 * Return: 0 on success, standard Linux error codes on failure.
592 */
593static int shm_svc_req_cb(struct qmi_handle *handle, void *conn_h,
594 void *req_h, unsigned int msg_id, void *req)
595{
596 struct req_work *rwp;
597 void *req_buf;
598 uint32_t req_sz = 0;
599
600 rwp = kzalloc(sizeof(*rwp), GFP_KERNEL);
601 if (!rwp) {
602 SHM_ERR("%s: Error allocating work item\n", __func__);
603 return -ENOMEM;
604 }
605
606 switch (msg_id) {
607 case QMI_HEALTH_MON_REG_REQ_V01:
608 req_sz = sizeof(struct hmon_register_req_msg_v01);
609 break;
610
611 case QMI_HEALTH_MON_HEALTH_CHECK_COMPLETE_REQ_V01:
612 req_sz = sizeof(struct hmon_health_check_complete_req_msg_v01);
613 break;
614
615 default:
616 SHM_ERR("%s: Invalid msg_id %d\n", __func__, msg_id);
617 kfree(rwp);
618 return -ENOTSUPP;
619 }
620
621 req_buf = kzalloc(req_sz, GFP_KERNEL);
622 if (!req_buf) {
623 SHM_ERR("%s: Error allocating request buffer\n", __func__);
624 kfree(rwp);
625 return -ENOMEM;
626 }
627 memcpy(req_buf, req, req_sz);
628
629 INIT_WORK(&rwp->work, shm_svc_req_worker);
630 rwp->conn_h = conn_h;
631 rwp->req_h = req_h;
632 rwp->msg_id = msg_id;
633 rwp->req = req_buf;
634 queue_work(shm_svc_workqueue, &rwp->work);
635 return 0;
636}
637
638/**
639 * shm_svc_recv_msg() - Worker to receive a QMI message
640 * @work: Reference to the work item.
641 *
642 * This function handles any incoming QMI messages to the SHM QMI service.
643 */
644static void shm_svc_recv_msg(struct work_struct *work)
645{
646 int rc;
647
648 do {
649 SHM_DEBUG("%s: Notified about a receive event\n", __func__);
650 } while ((rc = qmi_recv_msg(shm_svc_handle)) == 0);
651
652 if (rc != -ENOMSG)
653 SHM_ERR("%s: Error %d receiving message\n", __func__, rc);
654}
655
656/**
657 * shm_svc_notify() - Callback function to receive SHM QMI service events
658 * @handle: QMI handle in which the event is received.
659 * @event: Type of the QMI event.
660 * @priv: Opaque reference to the private data as registered by the
661 * service.
662 */
663static void shm_svc_notify(struct qmi_handle *handle,
664 enum qmi_event_type event, void *priv)
665{
666 switch (event) {
667 case QMI_RECV_MSG:
668 queue_delayed_work(shm_svc_workqueue, &work_recv_msg, 0);
669 break;
670 default:
671 break;
672 }
673}
674
675static struct qmi_svc_ops_options shm_svc_ops_options = {
676 .version = 1,
677 .service_id = HMON_SERVICE_ID_V01,
678 .service_vers = HMON_SERVICE_VERS_V01,
679 .service_ins = 0,
680 .connect_cb = shm_svc_connect_cb,
681 .disconnect_cb = shm_svc_disconnect_cb,
682 .req_desc_cb = shm_svc_req_desc_cb,
683 .req_cb = shm_svc_req_cb,
684};
685
686static int system_health_monitor_open(struct inode *inode, struct file *file)
687{
688 SHM_DEBUG("%s by %s\n", __func__, current->comm);
689 return 0;
690}
691
692static int system_health_monitor_release(struct inode *inode,
693 struct file *file)
694{
695 SHM_DEBUG("%s by %s\n", __func__, current->comm);
696 return 0;
697}
698
699static ssize_t system_health_monitor_write(struct file *file,
700 const char __user *buf, size_t count, loff_t *ppos)
701{
702 SHM_ERR("%s by %s\n", __func__, current->comm);
703 return -ENOTSUPP;
704}
705
706static ssize_t system_health_monitor_read(struct file *file, char __user *buf,
707 size_t count, loff_t *ppos)
708{
709 SHM_ERR("%s by %s\n", __func__, current->comm);
710 return -ENOTSUPP;
711}
712
713static long system_health_monitor_ioctl(struct file *file, unsigned int cmd,
714 unsigned long arg)
715{
716 int rc;
717
718 switch (cmd) {
719 case CHECK_SYSTEM_HEALTH_IOCTL:
720 SHM_INFO_LOG("%s by %s\n", __func__, current->comm);
721 rc = kern_check_system_health();
722 break;
723 default:
724 SHM_ERR("%s: Invalid cmd %d by %s\n",
725 __func__, cmd, current->comm);
726 rc = -EINVAL;
727 }
728 return rc;
729}
730
731static const struct file_operations system_health_monitor_fops = {
732 .owner = THIS_MODULE,
733 .open = system_health_monitor_open,
734 .release = system_health_monitor_release,
735 .read = system_health_monitor_read,
736 .write = system_health_monitor_write,
737 .unlocked_ioctl = system_health_monitor_ioctl,
738 .compat_ioctl = system_health_monitor_ioctl,
739};
740
741/**
742 * start_system_health_monitor_service() - Start the SHM QMI service
743 *
744 * This function registers the SHM QMI service, if it is not already
745 * registered.
746 */
747static int start_system_health_monitor_service(void)
748{
749 int rc;
750
751 shm_svc_workqueue = create_singlethread_workqueue("shm_svc");
752 if (!shm_svc_workqueue) {
753 SHM_ERR("%s: Error creating workqueue\n", __func__);
754 return -EFAULT;
755 }
756
757 shm_svc_handle = qmi_handle_create(shm_svc_notify, NULL);
758 if (!shm_svc_handle) {
759 SHM_ERR("%s: Creating shm_svc_handle failed\n", __func__);
760 rc = -ENOMEM;
761 goto start_svc_error1;
762 }
763
764 rc = qmi_svc_register(shm_svc_handle, &shm_svc_ops_options);
765 if (rc < 0) {
766 SHM_ERR("%s: Registering shm svc failed - %d\n", __func__, rc);
767 goto start_svc_error2;
768 }
769 return 0;
770start_svc_error2:
771 qmi_handle_destroy(shm_svc_handle);
772start_svc_error1:
773 destroy_workqueue(shm_svc_workqueue);
774 return rc;
775}
776
777/**
778 * parse_devicetree() - Parse the device tree for HMA information
779 * @node: Pointer to the device tree node.
780 * @hma: HMA information which needs to be extracted.
781 *
782 * This function parses the device tree, extracts the HMA information.
783 *
784 * Return: 0 on success, standard Linux error codes on failure.
785 */
786static int parse_devicetree(struct device_node *node,
787 struct hma_info *hma)
788{
789 char *key;
790 const char *subsys_name;
791 const char *ssrestart_string;
792
793 key = "qcom,subsys-name";
794 subsys_name = of_get_property(node, key, NULL);
795 if (!subsys_name)
796 goto error;
797 strlcpy(hma->subsys_name, subsys_name, SUBSYS_NAME_LEN);
798
799 key = "qcom,ssrestart-string";
800 ssrestart_string = of_get_property(node, key, NULL);
801 if (!ssrestart_string)
802 goto error;
803 strlcpy(hma->ssrestart_string, ssrestart_string, SSRESTART_STRLEN);
804 return 0;
805error:
806 SHM_ERR("%s: missing key: %s\n", __func__, key);
807 return -ENODEV;
808}
809
810/**
811 * system_health_monitor_probe() - Probe function to construct HMA info
812 * @pdev: Platform device pointing to a device tree node.
813 *
814 * This function extracts the HMA information from the device tree, constructs
815 * it and adds it to the global list.
816 *
817 * Return: 0 on success, standard Linux error codes on failure.
818 */
819static int system_health_monitor_probe(struct platform_device *pdev)
820{
821 int rc;
822 struct hma_info *hma, *tmp_hma;
823 struct device_node *node;
824
825 mutex_lock(&hma_info_list_lock);
826 for_each_child_of_node(pdev->dev.of_node, node) {
827 hma = kzalloc(sizeof(*hma), GFP_KERNEL);
828 if (!hma) {
829 SHM_ERR("%s: Error allocation hma_info\n", __func__);
830 rc = -ENOMEM;
831 goto probe_err;
832 }
833
834 rc = parse_devicetree(node, hma);
835 if (rc) {
836 SHM_ERR("%s Failed to parse Device Tree\n", __func__);
837 kfree(hma);
838 goto probe_err;
839 }
840
841 init_srcu_struct(&hma->reset_srcu);
842 hma->restart_nb.notifier_call = restart_notifier_cb;
843 hma->restart_nb_h = subsys_notif_register_notifier(
844 hma->ssrestart_string, &hma->restart_nb);
845 if (IS_ERR_OR_NULL(hma->restart_nb_h)) {
846 cleanup_srcu_struct(&hma->reset_srcu);
847 kfree(hma);
848 rc = -EFAULT;
849 SHM_ERR("%s: Error registering restart notif for %s\n",
850 __func__, hma->ssrestart_string);
851 goto probe_err;
852 }
853
854 list_add_tail(&hma->list, &hma_info_list);
855 SHM_INFO_LOG("%s: Added HMA info for %s\n",
856 __func__, hma->subsys_name);
857 }
858
859 rc = start_system_health_monitor_service();
860 if (rc) {
861 SHM_ERR("%s Failed to start service %d\n", __func__, rc);
862 goto probe_err;
863 }
864 mutex_unlock(&hma_info_list_lock);
865 return 0;
866probe_err:
867 list_for_each_entry_safe(hma, tmp_hma, &hma_info_list, list) {
868 list_del(&hma->list);
869 subsys_notif_unregister_notifier(hma->restart_nb_h,
870 &hma->restart_nb);
871 cleanup_srcu_struct(&hma->reset_srcu);
872 kfree(hma);
873 }
874 mutex_unlock(&hma_info_list_lock);
875 return rc;
876}
877
878static const struct of_device_id system_health_monitor_match_table[] = {
879 { .compatible = "qcom,system-health-monitor" },
880 {},
881};
882
883static struct platform_driver system_health_monitor_driver = {
884 .probe = system_health_monitor_probe,
885 .driver = {
886 .name = MODULE_NAME,
887 .owner = THIS_MODULE,
888 .of_match_table = system_health_monitor_match_table,
889 },
890};
891
892/**
893 * system_health_monitor_init() - Initialize the system health monitor module
894 *
895 * This functions registers a platform driver to probe for and extract the HMA
896 * information. This function registers the character device interface to the
897 * user-space.
898 *
899 * Return: 0 on success, standard Linux error codes on failure.
900 */
901static int __init system_health_monitor_init(void)
902{
903 int rc;
904
905 shm_ilctxt = ipc_log_context_create(SHM_ILCTXT_NUM_PAGES, "shm", 0);
906 if (!shm_ilctxt) {
907 SHM_ERR("%s: Unable to create SHM logging context\n", __func__);
908 shm_debug_mask = 0;
909 }
910
911 rc = platform_driver_register(&system_health_monitor_driver);
912 if (rc) {
913 SHM_ERR("%s: system_health_monitor_driver register failed %d\n",
914 __func__, rc);
915 return rc;
916 }
917
918 rc = alloc_chrdev_region(&system_health_monitor_dev,
919 0, 1, "system_health_monitor");
920 if (rc < 0) {
921 SHM_ERR("%s: alloc_chrdev_region() failed %d\n", __func__, rc);
922 return rc;
923 }
924
925 system_health_monitor_classp = class_create(THIS_MODULE,
926 "system_health_monitor");
927 if (IS_ERR_OR_NULL(system_health_monitor_classp)) {
928 SHM_ERR("%s: class_create() failed\n", __func__);
929 rc = -ENOMEM;
930 goto init_error1;
931 }
932
933 cdev_init(&system_health_monitor_cdev, &system_health_monitor_fops);
934 system_health_monitor_cdev.owner = THIS_MODULE;
935 rc = cdev_add(&system_health_monitor_cdev,
936 system_health_monitor_dev, 1);
937 if (rc < 0) {
938 SHM_ERR("%s: cdev_add() failed - rc %d\n",
939 __func__, rc);
940 goto init_error2;
941 }
942
943 system_health_monitor_devp = device_create(system_health_monitor_classp,
944 NULL, system_health_monitor_dev, NULL,
945 "system_health_monitor");
946 if (IS_ERR_OR_NULL(system_health_monitor_devp)) {
947 SHM_ERR("%s: device_create() failed - rc %d\n",
948 __func__, rc);
949 rc = PTR_ERR(system_health_monitor_devp);
950 goto init_error3;
951 }
952 SHM_INFO_LOG("%s: Complete\n", __func__);
953 return 0;
954init_error3:
955 cdev_del(&system_health_monitor_cdev);
956init_error2:
957 class_destroy(system_health_monitor_classp);
958init_error1:
959 unregister_chrdev_region(MAJOR(system_health_monitor_dev), 1);
960 return rc;
961}
962
963module_init(system_health_monitor_init);
964MODULE_DESCRIPTION("System Health Monitor");
965MODULE_LICENSE("GPL v2");